File: | arch/amd64/amd64/vmm.c |
Warning: | line 5614, column 2 Value stored to 'ret' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: vmm.c,v 1.301 2022/01/11 20:34:22 tobhe Exp $ */ |
2 | /* |
3 | * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org> |
4 | * |
5 | * Permission to use, copy, modify, and distribute this software for any |
6 | * purpose with or without fee is hereby granted, provided that the above |
7 | * copyright notice and this permission notice appear in all copies. |
8 | * |
9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
16 | */ |
17 | |
18 | #include <sys/param.h> |
19 | #include <sys/systm.h> |
20 | #include <sys/signalvar.h> |
21 | #include <sys/malloc.h> |
22 | #include <sys/device.h> |
23 | #include <sys/pool.h> |
24 | #include <sys/proc.h> |
25 | #include <sys/user.h> |
26 | #include <sys/ioctl.h> |
27 | #include <sys/queue.h> |
28 | #include <sys/rwlock.h> |
29 | #include <sys/pledge.h> |
30 | #include <sys/memrange.h> |
31 | #include <sys/tracepoint.h> |
32 | |
33 | #include <uvm/uvm_extern.h> |
34 | |
35 | #include <machine/fpu.h> |
36 | #include <machine/pmap.h> |
37 | #include <machine/biosvar.h> |
38 | #include <machine/segments.h> |
39 | #include <machine/cpufunc.h> |
40 | #include <machine/vmmvar.h> |
41 | |
42 | #include <dev/isa/isareg.h> |
43 | #include <dev/pv/pvreg.h> |
44 | |
45 | /* #define VMM_DEBUG */ |
46 | |
47 | void *l1tf_flush_region; |
48 | |
49 | #ifdef VMM_DEBUG |
50 | #define DPRINTF(x...) do { printf(x); } while(0) |
51 | #else |
52 | #define DPRINTF(x...) |
53 | #endif /* VMM_DEBUG */ |
54 | |
55 | #define DEVNAME(s)((s)->sc_dev.dv_xname) ((s)->sc_dev.dv_xname) |
56 | |
57 | #define CTRL_DUMP(x,y,z)printf(" %s: Can set:%s Can clear:%s\n", "z" , vcpu_vmx_check_cap (x, IA32_VMX_y_CTLS, IA32_VMX_z, 1) ? "Yes" : "No", vcpu_vmx_check_cap (x, IA32_VMX_y_CTLS, IA32_VMX_z, 0) ? "Yes" : "No"); printf(" %s: Can set:%s Can clear:%s\n", #z , \ |
58 | vcpu_vmx_check_cap(x, IA32_VMX_##y ##_CTLS, \ |
59 | IA32_VMX_##z, 1) ? "Yes" : "No", \ |
60 | vcpu_vmx_check_cap(x, IA32_VMX_##y ##_CTLS, \ |
61 | IA32_VMX_##z, 0) ? "Yes" : "No"); |
62 | |
63 | #define VMX_EXIT_INFO_HAVE_RIP0x1 0x1 |
64 | #define VMX_EXIT_INFO_HAVE_REASON0x2 0x2 |
65 | #define VMX_EXIT_INFO_COMPLETE(0x1 | 0x2) \ |
66 | (VMX_EXIT_INFO_HAVE_RIP0x1 | VMX_EXIT_INFO_HAVE_REASON0x2) |
67 | |
68 | struct vm { |
69 | struct vmspace *vm_vmspace; |
70 | vm_map_t vm_map; |
71 | uint32_t vm_id; |
72 | pid_t vm_creator_pid; |
73 | size_t vm_nmemranges; |
74 | size_t vm_memory_size; |
75 | char vm_name[VMM_MAX_NAME_LEN64]; |
76 | struct vm_mem_range vm_memranges[VMM_MAX_MEM_RANGES16]; |
77 | |
78 | struct vcpu_head vm_vcpu_list; |
79 | uint32_t vm_vcpu_ct; |
80 | u_int vm_vcpus_running; |
81 | struct rwlock vm_vcpu_lock; |
82 | |
83 | SLIST_ENTRY(vm)struct { struct vm *sle_next; } vm_link; |
84 | }; |
85 | |
86 | SLIST_HEAD(vmlist_head, vm)struct vmlist_head { struct vm *slh_first; }; |
87 | |
88 | struct vmm_softc { |
89 | struct device sc_dev; |
90 | |
91 | /* Capabilities */ |
92 | uint32_t nr_vmx_cpus; |
93 | uint32_t nr_svm_cpus; |
94 | uint32_t nr_rvi_cpus; |
95 | uint32_t nr_ept_cpus; |
96 | |
97 | /* Managed VMs */ |
98 | struct vmlist_head vm_list; |
99 | |
100 | int mode; |
101 | |
102 | size_t vcpu_ct; |
103 | size_t vcpu_max; |
104 | |
105 | struct rwlock vm_lock; |
106 | size_t vm_ct; /* number of in-memory VMs */ |
107 | size_t vm_idx; /* next unique VM index */ |
108 | |
109 | struct rwlock vpid_lock; |
110 | uint16_t max_vpid; |
111 | uint8_t vpids[512]; /* bitmap of used VPID/ASIDs */ |
112 | }; |
113 | |
114 | void vmx_dump_vmcs_field(uint16_t, const char *); |
115 | int vmm_enabled(void); |
116 | int vmm_probe(struct device *, void *, void *); |
117 | void vmm_attach(struct device *, struct device *, void *); |
118 | int vmmopen(dev_t, int, int, struct proc *); |
119 | int vmmioctl(dev_t, u_long, caddr_t, int, struct proc *); |
120 | int vmmclose(dev_t, int, int, struct proc *); |
121 | int vmm_start(void); |
122 | int vmm_stop(void); |
123 | size_t vm_create_check_mem_ranges(struct vm_create_params *); |
124 | int vm_create(struct vm_create_params *, struct proc *); |
125 | int vm_run(struct vm_run_params *); |
126 | int vm_terminate(struct vm_terminate_params *); |
127 | int vm_get_info(struct vm_info_params *); |
128 | int vm_resetcpu(struct vm_resetcpu_params *); |
129 | int vm_intr_pending(struct vm_intr_params *); |
130 | int vm_rwregs(struct vm_rwregs_params *, int); |
131 | int vm_mprotect_ept(struct vm_mprotect_ept_params *); |
132 | int vm_rwvmparams(struct vm_rwvmparams_params *, int); |
133 | int vm_find(uint32_t, struct vm **); |
134 | int vcpu_readregs_vmx(struct vcpu *, uint64_t, struct vcpu_reg_state *); |
135 | int vcpu_readregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *); |
136 | int vcpu_writeregs_vmx(struct vcpu *, uint64_t, int, struct vcpu_reg_state *); |
137 | int vcpu_writeregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *); |
138 | int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *); |
139 | int vcpu_reset_regs_vmx(struct vcpu *, struct vcpu_reg_state *); |
140 | int vcpu_reset_regs_svm(struct vcpu *, struct vcpu_reg_state *); |
141 | int vcpu_reload_vmcs_vmx(struct vcpu *); |
142 | int vcpu_init(struct vcpu *); |
143 | int vcpu_init_vmx(struct vcpu *); |
144 | int vcpu_init_svm(struct vcpu *); |
145 | int vcpu_must_stop(struct vcpu *); |
146 | int vcpu_run_vmx(struct vcpu *, struct vm_run_params *); |
147 | int vcpu_run_svm(struct vcpu *, struct vm_run_params *); |
148 | void vcpu_deinit(struct vcpu *); |
149 | void vcpu_deinit_vmx(struct vcpu *); |
150 | void vcpu_deinit_svm(struct vcpu *); |
151 | int vm_impl_init(struct vm *, struct proc *); |
152 | int vm_impl_init_vmx(struct vm *, struct proc *); |
153 | int vm_impl_init_svm(struct vm *, struct proc *); |
154 | void vm_impl_deinit(struct vm *); |
155 | void vm_impl_deinit_vmx(struct vm *); |
156 | void vm_impl_deinit_svm(struct vm *); |
157 | void vm_teardown(struct vm *); |
158 | int vcpu_vmx_check_cap(struct vcpu *, uint32_t, uint32_t, int); |
159 | int vcpu_vmx_compute_ctrl(uint64_t, uint16_t, uint32_t, uint32_t, uint32_t *); |
160 | int vmx_get_exit_info(uint64_t *, uint64_t *); |
161 | int vmx_load_pdptes(struct vcpu *); |
162 | int vmx_handle_exit(struct vcpu *); |
163 | int svm_handle_exit(struct vcpu *); |
164 | int svm_handle_msr(struct vcpu *); |
165 | int vmm_handle_xsetbv(struct vcpu *, uint64_t *); |
166 | int vmx_handle_xsetbv(struct vcpu *); |
167 | int svm_handle_xsetbv(struct vcpu *); |
168 | int vmm_handle_cpuid(struct vcpu *); |
169 | int vmx_handle_rdmsr(struct vcpu *); |
170 | int vmx_handle_wrmsr(struct vcpu *); |
171 | int vmx_handle_cr0_write(struct vcpu *, uint64_t); |
172 | int vmx_handle_cr4_write(struct vcpu *, uint64_t); |
173 | int vmx_handle_cr(struct vcpu *); |
174 | int svm_handle_inout(struct vcpu *); |
175 | int vmx_handle_inout(struct vcpu *); |
176 | int svm_handle_hlt(struct vcpu *); |
177 | int vmx_handle_hlt(struct vcpu *); |
178 | int vmm_inject_ud(struct vcpu *); |
179 | int vmm_inject_gp(struct vcpu *); |
180 | int vmm_inject_db(struct vcpu *); |
181 | void vmx_handle_intr(struct vcpu *); |
182 | void vmx_handle_intwin(struct vcpu *); |
183 | void vmx_handle_misc_enable_msr(struct vcpu *); |
184 | int vmm_get_guest_memtype(struct vm *, paddr_t); |
185 | int vmx_get_guest_faulttype(void); |
186 | int svm_get_guest_faulttype(struct vmcb *); |
187 | int vmx_get_exit_qualification(uint64_t *); |
188 | int vmm_get_guest_cpu_cpl(struct vcpu *); |
189 | int vmm_get_guest_cpu_mode(struct vcpu *); |
190 | int svm_fault_page(struct vcpu *, paddr_t); |
191 | int vmx_fault_page(struct vcpu *, paddr_t); |
192 | int vmx_handle_np_fault(struct vcpu *); |
193 | int svm_handle_np_fault(struct vcpu *); |
194 | int vmx_mprotect_ept(vm_map_t, paddr_t, paddr_t, int); |
195 | pt_entry_t *vmx_pmap_find_pte_ept(pmap_t, paddr_t); |
196 | int vmm_alloc_vpid(uint16_t *); |
197 | void vmm_free_vpid(uint16_t); |
198 | const char *vcpu_state_decode(u_int); |
199 | const char *vmx_exit_reason_decode(uint32_t); |
200 | const char *svm_exit_reason_decode(uint32_t); |
201 | const char *vmx_instruction_error_decode(uint32_t); |
202 | void svm_setmsrbr(struct vcpu *, uint32_t); |
203 | void svm_setmsrbw(struct vcpu *, uint32_t); |
204 | void svm_setmsrbrw(struct vcpu *, uint32_t); |
205 | void vmx_setmsrbr(struct vcpu *, uint32_t); |
206 | void vmx_setmsrbw(struct vcpu *, uint32_t); |
207 | void vmx_setmsrbrw(struct vcpu *, uint32_t); |
208 | void svm_set_clean(struct vcpu *, uint32_t); |
209 | void svm_set_dirty(struct vcpu *, uint32_t); |
210 | |
211 | int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size); |
212 | void vmm_init_pvclock(struct vcpu *, paddr_t); |
213 | int vmm_update_pvclock(struct vcpu *); |
214 | int vmm_pat_is_valid(uint64_t); |
215 | |
216 | #ifdef MULTIPROCESSOR1 |
217 | static int vmx_remote_vmclear(struct cpu_info*, struct vcpu *); |
218 | #endif |
219 | |
220 | #ifdef VMM_DEBUG |
221 | void dump_vcpu(struct vcpu *); |
222 | void vmx_vcpu_dump_regs(struct vcpu *); |
223 | void vmx_dump_vmcs(struct vcpu *); |
224 | const char *msr_name_decode(uint32_t); |
225 | void vmm_segment_desc_decode(uint64_t); |
226 | void vmm_decode_cr0(uint64_t); |
227 | void vmm_decode_cr3(uint64_t); |
228 | void vmm_decode_cr4(uint64_t); |
229 | void vmm_decode_msr_value(uint64_t, uint64_t); |
230 | void vmm_decode_apicbase_msr_value(uint64_t); |
231 | void vmm_decode_ia32_fc_value(uint64_t); |
232 | void vmm_decode_mtrrcap_value(uint64_t); |
233 | void vmm_decode_perf_status_value(uint64_t); |
234 | void vmm_decode_perf_ctl_value(uint64_t); |
235 | void vmm_decode_mtrrdeftype_value(uint64_t); |
236 | void vmm_decode_efer_value(uint64_t); |
237 | void vmm_decode_rflags(uint64_t); |
238 | void vmm_decode_misc_enable_value(uint64_t); |
239 | const char *vmm_decode_cpu_mode(struct vcpu *); |
240 | |
241 | extern int mtrr2mrt(int); |
242 | |
243 | struct vmm_reg_debug_info { |
244 | uint64_t vrdi_bit; |
245 | const char *vrdi_present; |
246 | const char *vrdi_absent; |
247 | }; |
248 | #endif /* VMM_DEBUG */ |
249 | |
250 | extern uint64_t tsc_frequency; |
251 | extern int tsc_is_invariant; |
252 | |
253 | const char *vmm_hv_signature = VMM_HV_SIGNATURE"OpenBSDVMM58"; |
254 | |
255 | const struct kmem_pa_mode vmm_kp_contig = { |
256 | .kp_constraint = &no_constraint, |
257 | .kp_maxseg = 1, |
258 | .kp_align = 4096, |
259 | .kp_zero = 1, |
260 | }; |
261 | |
262 | struct cfdriver vmm_cd = { |
263 | NULL((void *)0), "vmm", DV_DULL, CD_SKIPHIBERNATE2 |
264 | }; |
265 | |
266 | const struct cfattach vmm_ca = { |
267 | sizeof(struct vmm_softc), vmm_probe, vmm_attach, NULL((void *)0), NULL((void *)0) |
268 | }; |
269 | |
270 | /* |
271 | * Helper struct to easily get the VMCS field IDs needed in vmread/vmwrite |
272 | * to access the individual fields of the guest segment registers. This |
273 | * struct is indexed by VCPU_REGS_* id. |
274 | */ |
275 | const struct { |
276 | uint64_t selid; |
277 | uint64_t limitid; |
278 | uint64_t arid; |
279 | uint64_t baseid; |
280 | } vmm_vmx_sreg_vmcs_fields[] = { |
281 | { VMCS_GUEST_IA32_CS_SEL0x0802, VMCS_GUEST_IA32_CS_LIMIT0x4802, |
282 | VMCS_GUEST_IA32_CS_AR0x4816, VMCS_GUEST_IA32_CS_BASE0x6808 }, |
283 | { VMCS_GUEST_IA32_DS_SEL0x0806, VMCS_GUEST_IA32_DS_LIMIT0x4806, |
284 | VMCS_GUEST_IA32_DS_AR0x481A, VMCS_GUEST_IA32_DS_BASE0x680C }, |
285 | { VMCS_GUEST_IA32_ES_SEL0x0800, VMCS_GUEST_IA32_ES_LIMIT0x4800, |
286 | VMCS_GUEST_IA32_ES_AR0x4814, VMCS_GUEST_IA32_ES_BASE0x6806 }, |
287 | { VMCS_GUEST_IA32_FS_SEL0x0808, VMCS_GUEST_IA32_FS_LIMIT0x4808, |
288 | VMCS_GUEST_IA32_FS_AR0x481C, VMCS_GUEST_IA32_FS_BASE0x680E }, |
289 | { VMCS_GUEST_IA32_GS_SEL0x080A, VMCS_GUEST_IA32_GS_LIMIT0x480A, |
290 | VMCS_GUEST_IA32_GS_AR0x481E, VMCS_GUEST_IA32_GS_BASE0x6810 }, |
291 | { VMCS_GUEST_IA32_SS_SEL0x0804, VMCS_GUEST_IA32_SS_LIMIT0x4804, |
292 | VMCS_GUEST_IA32_SS_AR0x4818, VMCS_GUEST_IA32_SS_BASE0x680A }, |
293 | { VMCS_GUEST_IA32_LDTR_SEL0x080C, VMCS_GUEST_IA32_LDTR_LIMIT0x480C, |
294 | VMCS_GUEST_IA32_LDTR_AR0x4820, VMCS_GUEST_IA32_LDTR_BASE0x6812 }, |
295 | { VMCS_GUEST_IA32_TR_SEL0x080E, VMCS_GUEST_IA32_TR_LIMIT0x480E, |
296 | VMCS_GUEST_IA32_TR_AR0x4822, VMCS_GUEST_IA32_TR_BASE0x6814 } |
297 | }; |
298 | |
299 | /* Pools for VMs and VCPUs */ |
300 | struct pool vm_pool; |
301 | struct pool vcpu_pool; |
302 | |
303 | struct vmm_softc *vmm_softc; |
304 | |
305 | /* IDT information used when populating host state area */ |
306 | extern vaddr_t idt_vaddr; |
307 | extern struct gate_descriptor *idt; |
308 | |
309 | /* Constants used in "CR access exit" */ |
310 | #define CR_WRITE0 0 |
311 | #define CR_READ1 1 |
312 | #define CR_CLTS2 2 |
313 | #define CR_LMSW3 3 |
314 | |
315 | /* |
316 | * vmm_enabled |
317 | * |
318 | * Checks if we have at least one CPU with either VMX or SVM. |
319 | * Returns 1 if we have at least one of either type, but not both, 0 otherwise. |
320 | */ |
321 | int |
322 | vmm_enabled(void) |
323 | { |
324 | struct cpu_info *ci; |
325 | CPU_INFO_ITERATORint cii; |
326 | int found_vmx = 0, found_svm = 0; |
327 | |
328 | /* Check if we have at least one CPU with either VMX or SVM */ |
329 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { |
330 | if (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) |
331 | found_vmx = 1; |
332 | if (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1)) |
333 | found_svm = 1; |
334 | } |
335 | |
336 | /* Don't support both SVM and VMX at the same time */ |
337 | if (found_vmx && found_svm) |
338 | return (0); |
339 | |
340 | if (found_vmx || found_svm) |
341 | return 1; |
342 | |
343 | return 0; |
344 | } |
345 | |
346 | int |
347 | vmm_probe(struct device *parent, void *match, void *aux) |
348 | { |
349 | const char **busname = (const char **)aux; |
350 | |
351 | if (strcmp(*busname, vmm_cd.cd_name) != 0) |
352 | return (0); |
353 | return (1); |
354 | } |
355 | |
356 | /* |
357 | * vmm_attach |
358 | * |
359 | * Calculates how many of each type of CPU we have, prints this into dmesg |
360 | * during attach. Initializes various locks, pools, and list structures for the |
361 | * VMM. |
362 | */ |
363 | void |
364 | vmm_attach(struct device *parent, struct device *self, void *aux) |
365 | { |
366 | struct vmm_softc *sc = (struct vmm_softc *)self; |
367 | struct cpu_info *ci; |
368 | CPU_INFO_ITERATORint cii; |
369 | |
370 | sc->nr_vmx_cpus = 0; |
371 | sc->nr_svm_cpus = 0; |
372 | sc->nr_rvi_cpus = 0; |
373 | sc->nr_ept_cpus = 0; |
374 | sc->vcpu_ct = 0; |
375 | sc->vm_ct = 0; |
376 | sc->vm_idx = 0; |
377 | |
378 | /* Calculate CPU features */ |
379 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { |
380 | if (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) |
381 | sc->nr_vmx_cpus++; |
382 | if (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1)) |
383 | sc->nr_svm_cpus++; |
384 | if (ci->ci_vmm_flags & CI_VMM_RVI(1 << 2)) |
385 | sc->nr_rvi_cpus++; |
386 | if (ci->ci_vmm_flags & CI_VMM_EPT(1 << 3)) |
387 | sc->nr_ept_cpus++; |
388 | } |
389 | |
390 | SLIST_INIT(&sc->vm_list){ ((&sc->vm_list)->slh_first) = ((void *)0); }; |
391 | rw_init(&sc->vm_lock, "vm_list")_rw_init_flags(&sc->vm_lock, "vm_list", 0, ((void *)0) ); |
392 | |
393 | if (sc->nr_ept_cpus) { |
394 | printf(": VMX/EPT"); |
395 | sc->mode = VMM_MODE_EPT; |
396 | } else if (sc->nr_vmx_cpus) { |
397 | printf(": VMX"); |
398 | sc->mode = VMM_MODE_VMX; |
399 | } else if (sc->nr_rvi_cpus) { |
400 | printf(": SVM/RVI"); |
401 | sc->mode = VMM_MODE_RVI; |
402 | } else if (sc->nr_svm_cpus) { |
403 | printf(": SVM"); |
404 | sc->mode = VMM_MODE_SVM; |
405 | } else { |
406 | printf(": unknown"); |
407 | sc->mode = VMM_MODE_UNKNOWN; |
408 | } |
409 | |
410 | if (sc->mode == VMM_MODE_EPT || sc->mode == VMM_MODE_VMX) { |
411 | if (!(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr)) { |
412 | l1tf_flush_region = km_alloc(VMX_L1D_FLUSH_SIZE(64 * 1024), |
413 | &kv_any, &vmm_kp_contig, &kd_waitok); |
414 | if (!l1tf_flush_region) { |
415 | printf(" (failing, no memory)"); |
416 | sc->mode = VMM_MODE_UNKNOWN; |
417 | } else { |
418 | printf(" (using slow L1TF mitigation)"); |
419 | memset(l1tf_flush_region, 0xcc,__builtin_memset((l1tf_flush_region), (0xcc), ((64 * 1024))) |
420 | VMX_L1D_FLUSH_SIZE)__builtin_memset((l1tf_flush_region), (0xcc), ((64 * 1024))); |
421 | } |
422 | } |
423 | } |
424 | printf("\n"); |
425 | |
426 | if (sc->mode == VMM_MODE_SVM || sc->mode == VMM_MODE_RVI) { |
427 | sc->max_vpid = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_svm.svm_max_asid; |
428 | } else { |
429 | sc->max_vpid = 0xFFF; |
430 | } |
431 | |
432 | bzero(&sc->vpids, sizeof(sc->vpids))__builtin_bzero((&sc->vpids), (sizeof(sc->vpids))); |
433 | rw_init(&sc->vpid_lock, "vpid")_rw_init_flags(&sc->vpid_lock, "vpid", 0, ((void *)0)); |
434 | |
435 | pool_init(&vm_pool, sizeof(struct vm), 0, IPL_MPFLOOR0x9, PR_WAITOK0x0001, |
436 | "vmpool", NULL((void *)0)); |
437 | pool_init(&vcpu_pool, sizeof(struct vcpu), 64, IPL_MPFLOOR0x9, PR_WAITOK0x0001, |
438 | "vcpupl", NULL((void *)0)); |
439 | |
440 | vmm_softc = sc; |
441 | } |
442 | |
443 | /* |
444 | * vmmopen |
445 | * |
446 | * Called during open of /dev/vmm. |
447 | * |
448 | * Parameters: |
449 | * dev, flag, mode, p: These come from the character device and are |
450 | * all unused for this function |
451 | * |
452 | * Return values: |
453 | * ENODEV: if vmm(4) didn't attach or no supported CPUs detected |
454 | * 0: successful open |
455 | */ |
456 | int |
457 | vmmopen(dev_t dev, int flag, int mode, struct proc *p) |
458 | { |
459 | /* Don't allow open if we didn't attach */ |
460 | if (vmm_softc == NULL((void *)0)) |
461 | return (ENODEV19); |
462 | |
463 | /* Don't allow open if we didn't detect any supported CPUs */ |
464 | if (vmm_softc->mode != VMM_MODE_EPT && vmm_softc->mode != VMM_MODE_RVI) |
465 | return (ENODEV19); |
466 | |
467 | return 0; |
468 | } |
469 | |
470 | /* |
471 | * vmmioctl |
472 | * |
473 | * Main ioctl dispatch routine for /dev/vmm. Parses ioctl type and calls |
474 | * appropriate lower level handler routine. Returns result to ioctl caller. |
475 | */ |
476 | int |
477 | vmmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) |
478 | { |
479 | int ret; |
480 | |
481 | KERNEL_UNLOCK()_kernel_unlock(); |
482 | |
483 | switch (cmd) { |
484 | case VMM_IOC_CREATE(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_create_params) & 0x1fff) << 16) | ((('V' )) << 8) | ((1))): |
485 | if ((ret = vmm_start()) != 0) { |
486 | vmm_stop(); |
487 | break; |
488 | } |
489 | ret = vm_create((struct vm_create_params *)data, p); |
490 | break; |
491 | case VMM_IOC_RUN(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_run_params) & 0x1fff) << 16) | ((('V')) << 8) | ((2))): |
492 | ret = vm_run((struct vm_run_params *)data); |
493 | break; |
494 | case VMM_IOC_INFO(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_info_params) & 0x1fff) << 16) | ((('V')) << 8) | ((3))): |
495 | ret = vm_get_info((struct vm_info_params *)data); |
496 | break; |
497 | case VMM_IOC_TERM((unsigned long)0x80000000 | ((sizeof(struct vm_terminate_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((4))): |
498 | ret = vm_terminate((struct vm_terminate_params *)data); |
499 | break; |
500 | case VMM_IOC_RESETCPU((unsigned long)0x80000000 | ((sizeof(struct vm_resetcpu_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((5))): |
501 | ret = vm_resetcpu((struct vm_resetcpu_params *)data); |
502 | break; |
503 | case VMM_IOC_INTR((unsigned long)0x80000000 | ((sizeof(struct vm_intr_params) & 0x1fff) << 16) | ((('V')) << 8) | ((6))): |
504 | ret = vm_intr_pending((struct vm_intr_params *)data); |
505 | break; |
506 | case VMM_IOC_READREGS(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_rwregs_params) & 0x1fff) << 16) | ((('V' )) << 8) | ((7))): |
507 | ret = vm_rwregs((struct vm_rwregs_params *)data, 0); |
508 | break; |
509 | case VMM_IOC_WRITEREGS((unsigned long)0x80000000 | ((sizeof(struct vm_rwregs_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((8))): |
510 | ret = vm_rwregs((struct vm_rwregs_params *)data, 1); |
511 | break; |
512 | case VMM_IOC_MPROTECT_EPT((unsigned long)0x80000000 | ((sizeof(struct vm_mprotect_ept_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((11))): |
513 | ret = vm_mprotect_ept((struct vm_mprotect_ept_params *)data); |
514 | break; |
515 | case VMM_IOC_READVMPARAMS(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_rwvmparams_params) & 0x1fff) << 16) | (( ('V')) << 8) | ((9))): |
516 | ret = vm_rwvmparams((struct vm_rwvmparams_params *)data, 0); |
517 | break; |
518 | case VMM_IOC_WRITEVMPARAMS((unsigned long)0x80000000 | ((sizeof(struct vm_rwvmparams_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((10))): |
519 | ret = vm_rwvmparams((struct vm_rwvmparams_params *)data, 1); |
520 | break; |
521 | |
522 | default: |
523 | DPRINTF("%s: unknown ioctl code 0x%lx\n", __func__, cmd); |
524 | ret = ENOTTY25; |
525 | } |
526 | |
527 | KERNEL_LOCK()_kernel_lock(); |
528 | |
529 | return (ret); |
530 | } |
531 | |
532 | /* |
533 | * pledge_ioctl_vmm |
534 | * |
535 | * Restrict the allowed ioctls in a pledged process context. |
536 | * Is called from pledge_ioctl(). |
537 | */ |
538 | int |
539 | pledge_ioctl_vmm(struct proc *p, long com) |
540 | { |
541 | switch (com) { |
542 | case VMM_IOC_CREATE(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_create_params) & 0x1fff) << 16) | ((('V' )) << 8) | ((1))): |
543 | case VMM_IOC_INFO(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_info_params) & 0x1fff) << 16) | ((('V')) << 8) | ((3))): |
544 | /* The "parent" process in vmd forks and manages VMs */ |
545 | if (p->p_p->ps_pledge & PLEDGE_PROC0x0000000000001000ULL) |
546 | return (0); |
547 | break; |
548 | case VMM_IOC_TERM((unsigned long)0x80000000 | ((sizeof(struct vm_terminate_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((4))): |
549 | /* XXX VM processes should only terminate themselves */ |
550 | case VMM_IOC_RUN(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_run_params) & 0x1fff) << 16) | ((('V')) << 8) | ((2))): |
551 | case VMM_IOC_RESETCPU((unsigned long)0x80000000 | ((sizeof(struct vm_resetcpu_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((5))): |
552 | case VMM_IOC_INTR((unsigned long)0x80000000 | ((sizeof(struct vm_intr_params) & 0x1fff) << 16) | ((('V')) << 8) | ((6))): |
553 | case VMM_IOC_READREGS(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_rwregs_params) & 0x1fff) << 16) | ((('V' )) << 8) | ((7))): |
554 | case VMM_IOC_WRITEREGS((unsigned long)0x80000000 | ((sizeof(struct vm_rwregs_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((8))): |
555 | case VMM_IOC_MPROTECT_EPT((unsigned long)0x80000000 | ((sizeof(struct vm_mprotect_ept_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((11))): |
556 | case VMM_IOC_READVMPARAMS(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_rwvmparams_params) & 0x1fff) << 16) | (( ('V')) << 8) | ((9))): |
557 | case VMM_IOC_WRITEVMPARAMS((unsigned long)0x80000000 | ((sizeof(struct vm_rwvmparams_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((10))): |
558 | return (0); |
559 | } |
560 | |
561 | return (EPERM1); |
562 | } |
563 | |
564 | /* |
565 | * vmmclose |
566 | * |
567 | * Called when /dev/vmm is closed. Presently unused. |
568 | */ |
569 | int |
570 | vmmclose(dev_t dev, int flag, int mode, struct proc *p) |
571 | { |
572 | return 0; |
573 | } |
574 | |
575 | /* |
576 | * vm_find_vcpu |
577 | * |
578 | * Lookup VMM VCPU by ID number |
579 | * |
580 | * Parameters: |
581 | * vm: vm structure |
582 | * id: index id of vcpu |
583 | * |
584 | * Returns pointer to vcpu structure if successful, NULL otherwise |
585 | */ |
586 | static struct vcpu * |
587 | vm_find_vcpu(struct vm *vm, uint32_t id) |
588 | { |
589 | struct vcpu *vcpu; |
590 | |
591 | if (vm == NULL((void *)0)) |
592 | return NULL((void *)0); |
593 | rw_enter_read(&vm->vm_vcpu_lock); |
594 | SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link)for((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu ) != ((void *)0); (vcpu) = ((vcpu)->vc_vcpu_link.sle_next) ) { |
595 | if (vcpu->vc_id == id) |
596 | break; |
597 | } |
598 | rw_exit_read(&vm->vm_vcpu_lock); |
599 | return vcpu; |
600 | } |
601 | |
602 | |
603 | /* |
604 | * vm_resetcpu |
605 | * |
606 | * Resets the vcpu defined in 'vrp' to power-on-init register state |
607 | * |
608 | * Parameters: |
609 | * vrp: ioctl structure defining the vcpu to reset (see vmmvar.h) |
610 | * |
611 | * Returns 0 if successful, or various error codes on failure: |
612 | * ENOENT if the VM id contained in 'vrp' refers to an unknown VM or |
613 | * if vrp describes an unknown vcpu for this VM |
614 | * EBUSY if the indicated VCPU is not stopped |
615 | * EIO if the indicated VCPU failed to reset |
616 | */ |
617 | int |
618 | vm_resetcpu(struct vm_resetcpu_params *vrp) |
619 | { |
620 | struct vm *vm; |
621 | struct vcpu *vcpu; |
622 | int error; |
623 | |
624 | /* Find the desired VM */ |
625 | rw_enter_read(&vmm_softc->vm_lock); |
626 | error = vm_find(vrp->vrp_vm_id, &vm); |
627 | rw_exit_read(&vmm_softc->vm_lock); |
628 | |
629 | /* Not found? exit. */ |
630 | if (error != 0) { |
631 | DPRINTF("%s: vm id %u not found\n", __func__, |
632 | vrp->vrp_vm_id); |
633 | return (error); |
634 | } |
635 | |
636 | vcpu = vm_find_vcpu(vm, vrp->vrp_vcpu_id); |
637 | |
638 | if (vcpu == NULL((void *)0)) { |
639 | DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__, |
640 | vrp->vrp_vcpu_id, vrp->vrp_vm_id); |
641 | return (ENOENT2); |
642 | } |
643 | |
644 | rw_enter_write(&vcpu->vc_lock); |
645 | |
646 | if (vcpu->vc_state != VCPU_STATE_STOPPED) { |
647 | DPRINTF("%s: reset of vcpu %u on vm %u attempted " |
648 | "while vcpu was in state %u (%s)\n", __func__, |
649 | vrp->vrp_vcpu_id, vrp->vrp_vm_id, vcpu->vc_state, |
650 | vcpu_state_decode(vcpu->vc_state)); |
651 | |
652 | rw_exit_write(&vcpu->vc_lock); |
653 | return (EBUSY16); |
654 | } |
655 | |
656 | DPRINTF("%s: resetting vm %d vcpu %d to power on defaults\n", __func__, |
657 | vm->vm_id, vcpu->vc_id); |
658 | |
659 | if (vcpu_reset_regs(vcpu, &vrp->vrp_init_state)) { |
660 | printf("%s: failed\n", __func__); |
661 | #ifdef VMM_DEBUG |
662 | dump_vcpu(vcpu); |
663 | #endif /* VMM_DEBUG */ |
664 | rw_exit_write(&vcpu->vc_lock); |
665 | return (EIO5); |
666 | } |
667 | |
668 | rw_exit_write(&vcpu->vc_lock); |
669 | return (0); |
670 | } |
671 | |
672 | /* |
673 | * vm_intr_pending |
674 | * |
675 | * IOCTL handler routine for VMM_IOC_INTR messages, sent from vmd when an |
676 | * interrupt is pending and needs acknowledgment |
677 | * |
678 | * Parameters: |
679 | * vip: Describes the vm/vcpu for which the interrupt is pending |
680 | * |
681 | * Return values: |
682 | * 0: if successful |
683 | * ENOENT: if the VM/VCPU defined by 'vip' cannot be found |
684 | */ |
685 | int |
686 | vm_intr_pending(struct vm_intr_params *vip) |
687 | { |
688 | struct vm *vm; |
689 | struct vcpu *vcpu; |
690 | int error; |
691 | |
692 | /* Find the desired VM */ |
693 | rw_enter_read(&vmm_softc->vm_lock); |
694 | error = vm_find(vip->vip_vm_id, &vm); |
695 | |
696 | /* Not found? exit. */ |
697 | if (error != 0) { |
698 | rw_exit_read(&vmm_softc->vm_lock); |
699 | return (error); |
700 | } |
701 | |
702 | vcpu = vm_find_vcpu(vm, vip->vip_vcpu_id); |
703 | rw_exit_read(&vmm_softc->vm_lock); |
704 | |
705 | if (vcpu == NULL((void *)0)) |
706 | return (ENOENT2); |
707 | |
708 | rw_enter_write(&vcpu->vc_lock); |
709 | vcpu->vc_intr = vip->vip_intr; |
710 | rw_exit_write(&vcpu->vc_lock); |
711 | |
712 | return (0); |
713 | } |
714 | |
715 | /* |
716 | * vm_rwvmparams |
717 | * |
718 | * IOCTL handler to read/write the current vmm params like pvclock gpa, pvclock |
719 | * version, etc. |
720 | * |
721 | * Parameters: |
722 | * vrwp: Describes the VM and VCPU to get/set the params from |
723 | * dir: 0 for reading, 1 for writing |
724 | * |
725 | * Return values: |
726 | * 0: if successful |
727 | * ENOENT: if the VM/VCPU defined by 'vpp' cannot be found |
728 | * EINVAL: if an error occurred reading the registers of the guest |
729 | */ |
730 | int |
731 | vm_rwvmparams(struct vm_rwvmparams_params *vpp, int dir) { |
732 | struct vm *vm; |
733 | struct vcpu *vcpu; |
734 | int error; |
735 | |
736 | /* Find the desired VM */ |
737 | rw_enter_read(&vmm_softc->vm_lock); |
738 | error = vm_find(vpp->vpp_vm_id, &vm); |
739 | |
740 | /* Not found? exit. */ |
741 | if (error != 0) { |
742 | rw_exit_read(&vmm_softc->vm_lock); |
743 | return (error); |
744 | } |
745 | |
746 | vcpu = vm_find_vcpu(vm, vpp->vpp_vcpu_id); |
747 | rw_exit_read(&vmm_softc->vm_lock); |
748 | |
749 | if (vcpu == NULL((void *)0)) |
750 | return (ENOENT2); |
751 | |
752 | if (dir == 0) { |
753 | if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION0x2) |
754 | vpp->vpp_pvclock_version = vcpu->vc_pvclock_version; |
755 | if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA0x1) |
756 | vpp->vpp_pvclock_system_gpa = \ |
757 | vcpu->vc_pvclock_system_gpa; |
758 | return (0); |
759 | } |
760 | |
761 | if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION0x2) |
762 | vcpu->vc_pvclock_version = vpp->vpp_pvclock_version; |
763 | if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA0x1) { |
764 | vmm_init_pvclock(vcpu, vpp->vpp_pvclock_system_gpa); |
765 | } |
766 | return (0); |
767 | |
768 | } |
769 | |
770 | /* |
771 | * vm_readregs |
772 | * |
773 | * IOCTL handler to read/write the current register values of a guest VCPU. |
774 | * The VCPU must not be running. |
775 | * |
776 | * Parameters: |
777 | * vrwp: Describes the VM and VCPU to get/set the registers from. The |
778 | * register values are returned here as well. |
779 | * dir: 0 for reading, 1 for writing |
780 | * |
781 | * Return values: |
782 | * 0: if successful |
783 | * ENOENT: if the VM/VCPU defined by 'vrwp' cannot be found |
784 | * EINVAL: if an error occurred accessing the registers of the guest |
785 | * EPERM: if the vm cannot be accessed from the calling process |
786 | */ |
787 | int |
788 | vm_rwregs(struct vm_rwregs_params *vrwp, int dir) |
789 | { |
790 | struct vm *vm; |
791 | struct vcpu *vcpu; |
792 | struct vcpu_reg_state *vrs = &vrwp->vrwp_regs; |
793 | int error, ret; |
794 | |
795 | /* Find the desired VM */ |
796 | rw_enter_read(&vmm_softc->vm_lock); |
797 | error = vm_find(vrwp->vrwp_vm_id, &vm); |
798 | |
799 | /* Not found? exit. */ |
800 | if (error != 0) { |
801 | rw_exit_read(&vmm_softc->vm_lock); |
802 | return (error); |
803 | } |
804 | |
805 | vcpu = vm_find_vcpu(vm, vrwp->vrwp_vcpu_id); |
806 | rw_exit_read(&vmm_softc->vm_lock); |
807 | |
808 | if (vcpu == NULL((void *)0)) |
809 | return (ENOENT2); |
810 | |
811 | rw_enter_write(&vcpu->vc_lock); |
812 | if (vmm_softc->mode == VMM_MODE_VMX || |
813 | vmm_softc->mode == VMM_MODE_EPT) |
814 | ret = (dir == 0) ? |
815 | vcpu_readregs_vmx(vcpu, vrwp->vrwp_mask, vrs) : |
816 | vcpu_writeregs_vmx(vcpu, vrwp->vrwp_mask, 1, vrs); |
817 | else if (vmm_softc->mode == VMM_MODE_SVM || |
818 | vmm_softc->mode == VMM_MODE_RVI) |
819 | ret = (dir == 0) ? |
820 | vcpu_readregs_svm(vcpu, vrwp->vrwp_mask, vrs) : |
821 | vcpu_writeregs_svm(vcpu, vrwp->vrwp_mask, vrs); |
822 | else { |
823 | DPRINTF("%s: unknown vmm mode", __func__); |
824 | ret = EINVAL22; |
825 | } |
826 | rw_exit_write(&vcpu->vc_lock); |
827 | |
828 | return (ret); |
829 | } |
830 | |
831 | /* |
832 | * vm_mprotect_ept |
833 | * |
834 | * IOCTL handler to sets the access protections of the ept |
835 | * |
836 | * Parameters: |
837 | * vmep: describes the memory for which the protect will be applied.. |
838 | * |
839 | * Return values: |
840 | * 0: if successful |
841 | * ENOENT: if the VM defined by 'vmep' cannot be found |
842 | * EINVAL: if the sgpa or size is not page aligned, the prot is invalid, |
843 | * size is too large (512GB), there is wraparound |
844 | * (like start = 512GB-1 and end = 512GB-2), |
845 | * the address specified is not within the vm's mem range |
846 | * or the address lies inside reserved (MMIO) memory |
847 | */ |
848 | int |
849 | vm_mprotect_ept(struct vm_mprotect_ept_params *vmep) |
850 | { |
851 | struct vm *vm; |
852 | struct vcpu *vcpu; |
853 | vaddr_t sgpa; |
854 | size_t size; |
855 | vm_prot_t prot; |
856 | uint64_t msr; |
857 | int ret, memtype; |
858 | |
859 | /* If not EPT or RVI, nothing to do here */ |
860 | if (!(vmm_softc->mode == VMM_MODE_EPT |
861 | || vmm_softc->mode == VMM_MODE_RVI)) |
862 | return (0); |
863 | |
864 | /* Find the desired VM */ |
865 | rw_enter_read(&vmm_softc->vm_lock); |
866 | ret = vm_find(vmep->vmep_vm_id, &vm); |
867 | rw_exit_read(&vmm_softc->vm_lock); |
868 | |
869 | /* Not found? exit. */ |
870 | if (ret != 0) { |
871 | DPRINTF("%s: vm id %u not found\n", __func__, |
872 | vmep->vmep_vm_id); |
873 | return (ret); |
874 | } |
875 | |
876 | vcpu = vm_find_vcpu(vm, vmep->vmep_vcpu_id); |
877 | |
878 | if (vcpu == NULL((void *)0)) { |
879 | DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__, |
880 | vmep->vmep_vcpu_id, vmep->vmep_vm_id); |
881 | return (ENOENT2); |
882 | } |
883 | |
884 | if (vcpu->vc_state != VCPU_STATE_STOPPED) { |
885 | DPRINTF("%s: mprotect_ept %u on vm %u attempted " |
886 | "while vcpu was in state %u (%s)\n", __func__, |
887 | vmep->vmep_vcpu_id, vmep->vmep_vm_id, vcpu->vc_state, |
888 | vcpu_state_decode(vcpu->vc_state)); |
889 | |
890 | return (EBUSY16); |
891 | } |
892 | |
893 | /* Only proceed if the pmap is in the correct mode */ |
894 | KASSERT((vmm_softc->mode == VMM_MODE_EPT &&(((vmm_softc->mode == VMM_MODE_EPT && vm->vm_map ->pmap->pm_type == 2) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == 3)) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm.c" , 897, "(vmm_softc->mode == VMM_MODE_EPT && vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI)" )) |
895 | vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) ||(((vmm_softc->mode == VMM_MODE_EPT && vm->vm_map ->pmap->pm_type == 2) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == 3)) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm.c" , 897, "(vmm_softc->mode == VMM_MODE_EPT && vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI)" )) |
896 | (vmm_softc->mode == VMM_MODE_RVI &&(((vmm_softc->mode == VMM_MODE_EPT && vm->vm_map ->pmap->pm_type == 2) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == 3)) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm.c" , 897, "(vmm_softc->mode == VMM_MODE_EPT && vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI)" )) |
897 | vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI))(((vmm_softc->mode == VMM_MODE_EPT && vm->vm_map ->pmap->pm_type == 2) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == 3)) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm.c" , 897, "(vmm_softc->mode == VMM_MODE_EPT && vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI)" )); |
898 | |
899 | sgpa = vmep->vmep_sgpa; |
900 | size = vmep->vmep_size; |
901 | prot = vmep->vmep_prot; |
902 | |
903 | /* No W^X permissions */ |
904 | if ((prot & PROT_MASK(0x01 | 0x02 | 0x04)) != prot && |
905 | (prot & (PROT_WRITE0x02 | PROT_EXEC0x04)) == (PROT_WRITE0x02 | PROT_EXEC0x04)) { |
906 | DPRINTF("%s: W+X permission requested\n", __func__); |
907 | return (EINVAL22); |
908 | } |
909 | |
910 | /* No Write only permissions */ |
911 | if ((prot & (PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04)) == PROT_WRITE0x02) { |
912 | DPRINTF("%s: No Write only permissions\n", __func__); |
913 | return (EINVAL22); |
914 | } |
915 | |
916 | /* No empty permissions */ |
917 | if (prot == 0) { |
918 | DPRINTF("%s: No empty permissions\n", __func__); |
919 | return (EINVAL22); |
920 | } |
921 | |
922 | /* No execute only on EPT CPUs that don't have that capability */ |
923 | if (vmm_softc->mode == VMM_MODE_EPT) { |
924 | msr = rdmsr(IA32_VMX_EPT_VPID_CAP0x48C); |
925 | if (prot == PROT_EXEC0x04 && |
926 | (msr & IA32_EPT_VPID_CAP_XO_TRANSLATIONS(1ULL << 0)) == 0) { |
927 | DPRINTF("%s: Execute only permissions unsupported," |
928 | " adding read permission\n", __func__); |
929 | |
930 | prot |= PROT_READ0x01; |
931 | } |
932 | } |
933 | |
934 | /* Must be page aligned */ |
935 | if ((sgpa & PAGE_MASK((1 << 12) - 1)) || (size & PAGE_MASK((1 << 12) - 1)) || size == 0) |
936 | return (EINVAL22); |
937 | |
938 | /* size must be less then 512GB */ |
939 | if (size >= NBPD_L4(1ULL << 39)) |
940 | return (EINVAL22); |
941 | |
942 | /* no wraparound */ |
943 | if (sgpa + size < sgpa) |
944 | return (EINVAL22); |
945 | |
946 | /* |
947 | * Specifying addresses within the PCI MMIO space is forbidden. |
948 | * Disallow addresses that start inside the MMIO space: |
949 | * [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END] |
950 | */ |
951 | if (sgpa >= VMM_PCI_MMIO_BAR_BASE0xF0000000ULL && sgpa <= VMM_PCI_MMIO_BAR_END0xFFFFFFFFULL) |
952 | return (EINVAL22); |
953 | |
954 | /* |
955 | * ... and disallow addresses that end inside the MMIO space: |
956 | * (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END] |
957 | */ |
958 | if (sgpa + size > VMM_PCI_MMIO_BAR_BASE0xF0000000ULL && |
959 | sgpa + size <= VMM_PCI_MMIO_BAR_END0xFFFFFFFFULL) |
960 | return (EINVAL22); |
961 | |
962 | memtype = vmm_get_guest_memtype(vm, sgpa); |
963 | if (memtype == VMM_MEM_TYPE_UNKNOWN) |
964 | return (EINVAL22); |
965 | |
966 | if (vmm_softc->mode == VMM_MODE_EPT) |
967 | ret = vmx_mprotect_ept(vm->vm_map, sgpa, sgpa + size, prot); |
968 | else if (vmm_softc->mode == VMM_MODE_RVI) { |
969 | pmap_write_protect(vm->vm_map->pmap, sgpa, sgpa + size, prot); |
970 | /* XXX requires a invlpga */ |
971 | ret = 0; |
972 | } else |
973 | return (EINVAL22); |
974 | |
975 | return (ret); |
976 | } |
977 | |
978 | /* |
979 | * vmx_mprotect_ept |
980 | * |
981 | * apply the ept protections to the requested pages, faulting in the page if |
982 | * required. |
983 | */ |
984 | int |
985 | vmx_mprotect_ept(vm_map_t vm_map, paddr_t sgpa, paddr_t egpa, int prot) |
986 | { |
987 | struct vmx_invept_descriptor vid; |
988 | pmap_t pmap; |
989 | pt_entry_t *pte; |
990 | paddr_t addr; |
991 | int ret = 0; |
992 | |
993 | pmap = vm_map->pmap; |
994 | |
995 | KERNEL_LOCK()_kernel_lock(); |
996 | |
997 | for (addr = sgpa; addr < egpa; addr += PAGE_SIZE(1 << 12)) { |
998 | pte = vmx_pmap_find_pte_ept(pmap, addr); |
999 | if (pte == NULL((void *)0)) { |
1000 | ret = uvm_fault(vm_map, addr, VM_FAULT_WIRE((vm_fault_t) 0x2), |
1001 | PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04); |
1002 | if (ret) |
1003 | printf("%s: uvm_fault returns %d, GPA=0x%llx\n", |
1004 | __func__, ret, (uint64_t)addr); |
1005 | |
1006 | pte = vmx_pmap_find_pte_ept(pmap, addr); |
1007 | if (pte == NULL((void *)0)) { |
1008 | KERNEL_UNLOCK()_kernel_unlock(); |
1009 | return EFAULT14; |
1010 | } |
1011 | } |
1012 | |
1013 | if (prot & PROT_READ0x01) |
1014 | *pte |= EPT_R(1ULL << 0); |
1015 | else |
1016 | *pte &= ~EPT_R(1ULL << 0); |
1017 | |
1018 | if (prot & PROT_WRITE0x02) |
1019 | *pte |= EPT_W(1ULL << 1); |
1020 | else |
1021 | *pte &= ~EPT_W(1ULL << 1); |
1022 | |
1023 | if (prot & PROT_EXEC0x04) |
1024 | *pte |= EPT_X(1ULL << 2); |
1025 | else |
1026 | *pte &= ~EPT_X(1ULL << 2); |
1027 | } |
1028 | |
1029 | /* |
1030 | * SDM 3C: 28.3.3.4 Guidelines for Use of the INVEPT Instruction |
1031 | * the first bullet point seems to say we should call invept. |
1032 | * |
1033 | * Software should use the INVEPT instruction with the “single-context” |
1034 | * INVEPT type after making any of the following changes to an EPT |
1035 | * paging-structure entry (the INVEPT descriptor should contain an |
1036 | * EPTP value that references — directly or indirectly |
1037 | * — the modified EPT paging structure): |
1038 | * — Changing any of the privilege bits 2:0 from 1 to 0. |
1039 | * */ |
1040 | if (pmap->eptp != 0) { |
1041 | memset(&vid, 0, sizeof(vid))__builtin_memset((&vid), (0), (sizeof(vid))); |
1042 | vid.vid_eptp = pmap->eptp; |
1043 | DPRINTF("%s: flushing EPT TLB for EPTP 0x%llx\n", __func__, |
1044 | vid.vid_eptp); |
1045 | invept(IA32_VMX_INVEPT_SINGLE_CTX0x1, &vid); |
1046 | } |
1047 | |
1048 | KERNEL_UNLOCK()_kernel_unlock(); |
1049 | |
1050 | return ret; |
1051 | } |
1052 | |
1053 | /* |
1054 | * vmx_pmap_find_pte_ept |
1055 | * |
1056 | * find the page table entry specified by addr in the pmap supplied. |
1057 | */ |
1058 | pt_entry_t * |
1059 | vmx_pmap_find_pte_ept(pmap_t pmap, paddr_t addr) |
1060 | { |
1061 | int l4idx, l3idx, l2idx, l1idx; |
1062 | pd_entry_t *pd; |
1063 | paddr_t pdppa; |
1064 | pt_entry_t *ptes, *pte; |
1065 | |
1066 | l4idx = (addr & L4_MASK0x0000ff8000000000UL) >> L4_SHIFT39; /* PML4E idx */ |
1067 | l3idx = (addr & L3_MASK0x0000007fc0000000UL) >> L3_SHIFT30; /* PDPTE idx */ |
1068 | l2idx = (addr & L2_MASK0x000000003fe00000UL) >> L2_SHIFT21; /* PDE idx */ |
1069 | l1idx = (addr & L1_MASK0x00000000001ff000UL) >> L1_SHIFT12; /* PTE idx */ |
1070 | |
1071 | pd = (pd_entry_t *)pmap->pm_pdir; |
1072 | if (pd == NULL((void *)0)) |
1073 | return NULL((void *)0); |
1074 | |
1075 | /* |
1076 | * l4idx should always be 0 since we don't support more than 512GB |
1077 | * guest physical memory. |
1078 | */ |
1079 | if (l4idx > 0) |
1080 | return NULL((void *)0); |
1081 | |
1082 | /* |
1083 | * l3idx should always be < MAXDSIZ/1GB because we don't support more |
1084 | * than MAXDSIZ guest phys mem. |
1085 | */ |
1086 | if (l3idx >= MAXDSIZ((paddr_t)32*1024*1024*1024) / ((paddr_t)1024 * 1024 * 1024)) |
1087 | return NULL((void *)0); |
1088 | |
1089 | pdppa = pd[l4idx] & PG_FRAME0x000ffffffffff000UL; |
1090 | if (pdppa == 0) |
1091 | return NULL((void *)0); |
1092 | |
1093 | ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000 )) + (pdppa)); |
1094 | |
1095 | pdppa = ptes[l3idx] & PG_FRAME0x000ffffffffff000UL; |
1096 | if (pdppa == 0) |
1097 | return NULL((void *)0); |
1098 | |
1099 | ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000 )) + (pdppa)); |
1100 | |
1101 | pdppa = ptes[l2idx] & PG_FRAME0x000ffffffffff000UL; |
1102 | if (pdppa == 0) |
1103 | return NULL((void *)0); |
1104 | |
1105 | ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000 )) + (pdppa)); |
1106 | |
1107 | pte = &ptes[l1idx]; |
1108 | if (*pte == 0) |
1109 | return NULL((void *)0); |
1110 | |
1111 | return pte; |
1112 | } |
1113 | |
1114 | /* |
1115 | * vm_find |
1116 | * |
1117 | * Function to find an existing VM by its identifier. |
1118 | * Must be called under the global vm_lock. |
1119 | * |
1120 | * Parameters: |
1121 | * id: The VM identifier. |
1122 | * *res: A pointer to the VM or NULL if not found |
1123 | * |
1124 | * Return values: |
1125 | * 0: if successful |
1126 | * ENOENT: if the VM defined by 'id' cannot be found |
1127 | * EPERM: if the VM cannot be accessed by the current process |
1128 | */ |
1129 | int |
1130 | vm_find(uint32_t id, struct vm **res) |
1131 | { |
1132 | struct proc *p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; |
1133 | struct vm *vm; |
1134 | |
1135 | *res = NULL((void *)0); |
1136 | SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link)for((vm) = ((&vmm_softc->vm_list)->slh_first); (vm) != ((void *)0); (vm) = ((vm)->vm_link.sle_next)) { |
1137 | if (vm->vm_id == id) { |
1138 | /* |
1139 | * In the pledged VM process, only allow to find |
1140 | * the VM that is running in the current process. |
1141 | * The managing vmm parent process can lookup all |
1142 | * all VMs and is indicated by PLEDGE_PROC. |
1143 | */ |
1144 | if (((p->p_p->ps_pledge & |
1145 | (PLEDGE_VMM0x0000000040000000ULL | PLEDGE_PROC0x0000000000001000ULL)) == PLEDGE_VMM0x0000000040000000ULL) && |
1146 | (vm->vm_creator_pid != p->p_p->ps_pid)) |
1147 | return (pledge_fail(p, EPERM1, PLEDGE_VMM0x0000000040000000ULL)); |
1148 | *res = vm; |
1149 | return (0); |
1150 | } |
1151 | } |
1152 | |
1153 | return (ENOENT2); |
1154 | } |
1155 | |
1156 | /* |
1157 | * vmm_start |
1158 | * |
1159 | * Starts VMM mode on the system |
1160 | */ |
1161 | int |
1162 | vmm_start(void) |
1163 | { |
1164 | struct cpu_info *self = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
1165 | int ret = 0; |
1166 | #ifdef MULTIPROCESSOR1 |
1167 | struct cpu_info *ci; |
1168 | CPU_INFO_ITERATORint cii; |
1169 | int i; |
1170 | #endif |
1171 | |
1172 | /* VMM is already running */ |
1173 | if (self->ci_flags & CPUF_VMM0x20000) |
1174 | return (0); |
1175 | |
1176 | #ifdef MULTIPROCESSOR1 |
1177 | /* Broadcast start VMM IPI */ |
1178 | x86_broadcast_ipi(X86_IPI_START_VMM0x00000100); |
1179 | |
1180 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { |
1181 | if (ci == self) |
1182 | continue; |
1183 | for (i = 100000; (!(ci->ci_flags & CPUF_VMM0x20000)) && i>0;i--) |
1184 | delay(10)(*delay_func)(10); |
1185 | if (!(ci->ci_flags & CPUF_VMM0x20000)) { |
1186 | printf("%s: failed to enter VMM mode\n", |
1187 | ci->ci_dev->dv_xname); |
1188 | ret = EIO5; |
1189 | } |
1190 | } |
1191 | #endif /* MULTIPROCESSOR */ |
1192 | |
1193 | /* Start VMM on this CPU */ |
1194 | start_vmm_on_cpu(self); |
1195 | if (!(self->ci_flags & CPUF_VMM0x20000)) { |
1196 | printf("%s: failed to enter VMM mode\n", |
1197 | self->ci_dev->dv_xname); |
1198 | ret = EIO5; |
1199 | } |
1200 | |
1201 | return (ret); |
1202 | } |
1203 | |
1204 | /* |
1205 | * vmm_stop |
1206 | * |
1207 | * Stops VMM mode on the system |
1208 | */ |
1209 | int |
1210 | vmm_stop(void) |
1211 | { |
1212 | struct cpu_info *self = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
1213 | int ret = 0; |
1214 | #ifdef MULTIPROCESSOR1 |
1215 | struct cpu_info *ci; |
1216 | CPU_INFO_ITERATORint cii; |
1217 | int i; |
1218 | #endif |
1219 | |
1220 | /* VMM is not running */ |
1221 | if (!(self->ci_flags & CPUF_VMM0x20000)) |
1222 | return (0); |
1223 | |
1224 | #ifdef MULTIPROCESSOR1 |
1225 | /* Stop VMM on other CPUs */ |
1226 | x86_broadcast_ipi(X86_IPI_STOP_VMM0x00000200); |
1227 | |
1228 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { |
1229 | if (ci == self) |
1230 | continue; |
1231 | for (i = 100000; (ci->ci_flags & CPUF_VMM0x20000) && i>0 ;i--) |
1232 | delay(10)(*delay_func)(10); |
1233 | if (ci->ci_flags & CPUF_VMM0x20000) { |
1234 | printf("%s: failed to exit VMM mode\n", |
1235 | ci->ci_dev->dv_xname); |
1236 | ret = EIO5; |
1237 | } |
1238 | } |
1239 | #endif /* MULTIPROCESSOR */ |
1240 | |
1241 | /* Stop VMM on this CPU */ |
1242 | stop_vmm_on_cpu(self); |
1243 | if (self->ci_flags & CPUF_VMM0x20000) { |
1244 | printf("%s: failed to exit VMM mode\n", |
1245 | self->ci_dev->dv_xname); |
1246 | ret = EIO5; |
1247 | } |
1248 | |
1249 | return (ret); |
1250 | } |
1251 | |
1252 | /* |
1253 | * start_vmm_on_cpu |
1254 | * |
1255 | * Starts VMM mode on 'ci' by executing the appropriate CPU-specific insn |
1256 | * sequence to enter VMM mode (eg, VMXON) |
1257 | */ |
1258 | void |
1259 | start_vmm_on_cpu(struct cpu_info *ci) |
1260 | { |
1261 | uint64_t msr; |
1262 | uint32_t cr4; |
1263 | |
1264 | /* No VMM mode? exit. */ |
1265 | if ((ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) == 0 && |
1266 | (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1)) == 0) |
1267 | return; |
1268 | |
1269 | /* |
1270 | * AMD SVM |
1271 | */ |
1272 | if (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1)) { |
1273 | msr = rdmsr(MSR_EFER0xc0000080); |
1274 | msr |= EFER_SVME0x00001000; |
1275 | wrmsr(MSR_EFER0xc0000080, msr); |
1276 | } |
1277 | |
1278 | /* |
1279 | * Intel VMX |
1280 | */ |
1281 | if (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) { |
1282 | if (ci->ci_vmxon_region == 0) |
1283 | return; |
1284 | else { |
1285 | bzero(ci->ci_vmxon_region, PAGE_SIZE)__builtin_bzero((ci->ci_vmxon_region), ((1 << 12))); |
1286 | ci->ci_vmxon_region->vr_revision = |
1287 | ci->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision; |
1288 | |
1289 | /* Set CR4.VMXE */ |
1290 | cr4 = rcr4(); |
1291 | cr4 |= CR4_VMXE0x00002000; |
1292 | lcr4(cr4); |
1293 | |
1294 | /* Enable VMX */ |
1295 | msr = rdmsr(MSR_IA32_FEATURE_CONTROL0x03a); |
1296 | if (msr & IA32_FEATURE_CONTROL_LOCK0x01) { |
1297 | if (!(msr & IA32_FEATURE_CONTROL_VMX_EN0x04)) |
1298 | return; |
1299 | } else { |
1300 | msr |= IA32_FEATURE_CONTROL_VMX_EN0x04 | |
1301 | IA32_FEATURE_CONTROL_LOCK0x01; |
1302 | wrmsr(MSR_IA32_FEATURE_CONTROL0x03a, msr); |
1303 | } |
1304 | |
1305 | /* Enter VMX mode */ |
1306 | if (vmxon((uint64_t *)&ci->ci_vmxon_region_pa)) |
1307 | return; |
1308 | } |
1309 | } |
1310 | |
1311 | ci->ci_flags |= CPUF_VMM0x20000; |
1312 | } |
1313 | |
1314 | /* |
1315 | * stop_vmm_on_cpu |
1316 | * |
1317 | * Stops VMM mode on 'ci' by executing the appropriate CPU-specific insn |
1318 | * sequence to exit VMM mode (eg, VMXOFF) |
1319 | */ |
1320 | void |
1321 | stop_vmm_on_cpu(struct cpu_info *ci) |
1322 | { |
1323 | uint64_t msr; |
1324 | uint32_t cr4; |
1325 | |
1326 | if (!(ci->ci_flags & CPUF_VMM0x20000)) |
1327 | return; |
1328 | |
1329 | /* |
1330 | * AMD SVM |
1331 | */ |
1332 | if (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1)) { |
1333 | msr = rdmsr(MSR_EFER0xc0000080); |
1334 | msr &= ~EFER_SVME0x00001000; |
1335 | wrmsr(MSR_EFER0xc0000080, msr); |
1336 | } |
1337 | |
1338 | /* |
1339 | * Intel VMX |
1340 | */ |
1341 | if (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) { |
1342 | if (vmxoff()) |
1343 | panic("VMXOFF failed"); |
1344 | |
1345 | cr4 = rcr4(); |
1346 | cr4 &= ~CR4_VMXE0x00002000; |
1347 | lcr4(cr4); |
1348 | } |
1349 | |
1350 | ci->ci_flags &= ~CPUF_VMM0x20000; |
1351 | } |
1352 | |
1353 | /* |
1354 | * vmclear_on_cpu |
1355 | * |
1356 | * Flush and clear VMCS on 'ci' by executing vmclear. |
1357 | * |
1358 | */ |
1359 | void |
1360 | vmclear_on_cpu(struct cpu_info *ci) |
1361 | { |
1362 | if ((ci->ci_flags & CPUF_VMM0x20000) && (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0))) { |
1363 | if (vmclear(&ci->ci_vmcs_pa)) |
1364 | panic("VMCLEAR ipi failed"); |
1365 | atomic_swap_ulong(&ci->ci_vmcs_pa, VMX_VMCS_PA_CLEAR)_atomic_swap_ulong((&ci->ci_vmcs_pa), (0xFFFFFFFFFFFFFFFFUL )); |
1366 | } |
1367 | } |
1368 | |
1369 | #ifdef MULTIPROCESSOR1 |
1370 | static int |
1371 | vmx_remote_vmclear(struct cpu_info *ci, struct vcpu *vcpu) |
1372 | { |
1373 | int ret = 0, nticks = 200000000; |
1374 | |
1375 | rw_enter_write(&ci->ci_vmcs_lock); |
1376 | atomic_swap_ulong(&ci->ci_vmcs_pa, vcpu->vc_control_pa)_atomic_swap_ulong((&ci->ci_vmcs_pa), (vcpu->vc_control_pa )); |
1377 | x86_send_ipi(ci, X86_IPI_VMCLEAR_VMM0x00000004); |
1378 | |
1379 | while (ci->ci_vmcs_pa != VMX_VMCS_PA_CLEAR0xFFFFFFFFFFFFFFFFUL) { |
1380 | CPU_BUSY_CYCLE()__asm volatile("pause": : : "memory"); |
1381 | if (--nticks <= 0) { |
1382 | printf("%s: spun out\n", __func__); |
1383 | ret = 1; |
1384 | break; |
1385 | } |
1386 | } |
1387 | atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED)_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (0)); |
1388 | rw_exit_write(&ci->ci_vmcs_lock); |
1389 | |
1390 | return (ret); |
1391 | } |
1392 | #endif /* MULTIPROCESSOR */ |
1393 | |
1394 | /* |
1395 | * vm_create_check_mem_ranges |
1396 | * |
1397 | * Make sure that the guest physical memory ranges given by the user process |
1398 | * do not overlap and are in ascending order. |
1399 | * |
1400 | * The last physical address may not exceed VMM_MAX_VM_MEM_SIZE. |
1401 | * |
1402 | * Return Values: |
1403 | * The total memory size in MB if the checks were successful |
1404 | * 0: One of the memory ranges was invalid, or VMM_MAX_VM_MEM_SIZE was |
1405 | * exceeded |
1406 | */ |
1407 | size_t |
1408 | vm_create_check_mem_ranges(struct vm_create_params *vcp) |
1409 | { |
1410 | size_t i, memsize = 0; |
1411 | struct vm_mem_range *vmr, *pvmr; |
1412 | const paddr_t maxgpa = (uint64_t)VMM_MAX_VM_MEM_SIZE32768 * 1024 * 1024; |
1413 | |
1414 | if (vcp->vcp_nmemranges == 0 || |
1415 | vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES16) |
1416 | return (0); |
1417 | |
1418 | for (i = 0; i < vcp->vcp_nmemranges; i++) { |
1419 | vmr = &vcp->vcp_memranges[i]; |
1420 | |
1421 | /* Only page-aligned addresses and sizes are permitted */ |
1422 | if ((vmr->vmr_gpa & PAGE_MASK((1 << 12) - 1)) || (vmr->vmr_va & PAGE_MASK((1 << 12) - 1)) || |
1423 | (vmr->vmr_size & PAGE_MASK((1 << 12) - 1)) || vmr->vmr_size == 0) |
1424 | return (0); |
1425 | |
1426 | /* Make sure that VMM_MAX_VM_MEM_SIZE is not exceeded */ |
1427 | if (vmr->vmr_gpa >= maxgpa || |
1428 | vmr->vmr_size > maxgpa - vmr->vmr_gpa) |
1429 | return (0); |
1430 | |
1431 | /* |
1432 | * Make sure that all virtual addresses are within the address |
1433 | * space of the process and that they do not wrap around. |
1434 | * Calling uvm_share() when creating the VM will take care of |
1435 | * further checks. |
1436 | */ |
1437 | if (vmr->vmr_va < VM_MIN_ADDRESS(1 << 12) || |
1438 | vmr->vmr_va >= VM_MAXUSER_ADDRESS0x00007f7fffffc000 || |
1439 | vmr->vmr_size >= VM_MAXUSER_ADDRESS0x00007f7fffffc000 - vmr->vmr_va) |
1440 | return (0); |
1441 | |
1442 | /* |
1443 | * Specifying ranges within the PCI MMIO space is forbidden. |
1444 | * Disallow ranges that start inside the MMIO space: |
1445 | * [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END] |
1446 | */ |
1447 | if (vmr->vmr_gpa >= VMM_PCI_MMIO_BAR_BASE0xF0000000ULL && |
1448 | vmr->vmr_gpa <= VMM_PCI_MMIO_BAR_END0xFFFFFFFFULL) |
1449 | return (0); |
1450 | |
1451 | /* |
1452 | * ... and disallow ranges that end inside the MMIO space: |
1453 | * (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END] |
1454 | */ |
1455 | if (vmr->vmr_gpa + vmr->vmr_size > VMM_PCI_MMIO_BAR_BASE0xF0000000ULL && |
1456 | vmr->vmr_gpa + vmr->vmr_size <= VMM_PCI_MMIO_BAR_END0xFFFFFFFFULL) |
1457 | return (0); |
1458 | |
1459 | /* |
1460 | * Make sure that guest physical memory ranges do not overlap |
1461 | * and that they are ascending. |
1462 | */ |
1463 | if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa) |
1464 | return (0); |
1465 | |
1466 | memsize += vmr->vmr_size; |
1467 | pvmr = vmr; |
1468 | } |
1469 | |
1470 | if (memsize % (1024 * 1024) != 0) |
1471 | return (0); |
1472 | memsize /= 1024 * 1024; |
1473 | return (memsize); |
1474 | } |
1475 | |
1476 | /* |
1477 | * vm_create |
1478 | * |
1479 | * Creates the in-memory VMM structures for the VM defined by 'vcp'. The |
1480 | * parent of this VM shall be the process defined by 'p'. |
1481 | * This function does not start the VCPU(s) - see vm_start. |
1482 | * |
1483 | * Return Values: |
1484 | * 0: the create operation was successful |
1485 | * ENOMEM: out of memory |
1486 | * various other errors from vcpu_init/vm_impl_init |
1487 | */ |
1488 | int |
1489 | vm_create(struct vm_create_params *vcp, struct proc *p) |
1490 | { |
1491 | int i, ret; |
1492 | size_t memsize; |
1493 | struct vm *vm; |
1494 | struct vcpu *vcpu; |
1495 | |
1496 | if (!(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_flags & CPUF_VMM0x20000)) |
1497 | return (EINVAL22); |
1498 | |
1499 | memsize = vm_create_check_mem_ranges(vcp); |
1500 | if (memsize == 0) |
1501 | return (EINVAL22); |
1502 | |
1503 | /* XXX - support UP only (for now) */ |
1504 | if (vcp->vcp_ncpus != 1) |
1505 | return (EINVAL22); |
1506 | |
1507 | rw_enter_write(&vmm_softc->vm_lock); |
1508 | if (vmm_softc->vcpu_ct + vcp->vcp_ncpus > VMM_MAX_VCPUS512) { |
1509 | DPRINTF("%s: maximum vcpus (%lu) reached\n", __func__, |
1510 | vmm_softc->vcpu_max); |
1511 | rw_exit_write(&vmm_softc->vm_lock); |
1512 | return (ENOMEM12); |
1513 | } |
1514 | vmm_softc->vcpu_ct += vcp->vcp_ncpus; |
1515 | |
1516 | vm = pool_get(&vm_pool, PR_WAITOK0x0001 | PR_ZERO0x0008); |
1517 | SLIST_INIT(&vm->vm_vcpu_list){ ((&vm->vm_vcpu_list)->slh_first) = ((void *)0); }; |
1518 | rw_init(&vm->vm_vcpu_lock, "vcpu_list")_rw_init_flags(&vm->vm_vcpu_lock, "vcpu_list", 0, ((void *)0)); |
1519 | |
1520 | vm->vm_creator_pid = p->p_p->ps_pid; |
1521 | vm->vm_nmemranges = vcp->vcp_nmemranges; |
1522 | memcpy(vm->vm_memranges, vcp->vcp_memranges,__builtin_memcpy((vm->vm_memranges), (vcp->vcp_memranges ), (vm->vm_nmemranges * sizeof(vm->vm_memranges[0]))) |
1523 | vm->vm_nmemranges * sizeof(vm->vm_memranges[0]))__builtin_memcpy((vm->vm_memranges), (vcp->vcp_memranges ), (vm->vm_nmemranges * sizeof(vm->vm_memranges[0]))); |
1524 | vm->vm_memory_size = memsize; |
1525 | strncpy(vm->vm_name, vcp->vcp_name, VMM_MAX_NAME_LEN64 - 1); |
1526 | |
1527 | if (vm_impl_init(vm, p)) { |
1528 | printf("failed to init arch-specific features for vm %p\n", vm); |
1529 | vm_teardown(vm); |
1530 | rw_exit_write(&vmm_softc->vm_lock); |
1531 | return (ENOMEM12); |
1532 | } |
1533 | |
1534 | vmm_softc->vm_ct++; |
1535 | vmm_softc->vm_idx++; |
1536 | |
1537 | vm->vm_id = vmm_softc->vm_idx; |
1538 | vm->vm_vcpu_ct = 0; |
1539 | vm->vm_vcpus_running = 0; |
1540 | |
1541 | /* Initialize each VCPU defined in 'vcp' */ |
1542 | for (i = 0; i < vcp->vcp_ncpus; i++) { |
1543 | vcpu = pool_get(&vcpu_pool, PR_WAITOK0x0001 | PR_ZERO0x0008); |
1544 | vcpu->vc_parent = vm; |
1545 | if ((ret = vcpu_init(vcpu)) != 0) { |
1546 | printf("failed to init vcpu %d for vm %p\n", i, vm); |
1547 | vm_teardown(vm); |
1548 | vmm_softc->vm_idx--; |
1549 | rw_exit_write(&vmm_softc->vm_lock); |
1550 | return (ret); |
1551 | } |
1552 | rw_enter_write(&vm->vm_vcpu_lock); |
1553 | vcpu->vc_id = vm->vm_vcpu_ct; |
1554 | vm->vm_vcpu_ct++; |
1555 | SLIST_INSERT_HEAD(&vm->vm_vcpu_list, vcpu, vc_vcpu_link)do { (vcpu)->vc_vcpu_link.sle_next = (&vm->vm_vcpu_list )->slh_first; (&vm->vm_vcpu_list)->slh_first = ( vcpu); } while (0); |
1556 | rw_exit_write(&vm->vm_vcpu_lock); |
1557 | } |
1558 | |
1559 | /* XXX init various other hardware parts (vlapic, vioapic, etc) */ |
1560 | |
1561 | SLIST_INSERT_HEAD(&vmm_softc->vm_list, vm, vm_link)do { (vm)->vm_link.sle_next = (&vmm_softc->vm_list) ->slh_first; (&vmm_softc->vm_list)->slh_first = ( vm); } while (0); |
1562 | rw_exit_write(&vmm_softc->vm_lock); |
1563 | |
1564 | vcp->vcp_id = vm->vm_id; |
1565 | |
1566 | return (0); |
1567 | } |
1568 | |
1569 | /* |
1570 | * vm_impl_init_vmx |
1571 | * |
1572 | * Intel VMX specific VM initialization routine |
1573 | * |
1574 | * Parameters: |
1575 | * vm: the VM being initialized |
1576 | * p: vmd process owning the VM |
1577 | * |
1578 | * Return values: |
1579 | * 0: the initialization was successful |
1580 | * ENOMEM: the initialization failed (lack of resources) |
1581 | */ |
1582 | int |
1583 | vm_impl_init_vmx(struct vm *vm, struct proc *p) |
1584 | { |
1585 | int i, ret; |
1586 | vaddr_t mingpa, maxgpa; |
1587 | struct vm_mem_range *vmr; |
1588 | |
1589 | /* If not EPT, nothing to do here */ |
1590 | if (vmm_softc->mode != VMM_MODE_EPT) |
1591 | return (0); |
1592 | |
1593 | vmr = &vm->vm_memranges[0]; |
1594 | mingpa = vmr->vmr_gpa; |
1595 | vmr = &vm->vm_memranges[vm->vm_nmemranges - 1]; |
1596 | maxgpa = vmr->vmr_gpa + vmr->vmr_size; |
1597 | |
1598 | /* |
1599 | * uvmspace_alloc (currently) always returns a valid vmspace |
1600 | */ |
1601 | vm->vm_vmspace = uvmspace_alloc(mingpa, maxgpa, TRUE1, FALSE0); |
1602 | vm->vm_map = &vm->vm_vmspace->vm_map; |
1603 | |
1604 | /* Map the new map with an anon */ |
1605 | DPRINTF("%s: created vm_map @ %p\n", __func__, vm->vm_map); |
1606 | for (i = 0; i < vm->vm_nmemranges; i++) { |
1607 | vmr = &vm->vm_memranges[i]; |
1608 | ret = uvm_share(vm->vm_map, vmr->vmr_gpa, |
1609 | PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04, |
1610 | &p->p_vmspace->vm_map, vmr->vmr_va, vmr->vmr_size); |
1611 | if (ret) { |
1612 | printf("%s: uvm_share failed (%d)\n", __func__, ret); |
1613 | /* uvmspace_free calls pmap_destroy for us */ |
1614 | uvmspace_free(vm->vm_vmspace); |
1615 | vm->vm_vmspace = NULL((void *)0); |
1616 | return (ENOMEM12); |
1617 | } |
1618 | } |
1619 | |
1620 | ret = pmap_convert(vm->vm_map->pmap, PMAP_TYPE_EPT2); |
1621 | if (ret) { |
1622 | printf("%s: pmap_convert failed\n", __func__); |
1623 | /* uvmspace_free calls pmap_destroy for us */ |
1624 | uvmspace_free(vm->vm_vmspace); |
1625 | vm->vm_vmspace = NULL((void *)0); |
1626 | return (ENOMEM12); |
1627 | } |
1628 | |
1629 | return (0); |
1630 | } |
1631 | |
1632 | /* |
1633 | * vm_impl_init_svm |
1634 | * |
1635 | * AMD SVM specific VM initialization routine |
1636 | * |
1637 | * Parameters: |
1638 | * vm: the VM being initialized |
1639 | * p: vmd process owning the VM |
1640 | * |
1641 | * Return values: |
1642 | * 0: the initialization was successful |
1643 | * ENOMEM: the initialization failed (lack of resources) |
1644 | */ |
1645 | int |
1646 | vm_impl_init_svm(struct vm *vm, struct proc *p) |
1647 | { |
1648 | int i, ret; |
1649 | vaddr_t mingpa, maxgpa; |
1650 | struct vm_mem_range *vmr; |
1651 | |
1652 | /* If not RVI, nothing to do here */ |
1653 | if (vmm_softc->mode != VMM_MODE_RVI) |
1654 | return (0); |
1655 | |
1656 | vmr = &vm->vm_memranges[0]; |
1657 | mingpa = vmr->vmr_gpa; |
1658 | vmr = &vm->vm_memranges[vm->vm_nmemranges - 1]; |
1659 | maxgpa = vmr->vmr_gpa + vmr->vmr_size; |
1660 | |
1661 | /* |
1662 | * uvmspace_alloc (currently) always returns a valid vmspace |
1663 | */ |
1664 | vm->vm_vmspace = uvmspace_alloc(mingpa, maxgpa, TRUE1, FALSE0); |
1665 | vm->vm_map = &vm->vm_vmspace->vm_map; |
1666 | |
1667 | /* Map the new map with an anon */ |
1668 | DPRINTF("%s: created vm_map @ %p\n", __func__, vm->vm_map); |
1669 | for (i = 0; i < vm->vm_nmemranges; i++) { |
1670 | vmr = &vm->vm_memranges[i]; |
1671 | ret = uvm_share(vm->vm_map, vmr->vmr_gpa, |
1672 | PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04, |
1673 | &p->p_vmspace->vm_map, vmr->vmr_va, vmr->vmr_size); |
1674 | if (ret) { |
1675 | printf("%s: uvm_share failed (%d)\n", __func__, ret); |
1676 | /* uvmspace_free calls pmap_destroy for us */ |
1677 | uvmspace_free(vm->vm_vmspace); |
1678 | vm->vm_vmspace = NULL((void *)0); |
1679 | return (ENOMEM12); |
1680 | } |
1681 | } |
1682 | |
1683 | /* Convert pmap to RVI */ |
1684 | ret = pmap_convert(vm->vm_map->pmap, PMAP_TYPE_RVI3); |
1685 | |
1686 | return (ret); |
1687 | } |
1688 | |
1689 | /* |
1690 | * vm_impl_init |
1691 | * |
1692 | * Calls the architecture-specific VM init routine |
1693 | * |
1694 | * Parameters: |
1695 | * vm: the VM being initialized |
1696 | * p: vmd process owning the VM |
1697 | * |
1698 | * Return values (from architecture-specific init routines): |
1699 | * 0: the initialization was successful |
1700 | * ENOMEM: the initialization failed (lack of resources) |
1701 | */ |
1702 | int |
1703 | vm_impl_init(struct vm *vm, struct proc *p) |
1704 | { |
1705 | int ret; |
1706 | |
1707 | KERNEL_LOCK()_kernel_lock(); |
1708 | if (vmm_softc->mode == VMM_MODE_VMX || |
1709 | vmm_softc->mode == VMM_MODE_EPT) |
1710 | ret = vm_impl_init_vmx(vm, p); |
1711 | else if (vmm_softc->mode == VMM_MODE_SVM || |
1712 | vmm_softc->mode == VMM_MODE_RVI) |
1713 | ret = vm_impl_init_svm(vm, p); |
1714 | else |
1715 | panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); |
1716 | KERNEL_UNLOCK()_kernel_unlock(); |
1717 | |
1718 | return (ret); |
1719 | } |
1720 | |
1721 | /* |
1722 | * vm_impl_deinit_vmx |
1723 | * |
1724 | * Intel VMX specific VM deinitialization routine |
1725 | * |
1726 | * Parameters: |
1727 | * vm: VM to deinit |
1728 | */ |
1729 | void |
1730 | vm_impl_deinit_vmx(struct vm *vm) |
1731 | { |
1732 | /* Unused */ |
1733 | } |
1734 | |
1735 | /* |
1736 | * vm_impl_deinit_svm |
1737 | * |
1738 | * AMD SVM specific VM deinitialization routine |
1739 | * |
1740 | * Parameters: |
1741 | * vm: VM to deinit |
1742 | */ |
1743 | void |
1744 | vm_impl_deinit_svm(struct vm *vm) |
1745 | { |
1746 | /* Unused */ |
1747 | } |
1748 | |
1749 | /* |
1750 | * vm_impl_deinit |
1751 | * |
1752 | * Calls the architecture-specific VM init routine |
1753 | * |
1754 | * Parameters: |
1755 | * vm: VM to deinit |
1756 | */ |
1757 | void |
1758 | vm_impl_deinit(struct vm *vm) |
1759 | { |
1760 | if (vmm_softc->mode == VMM_MODE_VMX || |
1761 | vmm_softc->mode == VMM_MODE_EPT) |
1762 | vm_impl_deinit_vmx(vm); |
1763 | else if (vmm_softc->mode == VMM_MODE_SVM || |
1764 | vmm_softc->mode == VMM_MODE_RVI) |
1765 | vm_impl_deinit_svm(vm); |
1766 | else |
1767 | panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); |
1768 | } |
1769 | |
1770 | /* |
1771 | * vcpu_reload_vmcs_vmx |
1772 | * |
1773 | * (Re)load the VMCS on the current cpu. Must be called with the VMCS write |
1774 | * lock acquired. If the VMCS is determined to be loaded on a remote cpu, an |
1775 | * ipi will be used to remotely flush it before loading the VMCS locally. |
1776 | * |
1777 | * Parameters: |
1778 | * vcpu: Pointer to the vcpu needing its VMCS |
1779 | * |
1780 | * Return values: |
1781 | * 0: if successful |
1782 | * EINVAL: an error occurred during flush or reload |
1783 | */ |
1784 | int |
1785 | vcpu_reload_vmcs_vmx(struct vcpu *vcpu) |
1786 | { |
1787 | struct cpu_info *ci, *last_ci; |
1788 | |
1789 | rw_assert_wrlock(&vcpu->vc_lock); |
1790 | |
1791 | ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
1792 | last_ci = vcpu->vc_last_pcpu; |
1793 | |
1794 | if (last_ci == NULL((void *)0)) { |
1795 | /* First launch */ |
1796 | if (vmclear(&vcpu->vc_control_pa)) |
1797 | return (EINVAL22); |
1798 | atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED)_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (0)); |
1799 | #ifdef MULTIPROCESSOR1 |
1800 | } else if (last_ci != ci) { |
1801 | /* We've moved CPUs at some point, so remote VMCLEAR */ |
1802 | if (vmx_remote_vmclear(last_ci, vcpu)) |
1803 | return (EINVAL22); |
1804 | KASSERT(vcpu->vc_vmx_vmcs_state == VMCS_CLEARED)((vcpu->vc_vmx_vmcs_state == 0) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/arch/amd64/amd64/vmm.c", 1804, "vcpu->vc_vmx_vmcs_state == VMCS_CLEARED" )); |
1805 | #endif /* MULTIPROCESSOR */ |
1806 | } |
1807 | |
1808 | if (vmptrld(&vcpu->vc_control_pa)) { |
1809 | printf("%s: vmptrld\n", __func__); |
1810 | return (EINVAL22); |
1811 | } |
1812 | |
1813 | return (0); |
1814 | } |
1815 | |
1816 | /* |
1817 | * vcpu_readregs_vmx |
1818 | * |
1819 | * Reads 'vcpu's registers |
1820 | * |
1821 | * Parameters: |
1822 | * vcpu: the vcpu to read register values from |
1823 | * regmask: the types of registers to read |
1824 | * vrs: output parameter where register values are stored |
1825 | * |
1826 | * Return values: |
1827 | * 0: if successful |
1828 | * EINVAL: an error reading registers occurred |
1829 | */ |
1830 | int |
1831 | vcpu_readregs_vmx(struct vcpu *vcpu, uint64_t regmask, |
1832 | struct vcpu_reg_state *vrs) |
1833 | { |
1834 | int i, ret = 0; |
1835 | uint64_t sel, limit, ar; |
1836 | uint64_t *gprs = vrs->vrs_gprs; |
1837 | uint64_t *crs = vrs->vrs_crs; |
1838 | uint64_t *msrs = vrs->vrs_msrs; |
1839 | uint64_t *drs = vrs->vrs_drs; |
1840 | struct vcpu_segment_info *sregs = vrs->vrs_sregs; |
1841 | struct vmx_msr_store *msr_store; |
1842 | |
1843 | #ifdef VMM_DEBUG |
1844 | /* VMCS should be loaded... */ |
1845 | paddr_t pa = 0ULL; |
1846 | if (vmptrst(&pa)) |
1847 | panic("%s: vmptrst", __func__); |
1848 | KASSERT(pa == vcpu->vc_control_pa)((pa == vcpu->vc_control_pa) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/arch/amd64/amd64/vmm.c", 1848, "pa == vcpu->vc_control_pa" )); |
1849 | #endif /* VMM_DEBUG */ |
1850 | |
1851 | if (regmask & VM_RWREGS_GPRS0x1) { |
1852 | gprs[VCPU_REGS_RAX0] = vcpu->vc_gueststate.vg_rax; |
1853 | gprs[VCPU_REGS_RBX1] = vcpu->vc_gueststate.vg_rbx; |
1854 | gprs[VCPU_REGS_RCX2] = vcpu->vc_gueststate.vg_rcx; |
1855 | gprs[VCPU_REGS_RDX3] = vcpu->vc_gueststate.vg_rdx; |
1856 | gprs[VCPU_REGS_RSI4] = vcpu->vc_gueststate.vg_rsi; |
1857 | gprs[VCPU_REGS_RDI5] = vcpu->vc_gueststate.vg_rdi; |
1858 | gprs[VCPU_REGS_R86] = vcpu->vc_gueststate.vg_r8; |
1859 | gprs[VCPU_REGS_R97] = vcpu->vc_gueststate.vg_r9; |
1860 | gprs[VCPU_REGS_R108] = vcpu->vc_gueststate.vg_r10; |
1861 | gprs[VCPU_REGS_R119] = vcpu->vc_gueststate.vg_r11; |
1862 | gprs[VCPU_REGS_R1210] = vcpu->vc_gueststate.vg_r12; |
1863 | gprs[VCPU_REGS_R1311] = vcpu->vc_gueststate.vg_r13; |
1864 | gprs[VCPU_REGS_R1412] = vcpu->vc_gueststate.vg_r14; |
1865 | gprs[VCPU_REGS_R1513] = vcpu->vc_gueststate.vg_r15; |
1866 | gprs[VCPU_REGS_RBP15] = vcpu->vc_gueststate.vg_rbp; |
1867 | gprs[VCPU_REGS_RIP16] = vcpu->vc_gueststate.vg_rip; |
1868 | if (vmread(VMCS_GUEST_IA32_RSP0x681C, &gprs[VCPU_REGS_RSP14])) |
1869 | goto errout; |
1870 | if (vmread(VMCS_GUEST_IA32_RFLAGS0x6820, &gprs[VCPU_REGS_RFLAGS17])) |
1871 | goto errout; |
1872 | } |
1873 | |
1874 | if (regmask & VM_RWREGS_SREGS0x2) { |
1875 | for (i = 0; i < nitems(vmm_vmx_sreg_vmcs_fields)(sizeof((vmm_vmx_sreg_vmcs_fields)) / sizeof((vmm_vmx_sreg_vmcs_fields )[0])); i++) { |
1876 | if (vmread(vmm_vmx_sreg_vmcs_fields[i].selid, &sel)) |
1877 | goto errout; |
1878 | if (vmread(vmm_vmx_sreg_vmcs_fields[i].limitid, &limit)) |
1879 | goto errout; |
1880 | if (vmread(vmm_vmx_sreg_vmcs_fields[i].arid, &ar)) |
1881 | goto errout; |
1882 | if (vmread(vmm_vmx_sreg_vmcs_fields[i].baseid, |
1883 | &sregs[i].vsi_base)) |
1884 | goto errout; |
1885 | |
1886 | sregs[i].vsi_sel = sel; |
1887 | sregs[i].vsi_limit = limit; |
1888 | sregs[i].vsi_ar = ar; |
1889 | } |
1890 | |
1891 | if (vmread(VMCS_GUEST_IA32_GDTR_LIMIT0x4810, &limit)) |
1892 | goto errout; |
1893 | if (vmread(VMCS_GUEST_IA32_GDTR_BASE0x6816, |
1894 | &vrs->vrs_gdtr.vsi_base)) |
1895 | goto errout; |
1896 | vrs->vrs_gdtr.vsi_limit = limit; |
1897 | |
1898 | if (vmread(VMCS_GUEST_IA32_IDTR_LIMIT0x4812, &limit)) |
1899 | goto errout; |
1900 | if (vmread(VMCS_GUEST_IA32_IDTR_BASE0x6818, |
1901 | &vrs->vrs_idtr.vsi_base)) |
1902 | goto errout; |
1903 | vrs->vrs_idtr.vsi_limit = limit; |
1904 | } |
1905 | |
1906 | if (regmask & VM_RWREGS_CRS0x4) { |
1907 | crs[VCPU_REGS_CR21] = vcpu->vc_gueststate.vg_cr2; |
1908 | crs[VCPU_REGS_XCR05] = vcpu->vc_gueststate.vg_xcr0; |
1909 | if (vmread(VMCS_GUEST_IA32_CR00x6800, &crs[VCPU_REGS_CR00])) |
1910 | goto errout; |
1911 | if (vmread(VMCS_GUEST_IA32_CR30x6802, &crs[VCPU_REGS_CR32])) |
1912 | goto errout; |
1913 | if (vmread(VMCS_GUEST_IA32_CR40x6804, &crs[VCPU_REGS_CR43])) |
1914 | goto errout; |
1915 | if (vmread(VMCS_GUEST_PDPTE00x280A, &crs[VCPU_REGS_PDPTE06])) |
1916 | goto errout; |
1917 | if (vmread(VMCS_GUEST_PDPTE10x280C, &crs[VCPU_REGS_PDPTE17])) |
1918 | goto errout; |
1919 | if (vmread(VMCS_GUEST_PDPTE20x280E, &crs[VCPU_REGS_PDPTE28])) |
1920 | goto errout; |
1921 | if (vmread(VMCS_GUEST_PDPTE30x2810, &crs[VCPU_REGS_PDPTE39])) |
1922 | goto errout; |
1923 | } |
1924 | |
1925 | msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; |
1926 | |
1927 | if (regmask & VM_RWREGS_MSRS0x8) { |
1928 | for (i = 0; i < VCPU_REGS_NMSRS(6 + 1); i++) { |
1929 | msrs[i] = msr_store[i].vms_data; |
1930 | } |
1931 | } |
1932 | |
1933 | if (regmask & VM_RWREGS_DRS0x10) { |
1934 | drs[VCPU_REGS_DR00] = vcpu->vc_gueststate.vg_dr0; |
1935 | drs[VCPU_REGS_DR11] = vcpu->vc_gueststate.vg_dr1; |
1936 | drs[VCPU_REGS_DR22] = vcpu->vc_gueststate.vg_dr2; |
1937 | drs[VCPU_REGS_DR33] = vcpu->vc_gueststate.vg_dr3; |
1938 | drs[VCPU_REGS_DR64] = vcpu->vc_gueststate.vg_dr6; |
1939 | if (vmread(VMCS_GUEST_IA32_DR70x681A, &drs[VCPU_REGS_DR75])) |
1940 | goto errout; |
1941 | } |
1942 | |
1943 | goto out; |
1944 | |
1945 | errout: |
1946 | ret = EINVAL22; |
1947 | out: |
1948 | return (ret); |
1949 | } |
1950 | |
1951 | /* |
1952 | * vcpu_readregs_svm |
1953 | * |
1954 | * Reads 'vcpu's registers |
1955 | * |
1956 | * Parameters: |
1957 | * vcpu: the vcpu to read register values from |
1958 | * regmask: the types of registers to read |
1959 | * vrs: output parameter where register values are stored |
1960 | * |
1961 | * Return values: |
1962 | * 0: if successful |
1963 | */ |
1964 | int |
1965 | vcpu_readregs_svm(struct vcpu *vcpu, uint64_t regmask, |
1966 | struct vcpu_reg_state *vrs) |
1967 | { |
1968 | uint64_t *gprs = vrs->vrs_gprs; |
1969 | uint64_t *crs = vrs->vrs_crs; |
1970 | uint64_t *msrs = vrs->vrs_msrs; |
1971 | uint64_t *drs = vrs->vrs_drs; |
1972 | uint32_t attr; |
1973 | struct vcpu_segment_info *sregs = vrs->vrs_sregs; |
1974 | struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; |
1975 | |
1976 | if (regmask & VM_RWREGS_GPRS0x1) { |
1977 | gprs[VCPU_REGS_RAX0] = vcpu->vc_gueststate.vg_rax; |
1978 | gprs[VCPU_REGS_RBX1] = vcpu->vc_gueststate.vg_rbx; |
1979 | gprs[VCPU_REGS_RCX2] = vcpu->vc_gueststate.vg_rcx; |
1980 | gprs[VCPU_REGS_RDX3] = vcpu->vc_gueststate.vg_rdx; |
1981 | gprs[VCPU_REGS_RSI4] = vcpu->vc_gueststate.vg_rsi; |
1982 | gprs[VCPU_REGS_RDI5] = vcpu->vc_gueststate.vg_rdi; |
1983 | gprs[VCPU_REGS_R86] = vcpu->vc_gueststate.vg_r8; |
1984 | gprs[VCPU_REGS_R97] = vcpu->vc_gueststate.vg_r9; |
1985 | gprs[VCPU_REGS_R108] = vcpu->vc_gueststate.vg_r10; |
1986 | gprs[VCPU_REGS_R119] = vcpu->vc_gueststate.vg_r11; |
1987 | gprs[VCPU_REGS_R1210] = vcpu->vc_gueststate.vg_r12; |
1988 | gprs[VCPU_REGS_R1311] = vcpu->vc_gueststate.vg_r13; |
1989 | gprs[VCPU_REGS_R1412] = vcpu->vc_gueststate.vg_r14; |
1990 | gprs[VCPU_REGS_R1513] = vcpu->vc_gueststate.vg_r15; |
1991 | gprs[VCPU_REGS_RBP15] = vcpu->vc_gueststate.vg_rbp; |
1992 | gprs[VCPU_REGS_RIP16] = vmcb->v_rip; |
1993 | gprs[VCPU_REGS_RSP14] = vmcb->v_rsp; |
1994 | gprs[VCPU_REGS_RFLAGS17] = vmcb->v_rflags; |
1995 | } |
1996 | |
1997 | if (regmask & VM_RWREGS_SREGS0x2) { |
1998 | sregs[VCPU_REGS_CS0].vsi_sel = vmcb->v_cs.vs_sel; |
1999 | sregs[VCPU_REGS_CS0].vsi_limit = vmcb->v_cs.vs_lim; |
2000 | attr = vmcb->v_cs.vs_attr; |
2001 | sregs[VCPU_REGS_CS0].vsi_ar = (attr & 0xff) | ((attr << 4) & |
2002 | 0xf000); |
2003 | sregs[VCPU_REGS_CS0].vsi_base = vmcb->v_cs.vs_base; |
2004 | |
2005 | sregs[VCPU_REGS_DS1].vsi_sel = vmcb->v_ds.vs_sel; |
2006 | sregs[VCPU_REGS_DS1].vsi_limit = vmcb->v_ds.vs_lim; |
2007 | attr = vmcb->v_ds.vs_attr; |
2008 | sregs[VCPU_REGS_DS1].vsi_ar = (attr & 0xff) | ((attr << 4) & |
2009 | 0xf000); |
2010 | sregs[VCPU_REGS_DS1].vsi_base = vmcb->v_ds.vs_base; |
2011 | |
2012 | sregs[VCPU_REGS_ES2].vsi_sel = vmcb->v_es.vs_sel; |
2013 | sregs[VCPU_REGS_ES2].vsi_limit = vmcb->v_es.vs_lim; |
2014 | attr = vmcb->v_es.vs_attr; |
2015 | sregs[VCPU_REGS_ES2].vsi_ar = (attr & 0xff) | ((attr << 4) & |
2016 | 0xf000); |
2017 | sregs[VCPU_REGS_ES2].vsi_base = vmcb->v_es.vs_base; |
2018 | |
2019 | sregs[VCPU_REGS_FS3].vsi_sel = vmcb->v_fs.vs_sel; |
2020 | sregs[VCPU_REGS_FS3].vsi_limit = vmcb->v_fs.vs_lim; |
2021 | attr = vmcb->v_fs.vs_attr; |
2022 | sregs[VCPU_REGS_FS3].vsi_ar = (attr & 0xff) | ((attr << 4) & |
2023 | 0xf000); |
2024 | sregs[VCPU_REGS_FS3].vsi_base = vmcb->v_fs.vs_base; |
2025 | |
2026 | sregs[VCPU_REGS_GS4].vsi_sel = vmcb->v_gs.vs_sel; |
2027 | sregs[VCPU_REGS_GS4].vsi_limit = vmcb->v_gs.vs_lim; |
2028 | attr = vmcb->v_gs.vs_attr; |
2029 | sregs[VCPU_REGS_GS4].vsi_ar = (attr & 0xff) | ((attr << 4) & |
2030 | 0xf000); |
2031 | sregs[VCPU_REGS_GS4].vsi_base = vmcb->v_gs.vs_base; |
2032 | |
2033 | sregs[VCPU_REGS_SS5].vsi_sel = vmcb->v_ss.vs_sel; |
2034 | sregs[VCPU_REGS_SS5].vsi_limit = vmcb->v_ss.vs_lim; |
2035 | attr = vmcb->v_ss.vs_attr; |
2036 | sregs[VCPU_REGS_SS5].vsi_ar = (attr & 0xff) | ((attr << 4) & |
2037 | 0xf000); |
2038 | sregs[VCPU_REGS_SS5].vsi_base = vmcb->v_ss.vs_base; |
2039 | |
2040 | sregs[VCPU_REGS_LDTR6].vsi_sel = vmcb->v_ldtr.vs_sel; |
2041 | sregs[VCPU_REGS_LDTR6].vsi_limit = vmcb->v_ldtr.vs_lim; |
2042 | attr = vmcb->v_ldtr.vs_attr; |
2043 | sregs[VCPU_REGS_LDTR6].vsi_ar = (attr & 0xff) | ((attr << 4) |
2044 | & 0xf000); |
2045 | sregs[VCPU_REGS_LDTR6].vsi_base = vmcb->v_ldtr.vs_base; |
2046 | |
2047 | sregs[VCPU_REGS_TR7].vsi_sel = vmcb->v_tr.vs_sel; |
2048 | sregs[VCPU_REGS_TR7].vsi_limit = vmcb->v_tr.vs_lim; |
2049 | attr = vmcb->v_tr.vs_attr; |
2050 | sregs[VCPU_REGS_TR7].vsi_ar = (attr & 0xff) | ((attr << 4) & |
2051 | 0xf000); |
2052 | sregs[VCPU_REGS_TR7].vsi_base = vmcb->v_tr.vs_base; |
2053 | |
2054 | vrs->vrs_gdtr.vsi_limit = vmcb->v_gdtr.vs_lim; |
2055 | vrs->vrs_gdtr.vsi_base = vmcb->v_gdtr.vs_base; |
2056 | vrs->vrs_idtr.vsi_limit = vmcb->v_idtr.vs_lim; |
2057 | vrs->vrs_idtr.vsi_base = vmcb->v_idtr.vs_base; |
2058 | } |
2059 | |
2060 | if (regmask & VM_RWREGS_CRS0x4) { |
2061 | crs[VCPU_REGS_CR00] = vmcb->v_cr0; |
2062 | crs[VCPU_REGS_CR32] = vmcb->v_cr3; |
2063 | crs[VCPU_REGS_CR43] = vmcb->v_cr4; |
2064 | crs[VCPU_REGS_CR21] = vcpu->vc_gueststate.vg_cr2; |
2065 | crs[VCPU_REGS_XCR05] = vcpu->vc_gueststate.vg_xcr0; |
2066 | } |
2067 | |
2068 | if (regmask & VM_RWREGS_MSRS0x8) { |
2069 | msrs[VCPU_REGS_EFER0] = vmcb->v_efer; |
2070 | msrs[VCPU_REGS_STAR1] = vmcb->v_star; |
2071 | msrs[VCPU_REGS_LSTAR2] = vmcb->v_lstar; |
2072 | msrs[VCPU_REGS_CSTAR3] = vmcb->v_cstar; |
2073 | msrs[VCPU_REGS_SFMASK4] = vmcb->v_sfmask; |
2074 | msrs[VCPU_REGS_KGSBASE5] = vmcb->v_kgsbase; |
2075 | } |
2076 | |
2077 | if (regmask & VM_RWREGS_DRS0x10) { |
2078 | drs[VCPU_REGS_DR00] = vcpu->vc_gueststate.vg_dr0; |
2079 | drs[VCPU_REGS_DR11] = vcpu->vc_gueststate.vg_dr1; |
2080 | drs[VCPU_REGS_DR22] = vcpu->vc_gueststate.vg_dr2; |
2081 | drs[VCPU_REGS_DR33] = vcpu->vc_gueststate.vg_dr3; |
2082 | drs[VCPU_REGS_DR64] = vmcb->v_dr6; |
2083 | drs[VCPU_REGS_DR75] = vmcb->v_dr7; |
2084 | } |
2085 | |
2086 | return (0); |
2087 | } |
2088 | |
2089 | /* |
2090 | * vcpu_writeregs_vmx |
2091 | * |
2092 | * Writes VCPU registers |
2093 | * |
2094 | * Parameters: |
2095 | * vcpu: the vcpu that has to get its registers written to |
2096 | * regmask: the types of registers to write |
2097 | * loadvmcs: bit to indicate whether the VMCS has to be loaded first |
2098 | * vrs: the register values to write |
2099 | * |
2100 | * Return values: |
2101 | * 0: if successful |
2102 | * EINVAL an error writing registers occurred |
2103 | */ |
2104 | int |
2105 | vcpu_writeregs_vmx(struct vcpu *vcpu, uint64_t regmask, int loadvmcs, |
2106 | struct vcpu_reg_state *vrs) |
2107 | { |
2108 | int i, ret = 0; |
2109 | uint16_t sel; |
2110 | uint64_t limit, ar; |
2111 | uint64_t *gprs = vrs->vrs_gprs; |
2112 | uint64_t *crs = vrs->vrs_crs; |
2113 | uint64_t *msrs = vrs->vrs_msrs; |
2114 | uint64_t *drs = vrs->vrs_drs; |
2115 | struct vcpu_segment_info *sregs = vrs->vrs_sregs; |
2116 | struct vmx_msr_store *msr_store; |
2117 | |
2118 | if (loadvmcs) { |
2119 | if (vcpu_reload_vmcs_vmx(vcpu)) |
2120 | return (EINVAL22); |
2121 | } |
2122 | |
2123 | #ifdef VMM_DEBUG |
2124 | /* VMCS should be loaded... */ |
2125 | paddr_t pa = 0ULL; |
2126 | if (vmptrst(&pa)) |
2127 | panic("%s: vmptrst", __func__); |
2128 | KASSERT(pa == vcpu->vc_control_pa)((pa == vcpu->vc_control_pa) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/arch/amd64/amd64/vmm.c", 2128, "pa == vcpu->vc_control_pa" )); |
2129 | #endif /* VMM_DEBUG */ |
2130 | |
2131 | if (regmask & VM_RWREGS_GPRS0x1) { |
2132 | vcpu->vc_gueststate.vg_rax = gprs[VCPU_REGS_RAX0]; |
2133 | vcpu->vc_gueststate.vg_rbx = gprs[VCPU_REGS_RBX1]; |
2134 | vcpu->vc_gueststate.vg_rcx = gprs[VCPU_REGS_RCX2]; |
2135 | vcpu->vc_gueststate.vg_rdx = gprs[VCPU_REGS_RDX3]; |
2136 | vcpu->vc_gueststate.vg_rsi = gprs[VCPU_REGS_RSI4]; |
2137 | vcpu->vc_gueststate.vg_rdi = gprs[VCPU_REGS_RDI5]; |
2138 | vcpu->vc_gueststate.vg_r8 = gprs[VCPU_REGS_R86]; |
2139 | vcpu->vc_gueststate.vg_r9 = gprs[VCPU_REGS_R97]; |
2140 | vcpu->vc_gueststate.vg_r10 = gprs[VCPU_REGS_R108]; |
2141 | vcpu->vc_gueststate.vg_r11 = gprs[VCPU_REGS_R119]; |
2142 | vcpu->vc_gueststate.vg_r12 = gprs[VCPU_REGS_R1210]; |
2143 | vcpu->vc_gueststate.vg_r13 = gprs[VCPU_REGS_R1311]; |
2144 | vcpu->vc_gueststate.vg_r14 = gprs[VCPU_REGS_R1412]; |
2145 | vcpu->vc_gueststate.vg_r15 = gprs[VCPU_REGS_R1513]; |
2146 | vcpu->vc_gueststate.vg_rbp = gprs[VCPU_REGS_RBP15]; |
2147 | vcpu->vc_gueststate.vg_rip = gprs[VCPU_REGS_RIP16]; |
2148 | if (vmwrite(VMCS_GUEST_IA32_RIP0x681E, gprs[VCPU_REGS_RIP16])) |
2149 | goto errout; |
2150 | if (vmwrite(VMCS_GUEST_IA32_RSP0x681C, gprs[VCPU_REGS_RSP14])) |
2151 | goto errout; |
2152 | if (vmwrite(VMCS_GUEST_IA32_RFLAGS0x6820, gprs[VCPU_REGS_RFLAGS17])) |
2153 | goto errout; |
2154 | } |
2155 | |
2156 | if (regmask & VM_RWREGS_SREGS0x2) { |
2157 | for (i = 0; i < nitems(vmm_vmx_sreg_vmcs_fields)(sizeof((vmm_vmx_sreg_vmcs_fields)) / sizeof((vmm_vmx_sreg_vmcs_fields )[0])); i++) { |
2158 | sel = sregs[i].vsi_sel; |
2159 | limit = sregs[i].vsi_limit; |
2160 | ar = sregs[i].vsi_ar; |
2161 | |
2162 | if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].selid, sel)) |
2163 | goto errout; |
2164 | if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].limitid, limit)) |
2165 | goto errout; |
2166 | if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].arid, ar)) |
2167 | goto errout; |
2168 | if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].baseid, |
2169 | sregs[i].vsi_base)) |
2170 | goto errout; |
2171 | } |
2172 | |
2173 | if (vmwrite(VMCS_GUEST_IA32_GDTR_LIMIT0x4810, |
2174 | vrs->vrs_gdtr.vsi_limit)) |
2175 | goto errout; |
2176 | if (vmwrite(VMCS_GUEST_IA32_GDTR_BASE0x6816, |
2177 | vrs->vrs_gdtr.vsi_base)) |
2178 | goto errout; |
2179 | if (vmwrite(VMCS_GUEST_IA32_IDTR_LIMIT0x4812, |
2180 | vrs->vrs_idtr.vsi_limit)) |
2181 | goto errout; |
2182 | if (vmwrite(VMCS_GUEST_IA32_IDTR_BASE0x6818, |
2183 | vrs->vrs_idtr.vsi_base)) |
2184 | goto errout; |
2185 | } |
2186 | |
2187 | if (regmask & VM_RWREGS_CRS0x4) { |
2188 | vcpu->vc_gueststate.vg_xcr0 = crs[VCPU_REGS_XCR05]; |
2189 | if (vmwrite(VMCS_GUEST_IA32_CR00x6800, crs[VCPU_REGS_CR00])) |
2190 | goto errout; |
2191 | if (vmwrite(VMCS_GUEST_IA32_CR30x6802, crs[VCPU_REGS_CR32])) |
2192 | goto errout; |
2193 | if (vmwrite(VMCS_GUEST_IA32_CR40x6804, crs[VCPU_REGS_CR43])) |
2194 | goto errout; |
2195 | if (vmwrite(VMCS_GUEST_PDPTE00x280A, crs[VCPU_REGS_PDPTE06])) |
2196 | goto errout; |
2197 | if (vmwrite(VMCS_GUEST_PDPTE10x280C, crs[VCPU_REGS_PDPTE17])) |
2198 | goto errout; |
2199 | if (vmwrite(VMCS_GUEST_PDPTE20x280E, crs[VCPU_REGS_PDPTE28])) |
2200 | goto errout; |
2201 | if (vmwrite(VMCS_GUEST_PDPTE30x2810, crs[VCPU_REGS_PDPTE39])) |
2202 | goto errout; |
2203 | } |
2204 | |
2205 | msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; |
2206 | |
2207 | if (regmask & VM_RWREGS_MSRS0x8) { |
2208 | for (i = 0; i < VCPU_REGS_NMSRS(6 + 1); i++) { |
2209 | msr_store[i].vms_data = msrs[i]; |
2210 | } |
2211 | } |
2212 | |
2213 | if (regmask & VM_RWREGS_DRS0x10) { |
2214 | vcpu->vc_gueststate.vg_dr0 = drs[VCPU_REGS_DR00]; |
2215 | vcpu->vc_gueststate.vg_dr1 = drs[VCPU_REGS_DR11]; |
2216 | vcpu->vc_gueststate.vg_dr2 = drs[VCPU_REGS_DR22]; |
2217 | vcpu->vc_gueststate.vg_dr3 = drs[VCPU_REGS_DR33]; |
2218 | vcpu->vc_gueststate.vg_dr6 = drs[VCPU_REGS_DR64]; |
2219 | if (vmwrite(VMCS_GUEST_IA32_DR70x681A, drs[VCPU_REGS_DR75])) |
2220 | goto errout; |
2221 | } |
2222 | |
2223 | goto out; |
2224 | |
2225 | errout: |
2226 | ret = EINVAL22; |
2227 | out: |
2228 | if (loadvmcs) { |
2229 | if (vmclear(&vcpu->vc_control_pa)) |
2230 | ret = EINVAL22; |
2231 | } |
2232 | return (ret); |
2233 | } |
2234 | |
2235 | /* |
2236 | * vcpu_writeregs_svm |
2237 | * |
2238 | * Writes 'vcpu's registers |
2239 | * |
2240 | * Parameters: |
2241 | * vcpu: the vcpu that has to get its registers written to |
2242 | * regmask: the types of registers to write |
2243 | * vrs: the register values to write |
2244 | * |
2245 | * Return values: |
2246 | * 0: if successful |
2247 | * EINVAL an error writing registers occurred |
2248 | */ |
2249 | int |
2250 | vcpu_writeregs_svm(struct vcpu *vcpu, uint64_t regmask, |
2251 | struct vcpu_reg_state *vrs) |
2252 | { |
2253 | uint64_t *gprs = vrs->vrs_gprs; |
2254 | uint64_t *crs = vrs->vrs_crs; |
2255 | uint16_t attr; |
2256 | uint64_t *msrs = vrs->vrs_msrs; |
2257 | uint64_t *drs = vrs->vrs_drs; |
2258 | struct vcpu_segment_info *sregs = vrs->vrs_sregs; |
2259 | struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; |
2260 | |
2261 | if (regmask & VM_RWREGS_GPRS0x1) { |
2262 | vcpu->vc_gueststate.vg_rax = gprs[VCPU_REGS_RAX0]; |
2263 | vcpu->vc_gueststate.vg_rbx = gprs[VCPU_REGS_RBX1]; |
2264 | vcpu->vc_gueststate.vg_rcx = gprs[VCPU_REGS_RCX2]; |
2265 | vcpu->vc_gueststate.vg_rdx = gprs[VCPU_REGS_RDX3]; |
2266 | vcpu->vc_gueststate.vg_rsi = gprs[VCPU_REGS_RSI4]; |
2267 | vcpu->vc_gueststate.vg_rdi = gprs[VCPU_REGS_RDI5]; |
2268 | vcpu->vc_gueststate.vg_r8 = gprs[VCPU_REGS_R86]; |
2269 | vcpu->vc_gueststate.vg_r9 = gprs[VCPU_REGS_R97]; |
2270 | vcpu->vc_gueststate.vg_r10 = gprs[VCPU_REGS_R108]; |
2271 | vcpu->vc_gueststate.vg_r11 = gprs[VCPU_REGS_R119]; |
2272 | vcpu->vc_gueststate.vg_r12 = gprs[VCPU_REGS_R1210]; |
2273 | vcpu->vc_gueststate.vg_r13 = gprs[VCPU_REGS_R1311]; |
2274 | vcpu->vc_gueststate.vg_r14 = gprs[VCPU_REGS_R1412]; |
2275 | vcpu->vc_gueststate.vg_r15 = gprs[VCPU_REGS_R1513]; |
2276 | vcpu->vc_gueststate.vg_rbp = gprs[VCPU_REGS_RBP15]; |
2277 | vcpu->vc_gueststate.vg_rip = gprs[VCPU_REGS_RIP16]; |
2278 | |
2279 | vmcb->v_rip = gprs[VCPU_REGS_RIP16]; |
2280 | vmcb->v_rsp = gprs[VCPU_REGS_RSP14]; |
2281 | vmcb->v_rflags = gprs[VCPU_REGS_RFLAGS17]; |
2282 | } |
2283 | |
2284 | if (regmask & VM_RWREGS_SREGS0x2) { |
2285 | vmcb->v_cs.vs_sel = sregs[VCPU_REGS_CS0].vsi_sel; |
2286 | vmcb->v_cs.vs_lim = sregs[VCPU_REGS_CS0].vsi_limit; |
2287 | attr = sregs[VCPU_REGS_CS0].vsi_ar; |
2288 | vmcb->v_cs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); |
2289 | vmcb->v_cs.vs_base = sregs[VCPU_REGS_CS0].vsi_base; |
2290 | vmcb->v_ds.vs_sel = sregs[VCPU_REGS_DS1].vsi_sel; |
2291 | vmcb->v_ds.vs_lim = sregs[VCPU_REGS_DS1].vsi_limit; |
2292 | attr = sregs[VCPU_REGS_DS1].vsi_ar; |
2293 | vmcb->v_ds.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); |
2294 | vmcb->v_ds.vs_base = sregs[VCPU_REGS_DS1].vsi_base; |
2295 | vmcb->v_es.vs_sel = sregs[VCPU_REGS_ES2].vsi_sel; |
2296 | vmcb->v_es.vs_lim = sregs[VCPU_REGS_ES2].vsi_limit; |
2297 | attr = sregs[VCPU_REGS_ES2].vsi_ar; |
2298 | vmcb->v_es.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); |
2299 | vmcb->v_es.vs_base = sregs[VCPU_REGS_ES2].vsi_base; |
2300 | vmcb->v_fs.vs_sel = sregs[VCPU_REGS_FS3].vsi_sel; |
2301 | vmcb->v_fs.vs_lim = sregs[VCPU_REGS_FS3].vsi_limit; |
2302 | attr = sregs[VCPU_REGS_FS3].vsi_ar; |
2303 | vmcb->v_fs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); |
2304 | vmcb->v_fs.vs_base = sregs[VCPU_REGS_FS3].vsi_base; |
2305 | vmcb->v_gs.vs_sel = sregs[VCPU_REGS_GS4].vsi_sel; |
2306 | vmcb->v_gs.vs_lim = sregs[VCPU_REGS_GS4].vsi_limit; |
2307 | attr = sregs[VCPU_REGS_GS4].vsi_ar; |
2308 | vmcb->v_gs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); |
2309 | vmcb->v_gs.vs_base = sregs[VCPU_REGS_GS4].vsi_base; |
2310 | vmcb->v_ss.vs_sel = sregs[VCPU_REGS_SS5].vsi_sel; |
2311 | vmcb->v_ss.vs_lim = sregs[VCPU_REGS_SS5].vsi_limit; |
2312 | attr = sregs[VCPU_REGS_SS5].vsi_ar; |
2313 | vmcb->v_ss.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); |
2314 | vmcb->v_ss.vs_base = sregs[VCPU_REGS_SS5].vsi_base; |
2315 | vmcb->v_ldtr.vs_sel = sregs[VCPU_REGS_LDTR6].vsi_sel; |
2316 | vmcb->v_ldtr.vs_lim = sregs[VCPU_REGS_LDTR6].vsi_limit; |
2317 | attr = sregs[VCPU_REGS_LDTR6].vsi_ar; |
2318 | vmcb->v_ldtr.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); |
2319 | vmcb->v_ldtr.vs_base = sregs[VCPU_REGS_LDTR6].vsi_base; |
2320 | vmcb->v_tr.vs_sel = sregs[VCPU_REGS_TR7].vsi_sel; |
2321 | vmcb->v_tr.vs_lim = sregs[VCPU_REGS_TR7].vsi_limit; |
2322 | attr = sregs[VCPU_REGS_TR7].vsi_ar; |
2323 | vmcb->v_tr.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); |
2324 | vmcb->v_tr.vs_base = sregs[VCPU_REGS_TR7].vsi_base; |
2325 | vmcb->v_gdtr.vs_lim = vrs->vrs_gdtr.vsi_limit; |
2326 | vmcb->v_gdtr.vs_base = vrs->vrs_gdtr.vsi_base; |
2327 | vmcb->v_idtr.vs_lim = vrs->vrs_idtr.vsi_limit; |
2328 | vmcb->v_idtr.vs_base = vrs->vrs_idtr.vsi_base; |
2329 | } |
2330 | |
2331 | if (regmask & VM_RWREGS_CRS0x4) { |
2332 | vmcb->v_cr0 = crs[VCPU_REGS_CR00]; |
2333 | vmcb->v_cr3 = crs[VCPU_REGS_CR32]; |
2334 | vmcb->v_cr4 = crs[VCPU_REGS_CR43]; |
2335 | vcpu->vc_gueststate.vg_cr2 = crs[VCPU_REGS_CR21]; |
2336 | vcpu->vc_gueststate.vg_xcr0 = crs[VCPU_REGS_XCR05]; |
2337 | } |
2338 | |
2339 | if (regmask & VM_RWREGS_MSRS0x8) { |
2340 | vmcb->v_efer |= msrs[VCPU_REGS_EFER0]; |
2341 | vmcb->v_star = msrs[VCPU_REGS_STAR1]; |
2342 | vmcb->v_lstar = msrs[VCPU_REGS_LSTAR2]; |
2343 | vmcb->v_cstar = msrs[VCPU_REGS_CSTAR3]; |
2344 | vmcb->v_sfmask = msrs[VCPU_REGS_SFMASK4]; |
2345 | vmcb->v_kgsbase = msrs[VCPU_REGS_KGSBASE5]; |
2346 | } |
2347 | |
2348 | if (regmask & VM_RWREGS_DRS0x10) { |
2349 | vcpu->vc_gueststate.vg_dr0 = drs[VCPU_REGS_DR00]; |
2350 | vcpu->vc_gueststate.vg_dr1 = drs[VCPU_REGS_DR11]; |
2351 | vcpu->vc_gueststate.vg_dr2 = drs[VCPU_REGS_DR22]; |
2352 | vcpu->vc_gueststate.vg_dr3 = drs[VCPU_REGS_DR33]; |
2353 | vmcb->v_dr6 = drs[VCPU_REGS_DR64]; |
2354 | vmcb->v_dr7 = drs[VCPU_REGS_DR75]; |
2355 | } |
2356 | |
2357 | return (0); |
2358 | } |
2359 | |
2360 | /* |
2361 | * vcpu_reset_regs_svm |
2362 | * |
2363 | * Initializes 'vcpu's registers to supplied state |
2364 | * |
2365 | * Parameters: |
2366 | * vcpu: the vcpu whose register state is to be initialized |
2367 | * vrs: the register state to set |
2368 | * |
2369 | * Return values: |
2370 | * 0: registers init'ed successfully |
2371 | * EINVAL: an error occurred setting register state |
2372 | */ |
2373 | int |
2374 | vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs) |
2375 | { |
2376 | struct vmcb *vmcb; |
2377 | int ret; |
2378 | uint16_t asid; |
2379 | |
2380 | vmcb = (struct vmcb *)vcpu->vc_control_va; |
2381 | |
2382 | /* |
2383 | * Intercept controls |
2384 | * |
2385 | * External Interrupt exiting (SVM_INTERCEPT_INTR) |
2386 | * External NMI exiting (SVM_INTERCEPT_NMI) |
2387 | * CPUID instruction (SVM_INTERCEPT_CPUID) |
2388 | * HLT instruction (SVM_INTERCEPT_HLT) |
2389 | * I/O instructions (SVM_INTERCEPT_INOUT) |
2390 | * MSR access (SVM_INTERCEPT_MSR) |
2391 | * shutdown events (SVM_INTERCEPT_SHUTDOWN) |
2392 | * |
2393 | * VMRUN instruction (SVM_INTERCEPT_VMRUN) |
2394 | * VMMCALL instruction (SVM_INTERCEPT_VMMCALL) |
2395 | * VMLOAD instruction (SVM_INTERCEPT_VMLOAD) |
2396 | * VMSAVE instruction (SVM_INTERCEPT_VMSAVE) |
2397 | * STGI instruction (SVM_INTERCEPT_STGI) |
2398 | * CLGI instruction (SVM_INTERCEPT_CLGI) |
2399 | * SKINIT instruction (SVM_INTERCEPT_SKINIT) |
2400 | * ICEBP instruction (SVM_INTERCEPT_ICEBP) |
2401 | * MWAIT instruction (SVM_INTERCEPT_MWAIT_UNCOND) |
2402 | * MWAIT instruction (SVM_INTERCEPT_MWAIT_COND) |
2403 | * MONITOR instruction (SVM_INTERCEPT_MONITOR) |
2404 | * RDTSCP instruction (SVM_INTERCEPT_RDTSCP) |
2405 | * INVLPGA instruction (SVM_INTERCEPT_INVLPGA) |
2406 | * XSETBV instruction (SVM_INTERCEPT_XSETBV) (if available) |
2407 | */ |
2408 | vmcb->v_intercept1 = SVM_INTERCEPT_INTR(1UL << 0) | SVM_INTERCEPT_NMI(1UL << 1) | |
2409 | SVM_INTERCEPT_CPUID(1UL << 18) | SVM_INTERCEPT_HLT(1UL << 24) | SVM_INTERCEPT_INOUT(1UL << 27) | |
2410 | SVM_INTERCEPT_MSR(1UL << 28) | SVM_INTERCEPT_SHUTDOWN(1UL << 31); |
2411 | |
2412 | vmcb->v_intercept2 = SVM_INTERCEPT_VMRUN(1UL << 0) | SVM_INTERCEPT_VMMCALL(1UL << 1) | |
2413 | SVM_INTERCEPT_VMLOAD(1UL << 2) | SVM_INTERCEPT_VMSAVE(1UL << 3) | SVM_INTERCEPT_STGI(1UL << 4) | |
2414 | SVM_INTERCEPT_CLGI(1UL << 5) | SVM_INTERCEPT_SKINIT(1UL << 6) | SVM_INTERCEPT_ICEBP(1UL << 8) | |
2415 | SVM_INTERCEPT_MWAIT_UNCOND(1UL << 11) | SVM_INTERCEPT_MONITOR(1UL << 10) | |
2416 | SVM_INTERCEPT_MWAIT_COND(1UL << 12) | SVM_INTERCEPT_RDTSCP(1UL << 7) | |
2417 | SVM_INTERCEPT_INVLPGA(1UL << 26); |
2418 | |
2419 | if (xsave_mask) |
2420 | vmcb->v_intercept2 |= SVM_INTERCEPT_XSETBV(1UL << 13); |
2421 | |
2422 | /* Setup I/O bitmap */ |
2423 | memset((uint8_t *)vcpu->vc_svm_ioio_va, 0xFF, 3 * PAGE_SIZE)__builtin_memset(((uint8_t *)vcpu->vc_svm_ioio_va), (0xFF) , (3 * (1 << 12))); |
2424 | vmcb->v_iopm_pa = (uint64_t)(vcpu->vc_svm_ioio_pa); |
2425 | |
2426 | /* Setup MSR bitmap */ |
2427 | memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF, 2 * PAGE_SIZE)__builtin_memset(((uint8_t *)vcpu->vc_msr_bitmap_va), (0xFF ), (2 * (1 << 12))); |
2428 | vmcb->v_msrpm_pa = (uint64_t)(vcpu->vc_msr_bitmap_pa); |
2429 | svm_setmsrbrw(vcpu, MSR_IA32_FEATURE_CONTROL0x03a); |
2430 | svm_setmsrbrw(vcpu, MSR_SYSENTER_CS0x174); |
2431 | svm_setmsrbrw(vcpu, MSR_SYSENTER_ESP0x175); |
2432 | svm_setmsrbrw(vcpu, MSR_SYSENTER_EIP0x176); |
2433 | svm_setmsrbrw(vcpu, MSR_STAR0xc0000081); |
2434 | svm_setmsrbrw(vcpu, MSR_LSTAR0xc0000082); |
2435 | svm_setmsrbrw(vcpu, MSR_CSTAR0xc0000083); |
2436 | svm_setmsrbrw(vcpu, MSR_SFMASK0xc0000084); |
2437 | svm_setmsrbrw(vcpu, MSR_FSBASE0xc0000100); |
2438 | svm_setmsrbrw(vcpu, MSR_GSBASE0xc0000101); |
2439 | svm_setmsrbrw(vcpu, MSR_KERNELGSBASE0xc0000102); |
2440 | |
2441 | /* EFER is R/O so we can ensure the guest always has SVME */ |
2442 | svm_setmsrbr(vcpu, MSR_EFER0xc0000080); |
2443 | |
2444 | /* allow reading TSC */ |
2445 | svm_setmsrbr(vcpu, MSR_TSC0x010); |
2446 | |
2447 | /* Guest VCPU ASID */ |
2448 | if (vmm_alloc_vpid(&asid)) { |
2449 | DPRINTF("%s: could not allocate asid\n", __func__); |
2450 | ret = EINVAL22; |
2451 | goto exit; |
2452 | } |
2453 | |
2454 | vmcb->v_asid = asid; |
2455 | vcpu->vc_vpid = asid; |
2456 | |
2457 | /* TLB Control - First time in, flush all*/ |
2458 | vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_ALL1; |
2459 | |
2460 | /* INTR masking */ |
2461 | vmcb->v_intr_masking = 1; |
2462 | |
2463 | /* PAT */ |
2464 | vmcb->v_g_pat = PATENTRY(0, PAT_WB)(0x6UL << ((0) * 8)) | PATENTRY(1, PAT_WC)(0x1UL << ((1) * 8)) | |
2465 | PATENTRY(2, PAT_UCMINUS)(0x7UL << ((2) * 8)) | PATENTRY(3, PAT_UC)(0x0UL << ((3) * 8)) | |
2466 | PATENTRY(4, PAT_WB)(0x6UL << ((4) * 8)) | PATENTRY(5, PAT_WC)(0x1UL << ((5) * 8)) | |
2467 | PATENTRY(6, PAT_UCMINUS)(0x7UL << ((6) * 8)) | PATENTRY(7, PAT_UC)(0x0UL << ((7) * 8)); |
2468 | |
2469 | /* NPT */ |
2470 | if (vmm_softc->mode == VMM_MODE_RVI) { |
2471 | vmcb->v_np_enable = 1; |
2472 | vmcb->v_n_cr3 = vcpu->vc_parent->vm_map->pmap->pm_pdirpa; |
2473 | } |
2474 | |
2475 | /* Enable SVME in EFER (must always be set) */ |
2476 | vmcb->v_efer |= EFER_SVME0x00001000; |
2477 | |
2478 | ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), vrs); |
2479 | |
2480 | /* xcr0 power on default sets bit 0 (x87 state) */ |
2481 | vcpu->vc_gueststate.vg_xcr0 = XCR0_X870x00000001 & xsave_mask; |
2482 | |
2483 | vcpu->vc_parent->vm_map->pmap->eptp = 0; |
2484 | |
2485 | exit: |
2486 | return ret; |
2487 | } |
2488 | |
2489 | /* |
2490 | * svm_setmsrbr |
2491 | * |
2492 | * Allow read access to the specified msr on the supplied vcpu. |
2493 | * |
2494 | * Parameters: |
2495 | * vcpu: the VCPU to allow access |
2496 | * msr: the MSR number to allow access to |
2497 | */ |
2498 | void |
2499 | svm_setmsrbr(struct vcpu *vcpu, uint32_t msr) |
2500 | { |
2501 | uint8_t *msrs; |
2502 | uint16_t idx; |
2503 | |
2504 | msrs = (uint8_t *)vcpu->vc_msr_bitmap_va; |
2505 | |
2506 | /* |
2507 | * MSR Read bitmap layout: |
2508 | * Pentium MSRs (0x0 - 0x1fff) @ 0x0 |
2509 | * Gen6 and Syscall MSRs (0xc0000000 - 0xc0001fff) @ 0x800 |
2510 | * Gen7 and Gen8 MSRs (0xc0010000 - 0xc0011fff) @ 0x1000 |
2511 | * |
2512 | * Read enable bit is low order bit of 2-bit pair |
2513 | * per MSR (eg, MSR 0x0 write bit is at bit 0 @ 0x0) |
2514 | */ |
2515 | if (msr <= 0x1fff) { |
2516 | idx = SVM_MSRIDX(msr)((msr) / 4); |
2517 | msrs[idx] &= ~(SVM_MSRBIT_R(msr)(1 << (((msr) % 4) * 2))); |
2518 | } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) { |
2519 | idx = SVM_MSRIDX(msr - 0xc0000000)((msr - 0xc0000000) / 4) + 0x800; |
2520 | msrs[idx] &= ~(SVM_MSRBIT_R(msr - 0xc0000000)(1 << (((msr - 0xc0000000) % 4) * 2))); |
2521 | } else if (msr >= 0xc0010000 && msr <= 0xc0011fff) { |
2522 | idx = SVM_MSRIDX(msr - 0xc0010000)((msr - 0xc0010000) / 4) + 0x1000; |
2523 | msrs[idx] &= ~(SVM_MSRBIT_R(msr - 0xc0010000)(1 << (((msr - 0xc0010000) % 4) * 2))); |
2524 | } else { |
2525 | printf("%s: invalid msr 0x%x\n", __func__, msr); |
2526 | return; |
2527 | } |
2528 | } |
2529 | |
2530 | /* |
2531 | * svm_setmsrbw |
2532 | * |
2533 | * Allow write access to the specified msr on the supplied vcpu |
2534 | * |
2535 | * Parameters: |
2536 | * vcpu: the VCPU to allow access |
2537 | * msr: the MSR number to allow access to |
2538 | */ |
2539 | void |
2540 | svm_setmsrbw(struct vcpu *vcpu, uint32_t msr) |
2541 | { |
2542 | uint8_t *msrs; |
2543 | uint16_t idx; |
2544 | |
2545 | msrs = (uint8_t *)vcpu->vc_msr_bitmap_va; |
2546 | |
2547 | /* |
2548 | * MSR Write bitmap layout: |
2549 | * Pentium MSRs (0x0 - 0x1fff) @ 0x0 |
2550 | * Gen6 and Syscall MSRs (0xc0000000 - 0xc0001fff) @ 0x800 |
2551 | * Gen7 and Gen8 MSRs (0xc0010000 - 0xc0011fff) @ 0x1000 |
2552 | * |
2553 | * Write enable bit is high order bit of 2-bit pair |
2554 | * per MSR (eg, MSR 0x0 write bit is at bit 1 @ 0x0) |
2555 | */ |
2556 | if (msr <= 0x1fff) { |
2557 | idx = SVM_MSRIDX(msr)((msr) / 4); |
2558 | msrs[idx] &= ~(SVM_MSRBIT_W(msr)(1 << (((msr) % 4) * 2 + 1))); |
2559 | } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) { |
2560 | idx = SVM_MSRIDX(msr - 0xc0000000)((msr - 0xc0000000) / 4) + 0x800; |
2561 | msrs[idx] &= ~(SVM_MSRBIT_W(msr - 0xc0000000)(1 << (((msr - 0xc0000000) % 4) * 2 + 1))); |
2562 | } else if (msr >= 0xc0010000 && msr <= 0xc0011fff) { |
2563 | idx = SVM_MSRIDX(msr - 0xc0010000)((msr - 0xc0010000) / 4) + 0x1000; |
2564 | msrs[idx] &= ~(SVM_MSRBIT_W(msr - 0xc0010000)(1 << (((msr - 0xc0010000) % 4) * 2 + 1))); |
2565 | } else { |
2566 | printf("%s: invalid msr 0x%x\n", __func__, msr); |
2567 | return; |
2568 | } |
2569 | } |
2570 | |
2571 | /* |
2572 | * svm_setmsrbrw |
2573 | * |
2574 | * Allow read/write access to the specified msr on the supplied vcpu |
2575 | * |
2576 | * Parameters: |
2577 | * vcpu: the VCPU to allow access |
2578 | * msr: the MSR number to allow access to |
2579 | */ |
2580 | void |
2581 | svm_setmsrbrw(struct vcpu *vcpu, uint32_t msr) |
2582 | { |
2583 | svm_setmsrbr(vcpu, msr); |
2584 | svm_setmsrbw(vcpu, msr); |
2585 | } |
2586 | |
2587 | /* |
2588 | * vmx_setmsrbr |
2589 | * |
2590 | * Allow read access to the specified msr on the supplied vcpu. |
2591 | * |
2592 | * Parameters: |
2593 | * vcpu: the VCPU to allow access |
2594 | * msr: the MSR number to allow access to |
2595 | */ |
2596 | void |
2597 | vmx_setmsrbr(struct vcpu *vcpu, uint32_t msr) |
2598 | { |
2599 | uint8_t *msrs; |
2600 | uint16_t idx; |
2601 | |
2602 | msrs = (uint8_t *)vcpu->vc_msr_bitmap_va; |
2603 | |
2604 | /* |
2605 | * MSR Read bitmap layout: |
2606 | * "Low" MSRs (0x0 - 0x1fff) @ 0x0 |
2607 | * "High" MSRs (0xc0000000 - 0xc0001fff) @ 0x400 |
2608 | */ |
2609 | if (msr <= 0x1fff) { |
2610 | idx = VMX_MSRIDX(msr)((msr) / 8); |
2611 | msrs[idx] &= ~(VMX_MSRBIT(msr)(1 << (msr) % 8)); |
2612 | } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) { |
2613 | idx = VMX_MSRIDX(msr - 0xc0000000)((msr - 0xc0000000) / 8) + 0x400; |
2614 | msrs[idx] &= ~(VMX_MSRBIT(msr - 0xc0000000)(1 << (msr - 0xc0000000) % 8)); |
2615 | } else |
2616 | printf("%s: invalid msr 0x%x\n", __func__, msr); |
2617 | } |
2618 | |
2619 | /* |
2620 | * vmx_setmsrbw |
2621 | * |
2622 | * Allow write access to the specified msr on the supplied vcpu |
2623 | * |
2624 | * Parameters: |
2625 | * vcpu: the VCPU to allow access |
2626 | * msr: the MSR number to allow access to |
2627 | */ |
2628 | void |
2629 | vmx_setmsrbw(struct vcpu *vcpu, uint32_t msr) |
2630 | { |
2631 | uint8_t *msrs; |
2632 | uint16_t idx; |
2633 | |
2634 | msrs = (uint8_t *)vcpu->vc_msr_bitmap_va; |
2635 | |
2636 | /* |
2637 | * MSR Write bitmap layout: |
2638 | * "Low" MSRs (0x0 - 0x1fff) @ 0x800 |
2639 | * "High" MSRs (0xc0000000 - 0xc0001fff) @ 0xc00 |
2640 | */ |
2641 | if (msr <= 0x1fff) { |
2642 | idx = VMX_MSRIDX(msr)((msr) / 8) + 0x800; |
2643 | msrs[idx] &= ~(VMX_MSRBIT(msr)(1 << (msr) % 8)); |
2644 | } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) { |
2645 | idx = VMX_MSRIDX(msr - 0xc0000000)((msr - 0xc0000000) / 8) + 0xc00; |
2646 | msrs[idx] &= ~(VMX_MSRBIT(msr - 0xc0000000)(1 << (msr - 0xc0000000) % 8)); |
2647 | } else |
2648 | printf("%s: invalid msr 0x%x\n", __func__, msr); |
2649 | } |
2650 | |
2651 | /* |
2652 | * vmx_setmsrbrw |
2653 | * |
2654 | * Allow read/write access to the specified msr on the supplied vcpu |
2655 | * |
2656 | * Parameters: |
2657 | * vcpu: the VCPU to allow access |
2658 | * msr: the MSR number to allow access to |
2659 | */ |
2660 | void |
2661 | vmx_setmsrbrw(struct vcpu *vcpu, uint32_t msr) |
2662 | { |
2663 | vmx_setmsrbr(vcpu, msr); |
2664 | vmx_setmsrbw(vcpu, msr); |
2665 | } |
2666 | |
2667 | /* |
2668 | * svm_set_clean |
2669 | * |
2670 | * Sets (mark as unmodified) the VMCB clean bit set in 'value'. |
2671 | * For example, to set the clean bit for the VMCB intercepts (bit position 0), |
2672 | * the caller provides 'SVM_CLEANBITS_I' (0x1) for the 'value' argument. |
2673 | * Multiple cleanbits can be provided in 'value' at the same time (eg, |
2674 | * "SVM_CLEANBITS_I | SVM_CLEANBITS_TPR"). |
2675 | * |
2676 | * Note that this function does not clear any bits; to clear bits in the |
2677 | * vmcb cleanbits bitfield, use 'svm_set_dirty'. |
2678 | * |
2679 | * Parameters: |
2680 | * vmcs: the VCPU whose VMCB clean value should be set |
2681 | * value: the value(s) to enable in the cleanbits mask |
2682 | */ |
2683 | void |
2684 | svm_set_clean(struct vcpu *vcpu, uint32_t value) |
2685 | { |
2686 | struct vmcb *vmcb; |
2687 | |
2688 | /* If no cleanbits support, do nothing */ |
2689 | if (!curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_svm.svm_vmcb_clean) |
2690 | return; |
2691 | |
2692 | vmcb = (struct vmcb *)vcpu->vc_control_va; |
2693 | |
2694 | vmcb->v_vmcb_clean_bits |= value; |
2695 | } |
2696 | |
2697 | /* |
2698 | * svm_set_dirty |
2699 | * |
2700 | * Clears (mark as modified) the VMCB clean bit set in 'value'. |
2701 | * For example, to clear the bit for the VMCB intercepts (bit position 0) |
2702 | * the caller provides 'SVM_CLEANBITS_I' (0x1) for the 'value' argument. |
2703 | * Multiple dirty bits can be provided in 'value' at the same time (eg, |
2704 | * "SVM_CLEANBITS_I | SVM_CLEANBITS_TPR"). |
2705 | * |
2706 | * Parameters: |
2707 | * vmcs: the VCPU whose VMCB dirty value should be set |
2708 | * value: the value(s) to dirty in the cleanbits mask |
2709 | */ |
2710 | void |
2711 | svm_set_dirty(struct vcpu *vcpu, uint32_t value) |
2712 | { |
2713 | struct vmcb *vmcb; |
2714 | |
2715 | /* If no cleanbits support, do nothing */ |
2716 | if (!curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_svm.svm_vmcb_clean) |
2717 | return; |
2718 | |
2719 | vmcb = (struct vmcb *)vcpu->vc_control_va; |
2720 | |
2721 | vmcb->v_vmcb_clean_bits &= ~value; |
2722 | } |
2723 | |
2724 | /* |
2725 | * vcpu_reset_regs_vmx |
2726 | * |
2727 | * Initializes 'vcpu's registers to supplied state |
2728 | * |
2729 | * Parameters: |
2730 | * vcpu: the vcpu whose register state is to be initialized |
2731 | * vrs: the register state to set |
2732 | * |
2733 | * Return values: |
2734 | * 0: registers init'ed successfully |
2735 | * EINVAL: an error occurred setting register state |
2736 | */ |
2737 | int |
2738 | vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs) |
2739 | { |
2740 | int ret = 0, ug = 0; |
2741 | uint32_t cr0, cr4; |
2742 | uint32_t pinbased, procbased, procbased2, exit, entry; |
2743 | uint32_t want1, want0; |
2744 | uint64_t msr, ctrlval, eptp, cr3; |
2745 | uint16_t ctrl, vpid; |
2746 | struct vmx_msr_store *msr_store; |
2747 | |
2748 | rw_assert_wrlock(&vcpu->vc_lock); |
2749 | |
2750 | cr0 = vrs->vrs_crs[VCPU_REGS_CR00]; |
2751 | |
2752 | if (vcpu_reload_vmcs_vmx(vcpu)) { |
2753 | DPRINTF("%s: error reloading VMCS\n", __func__); |
2754 | ret = EINVAL22; |
2755 | goto exit; |
2756 | } |
2757 | |
2758 | #ifdef VMM_DEBUG |
2759 | /* VMCS should be loaded... */ |
2760 | paddr_t pa = 0ULL; |
2761 | if (vmptrst(&pa)) |
2762 | panic("%s: vmptrst", __func__); |
2763 | KASSERT(pa == vcpu->vc_control_pa)((pa == vcpu->vc_control_pa) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/arch/amd64/amd64/vmm.c", 2763, "pa == vcpu->vc_control_pa" )); |
2764 | #endif /* VMM_DEBUG */ |
2765 | |
2766 | /* Compute Basic Entry / Exit Controls */ |
2767 | vcpu->vc_vmx_basic = rdmsr(IA32_VMX_BASIC0x480); |
2768 | vcpu->vc_vmx_entry_ctls = rdmsr(IA32_VMX_ENTRY_CTLS0x484); |
2769 | vcpu->vc_vmx_exit_ctls = rdmsr(IA32_VMX_EXIT_CTLS0x483); |
2770 | vcpu->vc_vmx_pinbased_ctls = rdmsr(IA32_VMX_PINBASED_CTLS0x481); |
2771 | vcpu->vc_vmx_procbased_ctls = rdmsr(IA32_VMX_PROCBASED_CTLS0x482); |
2772 | |
2773 | /* Compute True Entry / Exit Controls (if applicable) */ |
2774 | if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) { |
2775 | vcpu->vc_vmx_true_entry_ctls = rdmsr(IA32_VMX_TRUE_ENTRY_CTLS0x490); |
2776 | vcpu->vc_vmx_true_exit_ctls = rdmsr(IA32_VMX_TRUE_EXIT_CTLS0x48F); |
2777 | vcpu->vc_vmx_true_pinbased_ctls = |
2778 | rdmsr(IA32_VMX_TRUE_PINBASED_CTLS0x48D); |
2779 | vcpu->vc_vmx_true_procbased_ctls = |
2780 | rdmsr(IA32_VMX_TRUE_PROCBASED_CTLS0x48E); |
2781 | } |
2782 | |
2783 | /* Compute Secondary Procbased Controls (if applicable) */ |
2784 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482, |
2785 | IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1)) |
2786 | vcpu->vc_vmx_procbased2_ctls = rdmsr(IA32_VMX_PROCBASED2_CTLS0x48B); |
2787 | |
2788 | /* |
2789 | * Pinbased ctrls |
2790 | * |
2791 | * We must be able to set the following: |
2792 | * IA32_VMX_EXTERNAL_INT_EXITING - exit on host interrupt |
2793 | * IA32_VMX_NMI_EXITING - exit on host NMI |
2794 | */ |
2795 | want1 = IA32_VMX_EXTERNAL_INT_EXITING(1ULL << 0) | |
2796 | IA32_VMX_NMI_EXITING(1ULL << 3); |
2797 | want0 = 0; |
2798 | |
2799 | if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) { |
2800 | ctrl = IA32_VMX_TRUE_PINBASED_CTLS0x48D; |
2801 | ctrlval = vcpu->vc_vmx_true_pinbased_ctls; |
2802 | } else { |
2803 | ctrl = IA32_VMX_PINBASED_CTLS0x481; |
2804 | ctrlval = vcpu->vc_vmx_pinbased_ctls; |
2805 | } |
2806 | |
2807 | if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &pinbased)) { |
2808 | DPRINTF("%s: error computing pinbased controls\n", __func__); |
2809 | ret = EINVAL22; |
2810 | goto exit; |
2811 | } |
2812 | |
2813 | if (vmwrite(VMCS_PINBASED_CTLS0x4000, pinbased)) { |
2814 | DPRINTF("%s: error setting pinbased controls\n", __func__); |
2815 | ret = EINVAL22; |
2816 | goto exit; |
2817 | } |
2818 | |
2819 | /* |
2820 | * Procbased ctrls |
2821 | * |
2822 | * We must be able to set the following: |
2823 | * IA32_VMX_HLT_EXITING - exit on HLT instruction |
2824 | * IA32_VMX_MWAIT_EXITING - exit on MWAIT instruction |
2825 | * IA32_VMX_UNCONDITIONAL_IO_EXITING - exit on I/O instructions |
2826 | * IA32_VMX_USE_MSR_BITMAPS - exit on various MSR accesses |
2827 | * IA32_VMX_CR8_LOAD_EXITING - guest TPR access |
2828 | * IA32_VMX_CR8_STORE_EXITING - guest TPR access |
2829 | * IA32_VMX_USE_TPR_SHADOW - guest TPR access (shadow) |
2830 | * IA32_VMX_MONITOR_EXITING - exit on MONITOR instruction |
2831 | * |
2832 | * If we have EPT, we must be able to clear the following |
2833 | * IA32_VMX_CR3_LOAD_EXITING - don't care about guest CR3 accesses |
2834 | * IA32_VMX_CR3_STORE_EXITING - don't care about guest CR3 accesses |
2835 | */ |
2836 | want1 = IA32_VMX_HLT_EXITING(1ULL << 7) | |
2837 | IA32_VMX_MWAIT_EXITING(1ULL << 10) | |
2838 | IA32_VMX_UNCONDITIONAL_IO_EXITING(1ULL << 24) | |
2839 | IA32_VMX_USE_MSR_BITMAPS(1ULL << 28) | |
2840 | IA32_VMX_CR8_LOAD_EXITING(1ULL << 19) | |
2841 | IA32_VMX_CR8_STORE_EXITING(1ULL << 20) | |
2842 | IA32_VMX_MONITOR_EXITING(1ULL << 29) | |
2843 | IA32_VMX_USE_TPR_SHADOW(1ULL << 21); |
2844 | want0 = 0; |
2845 | |
2846 | if (vmm_softc->mode == VMM_MODE_EPT) { |
2847 | want1 |= IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31); |
2848 | want0 |= IA32_VMX_CR3_LOAD_EXITING(1ULL << 15) | |
2849 | IA32_VMX_CR3_STORE_EXITING(1ULL << 16); |
2850 | } |
2851 | |
2852 | if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) { |
2853 | ctrl = IA32_VMX_TRUE_PROCBASED_CTLS0x48E; |
2854 | ctrlval = vcpu->vc_vmx_true_procbased_ctls; |
2855 | } else { |
2856 | ctrl = IA32_VMX_PROCBASED_CTLS0x482; |
2857 | ctrlval = vcpu->vc_vmx_procbased_ctls; |
2858 | } |
2859 | |
2860 | if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &procbased)) { |
2861 | DPRINTF("%s: error computing procbased controls\n", __func__); |
2862 | ret = EINVAL22; |
2863 | goto exit; |
2864 | } |
2865 | |
2866 | if (vmwrite(VMCS_PROCBASED_CTLS0x4002, procbased)) { |
2867 | DPRINTF("%s: error setting procbased controls\n", __func__); |
2868 | ret = EINVAL22; |
2869 | goto exit; |
2870 | } |
2871 | |
2872 | /* |
2873 | * Secondary Procbased ctrls |
2874 | * |
2875 | * We want to be able to set the following, if available: |
2876 | * IA32_VMX_ENABLE_VPID - use VPIDs where available |
2877 | * |
2878 | * If we have EPT, we must be able to set the following: |
2879 | * IA32_VMX_ENABLE_EPT - enable EPT |
2880 | * |
2881 | * If we have unrestricted guest capability, we must be able to set |
2882 | * the following: |
2883 | * IA32_VMX_UNRESTRICTED_GUEST - enable unrestricted guest (if caller |
2884 | * specified CR0_PG | CR0_PE in %cr0 in the 'vrs' parameter) |
2885 | */ |
2886 | want1 = 0; |
2887 | |
2888 | /* XXX checking for 2ndary controls can be combined here */ |
2889 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482, |
2890 | IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1)) { |
2891 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
2892 | IA32_VMX_ENABLE_VPID(1ULL << 5), 1)) { |
2893 | want1 |= IA32_VMX_ENABLE_VPID(1ULL << 5); |
2894 | vcpu->vc_vmx_vpid_enabled = 1; |
2895 | } |
2896 | } |
2897 | |
2898 | if (vmm_softc->mode == VMM_MODE_EPT) |
2899 | want1 |= IA32_VMX_ENABLE_EPT(1ULL << 1); |
2900 | |
2901 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482, |
2902 | IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1)) { |
2903 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
2904 | IA32_VMX_UNRESTRICTED_GUEST(1ULL << 7), 1)) { |
2905 | if ((cr0 & (CR0_PE0x00000001 | CR0_PG0x80000000)) == 0) { |
2906 | want1 |= IA32_VMX_UNRESTRICTED_GUEST(1ULL << 7); |
2907 | ug = 1; |
2908 | } |
2909 | } |
2910 | } |
2911 | |
2912 | want0 = ~want1; |
2913 | ctrlval = vcpu->vc_vmx_procbased2_ctls; |
2914 | ctrl = IA32_VMX_PROCBASED2_CTLS0x48B; |
2915 | |
2916 | if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &procbased2)) { |
2917 | DPRINTF("%s: error computing secondary procbased controls\n", |
2918 | __func__); |
2919 | ret = EINVAL22; |
2920 | goto exit; |
2921 | } |
2922 | |
2923 | if (vmwrite(VMCS_PROCBASED2_CTLS0x401E, procbased2)) { |
2924 | DPRINTF("%s: error setting secondary procbased controls\n", |
2925 | __func__); |
2926 | ret = EINVAL22; |
2927 | goto exit; |
2928 | } |
2929 | |
2930 | /* |
2931 | * Exit ctrls |
2932 | * |
2933 | * We must be able to set the following: |
2934 | * IA32_VMX_SAVE_DEBUG_CONTROLS |
2935 | * IA32_VMX_HOST_SPACE_ADDRESS_SIZE - exit to long mode |
2936 | * IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT - ack interrupt on exit |
2937 | */ |
2938 | want1 = IA32_VMX_HOST_SPACE_ADDRESS_SIZE(1ULL << 9) | |
2939 | IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT(1ULL << 15) | |
2940 | IA32_VMX_SAVE_DEBUG_CONTROLS(1ULL << 2); |
2941 | want0 = 0; |
2942 | |
2943 | if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) { |
2944 | ctrl = IA32_VMX_TRUE_EXIT_CTLS0x48F; |
2945 | ctrlval = vcpu->vc_vmx_true_exit_ctls; |
2946 | } else { |
2947 | ctrl = IA32_VMX_EXIT_CTLS0x483; |
2948 | ctrlval = vcpu->vc_vmx_exit_ctls; |
2949 | } |
2950 | |
2951 | if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &exit)) { |
2952 | DPRINTF("%s: error computing exit controls\n", __func__); |
2953 | ret = EINVAL22; |
2954 | goto exit; |
2955 | } |
2956 | |
2957 | if (vmwrite(VMCS_EXIT_CTLS0x400C, exit)) { |
2958 | DPRINTF("%s: error setting exit controls\n", __func__); |
2959 | ret = EINVAL22; |
2960 | goto exit; |
2961 | } |
2962 | |
2963 | /* |
2964 | * Entry ctrls |
2965 | * |
2966 | * We must be able to set the following: |
2967 | * IA32_VMX_IA32E_MODE_GUEST (if no unrestricted guest) |
2968 | * IA32_VMX_LOAD_DEBUG_CONTROLS |
2969 | * We must be able to clear the following: |
2970 | * IA32_VMX_ENTRY_TO_SMM - enter to SMM |
2971 | * IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT |
2972 | * IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY |
2973 | */ |
2974 | want1 = IA32_VMX_LOAD_DEBUG_CONTROLS(1ULL << 2); |
2975 | if (vrs->vrs_msrs[VCPU_REGS_EFER0] & EFER_LMA0x00000400) |
2976 | want1 |= IA32_VMX_IA32E_MODE_GUEST(1ULL << 9); |
2977 | |
2978 | want0 = IA32_VMX_ENTRY_TO_SMM(1ULL << 10) | |
2979 | IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT(1ULL << 11) | |
2980 | IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY(1ULL << 13); |
2981 | |
2982 | if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) { |
2983 | ctrl = IA32_VMX_TRUE_ENTRY_CTLS0x490; |
2984 | ctrlval = vcpu->vc_vmx_true_entry_ctls; |
2985 | } else { |
2986 | ctrl = IA32_VMX_ENTRY_CTLS0x484; |
2987 | ctrlval = vcpu->vc_vmx_entry_ctls; |
2988 | } |
2989 | |
2990 | if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &entry)) { |
2991 | ret = EINVAL22; |
2992 | goto exit; |
2993 | } |
2994 | |
2995 | if (vmwrite(VMCS_ENTRY_CTLS0x4012, entry)) { |
2996 | ret = EINVAL22; |
2997 | goto exit; |
2998 | } |
2999 | |
3000 | if (vmm_softc->mode == VMM_MODE_EPT) { |
3001 | eptp = vcpu->vc_parent->vm_map->pmap->pm_pdirpa; |
3002 | msr = rdmsr(IA32_VMX_EPT_VPID_CAP0x48C); |
3003 | if (msr & IA32_EPT_VPID_CAP_PAGE_WALK_4(1ULL << 6)) { |
3004 | /* Page walk length 4 supported */ |
3005 | eptp |= ((IA32_EPT_PAGE_WALK_LENGTH0x4 - 1) << 3); |
3006 | } else { |
3007 | DPRINTF("EPT page walk length 4 not supported\n"); |
3008 | ret = EINVAL22; |
3009 | goto exit; |
3010 | } |
3011 | |
3012 | if (msr & IA32_EPT_VPID_CAP_WB(1ULL << 14)) { |
3013 | /* WB cache type supported */ |
3014 | eptp |= IA32_EPT_PAGING_CACHE_TYPE_WB0x6; |
3015 | } else |
3016 | DPRINTF("%s: no WB cache type available, guest VM " |
3017 | "will run uncached\n", __func__); |
3018 | |
3019 | DPRINTF("Guest EPTP = 0x%llx\n", eptp); |
3020 | if (vmwrite(VMCS_GUEST_IA32_EPTP0x201A, eptp)) { |
3021 | DPRINTF("%s: error setting guest EPTP\n", __func__); |
3022 | ret = EINVAL22; |
3023 | goto exit; |
3024 | } |
3025 | |
3026 | vcpu->vc_parent->vm_map->pmap->eptp = eptp; |
3027 | } |
3028 | |
3029 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482, |
3030 | IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1)) { |
3031 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
3032 | IA32_VMX_ENABLE_VPID(1ULL << 5), 1)) { |
3033 | |
3034 | /* We may sleep during allocation, so reload VMCS. */ |
3035 | vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
3036 | ret = vmm_alloc_vpid(&vpid); |
3037 | if (vcpu_reload_vmcs_vmx(vcpu)) { |
3038 | printf("%s: failed to reload vmcs\n", __func__); |
3039 | ret = EINVAL22; |
3040 | goto exit; |
3041 | } |
3042 | if (ret) { |
3043 | DPRINTF("%s: could not allocate VPID\n", |
3044 | __func__); |
3045 | ret = EINVAL22; |
3046 | goto exit; |
3047 | } |
3048 | |
3049 | if (vmwrite(VMCS_GUEST_VPID0x0000, vpid)) { |
3050 | DPRINTF("%s: error setting guest VPID\n", |
3051 | __func__); |
3052 | ret = EINVAL22; |
3053 | goto exit; |
3054 | } |
3055 | |
3056 | vcpu->vc_vpid = vpid; |
3057 | } |
3058 | } |
3059 | |
3060 | /* |
3061 | * Determine which bits in CR0 have to be set to a fixed |
3062 | * value as per Intel SDM A.7. |
3063 | * CR0 bits in the vrs parameter must match these. |
3064 | */ |
3065 | want1 = (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) & |
3066 | (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1); |
3067 | want0 = ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) & |
3068 | ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1); |
3069 | |
3070 | /* |
3071 | * CR0_FIXED0 and CR0_FIXED1 may report the CR0_PG and CR0_PE bits as |
3072 | * fixed to 1 even if the CPU supports the unrestricted guest |
3073 | * feature. Update want1 and want0 accordingly to allow |
3074 | * any value for CR0_PG and CR0_PE in vrs->vrs_crs[VCPU_REGS_CR0] if |
3075 | * the CPU has the unrestricted guest capability. |
3076 | */ |
3077 | if (ug) { |
3078 | want1 &= ~(CR0_PG0x80000000 | CR0_PE0x00000001); |
3079 | want0 &= ~(CR0_PG0x80000000 | CR0_PE0x00000001); |
3080 | } |
3081 | |
3082 | /* |
3083 | * VMX may require some bits to be set that userland should not have |
3084 | * to care about. Set those here. |
3085 | */ |
3086 | if (want1 & CR0_NE0x00000020) |
3087 | cr0 |= CR0_NE0x00000020; |
3088 | |
3089 | if ((cr0 & want1) != want1) { |
3090 | ret = EINVAL22; |
3091 | goto exit; |
3092 | } |
3093 | |
3094 | if ((~cr0 & want0) != want0) { |
3095 | ret = EINVAL22; |
3096 | goto exit; |
3097 | } |
3098 | |
3099 | vcpu->vc_vmx_cr0_fixed1 = want1; |
3100 | vcpu->vc_vmx_cr0_fixed0 = want0; |
3101 | /* |
3102 | * Determine which bits in CR4 have to be set to a fixed |
3103 | * value as per Intel SDM A.8. |
3104 | * CR4 bits in the vrs parameter must match these, except |
3105 | * CR4_VMXE - we add that here since it must always be set. |
3106 | */ |
3107 | want1 = (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) & |
3108 | (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1); |
3109 | want0 = ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) & |
3110 | ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1); |
3111 | |
3112 | cr4 = vrs->vrs_crs[VCPU_REGS_CR43] | CR4_VMXE0x00002000; |
3113 | |
3114 | if ((cr4 & want1) != want1) { |
3115 | ret = EINVAL22; |
3116 | goto exit; |
3117 | } |
3118 | |
3119 | if ((~cr4 & want0) != want0) { |
3120 | ret = EINVAL22; |
3121 | goto exit; |
3122 | } |
3123 | |
3124 | cr3 = vrs->vrs_crs[VCPU_REGS_CR32]; |
3125 | |
3126 | /* Restore PDPTEs if 32-bit PAE paging is being used */ |
3127 | if (cr3 && (cr4 & CR4_PAE0x00000020) && |
3128 | !(vrs->vrs_msrs[VCPU_REGS_EFER0] & EFER_LMA0x00000400)) { |
3129 | if (vmwrite(VMCS_GUEST_PDPTE00x280A, |
3130 | vrs->vrs_crs[VCPU_REGS_PDPTE06])) { |
3131 | ret = EINVAL22; |
3132 | goto exit; |
3133 | } |
3134 | |
3135 | if (vmwrite(VMCS_GUEST_PDPTE10x280C, |
3136 | vrs->vrs_crs[VCPU_REGS_PDPTE17])) { |
3137 | ret = EINVAL22; |
3138 | goto exit; |
3139 | } |
3140 | |
3141 | if (vmwrite(VMCS_GUEST_PDPTE20x280E, |
3142 | vrs->vrs_crs[VCPU_REGS_PDPTE28])) { |
3143 | ret = EINVAL22; |
3144 | goto exit; |
3145 | } |
3146 | |
3147 | if (vmwrite(VMCS_GUEST_PDPTE30x2810, |
3148 | vrs->vrs_crs[VCPU_REGS_PDPTE39])) { |
3149 | ret = EINVAL22; |
3150 | goto exit; |
3151 | } |
3152 | } |
3153 | |
3154 | vrs->vrs_crs[VCPU_REGS_CR00] = cr0; |
3155 | vrs->vrs_crs[VCPU_REGS_CR43] = cr4; |
3156 | |
3157 | /* |
3158 | * Select host MSRs to be loaded on exit |
3159 | */ |
3160 | msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_load_va; |
3161 | msr_store[0].vms_index = MSR_EFER0xc0000080; |
3162 | msr_store[0].vms_data = rdmsr(MSR_EFER0xc0000080); |
3163 | msr_store[1].vms_index = MSR_STAR0xc0000081; |
3164 | msr_store[1].vms_data = rdmsr(MSR_STAR0xc0000081); |
3165 | msr_store[2].vms_index = MSR_LSTAR0xc0000082; |
3166 | msr_store[2].vms_data = rdmsr(MSR_LSTAR0xc0000082); |
3167 | msr_store[3].vms_index = MSR_CSTAR0xc0000083; |
3168 | msr_store[3].vms_data = rdmsr(MSR_CSTAR0xc0000083); |
3169 | msr_store[4].vms_index = MSR_SFMASK0xc0000084; |
3170 | msr_store[4].vms_data = rdmsr(MSR_SFMASK0xc0000084); |
3171 | msr_store[5].vms_index = MSR_KERNELGSBASE0xc0000102; |
3172 | msr_store[5].vms_data = rdmsr(MSR_KERNELGSBASE0xc0000102); |
3173 | msr_store[6].vms_index = MSR_MISC_ENABLE0x1a0; |
3174 | msr_store[6].vms_data = rdmsr(MSR_MISC_ENABLE0x1a0); |
3175 | |
3176 | /* |
3177 | * Select guest MSRs to be loaded on entry / saved on exit |
3178 | */ |
3179 | msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; |
3180 | |
3181 | msr_store[VCPU_REGS_EFER0].vms_index = MSR_EFER0xc0000080; |
3182 | msr_store[VCPU_REGS_STAR1].vms_index = MSR_STAR0xc0000081; |
3183 | msr_store[VCPU_REGS_LSTAR2].vms_index = MSR_LSTAR0xc0000082; |
3184 | msr_store[VCPU_REGS_CSTAR3].vms_index = MSR_CSTAR0xc0000083; |
3185 | msr_store[VCPU_REGS_SFMASK4].vms_index = MSR_SFMASK0xc0000084; |
3186 | msr_store[VCPU_REGS_KGSBASE5].vms_index = MSR_KERNELGSBASE0xc0000102; |
3187 | msr_store[VCPU_REGS_MISC_ENABLE6].vms_index = MSR_MISC_ENABLE0x1a0; |
3188 | |
3189 | /* |
3190 | * Initialize MSR_MISC_ENABLE as it can't be read and populated from vmd |
3191 | * and some of the content is based on the host. |
3192 | */ |
3193 | msr_store[VCPU_REGS_MISC_ENABLE6].vms_data = rdmsr(MSR_MISC_ENABLE0x1a0); |
3194 | msr_store[VCPU_REGS_MISC_ENABLE6].vms_data &= |
3195 | ~(MISC_ENABLE_TCC(1 << 3) | MISC_ENABLE_PERF_MON_AVAILABLE(1 << 7) | |
3196 | MISC_ENABLE_EIST_ENABLED(1 << 16) | MISC_ENABLE_ENABLE_MONITOR_FSM(1 << 18) | |
3197 | MISC_ENABLE_xTPR_MESSAGE_DISABLE(1 << 23)); |
3198 | msr_store[VCPU_REGS_MISC_ENABLE6].vms_data |= |
3199 | MISC_ENABLE_BTS_UNAVAILABLE(1 << 11) | MISC_ENABLE_PEBS_UNAVAILABLE(1 << 12); |
3200 | |
3201 | /* |
3202 | * Currently we have the same count of entry/exit MSRs loads/stores |
3203 | * but this is not an architectural requirement. |
3204 | */ |
3205 | if (vmwrite(VMCS_EXIT_MSR_STORE_COUNT0x400E, VMX_NUM_MSR_STORE7)) { |
3206 | DPRINTF("%s: error setting guest MSR exit store count\n", |
3207 | __func__); |
3208 | ret = EINVAL22; |
3209 | goto exit; |
3210 | } |
3211 | |
3212 | if (vmwrite(VMCS_EXIT_MSR_LOAD_COUNT0x4010, VMX_NUM_MSR_STORE7)) { |
3213 | DPRINTF("%s: error setting guest MSR exit load count\n", |
3214 | __func__); |
3215 | ret = EINVAL22; |
3216 | goto exit; |
3217 | } |
3218 | |
3219 | if (vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT0x4014, VMX_NUM_MSR_STORE7)) { |
3220 | DPRINTF("%s: error setting guest MSR entry load count\n", |
3221 | __func__); |
3222 | ret = EINVAL22; |
3223 | goto exit; |
3224 | } |
3225 | |
3226 | if (vmwrite(VMCS_EXIT_STORE_MSR_ADDRESS0x2006, |
3227 | vcpu->vc_vmx_msr_exit_save_pa)) { |
3228 | DPRINTF("%s: error setting guest MSR exit store address\n", |
3229 | __func__); |
3230 | ret = EINVAL22; |
3231 | goto exit; |
3232 | } |
3233 | |
3234 | if (vmwrite(VMCS_EXIT_LOAD_MSR_ADDRESS0x2008, |
3235 | vcpu->vc_vmx_msr_exit_load_pa)) { |
3236 | DPRINTF("%s: error setting guest MSR exit load address\n", |
3237 | __func__); |
3238 | ret = EINVAL22; |
3239 | goto exit; |
3240 | } |
3241 | |
3242 | if (vmwrite(VMCS_ENTRY_LOAD_MSR_ADDRESS0x200A, |
3243 | vcpu->vc_vmx_msr_exit_save_pa)) { |
3244 | DPRINTF("%s: error setting guest MSR entry load address\n", |
3245 | __func__); |
3246 | ret = EINVAL22; |
3247 | goto exit; |
3248 | } |
3249 | |
3250 | if (vmwrite(VMCS_MSR_BITMAP_ADDRESS0x2004, |
3251 | vcpu->vc_msr_bitmap_pa)) { |
3252 | DPRINTF("%s: error setting guest MSR bitmap address\n", |
3253 | __func__); |
3254 | ret = EINVAL22; |
3255 | goto exit; |
3256 | } |
3257 | |
3258 | if (vmwrite(VMCS_CR4_MASK0x6002, CR4_VMXE0x00002000)) { |
3259 | DPRINTF("%s: error setting guest CR4 mask\n", __func__); |
3260 | ret = EINVAL22; |
3261 | goto exit; |
3262 | } |
3263 | |
3264 | if (vmwrite(VMCS_CR0_MASK0x6000, CR0_NE0x00000020)) { |
3265 | DPRINTF("%s: error setting guest CR0 mask\n", __func__); |
3266 | ret = EINVAL22; |
3267 | goto exit; |
3268 | } |
3269 | |
3270 | /* |
3271 | * Set up the VMCS for the register state we want during VCPU start. |
3272 | * This matches what the CPU state would be after a bootloader |
3273 | * transition to 'start'. |
3274 | */ |
3275 | ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), 0, vrs); |
3276 | |
3277 | /* |
3278 | * Set up the MSR bitmap |
3279 | */ |
3280 | memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF, PAGE_SIZE)__builtin_memset(((uint8_t *)vcpu->vc_msr_bitmap_va), (0xFF ), ((1 << 12))); |
3281 | vmx_setmsrbrw(vcpu, MSR_IA32_FEATURE_CONTROL0x03a); |
3282 | vmx_setmsrbrw(vcpu, MSR_SYSENTER_CS0x174); |
3283 | vmx_setmsrbrw(vcpu, MSR_SYSENTER_ESP0x175); |
3284 | vmx_setmsrbrw(vcpu, MSR_SYSENTER_EIP0x176); |
3285 | vmx_setmsrbrw(vcpu, MSR_EFER0xc0000080); |
3286 | vmx_setmsrbrw(vcpu, MSR_STAR0xc0000081); |
3287 | vmx_setmsrbrw(vcpu, MSR_LSTAR0xc0000082); |
3288 | vmx_setmsrbrw(vcpu, MSR_CSTAR0xc0000083); |
3289 | vmx_setmsrbrw(vcpu, MSR_SFMASK0xc0000084); |
3290 | vmx_setmsrbrw(vcpu, MSR_FSBASE0xc0000100); |
3291 | vmx_setmsrbrw(vcpu, MSR_GSBASE0xc0000101); |
3292 | vmx_setmsrbrw(vcpu, MSR_KERNELGSBASE0xc0000102); |
3293 | vmx_setmsrbr(vcpu, MSR_MISC_ENABLE0x1a0); |
3294 | |
3295 | /* XXX CR0 shadow */ |
3296 | /* XXX CR4 shadow */ |
3297 | |
3298 | /* xcr0 power on default sets bit 0 (x87 state) */ |
3299 | vcpu->vc_gueststate.vg_xcr0 = XCR0_X870x00000001 & xsave_mask; |
3300 | |
3301 | /* XXX PAT shadow */ |
3302 | vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT0x277); |
3303 | |
3304 | /* Flush the VMCS */ |
3305 | if (vmclear(&vcpu->vc_control_pa)) { |
3306 | DPRINTF("%s: vmclear failed\n", __func__); |
3307 | ret = EINVAL22; |
3308 | } |
3309 | atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED)_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (0)); |
3310 | |
3311 | exit: |
3312 | return (ret); |
3313 | } |
3314 | |
3315 | /* |
3316 | * vcpu_init_vmx |
3317 | * |
3318 | * Intel VMX specific VCPU initialization routine. |
3319 | * |
3320 | * This function allocates various per-VCPU memory regions, sets up initial |
3321 | * VCPU VMCS controls, and sets initial register values. |
3322 | * |
3323 | * Parameters: |
3324 | * vcpu: the VCPU structure being initialized |
3325 | * |
3326 | * Return values: |
3327 | * 0: the VCPU was initialized successfully |
3328 | * ENOMEM: insufficient resources |
3329 | * EINVAL: an error occurred during VCPU initialization |
3330 | */ |
3331 | int |
3332 | vcpu_init_vmx(struct vcpu *vcpu) |
3333 | { |
3334 | struct vmcs *vmcs; |
3335 | uint32_t cr0, cr4; |
3336 | int ret = 0; |
3337 | |
3338 | /* Allocate VMCS VA */ |
3339 | vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, &kp_zero, |
3340 | &kd_waitok); |
3341 | vcpu->vc_vmx_vmcs_state = VMCS_CLEARED0; |
3342 | |
3343 | if (!vcpu->vc_control_va) |
3344 | return (ENOMEM12); |
3345 | |
3346 | /* Compute VMCS PA */ |
3347 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_control_va, |
3348 | (paddr_t *)&vcpu->vc_control_pa)) { |
3349 | ret = ENOMEM12; |
3350 | goto exit; |
3351 | } |
3352 | |
3353 | /* Allocate MSR bitmap VA */ |
3354 | vcpu->vc_msr_bitmap_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, &kp_zero, |
3355 | &kd_waitok); |
3356 | |
3357 | if (!vcpu->vc_msr_bitmap_va) { |
3358 | ret = ENOMEM12; |
3359 | goto exit; |
3360 | } |
3361 | |
3362 | /* Compute MSR bitmap PA */ |
3363 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_msr_bitmap_va, |
3364 | (paddr_t *)&vcpu->vc_msr_bitmap_pa)) { |
3365 | ret = ENOMEM12; |
3366 | goto exit; |
3367 | } |
3368 | |
3369 | /* Allocate MSR exit load area VA */ |
3370 | vcpu->vc_vmx_msr_exit_load_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, |
3371 | &kp_zero, &kd_waitok); |
3372 | |
3373 | if (!vcpu->vc_vmx_msr_exit_load_va) { |
3374 | ret = ENOMEM12; |
3375 | goto exit; |
3376 | } |
3377 | |
3378 | /* Compute MSR exit load area PA */ |
3379 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_vmx_msr_exit_load_va, |
3380 | &vcpu->vc_vmx_msr_exit_load_pa)) { |
3381 | ret = ENOMEM12; |
3382 | goto exit; |
3383 | } |
3384 | |
3385 | /* Allocate MSR exit save area VA */ |
3386 | vcpu->vc_vmx_msr_exit_save_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, |
3387 | &kp_zero, &kd_waitok); |
3388 | |
3389 | if (!vcpu->vc_vmx_msr_exit_save_va) { |
3390 | ret = ENOMEM12; |
3391 | goto exit; |
3392 | } |
3393 | |
3394 | /* Compute MSR exit save area PA */ |
3395 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_vmx_msr_exit_save_va, |
3396 | &vcpu->vc_vmx_msr_exit_save_pa)) { |
3397 | ret = ENOMEM12; |
3398 | goto exit; |
3399 | } |
3400 | |
3401 | /* Allocate MSR entry load area VA */ |
3402 | vcpu->vc_vmx_msr_entry_load_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, |
3403 | &kp_zero, &kd_waitok); |
3404 | |
3405 | if (!vcpu->vc_vmx_msr_entry_load_va) { |
3406 | ret = ENOMEM12; |
3407 | goto exit; |
3408 | } |
3409 | |
3410 | /* Compute MSR entry load area PA */ |
3411 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_vmx_msr_entry_load_va, |
3412 | &vcpu->vc_vmx_msr_entry_load_pa)) { |
3413 | ret = ENOMEM12; |
3414 | goto exit; |
3415 | } |
3416 | |
3417 | vmcs = (struct vmcs *)vcpu->vc_control_va; |
3418 | vmcs->vmcs_revision = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision; |
3419 | |
3420 | /* |
3421 | * Load the VMCS onto this PCPU so we can write registers |
3422 | */ |
3423 | if (vmptrld(&vcpu->vc_control_pa)) { |
3424 | ret = EINVAL22; |
3425 | goto exit; |
3426 | } |
3427 | |
3428 | /* Host CR0 */ |
3429 | cr0 = rcr0() & ~CR0_TS0x00000008; |
3430 | if (vmwrite(VMCS_HOST_IA32_CR00x6C00, cr0)) { |
3431 | DPRINTF("%s: error writing host CR0\n", __func__); |
3432 | ret = EINVAL22; |
3433 | goto exit; |
3434 | } |
3435 | |
3436 | /* Host CR4 */ |
3437 | cr4 = rcr4(); |
3438 | if (vmwrite(VMCS_HOST_IA32_CR40x6C04, cr4)) { |
3439 | DPRINTF("%s: error writing host CR4\n", __func__); |
3440 | ret = EINVAL22; |
3441 | goto exit; |
3442 | } |
3443 | |
3444 | /* Host Segment Selectors */ |
3445 | if (vmwrite(VMCS_HOST_IA32_CS_SEL0x0C02, GSEL(GCODE_SEL, SEL_KPL)(((1) << 3) | 0))) { |
3446 | DPRINTF("%s: error writing host CS selector\n", __func__); |
3447 | ret = EINVAL22; |
3448 | goto exit; |
3449 | } |
3450 | |
3451 | if (vmwrite(VMCS_HOST_IA32_DS_SEL0x0C06, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) { |
3452 | DPRINTF("%s: error writing host DS selector\n", __func__); |
3453 | ret = EINVAL22; |
3454 | goto exit; |
3455 | } |
3456 | |
3457 | if (vmwrite(VMCS_HOST_IA32_ES_SEL0x0C00, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) { |
3458 | DPRINTF("%s: error writing host ES selector\n", __func__); |
3459 | ret = EINVAL22; |
3460 | goto exit; |
3461 | } |
3462 | |
3463 | if (vmwrite(VMCS_HOST_IA32_FS_SEL0x0C08, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) { |
3464 | DPRINTF("%s: error writing host FS selector\n", __func__); |
3465 | ret = EINVAL22; |
3466 | goto exit; |
3467 | } |
3468 | |
3469 | if (vmwrite(VMCS_HOST_IA32_GS_SEL0x0C0A, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) { |
3470 | DPRINTF("%s: error writing host GS selector\n", __func__); |
3471 | ret = EINVAL22; |
3472 | goto exit; |
3473 | } |
3474 | |
3475 | if (vmwrite(VMCS_HOST_IA32_SS_SEL0x0C04, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) { |
3476 | DPRINTF("%s: error writing host SS selector\n", __func__); |
3477 | ret = EINVAL22; |
3478 | goto exit; |
3479 | } |
3480 | |
3481 | if (vmwrite(VMCS_HOST_IA32_TR_SEL0x0C0C, GSYSSEL(GPROC0_SEL, SEL_KPL)((((0) << 4) + (6 << 3)) | 0))) { |
3482 | DPRINTF("%s: error writing host TR selector\n", __func__); |
3483 | ret = EINVAL22; |
3484 | goto exit; |
3485 | } |
3486 | |
3487 | /* Host IDTR base */ |
3488 | if (vmwrite(VMCS_HOST_IA32_IDTR_BASE0x6C0E, idt_vaddr)) { |
3489 | DPRINTF("%s: error writing host IDTR base\n", __func__); |
3490 | ret = EINVAL22; |
3491 | goto exit; |
3492 | } |
3493 | |
3494 | /* VMCS link */ |
3495 | if (vmwrite(VMCS_LINK_POINTER0x2800, VMX_VMCS_PA_CLEAR0xFFFFFFFFFFFFFFFFUL)) { |
3496 | DPRINTF("%s: error writing VMCS link pointer\n", __func__); |
3497 | ret = EINVAL22; |
3498 | goto exit; |
3499 | } |
3500 | |
3501 | /* Flush the initial VMCS */ |
3502 | if (vmclear(&vcpu->vc_control_pa)) { |
3503 | DPRINTF("%s: vmclear failed\n", __func__); |
3504 | ret = EINVAL22; |
3505 | } |
3506 | |
3507 | exit: |
3508 | if (ret) { |
3509 | if (vcpu->vc_control_va) |
3510 | km_free((void *)vcpu->vc_control_va, PAGE_SIZE(1 << 12), |
3511 | &kv_page, &kp_zero); |
3512 | if (vcpu->vc_msr_bitmap_va) |
3513 | km_free((void *)vcpu->vc_msr_bitmap_va, PAGE_SIZE(1 << 12), |
3514 | &kv_page, &kp_zero); |
3515 | if (vcpu->vc_vmx_msr_exit_save_va) |
3516 | km_free((void *)vcpu->vc_vmx_msr_exit_save_va, |
3517 | PAGE_SIZE(1 << 12), &kv_page, &kp_zero); |
3518 | if (vcpu->vc_vmx_msr_exit_load_va) |
3519 | km_free((void *)vcpu->vc_vmx_msr_exit_load_va, |
3520 | PAGE_SIZE(1 << 12), &kv_page, &kp_zero); |
3521 | if (vcpu->vc_vmx_msr_entry_load_va) |
3522 | km_free((void *)vcpu->vc_vmx_msr_entry_load_va, |
3523 | PAGE_SIZE(1 << 12), &kv_page, &kp_zero); |
3524 | } |
3525 | |
3526 | return (ret); |
3527 | } |
3528 | |
3529 | /* |
3530 | * vcpu_reset_regs |
3531 | * |
3532 | * Resets a vcpu's registers to the provided state |
3533 | * |
3534 | * Parameters: |
3535 | * vcpu: the vcpu whose registers shall be reset |
3536 | * vrs: the desired register state |
3537 | * |
3538 | * Return values: |
3539 | * 0: the vcpu's registers were successfully reset |
3540 | * !0: the vcpu's registers could not be reset (see arch-specific reset |
3541 | * function for various values that can be returned here) |
3542 | */ |
3543 | int |
3544 | vcpu_reset_regs(struct vcpu *vcpu, struct vcpu_reg_state *vrs) |
3545 | { |
3546 | int ret; |
3547 | |
3548 | if (vmm_softc->mode == VMM_MODE_VMX || |
3549 | vmm_softc->mode == VMM_MODE_EPT) |
3550 | ret = vcpu_reset_regs_vmx(vcpu, vrs); |
3551 | else if (vmm_softc->mode == VMM_MODE_SVM || |
3552 | vmm_softc->mode == VMM_MODE_RVI) |
3553 | ret = vcpu_reset_regs_svm(vcpu, vrs); |
3554 | else |
3555 | panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); |
3556 | |
3557 | return (ret); |
3558 | } |
3559 | |
3560 | /* |
3561 | * vcpu_init_svm |
3562 | * |
3563 | * AMD SVM specific VCPU initialization routine. |
3564 | * |
3565 | * This function allocates various per-VCPU memory regions, sets up initial |
3566 | * VCPU VMCB controls, and sets initial register values. |
3567 | * |
3568 | * Parameters: |
3569 | * vcpu: the VCPU structure being initialized |
3570 | * |
3571 | * Return values: |
3572 | * 0: the VCPU was initialized successfully |
3573 | * ENOMEM: insufficient resources |
3574 | * EINVAL: an error occurred during VCPU initialization |
3575 | */ |
3576 | int |
3577 | vcpu_init_svm(struct vcpu *vcpu) |
3578 | { |
3579 | int ret = 0; |
3580 | |
3581 | /* Allocate VMCB VA */ |
3582 | vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, &kp_zero, |
3583 | &kd_waitok); |
3584 | |
3585 | if (!vcpu->vc_control_va) |
3586 | return (ENOMEM12); |
3587 | |
3588 | /* Compute VMCB PA */ |
3589 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_control_va, |
3590 | (paddr_t *)&vcpu->vc_control_pa)) { |
3591 | ret = ENOMEM12; |
3592 | goto exit; |
3593 | } |
3594 | |
3595 | DPRINTF("%s: VMCB va @ 0x%llx, pa @ 0x%llx\n", __func__, |
3596 | (uint64_t)vcpu->vc_control_va, |
3597 | (uint64_t)vcpu->vc_control_pa); |
3598 | |
3599 | |
3600 | /* Allocate MSR bitmap VA (2 pages) */ |
3601 | vcpu->vc_msr_bitmap_va = (vaddr_t)km_alloc(2 * PAGE_SIZE(1 << 12), &kv_any, |
3602 | &vmm_kp_contig, &kd_waitok); |
3603 | |
3604 | if (!vcpu->vc_msr_bitmap_va) { |
3605 | ret = ENOMEM12; |
3606 | goto exit; |
3607 | } |
3608 | |
3609 | /* Compute MSR bitmap PA */ |
3610 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_msr_bitmap_va, |
3611 | (paddr_t *)&vcpu->vc_msr_bitmap_pa)) { |
3612 | ret = ENOMEM12; |
3613 | goto exit; |
3614 | } |
3615 | |
3616 | DPRINTF("%s: MSR bitmap va @ 0x%llx, pa @ 0x%llx\n", __func__, |
3617 | (uint64_t)vcpu->vc_msr_bitmap_va, |
3618 | (uint64_t)vcpu->vc_msr_bitmap_pa); |
3619 | |
3620 | /* Allocate host state area VA */ |
3621 | vcpu->vc_svm_hsa_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, |
3622 | &kp_zero, &kd_waitok); |
3623 | |
3624 | if (!vcpu->vc_svm_hsa_va) { |
3625 | ret = ENOMEM12; |
3626 | goto exit; |
3627 | } |
3628 | |
3629 | /* Compute host state area PA */ |
3630 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_svm_hsa_va, |
3631 | &vcpu->vc_svm_hsa_pa)) { |
3632 | ret = ENOMEM12; |
3633 | goto exit; |
3634 | } |
3635 | |
3636 | DPRINTF("%s: HSA va @ 0x%llx, pa @ 0x%llx\n", __func__, |
3637 | (uint64_t)vcpu->vc_svm_hsa_va, |
3638 | (uint64_t)vcpu->vc_svm_hsa_pa); |
3639 | |
3640 | /* Allocate IOIO area VA (3 pages) */ |
3641 | vcpu->vc_svm_ioio_va = (vaddr_t)km_alloc(3 * PAGE_SIZE(1 << 12), &kv_any, |
3642 | &vmm_kp_contig, &kd_waitok); |
3643 | |
3644 | if (!vcpu->vc_svm_ioio_va) { |
3645 | ret = ENOMEM12; |
3646 | goto exit; |
3647 | } |
3648 | |
3649 | /* Compute IOIO area PA */ |
3650 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_svm_ioio_va, |
3651 | &vcpu->vc_svm_ioio_pa)) { |
3652 | ret = ENOMEM12; |
3653 | goto exit; |
3654 | } |
3655 | |
3656 | DPRINTF("%s: IOIO va @ 0x%llx, pa @ 0x%llx\n", __func__, |
3657 | (uint64_t)vcpu->vc_svm_ioio_va, |
3658 | (uint64_t)vcpu->vc_svm_ioio_pa); |
3659 | |
3660 | exit: |
3661 | if (ret) { |
3662 | if (vcpu->vc_control_va) |
3663 | km_free((void *)vcpu->vc_control_va, PAGE_SIZE(1 << 12), |
3664 | &kv_page, &kp_zero); |
3665 | if (vcpu->vc_msr_bitmap_va) |
3666 | km_free((void *)vcpu->vc_msr_bitmap_va, 2 * PAGE_SIZE(1 << 12), |
3667 | &kv_any, &vmm_kp_contig); |
3668 | if (vcpu->vc_svm_hsa_va) |
3669 | km_free((void *)vcpu->vc_svm_hsa_va, PAGE_SIZE(1 << 12), |
3670 | &kv_page, &kp_zero); |
3671 | if (vcpu->vc_svm_ioio_va) |
3672 | km_free((void *)vcpu->vc_svm_ioio_va, |
3673 | 3 * PAGE_SIZE(1 << 12), &kv_any, &vmm_kp_contig); |
3674 | } |
3675 | |
3676 | return (ret); |
3677 | } |
3678 | |
3679 | /* |
3680 | * vcpu_init |
3681 | * |
3682 | * Calls the architecture-specific VCPU init routine |
3683 | */ |
3684 | int |
3685 | vcpu_init(struct vcpu *vcpu) |
3686 | { |
3687 | int ret = 0; |
3688 | |
3689 | vcpu->vc_virt_mode = vmm_softc->mode; |
3690 | vcpu->vc_state = VCPU_STATE_STOPPED; |
3691 | vcpu->vc_vpid = 0; |
3692 | vcpu->vc_pvclock_system_gpa = 0; |
3693 | vcpu->vc_last_pcpu = NULL((void *)0); |
3694 | |
3695 | rw_init(&vcpu->vc_lock, "vcpu")_rw_init_flags(&vcpu->vc_lock, "vcpu", 0, ((void *)0)); |
3696 | |
3697 | /* Shadow PAT MSR, starting with host's value. */ |
3698 | vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT0x277); |
3699 | |
3700 | if (vmm_softc->mode == VMM_MODE_VMX || |
3701 | vmm_softc->mode == VMM_MODE_EPT) |
3702 | ret = vcpu_init_vmx(vcpu); |
3703 | else if (vmm_softc->mode == VMM_MODE_SVM || |
3704 | vmm_softc->mode == VMM_MODE_RVI) |
3705 | ret = vcpu_init_svm(vcpu); |
3706 | else |
3707 | panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); |
3708 | |
3709 | return (ret); |
3710 | } |
3711 | |
3712 | /* |
3713 | * vcpu_deinit_vmx |
3714 | * |
3715 | * Deinitializes the vcpu described by 'vcpu' |
3716 | * |
3717 | * Parameters: |
3718 | * vcpu: the vcpu to be deinited |
3719 | */ |
3720 | void |
3721 | vcpu_deinit_vmx(struct vcpu *vcpu) |
3722 | { |
3723 | if (vcpu->vc_control_va) |
3724 | km_free((void *)vcpu->vc_control_va, PAGE_SIZE(1 << 12), |
3725 | &kv_page, &kp_zero); |
3726 | if (vcpu->vc_vmx_msr_exit_save_va) |
3727 | km_free((void *)vcpu->vc_vmx_msr_exit_save_va, |
3728 | PAGE_SIZE(1 << 12), &kv_page, &kp_zero); |
3729 | if (vcpu->vc_vmx_msr_exit_load_va) |
3730 | km_free((void *)vcpu->vc_vmx_msr_exit_load_va, |
3731 | PAGE_SIZE(1 << 12), &kv_page, &kp_zero); |
3732 | if (vcpu->vc_vmx_msr_entry_load_va) |
3733 | km_free((void *)vcpu->vc_vmx_msr_entry_load_va, |
3734 | PAGE_SIZE(1 << 12), &kv_page, &kp_zero); |
3735 | |
3736 | if (vcpu->vc_vmx_vpid_enabled) |
3737 | vmm_free_vpid(vcpu->vc_vpid); |
3738 | } |
3739 | |
3740 | /* |
3741 | * vcpu_deinit_svm |
3742 | * |
3743 | * Deinitializes the vcpu described by 'vcpu' |
3744 | * |
3745 | * Parameters: |
3746 | * vcpu: the vcpu to be deinited |
3747 | */ |
3748 | void |
3749 | vcpu_deinit_svm(struct vcpu *vcpu) |
3750 | { |
3751 | if (vcpu->vc_control_va) |
3752 | km_free((void *)vcpu->vc_control_va, PAGE_SIZE(1 << 12), &kv_page, |
3753 | &kp_zero); |
3754 | if (vcpu->vc_msr_bitmap_va) |
3755 | km_free((void *)vcpu->vc_msr_bitmap_va, 2 * PAGE_SIZE(1 << 12), &kv_any, |
3756 | &vmm_kp_contig); |
3757 | if (vcpu->vc_svm_hsa_va) |
3758 | km_free((void *)vcpu->vc_svm_hsa_va, PAGE_SIZE(1 << 12), &kv_page, |
3759 | &kp_zero); |
3760 | if (vcpu->vc_svm_ioio_va) |
3761 | km_free((void *)vcpu->vc_svm_ioio_va, 3 * PAGE_SIZE(1 << 12), &kv_any, |
3762 | &vmm_kp_contig); |
3763 | |
3764 | vmm_free_vpid(vcpu->vc_vpid); |
3765 | } |
3766 | |
3767 | /* |
3768 | * vcpu_deinit |
3769 | * |
3770 | * Calls the architecture-specific VCPU deinit routine |
3771 | * |
3772 | * Parameters: |
3773 | * vcpu: the vcpu to be deinited |
3774 | */ |
3775 | void |
3776 | vcpu_deinit(struct vcpu *vcpu) |
3777 | { |
3778 | if (vmm_softc->mode == VMM_MODE_VMX || |
3779 | vmm_softc->mode == VMM_MODE_EPT) |
3780 | vcpu_deinit_vmx(vcpu); |
3781 | else if (vmm_softc->mode == VMM_MODE_SVM || |
3782 | vmm_softc->mode == VMM_MODE_RVI) |
3783 | vcpu_deinit_svm(vcpu); |
3784 | else |
3785 | panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); |
3786 | } |
3787 | |
3788 | /* |
3789 | * vm_teardown |
3790 | * |
3791 | * Tears down (destroys) the vm indicated by 'vm'. |
3792 | * |
3793 | * Parameters: |
3794 | * vm: vm to be torn down |
3795 | */ |
3796 | void |
3797 | vm_teardown(struct vm *vm) |
3798 | { |
3799 | struct vcpu *vcpu, *tmp; |
3800 | |
3801 | rw_assert_wrlock(&vmm_softc->vm_lock); |
3802 | KERNEL_LOCK()_kernel_lock(); |
3803 | |
3804 | /* Free VCPUs */ |
3805 | rw_enter_write(&vm->vm_vcpu_lock); |
3806 | SLIST_FOREACH_SAFE(vcpu, &vm->vm_vcpu_list, vc_vcpu_link, tmp)for ((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu ) && ((tmp) = ((vcpu)->vc_vcpu_link.sle_next), 1); (vcpu) = (tmp)) { |
3807 | SLIST_REMOVE(&vm->vm_vcpu_list, vcpu, vcpu, vc_vcpu_link)do { if ((&vm->vm_vcpu_list)->slh_first == (vcpu)) { do { ((&vm->vm_vcpu_list))->slh_first = ((&vm-> vm_vcpu_list))->slh_first->vc_vcpu_link.sle_next; } while (0); } else { struct vcpu *curelm = (&vm->vm_vcpu_list )->slh_first; while (curelm->vc_vcpu_link.sle_next != ( vcpu)) curelm = curelm->vc_vcpu_link.sle_next; curelm-> vc_vcpu_link.sle_next = curelm->vc_vcpu_link.sle_next-> vc_vcpu_link.sle_next; } ((vcpu)->vc_vcpu_link.sle_next) = ((void *)-1); } while (0); |
3808 | vcpu_deinit(vcpu); |
3809 | pool_put(&vcpu_pool, vcpu); |
3810 | vmm_softc->vcpu_ct--; |
3811 | } |
3812 | |
3813 | vm_impl_deinit(vm); |
3814 | |
3815 | /* teardown guest vmspace */ |
3816 | if (vm->vm_vmspace != NULL((void *)0)) { |
3817 | uvmspace_free(vm->vm_vmspace); |
3818 | vm->vm_vmspace = NULL((void *)0); |
3819 | } |
3820 | |
3821 | if (vm->vm_id > 0) { |
3822 | vmm_softc->vm_ct--; |
3823 | if (vmm_softc->vm_ct < 1) |
3824 | vmm_stop(); |
3825 | } |
3826 | pool_put(&vm_pool, vm); |
3827 | |
3828 | KERNEL_UNLOCK()_kernel_unlock(); |
3829 | rw_exit_write(&vm->vm_vcpu_lock); |
3830 | } |
3831 | |
3832 | /* |
3833 | * vcpu_vmx_check_cap |
3834 | * |
3835 | * Checks if the 'cap' bit in the 'msr' MSR can be set or cleared (set = 1 |
3836 | * or set = 0, respectively). |
3837 | * |
3838 | * When considering 'msr', we check to see if true controls are available, |
3839 | * and use those if so. |
3840 | * |
3841 | * Returns 1 of 'cap' can be set/cleared as requested, 0 otherwise. |
3842 | */ |
3843 | int |
3844 | vcpu_vmx_check_cap(struct vcpu *vcpu, uint32_t msr, uint32_t cap, int set) |
3845 | { |
3846 | uint64_t ctl; |
3847 | |
3848 | if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) { |
3849 | switch (msr) { |
3850 | case IA32_VMX_PINBASED_CTLS0x481: |
3851 | ctl = vcpu->vc_vmx_true_pinbased_ctls; |
3852 | break; |
3853 | case IA32_VMX_PROCBASED_CTLS0x482: |
3854 | ctl = vcpu->vc_vmx_true_procbased_ctls; |
3855 | break; |
3856 | case IA32_VMX_PROCBASED2_CTLS0x48B: |
3857 | ctl = vcpu->vc_vmx_procbased2_ctls; |
3858 | break; |
3859 | case IA32_VMX_ENTRY_CTLS0x484: |
3860 | ctl = vcpu->vc_vmx_true_entry_ctls; |
3861 | break; |
3862 | case IA32_VMX_EXIT_CTLS0x483: |
3863 | ctl = vcpu->vc_vmx_true_exit_ctls; |
3864 | break; |
3865 | default: |
3866 | return (0); |
3867 | } |
3868 | } else { |
3869 | switch (msr) { |
3870 | case IA32_VMX_PINBASED_CTLS0x481: |
3871 | ctl = vcpu->vc_vmx_pinbased_ctls; |
3872 | break; |
3873 | case IA32_VMX_PROCBASED_CTLS0x482: |
3874 | ctl = vcpu->vc_vmx_procbased_ctls; |
3875 | break; |
3876 | case IA32_VMX_PROCBASED2_CTLS0x48B: |
3877 | ctl = vcpu->vc_vmx_procbased2_ctls; |
3878 | break; |
3879 | case IA32_VMX_ENTRY_CTLS0x484: |
3880 | ctl = vcpu->vc_vmx_entry_ctls; |
3881 | break; |
3882 | case IA32_VMX_EXIT_CTLS0x483: |
3883 | ctl = vcpu->vc_vmx_exit_ctls; |
3884 | break; |
3885 | default: |
3886 | return (0); |
3887 | } |
3888 | } |
3889 | |
3890 | if (set) { |
3891 | /* Check bit 'cap << 32', must be !0 */ |
3892 | return (ctl & ((uint64_t)cap << 32)) != 0; |
3893 | } else { |
3894 | /* Check bit 'cap', must be 0 */ |
3895 | return (ctl & cap) == 0; |
3896 | } |
3897 | } |
3898 | |
3899 | /* |
3900 | * vcpu_vmx_compute_ctrl |
3901 | * |
3902 | * Computes the appropriate control value, given the supplied parameters |
3903 | * and CPU capabilities. |
3904 | * |
3905 | * Intel has made somewhat of a mess of this computation - it is described |
3906 | * using no fewer than three different approaches, spread across many |
3907 | * pages of the SDM. Further compounding the problem is the fact that now |
3908 | * we have "true controls" for each type of "control", and each needs to |
3909 | * be examined to get the calculation right, but only if "true" controls |
3910 | * are present on the CPU we're on. |
3911 | * |
3912 | * Parameters: |
3913 | * ctrlval: the control value, as read from the CPU MSR |
3914 | * ctrl: which control is being set (eg, pinbased, procbased, etc) |
3915 | * want0: the set of desired 0 bits |
3916 | * want1: the set of desired 1 bits |
3917 | * out: (out) the correct value to write into the VMCS for this VCPU, |
3918 | * for the 'ctrl' desired. |
3919 | * |
3920 | * Returns 0 if successful, or EINVAL if the supplied parameters define |
3921 | * an unworkable control setup. |
3922 | */ |
3923 | int |
3924 | vcpu_vmx_compute_ctrl(uint64_t ctrlval, uint16_t ctrl, uint32_t want1, |
3925 | uint32_t want0, uint32_t *out) |
3926 | { |
3927 | int i, set, clear; |
3928 | |
3929 | *out = 0; |
3930 | |
3931 | /* |
3932 | * The Intel SDM gives three formulae for determining which bits to |
3933 | * set/clear for a given control and desired functionality. Formula |
3934 | * 1 is the simplest but disallows use of newer features that are |
3935 | * enabled by functionality in later CPUs. |
3936 | * |
3937 | * Formulas 2 and 3 allow such extra functionality. We use formula |
3938 | * 2 - this requires us to know the identity of controls in the |
3939 | * "default1" class for each control register, but allows us to not |
3940 | * have to pass along and/or query both sets of capability MSRs for |
3941 | * each control lookup. This makes the code slightly longer, |
3942 | * however. |
3943 | */ |
3944 | for (i = 0; i < 32; i++) { |
3945 | /* Figure out if we can set and / or clear this bit */ |
3946 | set = (ctrlval & (1ULL << (i + 32))) != 0; |
3947 | clear = ((1ULL << i) & ((uint64_t)ctrlval)) == 0; |
3948 | |
3949 | /* If the bit can't be set nor cleared, something's wrong */ |
3950 | if (!set && !clear) |
3951 | return (EINVAL22); |
3952 | |
3953 | /* |
3954 | * Formula 2.c.i - "If the relevant VMX capability MSR |
3955 | * reports that a control has a single setting, use that |
3956 | * setting." |
3957 | */ |
3958 | if (set && !clear) { |
3959 | if (want0 & (1ULL << i)) |
3960 | return (EINVAL22); |
3961 | else |
3962 | *out |= (1ULL << i); |
3963 | } else if (clear && !set) { |
3964 | if (want1 & (1ULL << i)) |
3965 | return (EINVAL22); |
3966 | else |
3967 | *out &= ~(1ULL << i); |
3968 | } else { |
3969 | /* |
3970 | * 2.c.ii - "If the relevant VMX capability MSR |
3971 | * reports that a control can be set to 0 or 1 |
3972 | * and that control's meaning is known to the VMM, |
3973 | * set the control based on the functionality desired." |
3974 | */ |
3975 | if (want1 & (1ULL << i)) |
3976 | *out |= (1ULL << i); |
3977 | else if (want0 & (1 << i)) |
3978 | *out &= ~(1ULL << i); |
3979 | else { |
3980 | /* |
3981 | * ... assuming the control's meaning is not |
3982 | * known to the VMM ... |
3983 | * |
3984 | * 2.c.iii - "If the relevant VMX capability |
3985 | * MSR reports that a control can be set to 0 |
3986 | * or 1 and the control is not in the default1 |
3987 | * class, set the control to 0." |
3988 | * |
3989 | * 2.c.iv - "If the relevant VMX capability |
3990 | * MSR reports that a control can be set to 0 |
3991 | * or 1 and the control is in the default1 |
3992 | * class, set the control to 1." |
3993 | */ |
3994 | switch (ctrl) { |
3995 | case IA32_VMX_PINBASED_CTLS0x481: |
3996 | case IA32_VMX_TRUE_PINBASED_CTLS0x48D: |
3997 | /* |
3998 | * A.3.1 - default1 class of pinbased |
3999 | * controls comprises bits 1,2,4 |
4000 | */ |
4001 | switch (i) { |
4002 | case 1: |
4003 | case 2: |
4004 | case 4: |
4005 | *out |= (1ULL << i); |
4006 | break; |
4007 | default: |
4008 | *out &= ~(1ULL << i); |
4009 | break; |
4010 | } |
4011 | break; |
4012 | case IA32_VMX_PROCBASED_CTLS0x482: |
4013 | case IA32_VMX_TRUE_PROCBASED_CTLS0x48E: |
4014 | /* |
4015 | * A.3.2 - default1 class of procbased |
4016 | * controls comprises bits 1, 4-6, 8, |
4017 | * 13-16, 26 |
4018 | */ |
4019 | switch (i) { |
4020 | case 1: |
4021 | case 4 ... 6: |
4022 | case 8: |
4023 | case 13 ... 16: |
4024 | case 26: |
4025 | *out |= (1ULL << i); |
4026 | break; |
4027 | default: |
4028 | *out &= ~(1ULL << i); |
4029 | break; |
4030 | } |
4031 | break; |
4032 | /* |
4033 | * Unknown secondary procbased controls |
4034 | * can always be set to 0 |
4035 | */ |
4036 | case IA32_VMX_PROCBASED2_CTLS0x48B: |
4037 | *out &= ~(1ULL << i); |
4038 | break; |
4039 | case IA32_VMX_EXIT_CTLS0x483: |
4040 | case IA32_VMX_TRUE_EXIT_CTLS0x48F: |
4041 | /* |
4042 | * A.4 - default1 class of exit |
4043 | * controls comprises bits 0-8, 10, |
4044 | * 11, 13, 14, 16, 17 |
4045 | */ |
4046 | switch (i) { |
4047 | case 0 ... 8: |
4048 | case 10 ... 11: |
4049 | case 13 ... 14: |
4050 | case 16 ... 17: |
4051 | *out |= (1ULL << i); |
4052 | break; |
4053 | default: |
4054 | *out &= ~(1ULL << i); |
4055 | break; |
4056 | } |
4057 | break; |
4058 | case IA32_VMX_ENTRY_CTLS0x484: |
4059 | case IA32_VMX_TRUE_ENTRY_CTLS0x490: |
4060 | /* |
4061 | * A.5 - default1 class of entry |
4062 | * controls comprises bits 0-8, 12 |
4063 | */ |
4064 | switch (i) { |
4065 | case 0 ... 8: |
4066 | case 12: |
4067 | *out |= (1ULL << i); |
4068 | break; |
4069 | default: |
4070 | *out &= ~(1ULL << i); |
4071 | break; |
4072 | } |
4073 | break; |
4074 | } |
4075 | } |
4076 | } |
4077 | } |
4078 | |
4079 | return (0); |
4080 | } |
4081 | |
4082 | /* |
4083 | * vm_get_info |
4084 | * |
4085 | * Returns information about the VM indicated by 'vip'. The 'vip_size' field |
4086 | * in the 'vip' parameter is used to indicate the size of the caller's buffer. |
4087 | * If insufficient space exists in that buffer, the required size needed is |
4088 | * returned in vip_size and the number of VM information structures returned |
4089 | * in vip_info_count is set to 0. The caller should then try the ioctl again |
4090 | * after allocating a sufficiently large buffer. |
4091 | * |
4092 | * Parameters: |
4093 | * vip: information structure identifying the VM to query |
4094 | * |
4095 | * Return values: |
4096 | * 0: the operation succeeded |
4097 | * ENOMEM: memory allocation error during processing |
4098 | * EFAULT: error copying data to user process |
4099 | */ |
4100 | int |
4101 | vm_get_info(struct vm_info_params *vip) |
4102 | { |
4103 | struct vm_info_result *out; |
4104 | struct vm *vm; |
4105 | struct vcpu *vcpu; |
4106 | int i, j; |
4107 | size_t need; |
4108 | |
4109 | rw_enter_read(&vmm_softc->vm_lock); |
4110 | need = vmm_softc->vm_ct * sizeof(struct vm_info_result); |
4111 | if (vip->vip_size < need) { |
4112 | vip->vip_info_ct = 0; |
4113 | vip->vip_size = need; |
4114 | rw_exit_read(&vmm_softc->vm_lock); |
4115 | return (0); |
4116 | } |
4117 | |
4118 | out = malloc(need, M_DEVBUF2, M_NOWAIT0x0002|M_ZERO0x0008); |
4119 | if (out == NULL((void *)0)) { |
4120 | vip->vip_info_ct = 0; |
4121 | rw_exit_read(&vmm_softc->vm_lock); |
4122 | return (ENOMEM12); |
4123 | } |
4124 | |
4125 | i = 0; |
4126 | vip->vip_info_ct = vmm_softc->vm_ct; |
4127 | SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link)for((vm) = ((&vmm_softc->vm_list)->slh_first); (vm) != ((void *)0); (vm) = ((vm)->vm_link.sle_next)) { |
4128 | out[i].vir_memory_size = vm->vm_memory_size; |
4129 | out[i].vir_used_size = |
4130 | pmap_resident_count(vm->vm_map->pmap)((vm->vm_map->pmap)->pm_stats.resident_count) * PAGE_SIZE(1 << 12); |
4131 | out[i].vir_ncpus = vm->vm_vcpu_ct; |
4132 | out[i].vir_id = vm->vm_id; |
4133 | out[i].vir_creator_pid = vm->vm_creator_pid; |
4134 | strlcpy(out[i].vir_name, vm->vm_name, VMM_MAX_NAME_LEN64); |
4135 | rw_enter_read(&vm->vm_vcpu_lock); |
4136 | for (j = 0; j < vm->vm_vcpu_ct; j++) { |
4137 | out[i].vir_vcpu_state[j] = VCPU_STATE_UNKNOWN; |
4138 | SLIST_FOREACH(vcpu, &vm->vm_vcpu_list,for((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu ) != ((void *)0); (vcpu) = ((vcpu)->vc_vcpu_link.sle_next) ) |
4139 | vc_vcpu_link)for((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu ) != ((void *)0); (vcpu) = ((vcpu)->vc_vcpu_link.sle_next) ) { |
4140 | if (vcpu->vc_id == j) |
4141 | out[i].vir_vcpu_state[j] = |
4142 | vcpu->vc_state; |
4143 | } |
4144 | } |
4145 | rw_exit_read(&vm->vm_vcpu_lock); |
4146 | i++; |
4147 | } |
4148 | rw_exit_read(&vmm_softc->vm_lock); |
4149 | if (copyout(out, vip->vip_info, need) == EFAULT14) { |
4150 | free(out, M_DEVBUF2, need); |
4151 | return (EFAULT14); |
4152 | } |
4153 | |
4154 | free(out, M_DEVBUF2, need); |
4155 | return (0); |
4156 | } |
4157 | |
4158 | /* |
4159 | * vm_terminate |
4160 | * |
4161 | * Terminates the VM indicated by 'vtp'. |
4162 | * |
4163 | * Parameters: |
4164 | * vtp: structure defining the VM to terminate |
4165 | * |
4166 | * Return values: |
4167 | * 0: the VM was terminated |
4168 | * !0: the VM could not be located |
4169 | */ |
4170 | int |
4171 | vm_terminate(struct vm_terminate_params *vtp) |
4172 | { |
4173 | struct vm *vm; |
4174 | struct vcpu *vcpu; |
4175 | u_int old, next; |
4176 | int error; |
4177 | |
4178 | /* |
4179 | * Find desired VM |
4180 | */ |
4181 | rw_enter_write(&vmm_softc->vm_lock); |
4182 | error = vm_find(vtp->vtp_vm_id, &vm); |
4183 | |
4184 | if (error == 0) { |
4185 | rw_enter_read(&vm->vm_vcpu_lock); |
4186 | SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link)for((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu ) != ((void *)0); (vcpu) = ((vcpu)->vc_vcpu_link.sle_next) ) { |
4187 | do { |
4188 | old = vcpu->vc_state; |
4189 | if (old == VCPU_STATE_RUNNING) |
4190 | next = VCPU_STATE_REQTERM; |
4191 | else if (old == VCPU_STATE_STOPPED) |
4192 | next = VCPU_STATE_TERMINATED; |
4193 | else /* must be REQTERM or TERMINATED */ |
4194 | break; |
4195 | } while (old != atomic_cas_uint(&vcpu->vc_state,_atomic_cas_uint((&vcpu->vc_state), (old), (next)) |
4196 | old, next)_atomic_cas_uint((&vcpu->vc_state), (old), (next))); |
4197 | } |
4198 | rw_exit_read(&vm->vm_vcpu_lock); |
4199 | } else { |
4200 | rw_exit_write(&vmm_softc->vm_lock); |
4201 | return (error); |
4202 | } |
4203 | |
4204 | SLIST_REMOVE(&vmm_softc->vm_list, vm, vm, vm_link)do { if ((&vmm_softc->vm_list)->slh_first == (vm)) { do { ((&vmm_softc->vm_list))->slh_first = ((&vmm_softc ->vm_list))->slh_first->vm_link.sle_next; } while (0 ); } else { struct vm *curelm = (&vmm_softc->vm_list)-> slh_first; while (curelm->vm_link.sle_next != (vm)) curelm = curelm->vm_link.sle_next; curelm->vm_link.sle_next = curelm->vm_link.sle_next->vm_link.sle_next; } ((vm)-> vm_link.sle_next) = ((void *)-1); } while (0); |
4205 | if (vm->vm_vcpus_running == 0) |
4206 | vm_teardown(vm); |
4207 | |
4208 | rw_exit_write(&vmm_softc->vm_lock); |
4209 | |
4210 | return (0); |
4211 | } |
4212 | |
4213 | /* |
4214 | * vm_run |
4215 | * |
4216 | * Run the vm / vcpu specified by 'vrp' |
4217 | * |
4218 | * Parameters: |
4219 | * vrp: structure defining the VM to run |
4220 | * |
4221 | * Return value: |
4222 | * ENOENT: the VM defined in 'vrp' could not be located |
4223 | * EBUSY: the VM defined in 'vrp' is already running |
4224 | * EFAULT: error copying data from userspace (vmd) on return from previous |
4225 | * exit. |
4226 | * EAGAIN: help is needed from vmd(8) (device I/O or exit vmm(4) cannot |
4227 | * handle in-kernel.) |
4228 | * 0: the run loop exited and no help is needed from vmd(8) |
4229 | */ |
4230 | int |
4231 | vm_run(struct vm_run_params *vrp) |
4232 | { |
4233 | struct vm *vm; |
4234 | struct vcpu *vcpu; |
4235 | int ret = 0, error; |
4236 | u_int old, next; |
4237 | |
4238 | /* |
4239 | * Find desired VM |
4240 | */ |
4241 | rw_enter_read(&vmm_softc->vm_lock); |
4242 | error = vm_find(vrp->vrp_vm_id, &vm); |
4243 | |
4244 | /* |
4245 | * Attempt to locate the requested VCPU. If found, attempt to |
4246 | * to transition from VCPU_STATE_STOPPED -> VCPU_STATE_RUNNING. |
4247 | * Failure to make the transition indicates the VCPU is busy. |
4248 | */ |
4249 | if (error == 0) { |
4250 | rw_enter_read(&vm->vm_vcpu_lock); |
4251 | SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link)for((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu ) != ((void *)0); (vcpu) = ((vcpu)->vc_vcpu_link.sle_next) ) { |
4252 | if (vcpu->vc_id == vrp->vrp_vcpu_id) |
4253 | break; |
4254 | } |
4255 | |
4256 | if (vcpu != NULL((void *)0)) { |
4257 | old = VCPU_STATE_STOPPED; |
4258 | next = VCPU_STATE_RUNNING; |
4259 | |
4260 | if (atomic_cas_uint(&vcpu->vc_state, old, next)_atomic_cas_uint((&vcpu->vc_state), (old), (next)) != old) |
4261 | ret = EBUSY16; |
4262 | else { |
4263 | atomic_inc_int(&vm->vm_vcpus_running)_atomic_inc_int(&vm->vm_vcpus_running); |
4264 | rw_enter_write(&vcpu->vc_lock); |
4265 | } |
4266 | } else |
4267 | ret = ENOENT2; |
4268 | |
4269 | rw_exit_read(&vm->vm_vcpu_lock); |
4270 | } |
4271 | rw_exit_read(&vmm_softc->vm_lock); |
4272 | |
4273 | if (error != 0) |
4274 | ret = error; |
4275 | |
4276 | /* Bail if errors detected in the previous steps */ |
4277 | if (ret) |
4278 | return (ret); |
4279 | |
4280 | /* |
4281 | * We may be returning from userland helping us from the last exit. |
4282 | * If so (vrp_continue == 1), copy in the exit data from vmd. The |
4283 | * exit data will be consumed before the next entry (this typically |
4284 | * comprises VCPU register changes as the result of vmd(8)'s actions). |
4285 | */ |
4286 | if (vrp->vrp_continue) { |
4287 | if (copyin(vrp->vrp_exit, &vcpu->vc_exit, |
4288 | sizeof(struct vm_exit)) == EFAULT14) { |
4289 | rw_exit_write(&vcpu->vc_lock); |
4290 | return (EFAULT14); |
4291 | } |
4292 | } |
4293 | |
4294 | /* Run the VCPU specified in vrp */ |
4295 | if (vcpu->vc_virt_mode == VMM_MODE_VMX || |
4296 | vcpu->vc_virt_mode == VMM_MODE_EPT) { |
4297 | ret = vcpu_run_vmx(vcpu, vrp); |
4298 | } else if (vcpu->vc_virt_mode == VMM_MODE_SVM || |
4299 | vcpu->vc_virt_mode == VMM_MODE_RVI) { |
4300 | ret = vcpu_run_svm(vcpu, vrp); |
4301 | } |
4302 | |
4303 | /* |
4304 | * We can set the VCPU states here without CAS because once |
4305 | * a VCPU is in state RUNNING or REQTERM, only the VCPU itself |
4306 | * can switch the state. |
4307 | */ |
4308 | atomic_dec_int(&vm->vm_vcpus_running)_atomic_dec_int(&vm->vm_vcpus_running); |
4309 | if (vcpu->vc_state == VCPU_STATE_REQTERM) { |
4310 | vrp->vrp_exit_reason = VM_EXIT_TERMINATED0xFFFE; |
4311 | vcpu->vc_state = VCPU_STATE_TERMINATED; |
4312 | if (vm->vm_vcpus_running == 0) { |
4313 | rw_enter_write(&vmm_softc->vm_lock); |
4314 | vm_teardown(vm); |
4315 | rw_exit_write(&vmm_softc->vm_lock); |
4316 | } |
4317 | ret = 0; |
4318 | } else if (ret == 0 || ret == EAGAIN35) { |
4319 | /* If we are exiting, populate exit data so vmd can help. */ |
4320 | vrp->vrp_exit_reason = (ret == 0) ? VM_EXIT_NONE0xFFFF |
4321 | : vcpu->vc_gueststate.vg_exit_reason; |
4322 | vrp->vrp_irqready = vcpu->vc_irqready; |
4323 | vcpu->vc_state = VCPU_STATE_STOPPED; |
4324 | |
4325 | if (copyout(&vcpu->vc_exit, vrp->vrp_exit, |
4326 | sizeof(struct vm_exit)) == EFAULT14) { |
4327 | ret = EFAULT14; |
4328 | } else |
4329 | ret = 0; |
4330 | } else { |
4331 | vrp->vrp_exit_reason = VM_EXIT_TERMINATED0xFFFE; |
4332 | vcpu->vc_state = VCPU_STATE_TERMINATED; |
4333 | } |
4334 | |
4335 | rw_exit_write(&vcpu->vc_lock); |
4336 | |
4337 | return (ret); |
4338 | } |
4339 | |
4340 | /* |
4341 | * vcpu_must_stop |
4342 | * |
4343 | * Check if we need to (temporarily) stop running the VCPU for some reason, |
4344 | * such as: |
4345 | * - the VM was requested to terminate |
4346 | * - the proc running this VCPU has pending signals |
4347 | * |
4348 | * Parameters: |
4349 | * vcpu: the VCPU to check |
4350 | * |
4351 | * Return values: |
4352 | * 1: the VM owning this VCPU should stop |
4353 | * 0: no stop is needed |
4354 | */ |
4355 | int |
4356 | vcpu_must_stop(struct vcpu *vcpu) |
4357 | { |
4358 | struct proc *p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; |
4359 | |
4360 | if (vcpu->vc_state == VCPU_STATE_REQTERM) |
4361 | return (1); |
4362 | if (SIGPENDING(p)(((p)->p_siglist | (p)->p_p->ps_siglist) & ~(p)-> p_sigmask) != 0) |
4363 | return (1); |
4364 | return (0); |
4365 | } |
4366 | |
4367 | /* |
4368 | * vmm_fpurestore |
4369 | * |
4370 | * Restore the guest's FPU state, saving the existing userland thread's |
4371 | * FPU context if necessary. Must be called with interrupts disabled. |
4372 | */ |
4373 | int |
4374 | vmm_fpurestore(struct vcpu *vcpu) |
4375 | { |
4376 | struct cpu_info *ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
4377 | |
4378 | /* save vmm's FPU state if we haven't already */ |
4379 | if (ci->ci_flags & CPUF_USERXSTATE0x0200) { |
4380 | ci->ci_flags &= ~CPUF_USERXSTATE0x0200; |
4381 | fpusavereset(&curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_addr->u_pcb.pcb_savefpu); |
4382 | } |
4383 | |
4384 | if (vcpu->vc_fpuinited) { |
4385 | if (xrstor_user(&vcpu->vc_g_fpu, xsave_mask)) { |
4386 | DPRINTF("%s: guest attempted to set invalid %s\n", |
4387 | __func__, "xsave/xrstor state"); |
4388 | return EINVAL22; |
4389 | } |
4390 | } |
4391 | |
4392 | if (xsave_mask) { |
4393 | /* Restore guest %xcr0 */ |
4394 | if (xsetbv_user(0, vcpu->vc_gueststate.vg_xcr0)) { |
4395 | DPRINTF("%s: guest attempted to set invalid bits in " |
4396 | "xcr0 (guest %%xcr0=0x%llx, host %%xcr0=0x%llx)\n", |
4397 | __func__, vcpu->vc_gueststate.vg_xcr0, xsave_mask); |
4398 | return EINVAL22; |
4399 | } |
4400 | } |
4401 | |
4402 | return 0; |
4403 | } |
4404 | |
4405 | /* |
4406 | * vmm_fpusave |
4407 | * |
4408 | * Save the guest's FPU state. Must be called with interrupts disabled. |
4409 | */ |
4410 | void |
4411 | vmm_fpusave(struct vcpu *vcpu) |
4412 | { |
4413 | if (xsave_mask) { |
4414 | /* Save guest %xcr0 */ |
4415 | vcpu->vc_gueststate.vg_xcr0 = xgetbv(0); |
4416 | |
4417 | /* Restore host %xcr0 */ |
4418 | xsetbv(0, xsave_mask); |
4419 | } |
4420 | |
4421 | /* |
4422 | * Save full copy of FPU state - guest content is always |
4423 | * a subset of host's save area (see xsetbv exit handler) |
4424 | */ |
4425 | fpusavereset(&vcpu->vc_g_fpu); |
4426 | vcpu->vc_fpuinited = 1; |
4427 | } |
4428 | |
4429 | /* |
4430 | * vmm_translate_gva |
4431 | * |
4432 | * Translates a guest virtual address to a guest physical address by walking |
4433 | * the currently active page table (if needed). |
4434 | * |
4435 | * Note - this function can possibly alter the supplied VCPU state. |
4436 | * Specifically, it may inject exceptions depending on the current VCPU |
4437 | * configuration, and may alter %cr2 on #PF. Consequently, this function |
4438 | * should only be used as part of instruction emulation. |
4439 | * |
4440 | * Parameters: |
4441 | * vcpu: The VCPU this translation should be performed for (guest MMU settings |
4442 | * are gathered from this VCPU) |
4443 | * va: virtual address to translate |
4444 | * pa: pointer to paddr_t variable that will receive the translated physical |
4445 | * address. 'pa' is unchanged on error. |
4446 | * mode: one of PROT_READ, PROT_WRITE, PROT_EXEC indicating the mode in which |
4447 | * the address should be translated |
4448 | * |
4449 | * Return values: |
4450 | * 0: the address was successfully translated - 'pa' contains the physical |
4451 | * address currently mapped by 'va'. |
4452 | * EFAULT: the PTE for 'VA' is unmapped. A #PF will be injected in this case |
4453 | * and %cr2 set in the vcpu structure. |
4454 | * EINVAL: an error occurred reading paging table structures |
4455 | */ |
4456 | int |
4457 | vmm_translate_gva(struct vcpu *vcpu, uint64_t va, uint64_t *pa, int mode) |
4458 | { |
4459 | int level, shift, pdidx; |
4460 | uint64_t pte, pt_paddr, pte_paddr, mask, low_mask, high_mask; |
4461 | uint64_t shift_width, pte_size, *hva; |
4462 | paddr_t hpa; |
4463 | struct vcpu_reg_state vrs; |
4464 | |
4465 | level = 0; |
4466 | |
4467 | if (vmm_softc->mode == VMM_MODE_EPT || |
4468 | vmm_softc->mode == VMM_MODE_VMX) { |
4469 | if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), &vrs)) |
4470 | return (EINVAL22); |
4471 | } else if (vmm_softc->mode == VMM_MODE_RVI || |
4472 | vmm_softc->mode == VMM_MODE_SVM) { |
4473 | if (vcpu_readregs_svm(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), &vrs)) |
4474 | return (EINVAL22); |
4475 | } else { |
4476 | printf("%s: unknown vmm mode", __func__); |
4477 | return (EINVAL22); |
4478 | } |
4479 | |
4480 | DPRINTF("%s: guest %%cr0=0x%llx, %%cr3=0x%llx\n", __func__, |
4481 | vrs.vrs_crs[VCPU_REGS_CR0], vrs.vrs_crs[VCPU_REGS_CR3]); |
4482 | |
4483 | if (!(vrs.vrs_crs[VCPU_REGS_CR00] & CR0_PG0x80000000)) { |
4484 | DPRINTF("%s: unpaged, va=pa=0x%llx\n", __func__, |
4485 | va); |
4486 | *pa = va; |
4487 | return (0); |
4488 | } |
4489 | |
4490 | pt_paddr = vrs.vrs_crs[VCPU_REGS_CR32]; |
4491 | |
4492 | if (vrs.vrs_crs[VCPU_REGS_CR00] & CR0_PE0x00000001) { |
4493 | if (vrs.vrs_crs[VCPU_REGS_CR43] & CR4_PAE0x00000020) { |
4494 | pte_size = sizeof(uint64_t); |
4495 | shift_width = 9; |
4496 | |
4497 | if (vrs.vrs_msrs[VCPU_REGS_EFER0] & EFER_LMA0x00000400) { |
4498 | level = 4; |
4499 | mask = L4_MASK0x0000ff8000000000UL; |
4500 | shift = L4_SHIFT39; |
4501 | } else { |
4502 | level = 3; |
4503 | mask = L3_MASK0x0000007fc0000000UL; |
4504 | shift = L3_SHIFT30; |
4505 | } |
4506 | } else { |
4507 | level = 2; |
4508 | shift_width = 10; |
4509 | mask = 0xFFC00000; |
4510 | shift = 22; |
4511 | pte_size = sizeof(uint32_t); |
4512 | } |
4513 | } else { |
4514 | return (EINVAL22); |
4515 | } |
4516 | |
4517 | DPRINTF("%s: pte size=%lld level=%d mask=0x%llx, shift=%d, " |
4518 | "shift_width=%lld\n", __func__, pte_size, level, mask, shift, |
4519 | shift_width); |
4520 | |
4521 | /* XXX: Check for R bit in segment selector and set A bit */ |
4522 | |
4523 | for (;level > 0; level--) { |
4524 | pdidx = (va & mask) >> shift; |
4525 | pte_paddr = (pt_paddr) + (pdidx * pte_size); |
4526 | |
4527 | DPRINTF("%s: read pte level %d @ GPA 0x%llx\n", __func__, |
4528 | level, pte_paddr); |
4529 | if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, pte_paddr, |
4530 | &hpa)) { |
4531 | DPRINTF("%s: cannot extract HPA for GPA 0x%llx\n", |
4532 | __func__, pte_paddr); |
4533 | return (EINVAL22); |
4534 | } |
4535 | |
4536 | hpa = hpa | (pte_paddr & 0xFFF); |
4537 | hva = (uint64_t *)PMAP_DIRECT_MAP(hpa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000 )) + (hpa)); |
4538 | DPRINTF("%s: GPA 0x%llx -> HPA 0x%llx -> HVA 0x%llx\n", |
4539 | __func__, pte_paddr, (uint64_t)hpa, (uint64_t)hva); |
4540 | if (pte_size == 8) |
4541 | pte = *hva; |
4542 | else |
4543 | pte = *(uint32_t *)hva; |
4544 | |
4545 | DPRINTF("%s: PTE @ 0x%llx = 0x%llx\n", __func__, pte_paddr, |
4546 | pte); |
4547 | |
4548 | /* XXX: Set CR2 */ |
4549 | if (!(pte & PG_V0x0000000000000001UL)) |
4550 | return (EFAULT14); |
4551 | |
4552 | /* XXX: Check for SMAP */ |
4553 | if ((mode == PROT_WRITE0x02) && !(pte & PG_RW0x0000000000000002UL)) |
4554 | return (EPERM1); |
4555 | |
4556 | if ((vcpu->vc_exit.cpl > 0) && !(pte & PG_u0x0000000000000004UL)) |
4557 | return (EPERM1); |
4558 | |
4559 | pte = pte | PG_U0x0000000000000020UL; |
4560 | if (mode == PROT_WRITE0x02) |
4561 | pte = pte | PG_M0x0000000000000040UL; |
4562 | *hva = pte; |
4563 | |
4564 | /* XXX: EINVAL if in 32bit and PG_PS is 1 but CR4.PSE is 0 */ |
4565 | if (pte & PG_PS0x0000000000000080UL) |
4566 | break; |
4567 | |
4568 | if (level > 1) { |
4569 | pt_paddr = pte & PG_FRAME0x000ffffffffff000UL; |
4570 | shift -= shift_width; |
4571 | mask = mask >> shift_width; |
4572 | } |
4573 | } |
4574 | |
4575 | low_mask = ((uint64_t)1ULL << shift) - 1; |
4576 | high_mask = (((uint64_t)1ULL << ((pte_size * 8) - 1)) - 1) ^ low_mask; |
4577 | *pa = (pte & high_mask) | (va & low_mask); |
4578 | |
4579 | DPRINTF("%s: final GPA for GVA 0x%llx = 0x%llx\n", __func__, |
4580 | va, *pa); |
4581 | |
4582 | return (0); |
4583 | } |
4584 | |
4585 | |
4586 | /* |
4587 | * vcpu_run_vmx |
4588 | * |
4589 | * VMX main loop used to run a VCPU. |
4590 | * |
4591 | * Parameters: |
4592 | * vcpu: The VCPU to run |
4593 | * vrp: run parameters |
4594 | * |
4595 | * Return values: |
4596 | * 0: The run loop exited and no help is needed from vmd |
4597 | * EAGAIN: The run loop exited and help from vmd is needed |
4598 | * EINVAL: an error occurred |
4599 | */ |
4600 | int |
4601 | vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp) |
4602 | { |
4603 | int ret = 0, exitinfo; |
4604 | struct region_descriptor gdt; |
4605 | struct cpu_info *ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
4606 | uint64_t exit_reason, cr3, insn_error; |
4607 | struct schedstate_percpu *spc; |
4608 | struct vmx_invvpid_descriptor vid; |
4609 | uint64_t eii, procbased, int_st; |
4610 | uint16_t irq, ldt_sel; |
4611 | u_long s; |
4612 | struct region_descriptor gdtr, idtr; |
4613 | |
4614 | rw_assert_wrlock(&vcpu->vc_lock); |
4615 | |
4616 | if (vcpu_reload_vmcs_vmx(vcpu)) { |
4617 | printf("%s: failed (re)loading vmcs\n", __func__); |
4618 | return (EINVAL22); |
4619 | } |
4620 | |
4621 | /* |
4622 | * If we are returning from userspace (vmd) because we exited |
4623 | * last time, fix up any needed vcpu state first. Which state |
4624 | * needs to be fixed up depends on what vmd populated in the |
4625 | * exit data structure. |
4626 | */ |
4627 | irq = vrp->vrp_irq; |
4628 | |
4629 | if (vrp->vrp_continue) { |
4630 | switch (vcpu->vc_gueststate.vg_exit_reason) { |
4631 | case VMX_EXIT_IO30: |
4632 | if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN) |
4633 | vcpu->vc_gueststate.vg_rax = |
4634 | vcpu->vc_exit.vei.vei_data; |
4635 | break; |
4636 | case VM_EXIT_NONE0xFFFF: |
4637 | case VMX_EXIT_HLT12: |
4638 | case VMX_EXIT_INT_WINDOW7: |
4639 | case VMX_EXIT_EXTINT1: |
4640 | case VMX_EXIT_EPT_VIOLATION48: |
4641 | case VMX_EXIT_CPUID10: |
4642 | case VMX_EXIT_XSETBV55: |
4643 | break; |
4644 | #ifdef VMM_DEBUG |
4645 | case VMX_EXIT_TRIPLE_FAULT2: |
4646 | DPRINTF("%s: vm %d vcpu %d triple fault\n", |
4647 | __func__, vcpu->vc_parent->vm_id, |
4648 | vcpu->vc_id); |
4649 | vmx_vcpu_dump_regs(vcpu); |
4650 | dump_vcpu(vcpu); |
4651 | vmx_dump_vmcs(vcpu); |
4652 | break; |
4653 | case VMX_EXIT_ENTRY_FAILED_GUEST_STATE33: |
4654 | DPRINTF("%s: vm %d vcpu %d failed entry " |
4655 | "due to invalid guest state\n", |
4656 | __func__, vcpu->vc_parent->vm_id, |
4657 | vcpu->vc_id); |
4658 | vmx_vcpu_dump_regs(vcpu); |
4659 | dump_vcpu(vcpu); |
4660 | return (EINVAL22); |
4661 | default: |
4662 | DPRINTF("%s: unimplemented exit type %d (%s)\n", |
4663 | __func__, |
4664 | vcpu->vc_gueststate.vg_exit_reason, |
4665 | vmx_exit_reason_decode( |
4666 | vcpu->vc_gueststate.vg_exit_reason)); |
4667 | vmx_vcpu_dump_regs(vcpu); |
4668 | dump_vcpu(vcpu); |
4669 | break; |
4670 | #endif /* VMM_DEBUG */ |
4671 | } |
4672 | } |
4673 | |
4674 | setregion(&gdt, ci->ci_gdt, GDT_SIZE((6 << 3) + (1 << 4)) - 1); |
4675 | if (gdt.rd_base == 0) { |
4676 | printf("%s: setregion\n", __func__); |
4677 | return (EINVAL22); |
4678 | } |
4679 | |
4680 | /* Host GDTR base */ |
4681 | if (vmwrite(VMCS_HOST_IA32_GDTR_BASE0x6C0C, gdt.rd_base)) { |
4682 | printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__, |
4683 | VMCS_HOST_IA32_GDTR_BASE0x6C0C, gdt.rd_base); |
4684 | return (EINVAL22); |
4685 | } |
4686 | |
4687 | /* Host TR base */ |
4688 | if (vmwrite(VMCS_HOST_IA32_TR_BASE0x6C0A, (uint64_t)ci->ci_tss)) { |
4689 | printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__, |
4690 | VMCS_HOST_IA32_TR_BASE0x6C0A, (uint64_t)ci->ci_tss); |
4691 | return (EINVAL22); |
4692 | } |
4693 | |
4694 | /* Host CR3 */ |
4695 | cr3 = rcr3(); |
4696 | if (vmwrite(VMCS_HOST_IA32_CR30x6C02, cr3)) { |
4697 | printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__, |
4698 | VMCS_HOST_IA32_CR30x6C02, cr3); |
4699 | return (EINVAL22); |
4700 | } |
4701 | |
4702 | /* Handle vmd(8) injected interrupts */ |
4703 | /* Is there an interrupt pending injection? */ |
4704 | if (irq != 0xFFFF) { |
4705 | if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST0x4824, &int_st)) { |
4706 | printf("%s: can't get interruptibility state\n", |
4707 | __func__); |
4708 | return (EINVAL22); |
4709 | } |
4710 | |
4711 | /* Interruptibility state 0x3 covers NMIs and STI */ |
4712 | if (!(int_st & 0x3) && vcpu->vc_irqready) { |
4713 | eii = (irq & 0xFF); |
4714 | eii |= (1ULL << 31); /* Valid */ |
4715 | eii |= (0ULL << 8); /* Hardware Interrupt */ |
4716 | if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO0x4016, eii)) { |
4717 | printf("vcpu_run_vmx: can't vector " |
4718 | "interrupt to guest\n"); |
4719 | return (EINVAL22); |
4720 | } |
4721 | |
4722 | irq = 0xFFFF; |
4723 | } |
4724 | } else if (!vcpu->vc_intr) { |
4725 | /* |
4726 | * Disable window exiting |
4727 | */ |
4728 | if (vmread(VMCS_PROCBASED_CTLS0x4002, &procbased)) { |
4729 | printf("%s: can't read procbased ctls on exit\n", |
4730 | __func__); |
4731 | return (EINVAL22); |
4732 | } else { |
4733 | procbased &= ~IA32_VMX_INTERRUPT_WINDOW_EXITING(1ULL << 2); |
4734 | if (vmwrite(VMCS_PROCBASED_CTLS0x4002, procbased)) { |
4735 | printf("%s: can't write procbased ctls " |
4736 | "on exit\n", __func__); |
4737 | return (EINVAL22); |
4738 | } |
4739 | } |
4740 | } |
4741 | |
4742 | while (ret == 0) { |
4743 | #ifdef VMM_DEBUG |
4744 | paddr_t pa = 0ULL; |
4745 | vmptrst(&pa); |
4746 | KASSERT(pa == vcpu->vc_control_pa)((pa == vcpu->vc_control_pa) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/arch/amd64/amd64/vmm.c", 4746, "pa == vcpu->vc_control_pa" )); |
4747 | #endif /* VMM_DEBUG */ |
4748 | |
4749 | vmm_update_pvclock(vcpu); |
4750 | |
4751 | /* Inject event if present */ |
4752 | if (vcpu->vc_event != 0) { |
4753 | eii = (vcpu->vc_event & 0xFF); |
4754 | eii |= (1ULL << 31); /* Valid */ |
4755 | |
4756 | /* Set the "Send error code" flag for certain vectors */ |
4757 | switch (vcpu->vc_event & 0xFF) { |
4758 | case VMM_EX_DF8: |
4759 | case VMM_EX_TS10: |
4760 | case VMM_EX_NP11: |
4761 | case VMM_EX_SS12: |
4762 | case VMM_EX_GP13: |
4763 | case VMM_EX_PF14: |
4764 | case VMM_EX_AC17: |
4765 | eii |= (1ULL << 11); |
4766 | } |
4767 | |
4768 | eii |= (3ULL << 8); /* Hardware Exception */ |
4769 | if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO0x4016, eii)) { |
4770 | printf("%s: can't vector event to guest\n", |
4771 | __func__); |
4772 | ret = EINVAL22; |
4773 | break; |
4774 | } |
4775 | |
4776 | if (vmwrite(VMCS_ENTRY_EXCEPTION_ERROR_CODE0x4018, 0)) { |
4777 | printf("%s: can't write error code to guest\n", |
4778 | __func__); |
4779 | ret = EINVAL22; |
4780 | break; |
4781 | } |
4782 | |
4783 | vcpu->vc_event = 0; |
4784 | } |
4785 | |
4786 | if (vcpu->vc_vmx_vpid_enabled) { |
4787 | /* Invalidate old TLB mappings */ |
4788 | vid.vid_vpid = vcpu->vc_parent->vm_id; |
4789 | vid.vid_addr = 0; |
4790 | invvpid(IA32_VMX_INVVPID_SINGLE_CTX_GLB0x3, &vid); |
4791 | } |
4792 | |
4793 | /* Start / resume the VCPU */ |
4794 | |
4795 | /* Disable interrupts and save the current host FPU state. */ |
4796 | s = intr_disable(); |
4797 | if ((ret = vmm_fpurestore(vcpu))) { |
4798 | intr_restore(s); |
4799 | break; |
4800 | } |
4801 | |
4802 | sgdt(&gdtr); |
4803 | sidt(&idtr); |
4804 | sldt(&ldt_sel); |
4805 | |
4806 | TRACEPOINT(vmm, guest_enter, vcpu, vrp)do { extern struct dt_probe (dt_static_vmm_guest_enter); struct dt_probe *dtp = &(dt_static_vmm_guest_enter); struct dt_provider *dtpv = dtp->dtp_prov; if (__builtin_expect(((dt_tracing) != 0), 0) && __builtin_expect(((dtp->dtp_recording ) != 0), 0)) { dtpv->dtpv_enter(dtpv, dtp, vcpu, vrp); } } while (0); |
4807 | |
4808 | ret = vmx_enter_guest(&vcpu->vc_control_pa, |
4809 | &vcpu->vc_gueststate, |
4810 | (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1), |
4811 | ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr); |
4812 | |
4813 | bare_lgdt(&gdtr); |
4814 | lidt(&idtr); |
4815 | lldt(ldt_sel); |
4816 | |
4817 | /* |
4818 | * On exit, interrupts are disabled, and we are running with |
4819 | * the guest FPU state still possibly on the CPU. Save the FPU |
4820 | * state before re-enabling interrupts. |
4821 | */ |
4822 | vmm_fpusave(vcpu); |
4823 | intr_restore(s); |
4824 | |
4825 | TRACEPOINT(vmm, guest_exit, vcpu, vrp, exit_reason)do { extern struct dt_probe (dt_static_vmm_guest_exit); struct dt_probe *dtp = &(dt_static_vmm_guest_exit); struct dt_provider *dtpv = dtp->dtp_prov; if (__builtin_expect(((dt_tracing) != 0), 0) && __builtin_expect(((dtp->dtp_recording ) != 0), 0)) { dtpv->dtpv_enter(dtpv, dtp, vcpu, vrp, exit_reason ); } } while (0); |
4826 | |
4827 | atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_LAUNCHED)_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (1)); |
4828 | exit_reason = VM_EXIT_NONE0xFFFF; |
4829 | |
4830 | /* If we exited successfully ... */ |
4831 | if (ret == 0) { |
4832 | /* |
4833 | * ret == 0 implies we entered the guest, and later |
4834 | * exited for some valid reason |
4835 | */ |
4836 | exitinfo = vmx_get_exit_info( |
4837 | &vcpu->vc_gueststate.vg_rip, &exit_reason); |
4838 | if (vmread(VMCS_GUEST_IA32_RFLAGS0x6820, |
4839 | &vcpu->vc_gueststate.vg_rflags)) { |
4840 | printf("%s: can't read guest rflags during " |
4841 | "exit\n", __func__); |
4842 | ret = EINVAL22; |
4843 | break; |
4844 | } |
4845 | |
4846 | /* Update our state */ |
4847 | if (!(exitinfo & VMX_EXIT_INFO_HAVE_RIP0x1)) { |
4848 | printf("%s: cannot read guest rip\n", __func__); |
4849 | ret = EINVAL22; |
4850 | break; |
4851 | } |
4852 | |
4853 | if (!(exitinfo & VMX_EXIT_INFO_HAVE_REASON0x2)) { |
4854 | printf("%s: cant read exit reason\n", __func__); |
4855 | ret = EINVAL22; |
4856 | break; |
4857 | } |
4858 | |
4859 | /* |
4860 | * Handle the exit. This will alter "ret" to EAGAIN if |
4861 | * the exit handler determines help from vmd is needed. |
4862 | */ |
4863 | vcpu->vc_gueststate.vg_exit_reason = exit_reason; |
4864 | ret = vmx_handle_exit(vcpu); |
4865 | |
4866 | if (vcpu->vc_gueststate.vg_rflags & PSL_I0x00000200) |
4867 | vcpu->vc_irqready = 1; |
4868 | else |
4869 | vcpu->vc_irqready = 0; |
4870 | |
4871 | /* |
4872 | * If not ready for interrupts, but interrupts pending, |
4873 | * enable interrupt window exiting. |
4874 | */ |
4875 | if (vcpu->vc_irqready == 0 && vcpu->vc_intr) { |
4876 | if (vmread(VMCS_PROCBASED_CTLS0x4002, &procbased)) { |
4877 | printf("%s: can't read procbased ctls " |
4878 | "on intwin exit\n", __func__); |
4879 | ret = EINVAL22; |
4880 | break; |
4881 | } |
4882 | |
4883 | procbased |= IA32_VMX_INTERRUPT_WINDOW_EXITING(1ULL << 2); |
4884 | if (vmwrite(VMCS_PROCBASED_CTLS0x4002, procbased)) { |
4885 | printf("%s: can't write procbased ctls " |
4886 | "on intwin exit\n", __func__); |
4887 | ret = EINVAL22; |
4888 | break; |
4889 | } |
4890 | } |
4891 | |
4892 | /* |
4893 | * Exit to vmd if we are terminating, failed to enter, |
4894 | * or need help (device I/O) |
4895 | */ |
4896 | if (ret || vcpu_must_stop(vcpu)) |
4897 | break; |
4898 | |
4899 | if (vcpu->vc_intr && vcpu->vc_irqready) { |
4900 | ret = EAGAIN35; |
4901 | break; |
4902 | } |
4903 | |
4904 | /* Check if we should yield - don't hog the {p,v}pu */ |
4905 | spc = &ci->ci_schedstate; |
4906 | if (spc->spc_schedflags & SPCF_SHOULDYIELD0x0002) |
4907 | break; |
4908 | |
4909 | } else { |
4910 | /* |
4911 | * We failed vmresume or vmlaunch for some reason, |
4912 | * typically due to invalid vmcs state or other |
4913 | * reasons documented in SDM Vol 3C 30.4. |
4914 | */ |
4915 | switch (ret) { |
4916 | case VMX_FAIL_LAUNCH_INVALID_VMCS2: |
4917 | printf("%s: failed %s with invalid vmcs\n", |
4918 | __func__, |
4919 | (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1 |
4920 | ? "vmresume" : "vmlaunch")); |
4921 | break; |
4922 | case VMX_FAIL_LAUNCH_VALID_VMCS3: |
4923 | printf("%s: failed %s with valid vmcs\n", |
4924 | __func__, |
4925 | (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1 |
4926 | ? "vmresume" : "vmlaunch")); |
4927 | break; |
4928 | default: |
4929 | printf("%s: failed %s for unknown reason\n", |
4930 | __func__, |
4931 | (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1 |
4932 | ? "vmresume" : "vmlaunch")); |
4933 | } |
4934 | |
4935 | ret = EINVAL22; |
4936 | |
4937 | /* Try to translate a vmfail error code, if possible. */ |
4938 | if (vmread(VMCS_INSTRUCTION_ERROR0x4400, &insn_error)) { |
4939 | printf("%s: can't read insn error field\n", |
4940 | __func__); |
4941 | } else |
4942 | printf("%s: error code = %lld, %s\n", __func__, |
4943 | insn_error, |
4944 | vmx_instruction_error_decode(insn_error)); |
4945 | #ifdef VMM_DEBUG |
4946 | vmx_vcpu_dump_regs(vcpu); |
4947 | dump_vcpu(vcpu); |
4948 | #endif /* VMM_DEBUG */ |
4949 | } |
4950 | } |
4951 | |
4952 | vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
4953 | |
4954 | /* Copy the VCPU register state to the exit structure */ |
4955 | if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), &vcpu->vc_exit.vrs)) |
4956 | ret = EINVAL22; |
4957 | vcpu->vc_exit.cpl = vmm_get_guest_cpu_cpl(vcpu); |
4958 | |
4959 | return (ret); |
4960 | } |
4961 | |
4962 | /* |
4963 | * vmx_handle_intr |
4964 | * |
4965 | * Handle host (external) interrupts. We read which interrupt fired by |
4966 | * extracting the vector from the VMCS and dispatch the interrupt directly |
4967 | * to the host using vmm_dispatch_intr. |
4968 | */ |
4969 | void |
4970 | vmx_handle_intr(struct vcpu *vcpu) |
4971 | { |
4972 | uint8_t vec; |
4973 | uint64_t eii; |
4974 | struct gate_descriptor *idte; |
4975 | vaddr_t handler; |
4976 | |
4977 | if (vmread(VMCS_EXIT_INTERRUPTION_INFO0x4404, &eii)) { |
4978 | printf("%s: can't obtain intr info\n", __func__); |
4979 | return; |
4980 | } |
4981 | |
4982 | vec = eii & 0xFF; |
4983 | |
4984 | /* XXX check "error valid" code in eii, abort if 0 */ |
4985 | idte=&idt[vec]; |
4986 | handler = idte->gd_looffset + ((uint64_t)idte->gd_hioffset << 16); |
4987 | vmm_dispatch_intr(handler); |
4988 | } |
4989 | |
4990 | /* |
4991 | * svm_handle_hlt |
4992 | * |
4993 | * Handle HLT exits |
4994 | * |
4995 | * Parameters |
4996 | * vcpu: The VCPU that executed the HLT instruction |
4997 | * |
4998 | * Return Values: |
4999 | * EIO: The guest halted with interrupts disabled |
5000 | * EAGAIN: Normal return to vmd - vmd should halt scheduling this VCPU |
5001 | * until a virtual interrupt is ready to inject |
5002 | */ |
5003 | int |
5004 | svm_handle_hlt(struct vcpu *vcpu) |
5005 | { |
5006 | struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; |
5007 | uint64_t rflags = vmcb->v_rflags; |
5008 | |
5009 | /* All HLT insns are 1 byte */ |
5010 | vcpu->vc_gueststate.vg_rip += 1; |
5011 | |
5012 | if (!(rflags & PSL_I0x00000200)) { |
5013 | DPRINTF("%s: guest halted with interrupts disabled\n", |
5014 | __func__); |
5015 | return (EIO5); |
5016 | } |
5017 | |
5018 | return (EAGAIN35); |
5019 | } |
5020 | |
5021 | /* |
5022 | * vmx_handle_hlt |
5023 | * |
5024 | * Handle HLT exits. HLTing the CPU with interrupts disabled will terminate |
5025 | * the guest (no NMIs handled) by returning EIO to vmd. |
5026 | * |
5027 | * Parameters: |
5028 | * vcpu: The VCPU that executed the HLT instruction |
5029 | * |
5030 | * Return Values: |
5031 | * EINVAL: An error occurred extracting information from the VMCS, or an |
5032 | * invalid HLT instruction was encountered |
5033 | * EIO: The guest halted with interrupts disabled |
5034 | * EAGAIN: Normal return to vmd - vmd should halt scheduling this VCPU |
5035 | * until a virtual interrupt is ready to inject |
5036 | * |
5037 | */ |
5038 | int |
5039 | vmx_handle_hlt(struct vcpu *vcpu) |
5040 | { |
5041 | uint64_t insn_length, rflags; |
5042 | |
5043 | if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) { |
5044 | printf("%s: can't obtain instruction length\n", __func__); |
5045 | return (EINVAL22); |
5046 | } |
5047 | |
5048 | if (vmread(VMCS_GUEST_IA32_RFLAGS0x6820, &rflags)) { |
5049 | printf("%s: can't obtain guest rflags\n", __func__); |
5050 | return (EINVAL22); |
5051 | } |
5052 | |
5053 | if (insn_length != 1) { |
5054 | DPRINTF("%s: HLT with instruction length %lld not supported\n", |
5055 | __func__, insn_length); |
5056 | return (EINVAL22); |
5057 | } |
5058 | |
5059 | if (!(rflags & PSL_I0x00000200)) { |
5060 | DPRINTF("%s: guest halted with interrupts disabled\n", |
5061 | __func__); |
5062 | return (EIO5); |
5063 | } |
5064 | |
5065 | vcpu->vc_gueststate.vg_rip += insn_length; |
5066 | return (EAGAIN35); |
5067 | } |
5068 | |
5069 | /* |
5070 | * vmx_get_exit_info |
5071 | * |
5072 | * Returns exit information containing the current guest RIP and exit reason |
5073 | * in rip and exit_reason. The return value is a bitmask indicating whether |
5074 | * reading the RIP and exit reason was successful. |
5075 | */ |
5076 | int |
5077 | vmx_get_exit_info(uint64_t *rip, uint64_t *exit_reason) |
5078 | { |
5079 | int rv = 0; |
5080 | |
5081 | if (vmread(VMCS_GUEST_IA32_RIP0x681E, rip) == 0) { |
5082 | rv |= VMX_EXIT_INFO_HAVE_RIP0x1; |
5083 | if (vmread(VMCS_EXIT_REASON0x4402, exit_reason) == 0) |
5084 | rv |= VMX_EXIT_INFO_HAVE_REASON0x2; |
5085 | } |
5086 | return (rv); |
5087 | } |
5088 | |
5089 | /* |
5090 | * svm_handle_exit |
5091 | * |
5092 | * Handle exits from the VM by decoding the exit reason and calling various |
5093 | * subhandlers as needed. |
5094 | */ |
5095 | int |
5096 | svm_handle_exit(struct vcpu *vcpu) |
5097 | { |
5098 | uint64_t exit_reason, rflags; |
5099 | int update_rip, ret = 0; |
5100 | struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; |
5101 | |
5102 | update_rip = 0; |
5103 | exit_reason = vcpu->vc_gueststate.vg_exit_reason; |
5104 | rflags = vcpu->vc_gueststate.vg_rflags; |
5105 | |
5106 | switch (exit_reason) { |
5107 | case SVM_VMEXIT_VINTR0x64: |
5108 | if (!(rflags & PSL_I0x00000200)) { |
5109 | DPRINTF("%s: impossible interrupt window exit " |
5110 | "config\n", __func__); |
5111 | ret = EINVAL22; |
5112 | break; |
5113 | } |
5114 | |
5115 | /* |
5116 | * Guest is now ready for interrupts, so disable interrupt |
5117 | * window exiting. |
5118 | */ |
5119 | vmcb->v_irq = 0; |
5120 | vmcb->v_intr_vector = 0; |
5121 | vmcb->v_intercept1 &= ~SVM_INTERCEPT_VINTR(1UL << 4); |
5122 | svm_set_dirty(vcpu, SVM_CLEANBITS_TPR(1 << 3) | SVM_CLEANBITS_I(1 << 0)); |
5123 | |
5124 | update_rip = 0; |
5125 | break; |
5126 | case SVM_VMEXIT_INTR0x60: |
5127 | update_rip = 0; |
5128 | break; |
5129 | case SVM_VMEXIT_SHUTDOWN0x7F: |
5130 | update_rip = 0; |
5131 | ret = EAGAIN35; |
5132 | break; |
5133 | case SVM_VMEXIT_NPF0x400: |
5134 | ret = svm_handle_np_fault(vcpu); |
5135 | break; |
5136 | case SVM_VMEXIT_CPUID0x72: |
5137 | ret = vmm_handle_cpuid(vcpu); |
5138 | update_rip = 1; |
5139 | break; |
5140 | case SVM_VMEXIT_MSR0x7C: |
5141 | ret = svm_handle_msr(vcpu); |
5142 | update_rip = 1; |
5143 | break; |
5144 | case SVM_VMEXIT_XSETBV0x8D: |
5145 | ret = svm_handle_xsetbv(vcpu); |
5146 | update_rip = 1; |
5147 | break; |
5148 | case SVM_VMEXIT_IOIO0x7B: |
5149 | ret = svm_handle_inout(vcpu); |
5150 | update_rip = 1; |
5151 | break; |
5152 | case SVM_VMEXIT_HLT0x78: |
5153 | ret = svm_handle_hlt(vcpu); |
5154 | update_rip = 1; |
5155 | break; |
5156 | case SVM_VMEXIT_MWAIT0x8B: |
5157 | case SVM_VMEXIT_MWAIT_CONDITIONAL0x8C: |
5158 | case SVM_VMEXIT_MONITOR0x8A: |
5159 | case SVM_VMEXIT_VMRUN0x80: |
5160 | case SVM_VMEXIT_VMMCALL0x81: |
5161 | case SVM_VMEXIT_VMLOAD0x82: |
5162 | case SVM_VMEXIT_VMSAVE0x83: |
5163 | case SVM_VMEXIT_STGI0x84: |
5164 | case SVM_VMEXIT_CLGI0x85: |
5165 | case SVM_VMEXIT_SKINIT0x86: |
5166 | case SVM_VMEXIT_RDTSCP0x87: |
5167 | case SVM_VMEXIT_ICEBP0x88: |
5168 | case SVM_VMEXIT_INVLPGA0x7A: |
5169 | ret = vmm_inject_ud(vcpu); |
5170 | update_rip = 0; |
5171 | break; |
5172 | default: |
5173 | DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__, |
5174 | exit_reason, (uint64_t)vcpu->vc_control_pa); |
5175 | return (EINVAL22); |
5176 | } |
5177 | |
5178 | if (update_rip) { |
5179 | vmcb->v_rip = vcpu->vc_gueststate.vg_rip; |
5180 | |
5181 | if (rflags & PSL_T0x00000100) { |
5182 | if (vmm_inject_db(vcpu)) { |
5183 | printf("%s: can't inject #DB exception to " |
5184 | "guest", __func__); |
5185 | return (EINVAL22); |
5186 | } |
5187 | } |
5188 | } |
5189 | |
5190 | /* Enable SVME in EFER (must always be set) */ |
5191 | vmcb->v_efer |= EFER_SVME0x00001000; |
5192 | svm_set_dirty(vcpu, SVM_CLEANBITS_CR(1 << 5)); |
5193 | |
5194 | return (ret); |
5195 | } |
5196 | |
5197 | /* |
5198 | * vmx_handle_exit |
5199 | * |
5200 | * Handle exits from the VM by decoding the exit reason and calling various |
5201 | * subhandlers as needed. |
5202 | */ |
5203 | int |
5204 | vmx_handle_exit(struct vcpu *vcpu) |
5205 | { |
5206 | uint64_t exit_reason, rflags, istate; |
5207 | int update_rip, ret = 0; |
5208 | |
5209 | update_rip = 0; |
5210 | exit_reason = vcpu->vc_gueststate.vg_exit_reason; |
5211 | rflags = vcpu->vc_gueststate.vg_rflags; |
5212 | |
5213 | switch (exit_reason) { |
5214 | case VMX_EXIT_INT_WINDOW7: |
5215 | if (!(rflags & PSL_I0x00000200)) { |
5216 | DPRINTF("%s: impossible interrupt window exit " |
5217 | "config\n", __func__); |
5218 | ret = EINVAL22; |
5219 | break; |
5220 | } |
5221 | |
5222 | ret = EAGAIN35; |
5223 | update_rip = 0; |
5224 | break; |
5225 | case VMX_EXIT_EPT_VIOLATION48: |
5226 | ret = vmx_handle_np_fault(vcpu); |
5227 | break; |
5228 | case VMX_EXIT_CPUID10: |
5229 | ret = vmm_handle_cpuid(vcpu); |
5230 | update_rip = 1; |
5231 | break; |
5232 | case VMX_EXIT_IO30: |
5233 | ret = vmx_handle_inout(vcpu); |
5234 | update_rip = 1; |
5235 | break; |
5236 | case VMX_EXIT_EXTINT1: |
5237 | vmx_handle_intr(vcpu); |
5238 | update_rip = 0; |
5239 | break; |
5240 | case VMX_EXIT_CR_ACCESS28: |
5241 | ret = vmx_handle_cr(vcpu); |
5242 | update_rip = 1; |
5243 | break; |
5244 | case VMX_EXIT_HLT12: |
5245 | ret = vmx_handle_hlt(vcpu); |
5246 | update_rip = 1; |
5247 | break; |
5248 | case VMX_EXIT_RDMSR31: |
5249 | ret = vmx_handle_rdmsr(vcpu); |
5250 | update_rip = 1; |
5251 | break; |
5252 | case VMX_EXIT_WRMSR32: |
5253 | ret = vmx_handle_wrmsr(vcpu); |
5254 | update_rip = 1; |
5255 | break; |
5256 | case VMX_EXIT_XSETBV55: |
5257 | ret = vmx_handle_xsetbv(vcpu); |
5258 | update_rip = 1; |
5259 | break; |
5260 | case VMX_EXIT_MWAIT36: |
5261 | case VMX_EXIT_MONITOR39: |
5262 | case VMX_EXIT_VMXON27: |
5263 | case VMX_EXIT_VMWRITE25: |
5264 | case VMX_EXIT_VMREAD23: |
5265 | case VMX_EXIT_VMLAUNCH20: |
5266 | case VMX_EXIT_VMRESUME24: |
5267 | case VMX_EXIT_VMPTRLD21: |
5268 | case VMX_EXIT_VMPTRST22: |
5269 | case VMX_EXIT_VMCLEAR19: |
5270 | case VMX_EXIT_VMCALL18: |
5271 | case VMX_EXIT_VMFUNC59: |
5272 | case VMX_EXIT_VMXOFF26: |
5273 | case VMX_EXIT_INVVPID53: |
5274 | case VMX_EXIT_INVEPT50: |
5275 | ret = vmm_inject_ud(vcpu); |
5276 | update_rip = 0; |
5277 | break; |
5278 | case VMX_EXIT_TRIPLE_FAULT2: |
5279 | #ifdef VMM_DEBUG |
5280 | DPRINTF("%s: vm %d vcpu %d triple fault\n", __func__, |
5281 | vcpu->vc_parent->vm_id, vcpu->vc_id); |
5282 | vmx_vcpu_dump_regs(vcpu); |
5283 | dump_vcpu(vcpu); |
5284 | vmx_dump_vmcs(vcpu); |
5285 | #endif /* VMM_DEBUG */ |
5286 | ret = EAGAIN35; |
5287 | update_rip = 0; |
5288 | break; |
5289 | default: |
5290 | #ifdef VMM_DEBUG |
5291 | DPRINTF("%s: unhandled exit 0x%llx (%s)\n", __func__, |
5292 | exit_reason, vmx_exit_reason_decode(exit_reason)); |
5293 | #endif /* VMM_DEBUG */ |
5294 | return (EINVAL22); |
5295 | } |
5296 | |
5297 | if (update_rip) { |
5298 | if (vmwrite(VMCS_GUEST_IA32_RIP0x681E, |
5299 | vcpu->vc_gueststate.vg_rip)) { |
5300 | printf("%s: can't advance rip\n", __func__); |
5301 | return (EINVAL22); |
5302 | } |
5303 | |
5304 | if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST0x4824, |
5305 | &istate)) { |
5306 | printf("%s: can't read interruptibility state\n", |
5307 | __func__); |
5308 | return (EINVAL22); |
5309 | } |
5310 | |
5311 | /* Interruptibility state 0x3 covers NMIs and STI */ |
5312 | istate &= ~0x3; |
5313 | |
5314 | if (vmwrite(VMCS_GUEST_INTERRUPTIBILITY_ST0x4824, |
5315 | istate)) { |
5316 | printf("%s: can't write interruptibility state\n", |
5317 | __func__); |
5318 | return (EINVAL22); |
5319 | } |
5320 | |
5321 | if (rflags & PSL_T0x00000100) { |
5322 | if (vmm_inject_db(vcpu)) { |
5323 | printf("%s: can't inject #DB exception to " |
5324 | "guest", __func__); |
5325 | return (EINVAL22); |
5326 | } |
5327 | } |
5328 | } |
5329 | |
5330 | return (ret); |
5331 | } |
5332 | |
5333 | /* |
5334 | * vmm_inject_gp |
5335 | * |
5336 | * Injects an #GP exception into the guest VCPU. |
5337 | * |
5338 | * Parameters: |
5339 | * vcpu: vcpu to inject into |
5340 | * |
5341 | * Return values: |
5342 | * Always 0 |
5343 | */ |
5344 | int |
5345 | vmm_inject_gp(struct vcpu *vcpu) |
5346 | { |
5347 | DPRINTF("%s: injecting #GP at guest %%rip 0x%llx\n", __func__, |
5348 | vcpu->vc_gueststate.vg_rip); |
5349 | vcpu->vc_event = VMM_EX_GP13; |
5350 | |
5351 | return (0); |
5352 | } |
5353 | |
5354 | /* |
5355 | * vmm_inject_ud |
5356 | * |
5357 | * Injects an #UD exception into the guest VCPU. |
5358 | * |
5359 | * Parameters: |
5360 | * vcpu: vcpu to inject into |
5361 | * |
5362 | * Return values: |
5363 | * Always 0 |
5364 | */ |
5365 | int |
5366 | vmm_inject_ud(struct vcpu *vcpu) |
5367 | { |
5368 | DPRINTF("%s: injecting #UD at guest %%rip 0x%llx\n", __func__, |
5369 | vcpu->vc_gueststate.vg_rip); |
5370 | vcpu->vc_event = VMM_EX_UD6; |
5371 | |
5372 | return (0); |
5373 | } |
5374 | |
5375 | /* |
5376 | * vmm_inject_db |
5377 | * |
5378 | * Injects a #DB exception into the guest VCPU. |
5379 | * |
5380 | * Parameters: |
5381 | * vcpu: vcpu to inject into |
5382 | * |
5383 | * Return values: |
5384 | * Always 0 |
5385 | */ |
5386 | int |
5387 | vmm_inject_db(struct vcpu *vcpu) |
5388 | { |
5389 | DPRINTF("%s: injecting #DB at guest %%rip 0x%llx\n", __func__, |
5390 | vcpu->vc_gueststate.vg_rip); |
5391 | vcpu->vc_event = VMM_EX_DB1; |
5392 | |
5393 | return (0); |
5394 | } |
5395 | |
5396 | /* |
5397 | * vmm_get_guest_memtype |
5398 | * |
5399 | * Returns the type of memory 'gpa' refers to in the context of vm 'vm' |
5400 | */ |
5401 | int |
5402 | vmm_get_guest_memtype(struct vm *vm, paddr_t gpa) |
5403 | { |
5404 | int i; |
5405 | struct vm_mem_range *vmr; |
5406 | |
5407 | if (gpa >= VMM_PCI_MMIO_BAR_BASE0xF0000000ULL && gpa <= VMM_PCI_MMIO_BAR_END0xFFFFFFFFULL) { |
5408 | DPRINTF("guest mmio access @ 0x%llx\n", (uint64_t)gpa); |
5409 | return (VMM_MEM_TYPE_REGULAR); |
5410 | } |
5411 | |
5412 | /* XXX Use binary search? */ |
5413 | for (i = 0; i < vm->vm_nmemranges; i++) { |
5414 | vmr = &vm->vm_memranges[i]; |
5415 | |
5416 | /* |
5417 | * vm_memranges are ascending. gpa can no longer be in one of |
5418 | * the memranges |
5419 | */ |
5420 | if (gpa < vmr->vmr_gpa) |
5421 | break; |
5422 | |
5423 | if (gpa < vmr->vmr_gpa + vmr->vmr_size) |
5424 | return (VMM_MEM_TYPE_REGULAR); |
5425 | } |
5426 | |
5427 | DPRINTF("guest memtype @ 0x%llx unknown\n", (uint64_t)gpa); |
5428 | return (VMM_MEM_TYPE_UNKNOWN); |
5429 | } |
5430 | |
5431 | /* |
5432 | * vmx_get_exit_qualification |
5433 | * |
5434 | * Return the current VMCS' exit qualification information |
5435 | */ |
5436 | int |
5437 | vmx_get_exit_qualification(uint64_t *exit_qualification) |
5438 | { |
5439 | if (vmread(VMCS_GUEST_EXIT_QUALIFICATION0x6400, exit_qualification)) { |
5440 | printf("%s: can't extract exit qual\n", __func__); |
5441 | return (EINVAL22); |
5442 | } |
5443 | |
5444 | return (0); |
5445 | } |
5446 | |
5447 | /* |
5448 | * vmx_get_guest_faulttype |
5449 | * |
5450 | * Determines the type (R/W/X) of the last fault on the VCPU last run on |
5451 | * this PCPU. |
5452 | */ |
5453 | int |
5454 | vmx_get_guest_faulttype(void) |
5455 | { |
5456 | uint64_t exit_qual; |
5457 | uint64_t presentmask = IA32_VMX_EPT_FAULT_WAS_READABLE(1ULL << 3) | |
5458 | IA32_VMX_EPT_FAULT_WAS_WRITABLE(1ULL << 4) | IA32_VMX_EPT_FAULT_WAS_EXECABLE(1ULL << 5); |
5459 | vm_prot_t prot, was_prot; |
5460 | |
5461 | if (vmx_get_exit_qualification(&exit_qual)) |
5462 | return (-1); |
5463 | |
5464 | if ((exit_qual & presentmask) == 0) |
5465 | return VM_FAULT_INVALID((vm_fault_t) 0x0); |
5466 | |
5467 | was_prot = 0; |
5468 | if (exit_qual & IA32_VMX_EPT_FAULT_WAS_READABLE(1ULL << 3)) |
5469 | was_prot |= PROT_READ0x01; |
5470 | if (exit_qual & IA32_VMX_EPT_FAULT_WAS_WRITABLE(1ULL << 4)) |
5471 | was_prot |= PROT_WRITE0x02; |
5472 | if (exit_qual & IA32_VMX_EPT_FAULT_WAS_EXECABLE(1ULL << 5)) |
5473 | was_prot |= PROT_EXEC0x04; |
5474 | |
5475 | prot = 0; |
5476 | if (exit_qual & IA32_VMX_EPT_FAULT_READ(1ULL << 0)) |
5477 | prot = PROT_READ0x01; |
5478 | else if (exit_qual & IA32_VMX_EPT_FAULT_WRITE(1ULL << 1)) |
5479 | prot = PROT_WRITE0x02; |
5480 | else if (exit_qual & IA32_VMX_EPT_FAULT_EXEC(1ULL << 2)) |
5481 | prot = PROT_EXEC0x04; |
5482 | |
5483 | if ((was_prot & prot) == 0) |
5484 | return VM_FAULT_PROTECT((vm_fault_t) 0x1); |
5485 | |
5486 | return (-1); |
5487 | } |
5488 | |
5489 | /* |
5490 | * svm_get_guest_faulttype |
5491 | * |
5492 | * Determines the type (R/W/X) of the last fault on the VCPU last run on |
5493 | * this PCPU. |
5494 | */ |
5495 | int |
5496 | svm_get_guest_faulttype(struct vmcb *vmcb) |
5497 | { |
5498 | if (!(vmcb->v_exitinfo1 & 0x1)) |
5499 | return VM_FAULT_INVALID((vm_fault_t) 0x0); |
5500 | return VM_FAULT_PROTECT((vm_fault_t) 0x1); |
5501 | } |
5502 | |
5503 | /* |
5504 | * svm_fault_page |
5505 | * |
5506 | * Request a new page to be faulted into the UVM map of the VM owning 'vcpu' |
5507 | * at address 'gpa'. |
5508 | */ |
5509 | int |
5510 | svm_fault_page(struct vcpu *vcpu, paddr_t gpa) |
5511 | { |
5512 | int ret; |
5513 | |
5514 | ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, VM_FAULT_WIRE((vm_fault_t) 0x2), |
5515 | PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04); |
5516 | if (ret) |
5517 | printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n", |
5518 | __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_rip); |
5519 | |
5520 | return (ret); |
5521 | } |
5522 | |
5523 | /* |
5524 | * svm_handle_np_fault |
5525 | * |
5526 | * High level nested paging handler for SVM. Verifies that a fault is for a |
5527 | * valid memory region, then faults a page, or aborts otherwise. |
5528 | */ |
5529 | int |
5530 | svm_handle_np_fault(struct vcpu *vcpu) |
5531 | { |
5532 | uint64_t gpa; |
5533 | int gpa_memtype, ret; |
5534 | struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; |
5535 | |
5536 | ret = 0; |
5537 | |
5538 | gpa = vmcb->v_exitinfo2; |
5539 | |
5540 | gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa); |
5541 | switch (gpa_memtype) { |
5542 | case VMM_MEM_TYPE_REGULAR: |
5543 | ret = svm_fault_page(vcpu, gpa); |
5544 | break; |
5545 | default: |
5546 | printf("unknown memory type %d for GPA 0x%llx\n", |
5547 | gpa_memtype, gpa); |
5548 | return (EINVAL22); |
5549 | } |
5550 | |
5551 | return (ret); |
5552 | } |
5553 | |
5554 | /* |
5555 | * vmx_fault_page |
5556 | * |
5557 | * Request a new page to be faulted into the UVM map of the VM owning 'vcpu' |
5558 | * at address 'gpa'. |
5559 | * |
5560 | * Parameters: |
5561 | * vcpu: guest VCPU requiring the page to be faulted into the UVM map |
5562 | * gpa: guest physical address that triggered the fault |
5563 | * |
5564 | * Return Values: |
5565 | * 0: if successful |
5566 | * EINVAL: if fault type could not be determined or VMCS reload fails |
5567 | * EAGAIN: if a protection fault occurred, ie writing to a read-only page |
5568 | * errno: if uvm_fault(9) fails to wire in the page |
5569 | */ |
5570 | int |
5571 | vmx_fault_page(struct vcpu *vcpu, paddr_t gpa) |
5572 | { |
5573 | int fault_type, ret; |
5574 | |
5575 | fault_type = vmx_get_guest_faulttype(); |
5576 | if (fault_type == -1) { |
5577 | printf("%s: invalid fault type\n", __func__); |
5578 | return (EINVAL22); |
5579 | } |
5580 | |
5581 | if (fault_type == VM_FAULT_PROTECT((vm_fault_t) 0x1)) { |
5582 | vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_PROTECT; |
5583 | return (EAGAIN35); |
5584 | } |
5585 | |
5586 | /* We may sleep during uvm_fault(9), so reload VMCS. */ |
5587 | vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
5588 | ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, VM_FAULT_WIRE((vm_fault_t) 0x2), |
5589 | PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04); |
5590 | if (vcpu_reload_vmcs_vmx(vcpu)) { |
5591 | printf("%s: failed to reload vmcs\n", __func__); |
5592 | return (EINVAL22); |
5593 | } |
5594 | |
5595 | if (ret) |
5596 | printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n", |
5597 | __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_rip); |
5598 | |
5599 | return (ret); |
5600 | } |
5601 | |
5602 | /* |
5603 | * vmx_handle_np_fault |
5604 | * |
5605 | * High level nested paging handler for VMX. Verifies that a fault is for a |
5606 | * valid memory region, then faults a page, or aborts otherwise. |
5607 | */ |
5608 | int |
5609 | vmx_handle_np_fault(struct vcpu *vcpu) |
5610 | { |
5611 | uint64_t gpa; |
5612 | int gpa_memtype, ret; |
5613 | |
5614 | ret = 0; |
Value stored to 'ret' is never read | |
5615 | if (vmread(VMCS_GUEST_PHYSICAL_ADDRESS0x2400, &gpa)) { |
5616 | printf("%s: cannot extract faulting pa\n", __func__); |
5617 | return (EINVAL22); |
5618 | } |
5619 | |
5620 | gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa); |
5621 | switch (gpa_memtype) { |
5622 | case VMM_MEM_TYPE_REGULAR: |
5623 | ret = vmx_fault_page(vcpu, gpa); |
5624 | break; |
5625 | default: |
5626 | printf("unknown memory type %d for GPA 0x%llx\n", |
5627 | gpa_memtype, gpa); |
5628 | return (EINVAL22); |
5629 | } |
5630 | |
5631 | return (ret); |
5632 | } |
5633 | |
5634 | /* |
5635 | * vmm_get_guest_cpu_cpl |
5636 | * |
5637 | * Determines current CPL of 'vcpu'. On VMX/Intel, this is gathered from the |
5638 | * VMCS field for the DPL of SS (this seems odd, but is documented that way |
5639 | * in the SDM). For SVM/AMD, this is gathered directly from the VMCB's 'cpl' |
5640 | * field, as per the APM. |
5641 | * |
5642 | * Parameters: |
5643 | * vcpu: guest VCPU for which CPL is to be checked |
5644 | * |
5645 | * Return Values: |
5646 | * -1: the CPL could not be determined |
5647 | * 0-3 indicating the current CPL. For real mode operation, 0 is returned. |
5648 | */ |
5649 | int |
5650 | vmm_get_guest_cpu_cpl(struct vcpu *vcpu) |
5651 | { |
5652 | int mode; |
5653 | struct vmcb *vmcb; |
5654 | uint64_t ss_ar; |
5655 | |
5656 | mode = vmm_get_guest_cpu_mode(vcpu); |
5657 | |
5658 | if (mode == VMM_CPU_MODE_UNKNOWN) |
5659 | return (-1); |
5660 | |
5661 | if (mode == VMM_CPU_MODE_REAL) |
5662 | return (0); |
5663 | |
5664 | if (vmm_softc->mode == VMM_MODE_SVM || |
5665 | vmm_softc->mode == VMM_MODE_RVI) { |
5666 | vmcb = (struct vmcb *)vcpu->vc_control_va; |
5667 | return (vmcb->v_cpl); |
5668 | } else if (vmm_softc->mode == VMM_MODE_VMX || |
5669 | vmm_softc->mode == VMM_MODE_EPT) { |
5670 | if (vmread(VMCS_GUEST_IA32_SS_AR0x4818, &ss_ar)) |
5671 | return (-1); |
5672 | return ((ss_ar & 0x60) >> 5); |
5673 | } else |
5674 | return (-1); |
5675 | } |
5676 | |
5677 | /* |
5678 | * vmm_get_guest_cpu_mode |
5679 | * |
5680 | * Determines current CPU mode of 'vcpu'. |
5681 | * |
5682 | * Parameters: |
5683 | * vcpu: guest VCPU for which mode is to be checked |
5684 | * |
5685 | * Return Values: |
5686 | * One of VMM_CPU_MODE_*, or VMM_CPU_MODE_UNKNOWN if the mode could not be |
5687 | * ascertained. |
5688 | */ |
5689 | int |
5690 | vmm_get_guest_cpu_mode(struct vcpu *vcpu) |
5691 | { |
5692 | uint64_t cr0, efer, cs_ar; |
5693 | uint8_t l, dib; |
5694 | struct vmcb *vmcb; |
5695 | struct vmx_msr_store *msr_store; |
5696 | |
5697 | if (vmm_softc->mode == VMM_MODE_SVM || |
5698 | vmm_softc->mode == VMM_MODE_RVI) { |
5699 | vmcb = (struct vmcb *)vcpu->vc_control_va; |
5700 | cr0 = vmcb->v_cr0; |
5701 | efer = vmcb->v_efer; |
5702 | cs_ar = vmcb->v_cs.vs_attr; |
5703 | cs_ar = (cs_ar & 0xff) | ((cs_ar << 4) & 0xf000); |
5704 | } else if (vmm_softc->mode == VMM_MODE_VMX || |
5705 | vmm_softc->mode == VMM_MODE_EPT) { |
5706 | if (vmread(VMCS_GUEST_IA32_CR00x6800, &cr0)) |
5707 | return (VMM_CPU_MODE_UNKNOWN); |
5708 | if (vmread(VMCS_GUEST_IA32_CS_AR0x4816, &cs_ar)) |
5709 | return (VMM_CPU_MODE_UNKNOWN); |
5710 | msr_store = |
5711 | (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; |
5712 | efer = msr_store[VCPU_REGS_EFER0].vms_data; |
5713 | } else |
5714 | return (VMM_CPU_MODE_UNKNOWN); |
5715 | |
5716 | l = (cs_ar & 0x2000) >> 13; |
5717 | dib = (cs_ar & 0x4000) >> 14; |
5718 | |
5719 | /* Check CR0.PE */ |
5720 | if (!(cr0 & CR0_PE0x00000001)) |
5721 | return (VMM_CPU_MODE_REAL); |
5722 | |
5723 | /* Check EFER */ |
5724 | if (efer & EFER_LMA0x00000400) { |
5725 | /* Could be compat or long mode, check CS.L */ |
5726 | if (l) |
5727 | return (VMM_CPU_MODE_LONG); |
5728 | else |
5729 | return (VMM_CPU_MODE_COMPAT); |
5730 | } |
5731 | |
5732 | /* Check prot vs prot32 */ |
5733 | if (dib) |
5734 | return (VMM_CPU_MODE_PROT32); |
5735 | else |
5736 | return (VMM_CPU_MODE_PROT); |
5737 | } |
5738 | |
5739 | /* |
5740 | * svm_handle_inout |
5741 | * |
5742 | * Exit handler for IN/OUT instructions. |
5743 | * |
5744 | * The vmm can handle certain IN/OUTS without exiting to vmd, but most of these |
5745 | * will be passed to vmd for completion. |
5746 | * |
5747 | * Parameters: |
5748 | * vcpu: The VCPU where the IN/OUT instruction occurred |
5749 | * |
5750 | * Return values: |
5751 | * 0: if successful |
5752 | * EINVAL: an invalid IN/OUT instruction was encountered |
5753 | * EAGAIN: return to vmd - more processing needed in userland |
5754 | */ |
5755 | int |
5756 | svm_handle_inout(struct vcpu *vcpu) |
5757 | { |
5758 | uint64_t insn_length, exit_qual; |
5759 | int ret; |
5760 | struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; |
5761 | |
5762 | insn_length = vmcb->v_exitinfo2 - vmcb->v_rip; |
5763 | if (insn_length != 1 && insn_length != 2) { |
5764 | DPRINTF("%s: IN/OUT instruction with length %lld not " |
5765 | "supported\n", __func__, insn_length); |
5766 | return (EINVAL22); |
5767 | } |
5768 | |
5769 | exit_qual = vmcb->v_exitinfo1; |
5770 | |
5771 | /* Bit 0 - direction */ |
5772 | vcpu->vc_exit.vei.vei_dir = (exit_qual & 0x1); |
5773 | /* Bit 2 - string instruction? */ |
5774 | vcpu->vc_exit.vei.vei_string = (exit_qual & 0x4) >> 2; |
5775 | /* Bit 3 - REP prefix? */ |
5776 | vcpu->vc_exit.vei.vei_rep = (exit_qual & 0x8) >> 3; |
5777 | |
5778 | /* Bits 4:6 - size of exit */ |
5779 | if (exit_qual & 0x10) |
5780 | vcpu->vc_exit.vei.vei_size = 1; |
5781 | else if (exit_qual & 0x20) |
5782 | vcpu->vc_exit.vei.vei_size = 2; |
5783 | else if (exit_qual & 0x40) |
5784 | vcpu->vc_exit.vei.vei_size = 4; |
5785 | |
5786 | /* Bit 16:31 - port */ |
5787 | vcpu->vc_exit.vei.vei_port = (exit_qual & 0xFFFF0000) >> 16; |
5788 | /* Data */ |
5789 | vcpu->vc_exit.vei.vei_data = vmcb->v_rax; |
5790 | |
5791 | vcpu->vc_gueststate.vg_rip += insn_length; |
5792 | |
5793 | /* |
5794 | * The following ports usually belong to devices owned by vmd. |
5795 | * Return EAGAIN to signal help needed from userspace (vmd). |
5796 | * Return 0 to indicate we don't care about this port. |
5797 | * |
5798 | * XXX something better than a hardcoded list here, maybe |
5799 | * configure via vmd via the device list in vm create params? |
5800 | */ |
5801 | switch (vcpu->vc_exit.vei.vei_port) { |
5802 | case IO_ICU10x020 ... IO_ICU10x020 + 1: |
5803 | case 0x40 ... 0x43: |
5804 | case PCKBC_AUX0x61: |
5805 | case IO_RTC0x070 ... IO_RTC0x070 + 1: |
5806 | case IO_ICU20x0A0 ... IO_ICU20x0A0 + 1: |
5807 | case 0x3f8 ... 0x3ff: |
5808 | case ELCR00x4D0 ... ELCR10x4D1: |
5809 | case 0x500 ... 0x511: |
5810 | case 0x514: |
5811 | case 0x518: |
5812 | case 0xcf8: |
5813 | case 0xcfc ... 0xcff: |
5814 | case VMM_PCI_IO_BAR_BASE0x1000 ... VMM_PCI_IO_BAR_END0xFFFF: |
5815 | ret = EAGAIN35; |
5816 | break; |
5817 | default: |
5818 | /* Read from unsupported ports returns FFs */ |
5819 | if (vcpu->vc_exit.vei.vei_dir == 1) { |
5820 | switch(vcpu->vc_exit.vei.vei_size) { |
5821 | case 1: |
5822 | vcpu->vc_gueststate.vg_rax |= 0xFF; |
5823 | vmcb->v_rax |= 0xFF; |
5824 | break; |
5825 | case 2: |
5826 | vcpu->vc_gueststate.vg_rax |= 0xFFFF; |
5827 | vmcb->v_rax |= 0xFFFF; |
5828 | break; |
5829 | case 4: |
5830 | vcpu->vc_gueststate.vg_rax |= 0xFFFFFFFF; |
5831 | vmcb->v_rax |= 0xFFFFFFFF; |
5832 | break; |
5833 | } |
5834 | } |
5835 | ret = 0; |
5836 | } |
5837 | |
5838 | return (ret); |
5839 | } |
5840 | |
5841 | /* |
5842 | * vmx_handle_inout |
5843 | * |
5844 | * Exit handler for IN/OUT instructions. |
5845 | * |
5846 | * The vmm can handle certain IN/OUTS without exiting to vmd, but most of these |
5847 | * will be passed to vmd for completion. |
5848 | */ |
5849 | int |
5850 | vmx_handle_inout(struct vcpu *vcpu) |
5851 | { |
5852 | uint64_t insn_length, exit_qual; |
5853 | int ret; |
5854 | |
5855 | if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) { |
5856 | printf("%s: can't obtain instruction length\n", __func__); |
5857 | return (EINVAL22); |
5858 | } |
5859 | |
5860 | if (insn_length != 1 && insn_length != 2) { |
5861 | DPRINTF("%s: IN/OUT instruction with length %lld not " |
5862 | "supported\n", __func__, insn_length); |
5863 | return (EINVAL22); |
5864 | } |
5865 | |
5866 | if (vmx_get_exit_qualification(&exit_qual)) { |
5867 | printf("%s: can't get exit qual\n", __func__); |
5868 | return (EINVAL22); |
5869 | } |
5870 | |
5871 | /* Bits 0:2 - size of exit */ |
5872 | vcpu->vc_exit.vei.vei_size = (exit_qual & 0x7) + 1; |
5873 | /* Bit 3 - direction */ |
5874 | vcpu->vc_exit.vei.vei_dir = (exit_qual & 0x8) >> 3; |
5875 | /* Bit 4 - string instruction? */ |
5876 | vcpu->vc_exit.vei.vei_string = (exit_qual & 0x10) >> 4; |
5877 | /* Bit 5 - REP prefix? */ |
5878 | vcpu->vc_exit.vei.vei_rep = (exit_qual & 0x20) >> 5; |
5879 | /* Bit 6 - Operand encoding */ |
5880 | vcpu->vc_exit.vei.vei_encoding = (exit_qual & 0x40) >> 6; |
5881 | /* Bit 16:31 - port */ |
5882 | vcpu->vc_exit.vei.vei_port = (exit_qual & 0xFFFF0000) >> 16; |
5883 | /* Data */ |
5884 | vcpu->vc_exit.vei.vei_data = (uint32_t)vcpu->vc_gueststate.vg_rax; |
5885 | |
5886 | vcpu->vc_gueststate.vg_rip += insn_length; |
5887 | |
5888 | /* |
5889 | * The following ports usually belong to devices owned by vmd. |
5890 | * Return EAGAIN to signal help needed from userspace (vmd). |
5891 | * Return 0 to indicate we don't care about this port. |
5892 | * |
5893 | * XXX something better than a hardcoded list here, maybe |
5894 | * configure via vmd via the device list in vm create params? |
5895 | */ |
5896 | switch (vcpu->vc_exit.vei.vei_port) { |
5897 | case IO_ICU10x020 ... IO_ICU10x020 + 1: |
5898 | case 0x40 ... 0x43: |
5899 | case PCKBC_AUX0x61: |
5900 | case IO_RTC0x070 ... IO_RTC0x070 + 1: |
5901 | case IO_ICU20x0A0 ... IO_ICU20x0A0 + 1: |
5902 | case 0x3f8 ... 0x3ff: |
5903 | case ELCR00x4D0 ... ELCR10x4D1: |
5904 | case 0x500 ... 0x511: |
5905 | case 0x514: |
5906 | case 0x518: |
5907 | case 0xcf8: |
5908 | case 0xcfc ... 0xcff: |
5909 | case VMM_PCI_IO_BAR_BASE0x1000 ... VMM_PCI_IO_BAR_END0xFFFF: |
5910 | ret = EAGAIN35; |
5911 | break; |
5912 | default: |
5913 | /* Read from unsupported ports returns FFs */ |
5914 | if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN) { |
5915 | if (vcpu->vc_exit.vei.vei_size == 4) |
5916 | vcpu->vc_gueststate.vg_rax |= 0xFFFFFFFF; |
5917 | else if (vcpu->vc_exit.vei.vei_size == 2) |
5918 | vcpu->vc_gueststate.vg_rax |= 0xFFFF; |
5919 | else if (vcpu->vc_exit.vei.vei_size == 1) |
5920 | vcpu->vc_gueststate.vg_rax |= 0xFF; |
5921 | } |
5922 | ret = 0; |
5923 | } |
5924 | |
5925 | return (ret); |
5926 | } |
5927 | |
5928 | /* |
5929 | * vmx_load_pdptes |
5930 | * |
5931 | * Update the PDPTEs in the VMCS with the values currently indicated by the |
5932 | * guest CR3. This is used for 32-bit PAE guests when enabling paging. |
5933 | * |
5934 | * Parameters |
5935 | * vcpu: The vcpu whose PDPTEs should be loaded |
5936 | * |
5937 | * Return values: |
5938 | * 0: if successful |
5939 | * EINVAL: if the PDPTEs could not be loaded |
5940 | * ENOMEM: memory allocation failure |
5941 | */ |
5942 | int |
5943 | vmx_load_pdptes(struct vcpu *vcpu) |
5944 | { |
5945 | uint64_t cr3, cr3_host_phys; |
5946 | vaddr_t cr3_host_virt; |
5947 | pd_entry_t *pdptes; |
5948 | int ret; |
5949 | |
5950 | if (vmread(VMCS_GUEST_IA32_CR30x6802, &cr3)) { |
5951 | printf("%s: can't read guest cr3\n", __func__); |
5952 | return (EINVAL22); |
5953 | } |
5954 | |
5955 | if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, (vaddr_t)cr3, |
5956 | (paddr_t *)&cr3_host_phys)) { |
5957 | DPRINTF("%s: nonmapped guest CR3, setting PDPTEs to 0\n", |
5958 | __func__); |
5959 | if (vmwrite(VMCS_GUEST_PDPTE00x280A, 0)) { |
5960 | printf("%s: can't write guest PDPTE0\n", __func__); |
5961 | return (EINVAL22); |
5962 | } |
5963 | |
5964 | if (vmwrite(VMCS_GUEST_PDPTE10x280C, 0)) { |
5965 | printf("%s: can't write guest PDPTE1\n", __func__); |
5966 | return (EINVAL22); |
5967 | } |
5968 | |
5969 | if (vmwrite(VMCS_GUEST_PDPTE20x280E, 0)) { |
5970 | printf("%s: can't write guest PDPTE2\n", __func__); |
5971 | return (EINVAL22); |
5972 | } |
5973 | |
5974 | if (vmwrite(VMCS_GUEST_PDPTE30x2810, 0)) { |
5975 | printf("%s: can't write guest PDPTE3\n", __func__); |
5976 | return (EINVAL22); |
5977 | } |
5978 | return (0); |
5979 | } |
5980 | |
5981 | ret = 0; |
5982 | |
5983 | /* We may sleep during km_alloc(9), so reload VMCS. */ |
5984 | vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
5985 | cr3_host_virt = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_any, &kp_none, |
5986 | &kd_waitok); |
5987 | if (vcpu_reload_vmcs_vmx(vcpu)) { |
5988 | printf("%s: failed to reload vmcs\n", __func__); |
5989 | ret = EINVAL22; |
5990 | goto exit; |
5991 | } |
5992 | |
5993 | if (!cr3_host_virt) { |
5994 | printf("%s: can't allocate address for guest CR3 mapping\n", |
5995 | __func__); |
5996 | return (ENOMEM12); |
5997 | } |
5998 | |
5999 | pmap_kenter_pa(cr3_host_virt, cr3_host_phys, PROT_READ0x01); |
6000 | |
6001 | pdptes = (pd_entry_t *)cr3_host_virt; |
6002 | if (vmwrite(VMCS_GUEST_PDPTE00x280A, pdptes[0])) { |
6003 | printf("%s: can't write guest PDPTE0\n", __func__); |
6004 | ret = EINVAL22; |
6005 | goto exit; |
6006 | } |
6007 | |
6008 | if (vmwrite(VMCS_GUEST_PDPTE10x280C, pdptes[1])) { |
6009 | printf("%s: can't write guest PDPTE1\n", __func__); |
6010 | ret = EINVAL22; |
6011 | goto exit; |
6012 | } |
6013 | |
6014 | if (vmwrite(VMCS_GUEST_PDPTE20x280E, pdptes[2])) { |
6015 | printf("%s: can't write guest PDPTE2\n", __func__); |
6016 | ret = EINVAL22; |
6017 | goto exit; |
6018 | } |
6019 | |
6020 | if (vmwrite(VMCS_GUEST_PDPTE30x2810, pdptes[3])) { |
6021 | printf("%s: can't write guest PDPTE3\n", __func__); |
6022 | ret = EINVAL22; |
6023 | goto exit; |
6024 | } |
6025 | |
6026 | exit: |
6027 | pmap_kremove(cr3_host_virt, PAGE_SIZE(1 << 12)); |
6028 | |
6029 | /* km_free(9) might sleep, so we need to reload VMCS. */ |
6030 | vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
6031 | km_free((void *)cr3_host_virt, PAGE_SIZE(1 << 12), &kv_any, &kp_none); |
6032 | if (vcpu_reload_vmcs_vmx(vcpu)) { |
6033 | printf("%s: failed to reload vmcs after km_free\n", __func__); |
6034 | ret = EINVAL22; |
6035 | } |
6036 | |
6037 | return (ret); |
6038 | } |
6039 | |
6040 | /* |
6041 | * vmx_handle_cr0_write |
6042 | * |
6043 | * Write handler for CR0. This function ensures valid values are written into |
6044 | * CR0 for the cpu/vmm mode in use (cr0 must-be-0 and must-be-1 bits, etc). |
6045 | * |
6046 | * Parameters |
6047 | * vcpu: The vcpu taking the cr0 write exit |
6048 | * r: The guest's desired (incoming) cr0 value |
6049 | * |
6050 | * Return values: |
6051 | * 0: if successful |
6052 | * EINVAL: if an error occurred |
6053 | */ |
6054 | int |
6055 | vmx_handle_cr0_write(struct vcpu *vcpu, uint64_t r) |
6056 | { |
6057 | struct vmx_msr_store *msr_store; |
6058 | struct vmx_invvpid_descriptor vid; |
6059 | uint64_t ectls, oldcr0, cr4, mask; |
6060 | int ret; |
6061 | |
6062 | /* Check must-be-0 bits */ |
6063 | mask = vcpu->vc_vmx_cr0_fixed1; |
6064 | if (~r & mask) { |
6065 | /* Inject #GP, let the guest handle it */ |
6066 | DPRINTF("%s: guest set invalid bits in %%cr0. Zeros " |
6067 | "mask=0x%llx, data=0x%llx\n", __func__, |
6068 | vcpu->vc_vmx_cr0_fixed1, r); |
6069 | vmm_inject_gp(vcpu); |
6070 | return (0); |
6071 | } |
6072 | |
6073 | /* Check must-be-1 bits */ |
6074 | mask = vcpu->vc_vmx_cr0_fixed0; |
6075 | if ((r & mask) != mask) { |
6076 | /* Inject #GP, let the guest handle it */ |
6077 | DPRINTF("%s: guest set invalid bits in %%cr0. Ones " |
6078 | "mask=0x%llx, data=0x%llx\n", __func__, |
6079 | vcpu->vc_vmx_cr0_fixed0, r); |
6080 | vmm_inject_gp(vcpu); |
6081 | return (0); |
6082 | } |
6083 | |
6084 | if (r & 0xFFFFFFFF00000000ULL) { |
6085 | DPRINTF("%s: setting bits 63:32 of %%cr0 is invalid," |
6086 | " inject #GP, cr0=0x%llx\n", __func__, r); |
6087 | vmm_inject_gp(vcpu); |
6088 | return (0); |
6089 | } |
6090 | |
6091 | if ((r & CR0_PG0x80000000) && (r & CR0_PE0x00000001) == 0) { |
6092 | DPRINTF("%s: PG flag set when the PE flag is clear," |
6093 | " inject #GP, cr0=0x%llx\n", __func__, r); |
6094 | vmm_inject_gp(vcpu); |
6095 | return (0); |
6096 | } |
6097 | |
6098 | if ((r & CR0_NW0x20000000) && (r & CR0_CD0x40000000) == 0) { |
6099 | DPRINTF("%s: NW flag set when the CD flag is clear," |
6100 | " inject #GP, cr0=0x%llx\n", __func__, r); |
6101 | vmm_inject_gp(vcpu); |
6102 | return (0); |
6103 | } |
6104 | |
6105 | if (vmread(VMCS_GUEST_IA32_CR00x6800, &oldcr0)) { |
6106 | printf("%s: can't read guest cr0\n", __func__); |
6107 | return (EINVAL22); |
6108 | } |
6109 | |
6110 | /* CR0 must always have NE set */ |
6111 | r |= CR0_NE0x00000020; |
6112 | |
6113 | if (vmwrite(VMCS_GUEST_IA32_CR00x6800, r)) { |
6114 | printf("%s: can't write guest cr0\n", __func__); |
6115 | return (EINVAL22); |
6116 | } |
6117 | |
6118 | /* If the guest hasn't enabled paging ... */ |
6119 | if (!(r & CR0_PG0x80000000) && (oldcr0 & CR0_PG0x80000000)) { |
6120 | /* Paging was disabled (prev. enabled) - Flush TLB */ |
6121 | if ((vmm_softc->mode == VMM_MODE_VMX || |
6122 | vmm_softc->mode == VMM_MODE_EPT) && |
6123 | vcpu->vc_vmx_vpid_enabled) { |
6124 | vid.vid_vpid = vcpu->vc_parent->vm_id; |
6125 | vid.vid_addr = 0; |
6126 | invvpid(IA32_VMX_INVVPID_SINGLE_CTX_GLB0x3, &vid); |
6127 | } |
6128 | } else if (!(oldcr0 & CR0_PG0x80000000) && (r & CR0_PG0x80000000)) { |
6129 | /* |
6130 | * Since the guest has enabled paging, then the IA32_VMX_IA32E_MODE_GUEST |
6131 | * control must be set to the same as EFER_LME. |
6132 | */ |
6133 | msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; |
6134 | |
6135 | if (vmread(VMCS_ENTRY_CTLS0x4012, &ectls)) { |
6136 | printf("%s: can't read entry controls", __func__); |
6137 | return (EINVAL22); |
6138 | } |
6139 | |
6140 | if (msr_store[VCPU_REGS_EFER0].vms_data & EFER_LME0x00000100) |
6141 | ectls |= IA32_VMX_IA32E_MODE_GUEST(1ULL << 9); |
6142 | else |
6143 | ectls &= ~IA32_VMX_IA32E_MODE_GUEST(1ULL << 9); |
6144 | |
6145 | if (vmwrite(VMCS_ENTRY_CTLS0x4012, ectls)) { |
6146 | printf("%s: can't write entry controls", __func__); |
6147 | return (EINVAL22); |
6148 | } |
6149 | |
6150 | if (vmread(VMCS_GUEST_IA32_CR40x6804, &cr4)) { |
6151 | printf("%s: can't read guest cr4\n", __func__); |
6152 | return (EINVAL22); |
6153 | } |
6154 | |
6155 | /* Load PDPTEs if PAE guest enabling paging */ |
6156 | if (cr4 & CR4_PAE0x00000020) { |
6157 | ret = vmx_load_pdptes(vcpu); |
6158 | |
6159 | if (ret) { |
6160 | printf("%s: updating PDPTEs failed\n", __func__); |
6161 | return (ret); |
6162 | } |
6163 | } |
6164 | } |
6165 | |
6166 | return (0); |
6167 | } |
6168 | |
6169 | /* |
6170 | * vmx_handle_cr4_write |
6171 | * |
6172 | * Write handler for CR4. This function ensures valid values are written into |
6173 | * CR4 for the cpu/vmm mode in use (cr4 must-be-0 and must-be-1 bits, etc). |
6174 | * |
6175 | * Parameters |
6176 | * vcpu: The vcpu taking the cr4 write exit |
6177 | * r: The guest's desired (incoming) cr4 value |
6178 | * |
6179 | * Return values: |
6180 | * 0: if successful |
6181 | * EINVAL: if an error occurred |
6182 | */ |
6183 | int |
6184 | vmx_handle_cr4_write(struct vcpu *vcpu, uint64_t r) |
6185 | { |
6186 | uint64_t mask; |
6187 | |
6188 | /* Check must-be-0 bits */ |
6189 | mask = ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1); |
6190 | if (r & mask) { |
6191 | /* Inject #GP, let the guest handle it */ |
6192 | DPRINTF("%s: guest set invalid bits in %%cr4. Zeros " |
6193 | "mask=0x%llx, data=0x%llx\n", __func__, |
6194 | curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1, |
6195 | r); |
6196 | vmm_inject_gp(vcpu); |
6197 | return (0); |
6198 | } |
6199 | |
6200 | /* Check must-be-1 bits */ |
6201 | mask = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0; |
6202 | if ((r & mask) != mask) { |
6203 | /* Inject #GP, let the guest handle it */ |
6204 | DPRINTF("%s: guest set invalid bits in %%cr4. Ones " |
6205 | "mask=0x%llx, data=0x%llx\n", __func__, |
6206 | curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0, |
6207 | r); |
6208 | vmm_inject_gp(vcpu); |
6209 | return (0); |
6210 | } |
6211 | |
6212 | /* CR4_VMXE must always be enabled */ |
6213 | r |= CR4_VMXE0x00002000; |
6214 | |
6215 | if (vmwrite(VMCS_GUEST_IA32_CR40x6804, r)) { |
6216 | printf("%s: can't write guest cr4\n", __func__); |
6217 | return (EINVAL22); |
6218 | } |
6219 | |
6220 | return (0); |
6221 | } |
6222 | |
6223 | /* |
6224 | * vmx_handle_cr |
6225 | * |
6226 | * Handle reads/writes to control registers (except CR3) |
6227 | */ |
6228 | int |
6229 | vmx_handle_cr(struct vcpu *vcpu) |
6230 | { |
6231 | uint64_t insn_length, exit_qual, r; |
6232 | uint8_t crnum, dir, reg; |
6233 | |
6234 | if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) { |
6235 | printf("%s: can't obtain instruction length\n", __func__); |
6236 | return (EINVAL22); |
6237 | } |
6238 | |
6239 | if (vmx_get_exit_qualification(&exit_qual)) { |
6240 | printf("%s: can't get exit qual\n", __func__); |
6241 | return (EINVAL22); |
6242 | } |
6243 | |
6244 | /* Low 4 bits of exit_qual represent the CR number */ |
6245 | crnum = exit_qual & 0xf; |
6246 | |
6247 | /* |
6248 | * Bits 5:4 indicate the direction of operation (or special CR-modifying |
6249 | * instruction) |
6250 | */ |
6251 | dir = (exit_qual & 0x30) >> 4; |
6252 | |
6253 | /* Bits 11:8 encode the source/target register */ |
6254 | reg = (exit_qual & 0xf00) >> 8; |
6255 | |
6256 | switch (dir) { |
6257 | case CR_WRITE0: |
6258 | if (crnum == 0 || crnum == 4) { |
6259 | switch (reg) { |
6260 | case 0: r = vcpu->vc_gueststate.vg_rax; break; |
6261 | case 1: r = vcpu->vc_gueststate.vg_rcx; break; |
6262 | case 2: r = vcpu->vc_gueststate.vg_rdx; break; |
6263 | case 3: r = vcpu->vc_gueststate.vg_rbx; break; |
6264 | case 4: if (vmread(VMCS_GUEST_IA32_RSP0x681C, &r)) { |
6265 | printf("%s: unable to read guest " |
6266 | "RSP\n", __func__); |
6267 | return (EINVAL22); |
6268 | } |
6269 | break; |
6270 | case 5: r = vcpu->vc_gueststate.vg_rbp; break; |
6271 | case 6: r = vcpu->vc_gueststate.vg_rsi; break; |
6272 | case 7: r = vcpu->vc_gueststate.vg_rdi; break; |
6273 | case 8: r = vcpu->vc_gueststate.vg_r8; break; |
6274 | case 9: r = vcpu->vc_gueststate.vg_r9; break; |
6275 | case 10: r = vcpu->vc_gueststate.vg_r10; break; |
6276 | case 11: r = vcpu->vc_gueststate.vg_r11; break; |
6277 | case 12: r = vcpu->vc_gueststate.vg_r12; break; |
6278 | case 13: r = vcpu->vc_gueststate.vg_r13; break; |
6279 | case 14: r = vcpu->vc_gueststate.vg_r14; break; |
6280 | case 15: r = vcpu->vc_gueststate.vg_r15; break; |
6281 | } |
6282 | DPRINTF("%s: mov to cr%d @ %llx, data=0x%llx\n", |
6283 | __func__, crnum, vcpu->vc_gueststate.vg_rip, r); |
6284 | } |
6285 | |
6286 | if (crnum == 0) |
6287 | vmx_handle_cr0_write(vcpu, r); |
6288 | |
6289 | if (crnum == 4) |
6290 | vmx_handle_cr4_write(vcpu, r); |
6291 | |
6292 | break; |
6293 | case CR_READ1: |
6294 | DPRINTF("%s: mov from cr%d @ %llx\n", __func__, crnum, |
6295 | vcpu->vc_gueststate.vg_rip); |
6296 | break; |
6297 | case CR_CLTS2: |
6298 | DPRINTF("%s: clts instruction @ %llx\n", __func__, |
6299 | vcpu->vc_gueststate.vg_rip); |
6300 | break; |
6301 | case CR_LMSW3: |
6302 | DPRINTF("%s: lmsw instruction @ %llx\n", __func__, |
6303 | vcpu->vc_gueststate.vg_rip); |
6304 | break; |
6305 | default: |
6306 | DPRINTF("%s: unknown cr access @ %llx\n", __func__, |
6307 | vcpu->vc_gueststate.vg_rip); |
6308 | } |
6309 | |
6310 | vcpu->vc_gueststate.vg_rip += insn_length; |
6311 | |
6312 | return (0); |
6313 | } |
6314 | |
6315 | /* |
6316 | * vmx_handle_rdmsr |
6317 | * |
6318 | * Handler for rdmsr instructions. Bitmap MSRs are allowed implicit access |
6319 | * and won't end up here. This handler is primarily intended to catch otherwise |
6320 | * unknown MSR access for possible later inclusion in the bitmap list. For |
6321 | * each MSR access that ends up here, we log the access (when VMM_DEBUG is |
6322 | * enabled) |
6323 | * |
6324 | * Parameters: |
6325 | * vcpu: vcpu structure containing instruction info causing the exit |
6326 | * |
6327 | * Return value: |
6328 | * 0: The operation was successful |
6329 | * EINVAL: An error occurred |
6330 | */ |
6331 | int |
6332 | vmx_handle_rdmsr(struct vcpu *vcpu) |
6333 | { |
6334 | uint64_t insn_length; |
6335 | uint64_t *rax, *rdx; |
6336 | uint64_t *rcx; |
6337 | int ret; |
6338 | |
6339 | if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) { |
6340 | printf("%s: can't obtain instruction length\n", __func__); |
6341 | return (EINVAL22); |
6342 | } |
6343 | |
6344 | if (insn_length != 2) { |
6345 | DPRINTF("%s: RDMSR with instruction length %lld not " |
6346 | "supported\n", __func__, insn_length); |
6347 | return (EINVAL22); |
6348 | } |
6349 | |
6350 | rax = &vcpu->vc_gueststate.vg_rax; |
6351 | rcx = &vcpu->vc_gueststate.vg_rcx; |
6352 | rdx = &vcpu->vc_gueststate.vg_rdx; |
6353 | |
6354 | switch (*rcx) { |
6355 | case MSR_BIOS_SIGN0x08b: |
6356 | case MSR_PLATFORM_ID0x017: |
6357 | /* Ignored */ |
6358 | *rax = 0; |
6359 | *rdx = 0; |
6360 | break; |
6361 | case MSR_CR_PAT0x277: |
6362 | *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL); |
6363 | *rdx = (vcpu->vc_shadow_pat >> 32); |
6364 | break; |
6365 | default: |
6366 | /* Unsupported MSRs causes #GP exception, don't advance %rip */ |
6367 | DPRINTF("%s: unsupported rdmsr (msr=0x%llx), injecting #GP\n", |
6368 | __func__, *rcx); |
6369 | ret = vmm_inject_gp(vcpu); |
6370 | return (ret); |
6371 | } |
6372 | |
6373 | vcpu->vc_gueststate.vg_rip += insn_length; |
6374 | |
6375 | return (0); |
6376 | } |
6377 | |
6378 | /* |
6379 | * vmx_handle_xsetbv |
6380 | * |
6381 | * VMX-specific part of the xsetbv instruction exit handler |
6382 | * |
6383 | * Parameters: |
6384 | * vcpu: vcpu structure containing instruction info causing the exit |
6385 | * |
6386 | * Return value: |
6387 | * 0: The operation was successful |
6388 | * EINVAL: An error occurred |
6389 | */ |
6390 | int |
6391 | vmx_handle_xsetbv(struct vcpu *vcpu) |
6392 | { |
6393 | uint64_t insn_length, *rax; |
6394 | int ret; |
6395 | |
6396 | if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) { |
6397 | printf("%s: can't obtain instruction length\n", __func__); |
6398 | return (EINVAL22); |
6399 | } |
6400 | |
6401 | /* All XSETBV instructions are 3 bytes */ |
6402 | if (insn_length != 3) { |
6403 | DPRINTF("%s: XSETBV with instruction length %lld not " |
6404 | "supported\n", __func__, insn_length); |
6405 | return (EINVAL22); |
6406 | } |
6407 | |
6408 | rax = &vcpu->vc_gueststate.vg_rax; |
6409 | |
6410 | ret = vmm_handle_xsetbv(vcpu, rax); |
6411 | |
6412 | vcpu->vc_gueststate.vg_rip += insn_length; |
6413 | |
6414 | return ret; |
6415 | } |
6416 | |
6417 | /* |
6418 | * svm_handle_xsetbv |
6419 | * |
6420 | * SVM-specific part of the xsetbv instruction exit handler |
6421 | * |
6422 | * Parameters: |
6423 | * vcpu: vcpu structure containing instruction info causing the exit |
6424 | * |
6425 | * Return value: |
6426 | * 0: The operation was successful |
6427 | * EINVAL: An error occurred |
6428 | */ |
6429 | int |
6430 | svm_handle_xsetbv(struct vcpu *vcpu) |
6431 | { |
6432 | uint64_t insn_length, *rax; |
6433 | int ret; |
6434 | struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; |
6435 | |
6436 | /* All XSETBV instructions are 3 bytes */ |
6437 | insn_length = 3; |
6438 | |
6439 | rax = &vmcb->v_rax; |
6440 | |
6441 | ret = vmm_handle_xsetbv(vcpu, rax); |
6442 | |
6443 | vcpu->vc_gueststate.vg_rip += insn_length; |
6444 | |
6445 | return ret; |
6446 | } |
6447 | |
6448 | /* |
6449 | * vmm_handle_xsetbv |
6450 | * |
6451 | * Handler for xsetbv instructions. We allow the guest VM to set xcr0 values |
6452 | * limited to the xsave_mask in use in the host. |
6453 | * |
6454 | * Parameters: |
6455 | * vcpu: vcpu structure containing instruction info causing the exit |
6456 | * rax: pointer to guest %rax |
6457 | * |
6458 | * Return value: |
6459 | * 0: The operation was successful |
6460 | * EINVAL: An error occurred |
6461 | */ |
6462 | int |
6463 | vmm_handle_xsetbv(struct vcpu *vcpu, uint64_t *rax) |
6464 | { |
6465 | uint64_t *rdx, *rcx, val; |
6466 | |
6467 | rcx = &vcpu->vc_gueststate.vg_rcx; |
6468 | rdx = &vcpu->vc_gueststate.vg_rdx; |
6469 | |
6470 | if (vmm_get_guest_cpu_cpl(vcpu) != 0) { |
6471 | DPRINTF("%s: guest cpl not zero\n", __func__); |
6472 | return (vmm_inject_gp(vcpu)); |
6473 | } |
6474 | |
6475 | if (*rcx != 0) { |
6476 | DPRINTF("%s: guest specified invalid xcr register number " |
6477 | "%lld\n", __func__, *rcx); |
6478 | return (vmm_inject_gp(vcpu)); |
6479 | } |
6480 | |
6481 | val = *rax + (*rdx << 32); |
6482 | if (val & ~xsave_mask) { |
6483 | DPRINTF("%s: guest specified xcr0 outside xsave_mask %lld\n", |
6484 | __func__, val); |
6485 | return (vmm_inject_gp(vcpu)); |
6486 | } |
6487 | |
6488 | vcpu->vc_gueststate.vg_xcr0 = val; |
6489 | |
6490 | return (0); |
6491 | } |
6492 | |
6493 | /* |
6494 | * vmx_handle_misc_enable_msr |
6495 | * |
6496 | * Handler for writes to the MSR_MISC_ENABLE (0x1a0) MSR on Intel CPUs. We |
6497 | * limit what the guest can write to this MSR (certain hardware-related |
6498 | * settings like speedstep, etc). |
6499 | * |
6500 | * Parameters: |
6501 | * vcpu: vcpu structure containing information about the wrmsr causing this |
6502 | * exit |
6503 | */ |
6504 | void |
6505 | vmx_handle_misc_enable_msr(struct vcpu *vcpu) |
6506 | { |
6507 | uint64_t *rax, *rdx; |
6508 | struct vmx_msr_store *msr_store; |
6509 | |
6510 | rax = &vcpu->vc_gueststate.vg_rax; |
6511 | rdx = &vcpu->vc_gueststate.vg_rdx; |
6512 | msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; |
6513 | |
6514 | /* Filter out guest writes to TCC, EIST, and xTPR */ |
6515 | *rax &= ~(MISC_ENABLE_TCC(1 << 3) | MISC_ENABLE_EIST_ENABLED(1 << 16) | |
6516 | MISC_ENABLE_xTPR_MESSAGE_DISABLE(1 << 23)); |
6517 | |
6518 | msr_store[VCPU_REGS_MISC_ENABLE6].vms_data = *rax | (*rdx << 32); |
6519 | } |
6520 | |
6521 | /* |
6522 | * vmx_handle_wrmsr |
6523 | * |
6524 | * Handler for wrmsr instructions. This handler logs the access, and discards |
6525 | * the written data (when VMM_DEBUG is enabled). Any valid wrmsr will not end |
6526 | * up here (it will be whitelisted in the MSR bitmap). |
6527 | * |
6528 | * Parameters: |
6529 | * vcpu: vcpu structure containing instruction info causing the exit |
6530 | * |
6531 | * Return value: |
6532 | * 0: The operation was successful |
6533 | * EINVAL: An error occurred |
6534 | */ |
6535 | int |
6536 | vmx_handle_wrmsr(struct vcpu *vcpu) |
6537 | { |
6538 | uint64_t insn_length, val; |
6539 | uint64_t *rax, *rdx, *rcx; |
6540 | int ret; |
6541 | |
6542 | if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) { |
6543 | printf("%s: can't obtain instruction length\n", __func__); |
6544 | return (EINVAL22); |
6545 | } |
6546 | |
6547 | if (insn_length != 2) { |
6548 | DPRINTF("%s: WRMSR with instruction length %lld not " |
6549 | "supported\n", __func__, insn_length); |
6550 | return (EINVAL22); |
6551 | } |
6552 | |
6553 | rax = &vcpu->vc_gueststate.vg_rax; |
6554 | rcx = &vcpu->vc_gueststate.vg_rcx; |
6555 | rdx = &vcpu->vc_gueststate.vg_rdx; |
6556 | val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL); |
6557 | |
6558 | switch (*rcx) { |
6559 | case MSR_CR_PAT0x277: |
6560 | if (!vmm_pat_is_valid(val)) { |
6561 | ret = vmm_inject_gp(vcpu); |
6562 | return (ret); |
6563 | } |
6564 | vcpu->vc_shadow_pat = val; |
6565 | break; |
6566 | case MSR_MISC_ENABLE0x1a0: |
6567 | vmx_handle_misc_enable_msr(vcpu); |
6568 | break; |
6569 | case MSR_SMM_MONITOR_CTL0x09b: |
6570 | /* |
6571 | * 34.15.5 - Enabling dual monitor treatment |
6572 | * |
6573 | * Unsupported, so inject #GP and return without |
6574 | * advancing %rip. |
6575 | */ |
6576 | ret = vmm_inject_gp(vcpu); |
6577 | return (ret); |
6578 | case KVM_MSR_SYSTEM_TIME0x4b564d01: |
6579 | vmm_init_pvclock(vcpu, |
6580 | (*rax & 0xFFFFFFFFULL) | (*rdx << 32)); |
6581 | break; |
6582 | #ifdef VMM_DEBUG |
6583 | default: |
6584 | /* |
6585 | * Log the access, to be able to identify unknown MSRs |
6586 | */ |
6587 | DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data " |
6588 | "written from guest=0x%llx:0x%llx\n", __func__, |
6589 | *rcx, *rdx, *rax); |
6590 | #endif /* VMM_DEBUG */ |
6591 | } |
6592 | |
6593 | vcpu->vc_gueststate.vg_rip += insn_length; |
6594 | |
6595 | return (0); |
6596 | } |
6597 | |
6598 | /* |
6599 | * svm_handle_msr |
6600 | * |
6601 | * Handler for MSR instructions. |
6602 | * |
6603 | * Parameters: |
6604 | * vcpu: vcpu structure containing instruction info causing the exit |
6605 | * |
6606 | * Return value: |
6607 | * Always 0 (successful) |
6608 | */ |
6609 | int |
6610 | svm_handle_msr(struct vcpu *vcpu) |
6611 | { |
6612 | uint64_t insn_length, val; |
6613 | uint64_t *rax, *rcx, *rdx; |
6614 | struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; |
6615 | int ret; |
6616 | |
6617 | /* XXX: Validate RDMSR / WRMSR insn_length */ |
6618 | insn_length = 2; |
6619 | |
6620 | rax = &vmcb->v_rax; |
6621 | rcx = &vcpu->vc_gueststate.vg_rcx; |
6622 | rdx = &vcpu->vc_gueststate.vg_rdx; |
6623 | |
6624 | if (vmcb->v_exitinfo1 == 1) { |
6625 | /* WRMSR */ |
6626 | val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL); |
6627 | |
6628 | switch (*rcx) { |
6629 | case MSR_CR_PAT0x277: |
6630 | if (!vmm_pat_is_valid(val)) { |
6631 | ret = vmm_inject_gp(vcpu); |
6632 | return (ret); |
6633 | } |
6634 | vcpu->vc_shadow_pat = val; |
6635 | break; |
6636 | case MSR_EFER0xc0000080: |
6637 | vmcb->v_efer = *rax | EFER_SVME0x00001000; |
6638 | break; |
6639 | case KVM_MSR_SYSTEM_TIME0x4b564d01: |
6640 | vmm_init_pvclock(vcpu, |
6641 | (*rax & 0xFFFFFFFFULL) | (*rdx << 32)); |
6642 | break; |
6643 | default: |
6644 | /* Log the access, to be able to identify unknown MSRs */ |
6645 | DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data " |
6646 | "written from guest=0x%llx:0x%llx\n", __func__, |
6647 | *rcx, *rdx, *rax); |
6648 | } |
6649 | } else { |
6650 | /* RDMSR */ |
6651 | switch (*rcx) { |
6652 | case MSR_BIOS_SIGN0x08b: |
6653 | case MSR_INT_PEN_MSG0xc0010055: |
6654 | case MSR_PLATFORM_ID0x017: |
6655 | /* Ignored */ |
6656 | *rax = 0; |
6657 | *rdx = 0; |
6658 | break; |
6659 | case MSR_CR_PAT0x277: |
6660 | *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL); |
6661 | *rdx = (vcpu->vc_shadow_pat >> 32); |
6662 | break; |
6663 | case MSR_DE_CFG0xc0011029: |
6664 | /* LFENCE serializing bit is set by host */ |
6665 | *rax = DE_CFG_SERIALIZE_LFENCE(1 << 1); |
6666 | *rdx = 0; |
6667 | break; |
6668 | default: |
6669 | /* |
6670 | * Unsupported MSRs causes #GP exception, don't advance |
6671 | * %rip |
6672 | */ |
6673 | DPRINTF("%s: unsupported rdmsr (msr=0x%llx), " |
6674 | "injecting #GP\n", __func__, *rcx); |
6675 | ret = vmm_inject_gp(vcpu); |
6676 | return (ret); |
6677 | } |
6678 | } |
6679 | |
6680 | vcpu->vc_gueststate.vg_rip += insn_length; |
6681 | |
6682 | return (0); |
6683 | } |
6684 | |
6685 | /* |
6686 | * vmm_handle_cpuid |
6687 | * |
6688 | * Exit handler for CPUID instruction |
6689 | * |
6690 | * Parameters: |
6691 | * vcpu: vcpu causing the CPUID exit |
6692 | * |
6693 | * Return value: |
6694 | * 0: the exit was processed successfully |
6695 | * EINVAL: error occurred validating the CPUID instruction arguments |
6696 | */ |
6697 | int |
6698 | vmm_handle_cpuid(struct vcpu *vcpu) |
6699 | { |
6700 | uint64_t insn_length, cr4; |
6701 | uint64_t *rax, *rbx, *rcx, *rdx; |
6702 | struct vmcb *vmcb; |
6703 | uint32_t eax, ebx, ecx, edx; |
6704 | struct vmx_msr_store *msr_store; |
6705 | int vmm_cpuid_level; |
6706 | |
6707 | /* what's the cpuid level we support/advertise? */ |
6708 | vmm_cpuid_level = cpuid_level; |
6709 | if (vmm_cpuid_level < 0x15 && tsc_is_invariant) |
6710 | vmm_cpuid_level = 0x15; |
6711 | |
6712 | if (vmm_softc->mode == VMM_MODE_VMX || |
6713 | vmm_softc->mode == VMM_MODE_EPT) { |
6714 | if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) { |
6715 | DPRINTF("%s: can't obtain instruction length\n", |
6716 | __func__); |
6717 | return (EINVAL22); |
6718 | } |
6719 | |
6720 | if (vmread(VMCS_GUEST_IA32_CR40x6804, &cr4)) { |
6721 | DPRINTF("%s: can't obtain cr4\n", __func__); |
6722 | return (EINVAL22); |
6723 | } |
6724 | |
6725 | rax = &vcpu->vc_gueststate.vg_rax; |
6726 | |
6727 | /* |
6728 | * "CPUID leaves above 02H and below 80000000H are only |
6729 | * visible when IA32_MISC_ENABLE MSR has bit 22 set to its |
6730 | * default value 0" |
6731 | */ |
6732 | msr_store = |
6733 | (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; |
6734 | if (msr_store[VCPU_REGS_MISC_ENABLE6].vms_data & |
6735 | MISC_ENABLE_LIMIT_CPUID_MAXVAL(1 << 22)) |
6736 | vmm_cpuid_level = 0x02; |
6737 | } else { |
6738 | /* XXX: validate insn_length 2 */ |
6739 | insn_length = 2; |
6740 | vmcb = (struct vmcb *)vcpu->vc_control_va; |
6741 | rax = &vmcb->v_rax; |
6742 | cr4 = vmcb->v_cr4; |
6743 | } |
6744 | |
6745 | rbx = &vcpu->vc_gueststate.vg_rbx; |
6746 | rcx = &vcpu->vc_gueststate.vg_rcx; |
6747 | rdx = &vcpu->vc_gueststate.vg_rdx; |
6748 | vcpu->vc_gueststate.vg_rip += insn_length; |
6749 | |
6750 | /* |
6751 | * "If a value entered for CPUID.EAX is higher than the maximum input |
6752 | * value for basic or extended function for that processor then the |
6753 | * data for the highest basic information leaf is returned." |
6754 | * |
6755 | * "When CPUID returns the highest basic leaf information as a result |
6756 | * of an invalid input EAX value, any dependence on input ECX value |
6757 | * in the basic leaf is honored." |
6758 | * |
6759 | * This means if rax is between vmm_cpuid_level and 0x40000000 (the start |
6760 | * of the hypervisor info leaves), clamp to vmm_cpuid_level, but without |
6761 | * altering subleaf. Also, if rax is greater than the extended function |
6762 | * info, clamp also to vmm_cpuid_level. |
6763 | */ |
6764 | if ((*rax > vmm_cpuid_level && *rax < 0x40000000) || |
6765 | (*rax > curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_pnfeatset)) { |
6766 | DPRINTF("%s: invalid cpuid input leaf 0x%llx, guest rip=" |
6767 | "0x%llx - resetting to 0x%x\n", __func__, *rax, |
6768 | vcpu->vc_gueststate.vg_rip - insn_length, |
6769 | vmm_cpuid_level); |
6770 | *rax = vmm_cpuid_level; |
6771 | } |
6772 | |
6773 | CPUID_LEAF(*rax, 0, eax, ebx, ecx, edx)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (*rax), "c" (0)); |
6774 | |
6775 | switch (*rax) { |
6776 | case 0x00: /* Max level and vendor ID */ |
6777 | *rax = vmm_cpuid_level; |
6778 | *rbx = *((uint32_t *)&cpu_vendor); |
6779 | *rdx = *((uint32_t *)&cpu_vendor + 1); |
6780 | *rcx = *((uint32_t *)&cpu_vendor + 2); |
6781 | break; |
6782 | case 0x01: /* Version, brand, feature info */ |
6783 | *rax = cpu_id; |
6784 | /* mask off host's APIC ID, reset to vcpu id */ |
6785 | *rbx = cpu_ebxfeature & 0x0000FFFF; |
6786 | *rbx |= (vcpu->vc_id & 0xFF) << 24; |
6787 | *rcx = (cpu_ecxfeature | CPUIDECX_HV0x80000000) & VMM_CPUIDECX_MASK~(0x00000080 | 0x00000100 | 0x00000008 | 0x00008000 | 0x00000020 | 0x00000004 | 0x00000010 | 0x00000040 | 0x00000400 | 0x00000800 | 0x00004000 | 0x00020000 | 0x00040000 | 0x00200000 | 0x01000000 ); |
6788 | |
6789 | /* Guest CR4.OSXSAVE determines presence of CPUIDECX_OSXSAVE */ |
6790 | if (cr4 & CR4_OSXSAVE0x00040000) |
6791 | *rcx |= CPUIDECX_OSXSAVE0x08000000; |
6792 | else |
6793 | *rcx &= ~CPUIDECX_OSXSAVE0x08000000; |
6794 | |
6795 | *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_feature_flags & VMM_CPUIDEDX_MASK~(0x00400000 | 0x20000000 | 0x10000000 | 0x00200000 | 0x00000200 | 0x00040000 | 0x08000000 | 0x80000000 | 0x00001000 | 0x00000080 | 0x00004000); |
6796 | break; |
6797 | case 0x02: /* Cache and TLB information */ |
6798 | *rax = eax; |
6799 | *rbx = ebx; |
6800 | *rcx = ecx; |
6801 | *rdx = edx; |
6802 | break; |
6803 | case 0x03: /* Processor serial number (not supported) */ |
6804 | DPRINTF("%s: function 0x03 (processor serial number) not " |
6805 | "supported\n", __func__); |
6806 | *rax = 0; |
6807 | *rbx = 0; |
6808 | *rcx = 0; |
6809 | *rdx = 0; |
6810 | break; |
6811 | case 0x04: /* Deterministic cache info */ |
6812 | if (*rcx == 0) { |
6813 | *rax = eax & VMM_CPUID4_CACHE_TOPOLOGY_MASK0x3FF; |
6814 | *rbx = ebx; |
6815 | *rcx = ecx; |
6816 | *rdx = edx; |
6817 | } else { |
6818 | CPUID_LEAF(*rax, *rcx, eax, ebx, ecx, edx)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (*rax), "c" (*rcx)); |
6819 | *rax = eax & VMM_CPUID4_CACHE_TOPOLOGY_MASK0x3FF; |
6820 | *rbx = ebx; |
6821 | *rcx = ecx; |
6822 | *rdx = edx; |
6823 | } |
6824 | break; |
6825 | case 0x05: /* MONITOR/MWAIT (not supported) */ |
6826 | DPRINTF("%s: function 0x05 (monitor/mwait) not supported\n", |
6827 | __func__); |
6828 | *rax = 0; |
6829 | *rbx = 0; |
6830 | *rcx = 0; |
6831 | *rdx = 0; |
6832 | break; |
6833 | case 0x06: /* Thermal / Power management (not supported) */ |
6834 | DPRINTF("%s: function 0x06 (thermal/power mgt) not supported\n", |
6835 | __func__); |
6836 | *rax = 0; |
6837 | *rbx = 0; |
6838 | *rcx = 0; |
6839 | *rdx = 0; |
6840 | break; |
6841 | case 0x07: /* SEFF */ |
6842 | if (*rcx == 0) { |
6843 | *rax = 0; /* Highest subleaf supported */ |
6844 | *rbx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_feature_sefflags_ebx & VMM_SEFF0EBX_MASK~(0x00000002 | 0x00000004 | 0x00000010 | 0x00000400 | 0x00000800 | 0x00001000 | 0x00004000 | 0x00400000 | 0x02000000 | 0x00010000 | 0x00020000 | 0x00200000 | 0x04000000 | 0x08000000 | 0x10000000 | 0x40000000 | 0x80000000); |
6845 | *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_feature_sefflags_ecx & VMM_SEFF0ECX_MASK~(0x00000002); |
6846 | *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_feature_sefflags_edx & VMM_SEFF0EDX_MASK(0x00000400); |
6847 | } else { |
6848 | /* Unsupported subleaf */ |
6849 | DPRINTF("%s: function 0x07 (SEFF) unsupported subleaf " |
6850 | "0x%llx not supported\n", __func__, *rcx); |
6851 | *rax = 0; |
6852 | *rbx = 0; |
6853 | *rcx = 0; |
6854 | *rdx = 0; |
6855 | } |
6856 | break; |
6857 | case 0x09: /* Direct Cache Access (not supported) */ |
6858 | DPRINTF("%s: function 0x09 (direct cache access) not " |
6859 | "supported\n", __func__); |
6860 | *rax = 0; |
6861 | *rbx = 0; |
6862 | *rcx = 0; |
6863 | *rdx = 0; |
6864 | break; |
6865 | case 0x0a: /* Architectural perf monitoring (not supported) */ |
6866 | DPRINTF("%s: function 0x0a (arch. perf mon) not supported\n", |
6867 | __func__); |
6868 | *rax = 0; |
6869 | *rbx = 0; |
6870 | *rcx = 0; |
6871 | *rdx = 0; |
6872 | break; |
6873 | case 0x0b: /* Extended topology enumeration (not supported) */ |
6874 | DPRINTF("%s: function 0x0b (topology enumeration) not " |
6875 | "supported\n", __func__); |
6876 | *rax = 0; |
6877 | *rbx = 0; |
6878 | *rcx = 0; |
6879 | *rdx = 0; |
6880 | break; |
6881 | case 0x0d: /* Processor ext. state information */ |
6882 | if (*rcx == 0) { |
6883 | *rax = xsave_mask; |
6884 | *rbx = ebx; |
6885 | *rcx = ecx; |
6886 | *rdx = edx; |
6887 | } else if (*rcx == 1) { |
6888 | *rax = 0; |
6889 | *rbx = 0; |
6890 | *rcx = 0; |
6891 | *rdx = 0; |
6892 | } else { |
6893 | CPUID_LEAF(*rax, *rcx, eax, ebx, ecx, edx)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (*rax), "c" (*rcx)); |
6894 | *rax = eax; |
6895 | *rbx = ebx; |
6896 | *rcx = ecx; |
6897 | *rdx = edx; |
6898 | } |
6899 | break; |
6900 | case 0x0f: /* QoS info (not supported) */ |
6901 | DPRINTF("%s: function 0x0f (QoS info) not supported\n", |
6902 | __func__); |
6903 | *rax = 0; |
6904 | *rbx = 0; |
6905 | *rcx = 0; |
6906 | *rdx = 0; |
6907 | break; |
6908 | case 0x14: /* Processor Trace info (not supported) */ |
6909 | DPRINTF("%s: function 0x14 (processor trace info) not " |
6910 | "supported\n", __func__); |
6911 | *rax = 0; |
6912 | *rbx = 0; |
6913 | *rcx = 0; |
6914 | *rdx = 0; |
6915 | break; |
6916 | case 0x15: |
6917 | if (cpuid_level >= 0x15) { |
6918 | *rax = eax; |
6919 | *rbx = ebx; |
6920 | *rcx = ecx; |
6921 | *rdx = edx; |
6922 | } else { |
6923 | KASSERT(tsc_is_invariant)((tsc_is_invariant) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm.c" , 6923, "tsc_is_invariant")); |
6924 | *rax = 1; |
6925 | *rbx = 100; |
6926 | *rcx = tsc_frequency / 100; |
6927 | *rdx = 0; |
6928 | } |
6929 | break; |
6930 | case 0x16: /* Processor frequency info */ |
6931 | *rax = eax; |
6932 | *rbx = ebx; |
6933 | *rcx = ecx; |
6934 | *rdx = edx; |
6935 | break; |
6936 | case 0x40000000: /* Hypervisor information */ |
6937 | *rax = 0; |
6938 | *rbx = *((uint32_t *)&vmm_hv_signature[0]); |
6939 | *rcx = *((uint32_t *)&vmm_hv_signature[4]); |
6940 | *rdx = *((uint32_t *)&vmm_hv_signature[8]); |
6941 | break; |
6942 | case 0x40000001: /* KVM hypervisor features */ |
6943 | *rax = (1 << KVM_FEATURE_CLOCKSOURCE23) | |
6944 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT24); |
6945 | *rbx = 0; |
6946 | *rcx = 0; |
6947 | *rdx = 0; |
6948 | break; |
6949 | case 0x80000000: /* Extended function level */ |
6950 | *rax = 0x80000008; /* curcpu()->ci_pnfeatset */ |
6951 | *rbx = 0; |
6952 | *rcx = 0; |
6953 | *rdx = 0; |
6954 | break; |
6955 | case 0x80000001: /* Extended function info */ |
6956 | *rax = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_efeature_eax; |
6957 | *rbx = 0; /* Reserved */ |
6958 | *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_efeature_ecx & VMM_ECPUIDECX_MASK~(0x00000004 | 0x20000000); |
6959 | *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_feature_eflags & VMM_FEAT_EFLAGS_MASK~(0x08000000); |
6960 | break; |
6961 | case 0x80000002: /* Brand string */ |
6962 | *rax = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_brand[0]; |
6963 | *rbx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_brand[1]; |
6964 | *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_brand[2]; |
6965 | *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_brand[3]; |
6966 | break; |
6967 | case 0x80000003: /* Brand string */ |
6968 | *rax = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_brand[4]; |
6969 | *rbx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_brand[5]; |
6970 | *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_brand[6]; |
6971 | *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_brand[7]; |
6972 | break; |
6973 | case 0x80000004: /* Brand string */ |
6974 | *rax = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_brand[8]; |
6975 | *rbx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_brand[9]; |
6976 | *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_brand[10]; |
6977 | *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_brand[11]; |
6978 | break; |
6979 | case 0x80000005: /* Reserved (Intel), cacheinfo (AMD) */ |
6980 | *rax = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_amdcacheinfo[0]; |
6981 | *rbx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_amdcacheinfo[1]; |
6982 | *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_amdcacheinfo[2]; |
6983 | *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_amdcacheinfo[3]; |
6984 | break; |
6985 | case 0x80000006: /* ext. cache info */ |
6986 | *rax = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_extcacheinfo[0]; |
6987 | *rbx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_extcacheinfo[1]; |
6988 | *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_extcacheinfo[2]; |
6989 | *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_extcacheinfo[3]; |
6990 | break; |
6991 | case 0x80000007: /* apmi */ |
6992 | CPUID(0x80000007, *rax, *rbx, *rcx, *rdx)__asm volatile("cpuid" : "=a" (*rax), "=b" (*rbx), "=c" (*rcx ), "=d" (*rdx) : "a" (0x80000007)); |
6993 | break; |
6994 | case 0x80000008: /* Phys bits info and topology (AMD) */ |
6995 | CPUID(0x80000008, *rax, *rbx, *rcx, *rdx)__asm volatile("cpuid" : "=a" (*rax), "=b" (*rbx), "=c" (*rcx ), "=d" (*rdx) : "a" (0x80000008)); |
6996 | *rbx &= VMM_AMDSPEC_EBX_MASK~((1ULL << 12) | (1ULL << 14) | (1ULL << 15 ) | (1ULL << 16) | (1ULL << 17) | (1ULL << 18 ) | (1ULL << 24) | (1ULL << 25) | (1ULL << 26 )); |
6997 | /* Reset %rcx (topology) */ |
6998 | *rcx = 0; |
6999 | break; |
7000 | default: |
7001 | DPRINTF("%s: unsupported rax=0x%llx\n", __func__, *rax); |
7002 | *rax = 0; |
7003 | *rbx = 0; |
7004 | *rcx = 0; |
7005 | *rdx = 0; |
7006 | } |
7007 | |
7008 | |
7009 | if (vmm_softc->mode == VMM_MODE_SVM || |
7010 | vmm_softc->mode == VMM_MODE_RVI) { |
7011 | /* |
7012 | * update %rax. the rest of the registers get updated in |
7013 | * svm_enter_guest |
7014 | */ |
7015 | vmcb->v_rax = *rax; |
7016 | } |
7017 | |
7018 | return (0); |
7019 | } |
7020 | |
7021 | /* |
7022 | * vcpu_run_svm |
7023 | * |
7024 | * SVM main loop used to run a VCPU. |
7025 | * |
7026 | * Parameters: |
7027 | * vcpu: The VCPU to run |
7028 | * vrp: run parameters |
7029 | * |
7030 | * Return values: |
7031 | * 0: The run loop exited and no help is needed from vmd |
7032 | * EAGAIN: The run loop exited and help from vmd is needed |
7033 | * EINVAL: an error occurred |
7034 | */ |
7035 | int |
7036 | vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp) |
7037 | { |
7038 | int ret = 0; |
7039 | struct region_descriptor gdt; |
7040 | struct cpu_info *ci = NULL((void *)0); |
7041 | uint64_t exit_reason; |
7042 | struct schedstate_percpu *spc; |
7043 | uint16_t irq; |
7044 | struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; |
7045 | |
7046 | irq = vrp->vrp_irq; |
7047 | |
7048 | /* |
7049 | * If we are returning from userspace (vmd) because we exited |
7050 | * last time, fix up any needed vcpu state first. Which state |
7051 | * needs to be fixed up depends on what vmd populated in the |
7052 | * exit data structure. |
7053 | */ |
7054 | if (vrp->vrp_continue) { |
7055 | switch (vcpu->vc_gueststate.vg_exit_reason) { |
7056 | case SVM_VMEXIT_IOIO0x7B: |
7057 | if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN) { |
7058 | vcpu->vc_gueststate.vg_rax = |
7059 | vcpu->vc_exit.vei.vei_data; |
7060 | vmcb->v_rax = vcpu->vc_gueststate.vg_rax; |
7061 | } |
7062 | } |
7063 | } |
7064 | |
7065 | while (ret == 0) { |
7066 | vmm_update_pvclock(vcpu); |
7067 | if (ci != curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})) { |
7068 | /* |
7069 | * We are launching for the first time, or we are |
7070 | * resuming from a different pcpu, so we need to |
7071 | * reset certain pcpu-specific values. |
7072 | */ |
7073 | ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
7074 | setregion(&gdt, ci->ci_gdt, GDT_SIZE((6 << 3) + (1 << 4)) - 1); |
7075 | |
7076 | if (ci != vcpu->vc_last_pcpu) { |
7077 | /* |
7078 | * Flush TLB by guest ASID if feature |
7079 | * available, flush entire TLB if not. |
7080 | */ |
7081 | if (ci->ci_vmm_cap.vcc_svm.svm_flush_by_asid) |
7082 | vmcb->v_tlb_control = |
7083 | SVM_TLB_CONTROL_FLUSH_ASID3; |
7084 | else |
7085 | vmcb->v_tlb_control = |
7086 | SVM_TLB_CONTROL_FLUSH_ALL1; |
7087 | |
7088 | svm_set_dirty(vcpu, SVM_CLEANBITS_ALL((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10) | (1 << 11) )); |
7089 | } |
7090 | |
7091 | vcpu->vc_last_pcpu = ci; |
7092 | |
7093 | if (gdt.rd_base == 0) { |
7094 | ret = EINVAL22; |
7095 | break; |
7096 | } |
7097 | } |
7098 | |
7099 | /* Handle vmd(8) injected interrupts */ |
7100 | /* Is there an interrupt pending injection? */ |
7101 | if (irq != 0xFFFF && vcpu->vc_irqready) { |
7102 | vmcb->v_eventinj = (irq & 0xFF) | (1 << 31); |
7103 | irq = 0xFFFF; |
7104 | } |
7105 | |
7106 | /* Inject event if present */ |
7107 | if (vcpu->vc_event != 0) { |
7108 | DPRINTF("%s: inject event %d\n", __func__, |
7109 | vcpu->vc_event); |
7110 | vmcb->v_eventinj = 0; |
7111 | /* Set the "Event Valid" flag for certain vectors */ |
7112 | switch (vcpu->vc_event & 0xFF) { |
7113 | case VMM_EX_DF8: |
7114 | case VMM_EX_TS10: |
7115 | case VMM_EX_NP11: |
7116 | case VMM_EX_SS12: |
7117 | case VMM_EX_GP13: |
7118 | case VMM_EX_PF14: |
7119 | case VMM_EX_AC17: |
7120 | vmcb->v_eventinj |= (1ULL << 11); |
7121 | } |
7122 | vmcb->v_eventinj |= (vcpu->vc_event) | (1 << 31); |
7123 | vmcb->v_eventinj |= (3ULL << 8); /* Exception */ |
7124 | vcpu->vc_event = 0; |
7125 | } |
7126 | |
7127 | TRACEPOINT(vmm, guest_enter, vcpu, vrp)do { extern struct dt_probe (dt_static_vmm_guest_enter); struct dt_probe *dtp = &(dt_static_vmm_guest_enter); struct dt_provider *dtpv = dtp->dtp_prov; if (__builtin_expect(((dt_tracing) != 0), 0) && __builtin_expect(((dtp->dtp_recording ) != 0), 0)) { dtpv->dtpv_enter(dtpv, dtp, vcpu, vrp); } } while (0); |
7128 | |
7129 | /* Start / resume the VCPU */ |
7130 | /* Disable interrupts and save the current host FPU state. */ |
7131 | clgi(); |
7132 | if ((ret = vmm_fpurestore(vcpu))) { |
7133 | stgi(); |
7134 | break; |
7135 | } |
7136 | |
7137 | KASSERT(vmcb->v_intercept1 & SVM_INTERCEPT_INTR)((vmcb->v_intercept1 & (1UL << 0)) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm.c", 7137, "vmcb->v_intercept1 & SVM_INTERCEPT_INTR" )); |
7138 | wrmsr(MSR_AMD_VM_HSAVE_PA0xc0010117, vcpu->vc_svm_hsa_pa); |
7139 | |
7140 | ret = svm_enter_guest(vcpu->vc_control_pa, |
7141 | &vcpu->vc_gueststate, &gdt); |
7142 | |
7143 | /* |
7144 | * On exit, interrupts are disabled, and we are running with |
7145 | * the guest FPU state still possibly on the CPU. Save the FPU |
7146 | * state before re-enabling interrupts. |
7147 | */ |
7148 | vmm_fpusave(vcpu); |
7149 | |
7150 | /* |
7151 | * Enable interrupts now. Note that if the exit was due to INTR |
7152 | * (external interrupt), the interrupt will be processed now. |
7153 | */ |
7154 | stgi(); |
7155 | |
7156 | vcpu->vc_gueststate.vg_rip = vmcb->v_rip; |
7157 | vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_NONE0; |
7158 | svm_set_clean(vcpu, SVM_CLEANBITS_ALL((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10) | (1 << 11) )); |
7159 | |
7160 | /* Record the exit reason on successful exit */ |
7161 | if (ret == 0) { |
7162 | exit_reason = vmcb->v_exitcode; |
7163 | vcpu->vc_gueststate.vg_exit_reason = exit_reason; |
7164 | } |
7165 | |
7166 | TRACEPOINT(vmm, guest_exit, vcpu, vrp, exit_reason)do { extern struct dt_probe (dt_static_vmm_guest_exit); struct dt_probe *dtp = &(dt_static_vmm_guest_exit); struct dt_provider *dtpv = dtp->dtp_prov; if (__builtin_expect(((dt_tracing) != 0), 0) && __builtin_expect(((dtp->dtp_recording ) != 0), 0)) { dtpv->dtpv_enter(dtpv, dtp, vcpu, vrp, exit_reason ); } } while (0); |
7167 | |
7168 | /* If we exited successfully ... */ |
7169 | if (ret == 0) { |
7170 | vcpu->vc_gueststate.vg_rflags = vmcb->v_rflags; |
7171 | |
7172 | /* |
7173 | * Handle the exit. This will alter "ret" to EAGAIN if |
7174 | * the exit handler determines help from vmd is needed. |
7175 | */ |
7176 | ret = svm_handle_exit(vcpu); |
7177 | |
7178 | if (vcpu->vc_gueststate.vg_rflags & PSL_I0x00000200) |
7179 | vcpu->vc_irqready = 1; |
7180 | else |
7181 | vcpu->vc_irqready = 0; |
7182 | |
7183 | /* |
7184 | * If not ready for interrupts, but interrupts pending, |
7185 | * enable interrupt window exiting. |
7186 | */ |
7187 | if (vcpu->vc_irqready == 0 && vcpu->vc_intr) { |
7188 | vmcb->v_intercept1 |= SVM_INTERCEPT_VINTR(1UL << 4); |
7189 | vmcb->v_irq = 1; |
7190 | vmcb->v_intr_misc = SVM_INTR_MISC_V_IGN_TPR0x10; |
7191 | vmcb->v_intr_vector = 0; |
7192 | svm_set_dirty(vcpu, SVM_CLEANBITS_TPR(1 << 3) | |
7193 | SVM_CLEANBITS_I(1 << 0)); |
7194 | } |
7195 | |
7196 | /* |
7197 | * Exit to vmd if we are terminating, failed to enter, |
7198 | * or need help (device I/O) |
7199 | */ |
7200 | if (ret || vcpu_must_stop(vcpu)) |
7201 | break; |
7202 | |
7203 | if (vcpu->vc_intr && vcpu->vc_irqready) { |
7204 | ret = EAGAIN35; |
7205 | break; |
7206 | } |
7207 | |
7208 | /* Check if we should yield - don't hog the cpu */ |
7209 | spc = &ci->ci_schedstate; |
7210 | if (spc->spc_schedflags & SPCF_SHOULDYIELD0x0002) |
7211 | break; |
7212 | } |
7213 | } |
7214 | |
7215 | /* |
7216 | * We are heading back to userspace (vmd), either because we need help |
7217 | * handling an exit, a guest interrupt is pending, or we failed in some |
7218 | * way to enter the guest. Copy the guest registers to the exit struct |
7219 | * and return to vmd. |
7220 | */ |
7221 | if (vcpu_readregs_svm(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), &vcpu->vc_exit.vrs)) |
7222 | ret = EINVAL22; |
7223 | |
7224 | return (ret); |
7225 | } |
7226 | |
7227 | /* |
7228 | * vmm_alloc_vpid |
7229 | * |
7230 | * Sets the memory location pointed to by "vpid" to the next available VPID |
7231 | * or ASID. |
7232 | * |
7233 | * Parameters: |
7234 | * vpid: Pointer to location to receive the next VPID/ASID |
7235 | * |
7236 | * Return Values: |
7237 | * 0: The operation completed successfully |
7238 | * ENOMEM: No VPIDs/ASIDs were available. Content of 'vpid' is unchanged. |
7239 | */ |
7240 | int |
7241 | vmm_alloc_vpid(uint16_t *vpid) |
7242 | { |
7243 | uint16_t i; |
7244 | uint8_t idx, bit; |
7245 | struct vmm_softc *sc = vmm_softc; |
7246 | |
7247 | rw_enter_write(&vmm_softc->vpid_lock); |
7248 | for (i = 1; i <= sc->max_vpid; i++) { |
7249 | idx = i / 8; |
7250 | bit = i - (idx * 8); |
7251 | |
7252 | if (!(sc->vpids[idx] & (1 << bit))) { |
7253 | sc->vpids[idx] |= (1 << bit); |
7254 | *vpid = i; |
7255 | DPRINTF("%s: allocated VPID/ASID %d\n", __func__, |
7256 | i); |
7257 | rw_exit_write(&vmm_softc->vpid_lock); |
7258 | return 0; |
7259 | } |
7260 | } |
7261 | |
7262 | printf("%s: no available %ss\n", __func__, |
7263 | (sc->mode == VMM_MODE_EPT || sc->mode == VMM_MODE_VMX) ? "VPID" : |
7264 | "ASID"); |
7265 | |
7266 | rw_exit_write(&vmm_softc->vpid_lock); |
7267 | return ENOMEM12; |
7268 | } |
7269 | |
7270 | /* |
7271 | * vmm_free_vpid |
7272 | * |
7273 | * Frees the VPID/ASID id supplied in "vpid". |
7274 | * |
7275 | * Parameters: |
7276 | * vpid: VPID/ASID to free. |
7277 | */ |
7278 | void |
7279 | vmm_free_vpid(uint16_t vpid) |
7280 | { |
7281 | uint8_t idx, bit; |
7282 | struct vmm_softc *sc = vmm_softc; |
7283 | |
7284 | rw_enter_write(&vmm_softc->vpid_lock); |
7285 | idx = vpid / 8; |
7286 | bit = vpid - (idx * 8); |
7287 | sc->vpids[idx] &= ~(1 << bit); |
7288 | |
7289 | DPRINTF("%s: freed VPID/ASID %d\n", __func__, vpid); |
7290 | rw_exit_write(&vmm_softc->vpid_lock); |
7291 | } |
7292 | |
7293 | |
7294 | /* vmm_gpa_is_valid |
7295 | * |
7296 | * Check if the given gpa is within guest memory space. |
7297 | * |
7298 | * Parameters: |
7299 | * vcpu: The virtual cpu we are running on. |
7300 | * gpa: The address to check. |
7301 | * obj_size: The size of the object assigned to gpa |
7302 | * |
7303 | * Return values: |
7304 | * 1: gpa is within the memory ranges allocated for the vcpu |
7305 | * 0: otherwise |
7306 | */ |
7307 | int |
7308 | vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size) |
7309 | { |
7310 | struct vm *vm = vcpu->vc_parent; |
7311 | struct vm_mem_range *vmr; |
7312 | size_t i; |
7313 | |
7314 | for (i = 0; i < vm->vm_nmemranges; ++i) { |
7315 | vmr = &vm->vm_memranges[i]; |
7316 | if (vmr->vmr_size >= obj_size && |
7317 | vmr->vmr_gpa <= gpa && |
7318 | gpa < (vmr->vmr_gpa + vmr->vmr_size - obj_size)) { |
7319 | return 1; |
7320 | } |
7321 | } |
7322 | return 0; |
7323 | } |
7324 | |
7325 | void |
7326 | vmm_init_pvclock(struct vcpu *vcpu, paddr_t gpa) |
7327 | { |
7328 | paddr_t pvclock_gpa = gpa & 0xFFFFFFFFFFFFFFF0; |
7329 | if (!vmm_gpa_is_valid(vcpu, pvclock_gpa, |
7330 | sizeof(struct pvclock_time_info))) { |
7331 | /* XXX: Kill guest? */ |
7332 | vmm_inject_gp(vcpu); |
7333 | return; |
7334 | } |
7335 | |
7336 | /* XXX: handle case when this struct goes over page boundaries */ |
7337 | if ((pvclock_gpa & PAGE_MASK((1 << 12) - 1)) + sizeof(struct pvclock_time_info) > |
7338 | PAGE_SIZE(1 << 12)) { |
7339 | vmm_inject_gp(vcpu); |
7340 | return; |
7341 | } |
7342 | |
7343 | vcpu->vc_pvclock_system_gpa = gpa; |
7344 | if (tsc_frequency > 0) |
7345 | vcpu->vc_pvclock_system_tsc_mul = |
7346 | (int) ((1000000000L << 20) / tsc_frequency); |
7347 | else |
7348 | vcpu->vc_pvclock_system_tsc_mul = 0; |
7349 | vmm_update_pvclock(vcpu); |
7350 | } |
7351 | |
7352 | int |
7353 | vmm_update_pvclock(struct vcpu *vcpu) |
7354 | { |
7355 | struct pvclock_time_info *pvclock_ti; |
7356 | struct timespec tv; |
7357 | struct vm *vm = vcpu->vc_parent; |
7358 | paddr_t pvclock_hpa, pvclock_gpa; |
7359 | |
7360 | if (vcpu->vc_pvclock_system_gpa & PVCLOCK_SYSTEM_TIME_ENABLE0x01) { |
7361 | pvclock_gpa = vcpu->vc_pvclock_system_gpa & 0xFFFFFFFFFFFFFFF0; |
7362 | if (!pmap_extract(vm->vm_map->pmap, pvclock_gpa, &pvclock_hpa)) |
7363 | return (EINVAL22); |
7364 | pvclock_ti = (void*) PMAP_DIRECT_MAP(pvclock_hpa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000 )) + (pvclock_hpa)); |
7365 | |
7366 | /* START next cycle (must be odd) */ |
7367 | pvclock_ti->ti_version = |
7368 | (++vcpu->vc_pvclock_version << 1) | 0x1; |
7369 | |
7370 | pvclock_ti->ti_tsc_timestamp = rdtsc(); |
7371 | nanotime(&tv); |
7372 | pvclock_ti->ti_system_time = |
7373 | tv.tv_sec * 1000000000L + tv.tv_nsec; |
7374 | pvclock_ti->ti_tsc_shift = 12; |
7375 | pvclock_ti->ti_tsc_to_system_mul = |
7376 | vcpu->vc_pvclock_system_tsc_mul; |
7377 | pvclock_ti->ti_flags = PVCLOCK_FLAG_TSC_STABLE0x01; |
7378 | |
7379 | /* END (must be even) */ |
7380 | pvclock_ti->ti_version &= ~0x1; |
7381 | } |
7382 | return (0); |
7383 | } |
7384 | |
7385 | int |
7386 | vmm_pat_is_valid(uint64_t pat) |
7387 | { |
7388 | int i; |
7389 | uint8_t *byte = (uint8_t *)&pat; |
7390 | |
7391 | /* Intel SDM Vol 3A, 11.12.2: 0x02, 0x03, and 0x08-0xFF result in #GP */ |
7392 | for (i = 0; i < 8; i++) { |
7393 | if (byte[i] == 0x02 || byte[i] == 0x03 || byte[i] > 0x07) { |
7394 | DPRINTF("%s: invalid pat %llx\n", __func__, pat); |
7395 | return 0; |
7396 | } |
7397 | } |
7398 | |
7399 | return 1; |
7400 | } |
7401 | |
7402 | /* |
7403 | * vmx_exit_reason_decode |
7404 | * |
7405 | * Returns a human readable string describing exit type 'code' |
7406 | */ |
7407 | const char * |
7408 | vmx_exit_reason_decode(uint32_t code) |
7409 | { |
7410 | switch (code) { |
7411 | case VMX_EXIT_NMI0: return "NMI"; |
7412 | case VMX_EXIT_EXTINT1: return "External interrupt"; |
7413 | case VMX_EXIT_TRIPLE_FAULT2: return "Triple fault"; |
7414 | case VMX_EXIT_INIT3: return "INIT signal"; |
7415 | case VMX_EXIT_SIPI4: return "SIPI signal"; |
7416 | case VMX_EXIT_IO_SMI5: return "I/O SMI"; |
7417 | case VMX_EXIT_OTHER_SMI6: return "other SMI"; |
7418 | case VMX_EXIT_INT_WINDOW7: return "Interrupt window"; |
7419 | case VMX_EXIT_NMI_WINDOW8: return "NMI window"; |
7420 | case VMX_EXIT_TASK_SWITCH9: return "Task switch"; |
7421 | case VMX_EXIT_CPUID10: return "CPUID instruction"; |
7422 | case VMX_EXIT_GETSEC11: return "GETSEC instruction"; |
7423 | case VMX_EXIT_HLT12: return "HLT instruction"; |
7424 | case VMX_EXIT_INVD13: return "INVD instruction"; |
7425 | case VMX_EXIT_INVLPG14: return "INVLPG instruction"; |
7426 | case VMX_EXIT_RDPMC15: return "RDPMC instruction"; |
7427 | case VMX_EXIT_RDTSC16: return "RDTSC instruction"; |
7428 | case VMX_EXIT_RSM17: return "RSM instruction"; |
7429 | case VMX_EXIT_VMCALL18: return "VMCALL instruction"; |
7430 | case VMX_EXIT_VMCLEAR19: return "VMCLEAR instruction"; |
7431 | case VMX_EXIT_VMLAUNCH20: return "VMLAUNCH instruction"; |
7432 | case VMX_EXIT_VMPTRLD21: return "VMPTRLD instruction"; |
7433 | case VMX_EXIT_VMPTRST22: return "VMPTRST instruction"; |
7434 | case VMX_EXIT_VMREAD23: return "VMREAD instruction"; |
7435 | case VMX_EXIT_VMRESUME24: return "VMRESUME instruction"; |
7436 | case VMX_EXIT_VMWRITE25: return "VMWRITE instruction"; |
7437 | case VMX_EXIT_VMXOFF26: return "VMXOFF instruction"; |
7438 | case VMX_EXIT_VMXON27: return "VMXON instruction"; |
7439 | case VMX_EXIT_CR_ACCESS28: return "CR access"; |
7440 | case VMX_EXIT_MOV_DR29: return "MOV DR instruction"; |
7441 | case VMX_EXIT_IO30: return "I/O instruction"; |
7442 | case VMX_EXIT_RDMSR31: return "RDMSR instruction"; |
7443 | case VMX_EXIT_WRMSR32: return "WRMSR instruction"; |
7444 | case VMX_EXIT_ENTRY_FAILED_GUEST_STATE33: return "guest state invalid"; |
7445 | case VMX_EXIT_ENTRY_FAILED_MSR_LOAD34: return "MSR load failed"; |
7446 | case VMX_EXIT_MWAIT36: return "MWAIT instruction"; |
7447 | case VMX_EXIT_MTF37: return "monitor trap flag"; |
7448 | case VMX_EXIT_MONITOR39: return "MONITOR instruction"; |
7449 | case VMX_EXIT_PAUSE40: return "PAUSE instruction"; |
7450 | case VMX_EXIT_ENTRY_FAILED_MCE41: return "MCE during entry"; |
7451 | case VMX_EXIT_TPR_BELOW_THRESHOLD43: return "TPR below threshold"; |
7452 | case VMX_EXIT_APIC_ACCESS44: return "APIC access"; |
7453 | case VMX_EXIT_VIRTUALIZED_EOI45: return "virtualized EOI"; |
7454 | case VMX_EXIT_GDTR_IDTR46: return "GDTR/IDTR access"; |
7455 | case VMX_EXIT_LDTR_TR47: return "LDTR/TR access"; |
7456 | case VMX_EXIT_EPT_VIOLATION48: return "EPT violation"; |
7457 | case VMX_EXIT_EPT_MISCONFIGURATION49: return "EPT misconfiguration"; |
7458 | case VMX_EXIT_INVEPT50: return "INVEPT instruction"; |
7459 | case VMX_EXIT_RDTSCP51: return "RDTSCP instruction"; |
7460 | case VMX_EXIT_VMX_PREEMPTION_TIMER_EXPIRED52: |
7461 | return "preemption timer expired"; |
7462 | case VMX_EXIT_INVVPID53: return "INVVPID instruction"; |
7463 | case VMX_EXIT_WBINVD54: return "WBINVD instruction"; |
7464 | case VMX_EXIT_XSETBV55: return "XSETBV instruction"; |
7465 | case VMX_EXIT_APIC_WRITE56: return "APIC write"; |
7466 | case VMX_EXIT_RDRAND57: return "RDRAND instruction"; |
7467 | case VMX_EXIT_INVPCID58: return "INVPCID instruction"; |
7468 | case VMX_EXIT_VMFUNC59: return "VMFUNC instruction"; |
7469 | case VMX_EXIT_RDSEED61: return "RDSEED instruction"; |
7470 | case VMX_EXIT_XSAVES63: return "XSAVES instruction"; |
7471 | case VMX_EXIT_XRSTORS64: return "XRSTORS instruction"; |
7472 | default: return "unknown"; |
7473 | } |
7474 | } |
7475 | |
7476 | /* |
7477 | * svm_exit_reason_decode |
7478 | * |
7479 | * Returns a human readable string describing exit type 'code' |
7480 | */ |
7481 | const char * |
7482 | svm_exit_reason_decode(uint32_t code) |
7483 | { |
7484 | switch (code) { |
7485 | case SVM_VMEXIT_CR0_READ0x00: return "CR0 read"; /* 0x00 */ |
7486 | case SVM_VMEXIT_CR1_READ0x01: return "CR1 read"; /* 0x01 */ |
7487 | case SVM_VMEXIT_CR2_READ0x02: return "CR2 read"; /* 0x02 */ |
7488 | case SVM_VMEXIT_CR3_READ0x03: return "CR3 read"; /* 0x03 */ |
7489 | case SVM_VMEXIT_CR4_READ0x04: return "CR4 read"; /* 0x04 */ |
7490 | case SVM_VMEXIT_CR5_READ0x05: return "CR5 read"; /* 0x05 */ |
7491 | case SVM_VMEXIT_CR6_READ0x06: return "CR6 read"; /* 0x06 */ |
7492 | case SVM_VMEXIT_CR7_READ0x07: return "CR7 read"; /* 0x07 */ |
7493 | case SVM_VMEXIT_CR8_READ0x08: return "CR8 read"; /* 0x08 */ |
7494 | case SVM_VMEXIT_CR9_READ0x09: return "CR9 read"; /* 0x09 */ |
7495 | case SVM_VMEXIT_CR10_READ0x0A: return "CR10 read"; /* 0x0A */ |
7496 | case SVM_VMEXIT_CR11_READ0x0B: return "CR11 read"; /* 0x0B */ |
7497 | case SVM_VMEXIT_CR12_READ0x0C: return "CR12 read"; /* 0x0C */ |
7498 | case SVM_VMEXIT_CR13_READ0x0D: return "CR13 read"; /* 0x0D */ |
7499 | case SVM_VMEXIT_CR14_READ0x0E: return "CR14 read"; /* 0x0E */ |
7500 | case SVM_VMEXIT_CR15_READ0x0F: return "CR15 read"; /* 0x0F */ |
7501 | case SVM_VMEXIT_CR0_WRITE0x10: return "CR0 write"; /* 0x10 */ |
7502 | case SVM_VMEXIT_CR1_WRITE0x11: return "CR1 write"; /* 0x11 */ |
7503 | case SVM_VMEXIT_CR2_WRITE0x12: return "CR2 write"; /* 0x12 */ |
7504 | case SVM_VMEXIT_CR3_WRITE0x13: return "CR3 write"; /* 0x13 */ |
7505 | case SVM_VMEXIT_CR4_WRITE0x14: return "CR4 write"; /* 0x14 */ |
7506 | case SVM_VMEXIT_CR5_WRITE0x15: return "CR5 write"; /* 0x15 */ |
7507 | case SVM_VMEXIT_CR6_WRITE0x16: return "CR6 write"; /* 0x16 */ |
7508 | case SVM_VMEXIT_CR7_WRITE0x17: return "CR7 write"; /* 0x17 */ |
7509 | case SVM_VMEXIT_CR8_WRITE0x18: return "CR8 write"; /* 0x18 */ |
7510 | case SVM_VMEXIT_CR9_WRITE0x19: return "CR9 write"; /* 0x19 */ |
7511 | case SVM_VMEXIT_CR10_WRITE0x1A: return "CR10 write"; /* 0x1A */ |
7512 | case SVM_VMEXIT_CR11_WRITE0x1B: return "CR11 write"; /* 0x1B */ |
7513 | case SVM_VMEXIT_CR12_WRITE0x1C: return "CR12 write"; /* 0x1C */ |
7514 | case SVM_VMEXIT_CR13_WRITE0x1D: return "CR13 write"; /* 0x1D */ |
7515 | case SVM_VMEXIT_CR14_WRITE0x1E: return "CR14 write"; /* 0x1E */ |
7516 | case SVM_VMEXIT_CR15_WRITE0x1F: return "CR15 write"; /* 0x1F */ |
7517 | case SVM_VMEXIT_DR0_READ0x20: return "DR0 read"; /* 0x20 */ |
7518 | case SVM_VMEXIT_DR1_READ0x21: return "DR1 read"; /* 0x21 */ |
7519 | case SVM_VMEXIT_DR2_READ0x22: return "DR2 read"; /* 0x22 */ |
7520 | case SVM_VMEXIT_DR3_READ0x23: return "DR3 read"; /* 0x23 */ |
7521 | case SVM_VMEXIT_DR4_READ0x24: return "DR4 read"; /* 0x24 */ |
7522 | case SVM_VMEXIT_DR5_READ0x25: return "DR5 read"; /* 0x25 */ |
7523 | case SVM_VMEXIT_DR6_READ0x26: return "DR6 read"; /* 0x26 */ |
7524 | case SVM_VMEXIT_DR7_READ0x27: return "DR7 read"; /* 0x27 */ |
7525 | case SVM_VMEXIT_DR8_READ0x28: return "DR8 read"; /* 0x28 */ |
7526 | case SVM_VMEXIT_DR9_READ0x29: return "DR9 read"; /* 0x29 */ |
7527 | case SVM_VMEXIT_DR10_READ0x2A: return "DR10 read"; /* 0x2A */ |
7528 | case SVM_VMEXIT_DR11_READ0x2B: return "DR11 read"; /* 0x2B */ |
7529 | case SVM_VMEXIT_DR12_READ0x2C: return "DR12 read"; /* 0x2C */ |
7530 | case SVM_VMEXIT_DR13_READ0x2D: return "DR13 read"; /* 0x2D */ |
7531 | case SVM_VMEXIT_DR14_READ0x2E: return "DR14 read"; /* 0x2E */ |
7532 | case SVM_VMEXIT_DR15_READ0x2F: return "DR15 read"; /* 0x2F */ |
7533 | case SVM_VMEXIT_DR0_WRITE0x30: return "DR0 write"; /* 0x30 */ |
7534 | case SVM_VMEXIT_DR1_WRITE0x31: return "DR1 write"; /* 0x31 */ |
7535 | case SVM_VMEXIT_DR2_WRITE0x32: return "DR2 write"; /* 0x32 */ |
7536 | case SVM_VMEXIT_DR3_WRITE0x33: return "DR3 write"; /* 0x33 */ |
7537 | case SVM_VMEXIT_DR4_WRITE0x34: return "DR4 write"; /* 0x34 */ |
7538 | case SVM_VMEXIT_DR5_WRITE0x35: return "DR5 write"; /* 0x35 */ |
7539 | case SVM_VMEXIT_DR6_WRITE0x36: return "DR6 write"; /* 0x36 */ |
7540 | case SVM_VMEXIT_DR7_WRITE0x37: return "DR7 write"; /* 0x37 */ |
7541 | case SVM_VMEXIT_DR8_WRITE0x38: return "DR8 write"; /* 0x38 */ |
7542 | case SVM_VMEXIT_DR9_WRITE0x39: return "DR9 write"; /* 0x39 */ |
7543 | case SVM_VMEXIT_DR10_WRITE0x3A: return "DR10 write"; /* 0x3A */ |
7544 | case SVM_VMEXIT_DR11_WRITE0x3B: return "DR11 write"; /* 0x3B */ |
7545 | case SVM_VMEXIT_DR12_WRITE0x3C: return "DR12 write"; /* 0x3C */ |
7546 | case SVM_VMEXIT_DR13_WRITE0x3D: return "DR13 write"; /* 0x3D */ |
7547 | case SVM_VMEXIT_DR14_WRITE0x3E: return "DR14 write"; /* 0x3E */ |
7548 | case SVM_VMEXIT_DR15_WRITE0x3F: return "DR15 write"; /* 0x3F */ |
7549 | case SVM_VMEXIT_EXCP00x40: return "Exception 0x00"; /* 0x40 */ |
7550 | case SVM_VMEXIT_EXCP10x41: return "Exception 0x01"; /* 0x41 */ |
7551 | case SVM_VMEXIT_EXCP20x42: return "Exception 0x02"; /* 0x42 */ |
7552 | case SVM_VMEXIT_EXCP30x43: return "Exception 0x03"; /* 0x43 */ |
7553 | case SVM_VMEXIT_EXCP40x44: return "Exception 0x04"; /* 0x44 */ |
7554 | case SVM_VMEXIT_EXCP50x45: return "Exception 0x05"; /* 0x45 */ |
7555 | case SVM_VMEXIT_EXCP60x46: return "Exception 0x06"; /* 0x46 */ |
7556 | case SVM_VMEXIT_EXCP70x47: return "Exception 0x07"; /* 0x47 */ |
7557 | case SVM_VMEXIT_EXCP80x48: return "Exception 0x08"; /* 0x48 */ |
7558 | case SVM_VMEXIT_EXCP90x49: return "Exception 0x09"; /* 0x49 */ |
7559 | case SVM_VMEXIT_EXCP100x4A: return "Exception 0x0A"; /* 0x4A */ |
7560 | case SVM_VMEXIT_EXCP110x4B: return "Exception 0x0B"; /* 0x4B */ |
7561 | case SVM_VMEXIT_EXCP120x4C: return "Exception 0x0C"; /* 0x4C */ |
7562 | case SVM_VMEXIT_EXCP130x4D: return "Exception 0x0D"; /* 0x4D */ |
7563 | case SVM_VMEXIT_EXCP140x4E: return "Exception 0x0E"; /* 0x4E */ |
7564 | case SVM_VMEXIT_EXCP150x4F: return "Exception 0x0F"; /* 0x4F */ |
7565 | case SVM_VMEXIT_EXCP160x50: return "Exception 0x10"; /* 0x50 */ |
7566 | case SVM_VMEXIT_EXCP170x51: return "Exception 0x11"; /* 0x51 */ |
7567 | case SVM_VMEXIT_EXCP180x52: return "Exception 0x12"; /* 0x52 */ |
7568 | case SVM_VMEXIT_EXCP190x53: return "Exception 0x13"; /* 0x53 */ |
7569 | case SVM_VMEXIT_EXCP200x54: return "Exception 0x14"; /* 0x54 */ |
7570 | case SVM_VMEXIT_EXCP210x55: return "Exception 0x15"; /* 0x55 */ |
7571 | case SVM_VMEXIT_EXCP220x56: return "Exception 0x16"; /* 0x56 */ |
7572 | case SVM_VMEXIT_EXCP230x57: return "Exception 0x17"; /* 0x57 */ |
7573 | case SVM_VMEXIT_EXCP240x58: return "Exception 0x18"; /* 0x58 */ |
7574 | case SVM_VMEXIT_EXCP250x59: return "Exception 0x19"; /* 0x59 */ |
7575 | case SVM_VMEXIT_EXCP260x5A: return "Exception 0x1A"; /* 0x5A */ |
7576 | case SVM_VMEXIT_EXCP270x5B: return "Exception 0x1B"; /* 0x5B */ |
7577 | case SVM_VMEXIT_EXCP280x5C: return "Exception 0x1C"; /* 0x5C */ |
7578 | case SVM_VMEXIT_EXCP290x5D: return "Exception 0x1D"; /* 0x5D */ |
7579 | case SVM_VMEXIT_EXCP300x5E: return "Exception 0x1E"; /* 0x5E */ |
7580 | case SVM_VMEXIT_EXCP310x5F: return "Exception 0x1F"; /* 0x5F */ |
7581 | case SVM_VMEXIT_INTR0x60: return "External interrupt"; /* 0x60 */ |
7582 | case SVM_VMEXIT_NMI0x61: return "NMI"; /* 0x61 */ |
7583 | case SVM_VMEXIT_SMI0x62: return "SMI"; /* 0x62 */ |
7584 | case SVM_VMEXIT_INIT0x63: return "INIT"; /* 0x63 */ |
7585 | case SVM_VMEXIT_VINTR0x64: return "Interrupt window"; /* 0x64 */ |
7586 | case SVM_VMEXIT_CR0_SEL_WRITE0x65: return "Sel CR0 write"; /* 0x65 */ |
7587 | case SVM_VMEXIT_IDTR_READ0x66: return "IDTR read"; /* 0x66 */ |
7588 | case SVM_VMEXIT_GDTR_READ0x67: return "GDTR read"; /* 0x67 */ |
7589 | case SVM_VMEXIT_LDTR_READ0x68: return "LDTR read"; /* 0x68 */ |
7590 | case SVM_VMEXIT_TR_READ0x69: return "TR read"; /* 0x69 */ |
7591 | case SVM_VMEXIT_IDTR_WRITE0x6A: return "IDTR write"; /* 0x6A */ |
7592 | case SVM_VMEXIT_GDTR_WRITE0x6B: return "GDTR write"; /* 0x6B */ |
7593 | case SVM_VMEXIT_LDTR_WRITE0x6C: return "LDTR write"; /* 0x6C */ |
7594 | case SVM_VMEXIT_TR_WRITE0x6D: return "TR write"; /* 0x6D */ |
7595 | case SVM_VMEXIT_RDTSC0x6E: return "RDTSC instruction"; /* 0x6E */ |
7596 | case SVM_VMEXIT_RDPMC0x6F: return "RDPMC instruction"; /* 0x6F */ |
7597 | case SVM_VMEXIT_PUSHF0x70: return "PUSHF instruction"; /* 0x70 */ |
7598 | case SVM_VMEXIT_POPF0x71: return "POPF instruction"; /* 0x71 */ |
7599 | case SVM_VMEXIT_CPUID0x72: return "CPUID instruction"; /* 0x72 */ |
7600 | case SVM_VMEXIT_RSM0x73: return "RSM instruction"; /* 0x73 */ |
7601 | case SVM_VMEXIT_IRET0x74: return "IRET instruction"; /* 0x74 */ |
7602 | case SVM_VMEXIT_SWINT0x75: return "SWINT instruction"; /* 0x75 */ |
7603 | case SVM_VMEXIT_INVD0x76: return "INVD instruction"; /* 0x76 */ |
7604 | case SVM_VMEXIT_PAUSE0x77: return "PAUSE instruction"; /* 0x77 */ |
7605 | case SVM_VMEXIT_HLT0x78: return "HLT instruction"; /* 0x78 */ |
7606 | case SVM_VMEXIT_INVLPG0x79: return "INVLPG instruction"; /* 0x79 */ |
7607 | case SVM_VMEXIT_INVLPGA0x7A: return "INVLPGA instruction"; /* 0x7A */ |
7608 | case SVM_VMEXIT_IOIO0x7B: return "I/O instruction"; /* 0x7B */ |
7609 | case SVM_VMEXIT_MSR0x7C: return "RDMSR/WRMSR instruction"; /* 0x7C */ |
7610 | case SVM_VMEXIT_TASK_SWITCH0x7D: return "Task switch"; /* 0x7D */ |
7611 | case SVM_VMEXIT_FERR_FREEZE0x7E: return "FERR_FREEZE"; /* 0x7E */ |
7612 | case SVM_VMEXIT_SHUTDOWN0x7F: return "Triple fault"; /* 0x7F */ |
7613 | case SVM_VMEXIT_VMRUN0x80: return "VMRUN instruction"; /* 0x80 */ |
7614 | case SVM_VMEXIT_VMMCALL0x81: return "VMMCALL instruction"; /* 0x81 */ |
7615 | case SVM_VMEXIT_VMLOAD0x82: return "VMLOAD instruction"; /* 0x82 */ |
7616 | case SVM_VMEXIT_VMSAVE0x83: return "VMSAVE instruction"; /* 0x83 */ |
7617 | case SVM_VMEXIT_STGI0x84: return "STGI instruction"; /* 0x84 */ |
7618 | case SVM_VMEXIT_CLGI0x85: return "CLGI instruction"; /* 0x85 */ |
7619 | case SVM_VMEXIT_SKINIT0x86: return "SKINIT instruction"; /* 0x86 */ |
7620 | case SVM_VMEXIT_RDTSCP0x87: return "RDTSCP instruction"; /* 0x87 */ |
7621 | case SVM_VMEXIT_ICEBP0x88: return "ICEBP instruction"; /* 0x88 */ |
7622 | case SVM_VMEXIT_WBINVD0x89: return "WBINVD instruction"; /* 0x89 */ |
7623 | case SVM_VMEXIT_MONITOR0x8A: return "MONITOR instruction"; /* 0x8A */ |
7624 | case SVM_VMEXIT_MWAIT0x8B: return "MWAIT instruction"; /* 0x8B */ |
7625 | case SVM_VMEXIT_MWAIT_CONDITIONAL0x8C: return "Cond MWAIT"; /* 0x8C */ |
7626 | case SVM_VMEXIT_NPF0x400: return "NPT violation"; /* 0x400 */ |
7627 | default: return "unknown"; |
7628 | } |
7629 | } |
7630 | |
7631 | /* |
7632 | * vmx_instruction_error_decode |
7633 | * |
7634 | * Returns a human readable string describing the instruction error in 'code' |
7635 | */ |
7636 | const char * |
7637 | vmx_instruction_error_decode(uint32_t code) |
7638 | { |
7639 | switch (code) { |
7640 | case 1: return "VMCALL: unsupported in VMX root"; |
7641 | case 2: return "VMCLEAR: invalid paddr"; |
7642 | case 3: return "VMCLEAR: VMXON pointer"; |
7643 | case 4: return "VMLAUNCH: non-clear VMCS"; |
7644 | case 5: return "VMRESUME: non-launched VMCS"; |
7645 | case 6: return "VMRESUME: executed after VMXOFF"; |
7646 | case 7: return "VM entry: invalid control field(s)"; |
7647 | case 8: return "VM entry: invalid host state field(s)"; |
7648 | case 9: return "VMPTRLD: invalid paddr"; |
7649 | case 10: return "VMPTRLD: VMXON pointer"; |
7650 | case 11: return "VMPTRLD: incorrect VMCS revid"; |
7651 | case 12: return "VMREAD/VMWRITE: unsupported VMCS field"; |
7652 | case 13: return "VMWRITE: RO VMCS field"; |
7653 | case 15: return "VMXON: unsupported in VMX root"; |
7654 | case 20: return "VMCALL: invalid VM exit control fields"; |
7655 | case 26: return "VM entry: blocked by MOV SS"; |
7656 | case 28: return "Invalid operand to INVEPT/INVVPID"; |
7657 | case 0x80000021: return "VM entry: invalid guest state"; |
7658 | case 0x80000022: return "VM entry: failure due to MSR loading"; |
7659 | case 0x80000029: return "VM entry: machine-check event"; |
7660 | default: return "unknown"; |
7661 | } |
7662 | } |
7663 | |
7664 | /* |
7665 | * vcpu_state_decode |
7666 | * |
7667 | * Returns a human readable string describing the vcpu state in 'state'. |
7668 | */ |
7669 | const char * |
7670 | vcpu_state_decode(u_int state) |
7671 | { |
7672 | switch (state) { |
7673 | case VCPU_STATE_STOPPED: return "stopped"; |
7674 | case VCPU_STATE_RUNNING: return "running"; |
7675 | case VCPU_STATE_REQTERM: return "requesting termination"; |
7676 | case VCPU_STATE_TERMINATED: return "terminated"; |
7677 | case VCPU_STATE_UNKNOWN: return "unknown"; |
7678 | default: return "invalid"; |
7679 | } |
7680 | } |
7681 | |
7682 | #ifdef VMM_DEBUG |
7683 | /* |
7684 | * dump_vcpu |
7685 | * |
7686 | * Dumps the VMX capabilities of vcpu 'vcpu' |
7687 | */ |
7688 | void |
7689 | dump_vcpu(struct vcpu *vcpu) |
7690 | { |
7691 | printf("vcpu @ %p\n", vcpu); |
7692 | printf(" parent vm @ %p\n", vcpu->vc_parent); |
7693 | printf(" mode: "); |
7694 | if (vcpu->vc_virt_mode == VMM_MODE_VMX || |
7695 | vcpu->vc_virt_mode == VMM_MODE_EPT) { |
7696 | printf("VMX\n"); |
7697 | printf(" pinbased ctls: 0x%llx\n", |
7698 | vcpu->vc_vmx_pinbased_ctls); |
7699 | printf(" true pinbased ctls: 0x%llx\n", |
7700 | vcpu->vc_vmx_true_pinbased_ctls); |
7701 | CTRL_DUMP(vcpu, PINBASED, EXTERNAL_INT_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "EXTERNAL_INT_EXITING" , vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 0), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 0), 0 ) ? "Yes" : "No");; |
7702 | CTRL_DUMP(vcpu, PINBASED, NMI_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "NMI_EXITING" , vcpu_vmx_check_cap (vcpu, 0x481, (1ULL << 3), 1) ? "Yes" : "No", vcpu_vmx_check_cap (vcpu, 0x481, (1ULL << 3), 0) ? "Yes" : "No");; |
7703 | CTRL_DUMP(vcpu, PINBASED, VIRTUAL_NMIS)printf(" %s: Can set:%s Can clear:%s\n", "VIRTUAL_NMIS" , vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 5), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 5), 0 ) ? "Yes" : "No");; |
7704 | CTRL_DUMP(vcpu, PINBASED, ACTIVATE_VMX_PREEMPTION_TIMER)printf(" %s: Can set:%s Can clear:%s\n", "ACTIVATE_VMX_PREEMPTION_TIMER" , vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 6), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 6), 0 ) ? "Yes" : "No");; |
7705 | CTRL_DUMP(vcpu, PINBASED, PROCESS_POSTED_INTERRUPTS)printf(" %s: Can set:%s Can clear:%s\n", "PROCESS_POSTED_INTERRUPTS" , vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 7), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 7), 0 ) ? "Yes" : "No");; |
7706 | printf(" procbased ctls: 0x%llx\n", |
7707 | vcpu->vc_vmx_procbased_ctls); |
7708 | printf(" true procbased ctls: 0x%llx\n", |
7709 | vcpu->vc_vmx_true_procbased_ctls); |
7710 | CTRL_DUMP(vcpu, PROCBASED, INTERRUPT_WINDOW_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "INTERRUPT_WINDOW_EXITING" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 2), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 2), 0 ) ? "Yes" : "No");; |
7711 | CTRL_DUMP(vcpu, PROCBASED, USE_TSC_OFFSETTING)printf(" %s: Can set:%s Can clear:%s\n", "USE_TSC_OFFSETTING" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 3), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 3), 0 ) ? "Yes" : "No");; |
7712 | CTRL_DUMP(vcpu, PROCBASED, HLT_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "HLT_EXITING" , vcpu_vmx_check_cap (vcpu, 0x482, (1ULL << 7), 1) ? "Yes" : "No", vcpu_vmx_check_cap (vcpu, 0x482, (1ULL << 7), 0) ? "Yes" : "No");; |
7713 | CTRL_DUMP(vcpu, PROCBASED, INVLPG_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "INVLPG_EXITING" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 9), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 9), 0 ) ? "Yes" : "No");; |
7714 | CTRL_DUMP(vcpu, PROCBASED, MWAIT_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "MWAIT_EXITING" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 10), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 10), 0 ) ? "Yes" : "No");; |
7715 | CTRL_DUMP(vcpu, PROCBASED, RDPMC_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "RDPMC_EXITING" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 11), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 11), 0 ) ? "Yes" : "No");; |
7716 | CTRL_DUMP(vcpu, PROCBASED, RDTSC_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "RDTSC_EXITING" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 12), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 12), 0 ) ? "Yes" : "No");; |
7717 | CTRL_DUMP(vcpu, PROCBASED, CR3_LOAD_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "CR3_LOAD_EXITING" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 15), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 15), 0 ) ? "Yes" : "No");; |
7718 | CTRL_DUMP(vcpu, PROCBASED, CR3_STORE_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "CR3_STORE_EXITING" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 16), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 16), 0 ) ? "Yes" : "No");; |
7719 | CTRL_DUMP(vcpu, PROCBASED, CR8_LOAD_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "CR8_LOAD_EXITING" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 19), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 19), 0 ) ? "Yes" : "No");; |
7720 | CTRL_DUMP(vcpu, PROCBASED, CR8_STORE_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "CR8_STORE_EXITING" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 20), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 20), 0 ) ? "Yes" : "No");; |
7721 | CTRL_DUMP(vcpu, PROCBASED, USE_TPR_SHADOW)printf(" %s: Can set:%s Can clear:%s\n", "USE_TPR_SHADOW" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 21), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 21), 0 ) ? "Yes" : "No");; |
7722 | CTRL_DUMP(vcpu, PROCBASED, NMI_WINDOW_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "NMI_WINDOW_EXITING" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 22), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 22), 0 ) ? "Yes" : "No");; |
7723 | CTRL_DUMP(vcpu, PROCBASED, MOV_DR_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "MOV_DR_EXITING" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 23), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 23), 0 ) ? "Yes" : "No");; |
7724 | CTRL_DUMP(vcpu, PROCBASED, UNCONDITIONAL_IO_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "UNCONDITIONAL_IO_EXITING" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 24), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 24), 0 ) ? "Yes" : "No");; |
7725 | CTRL_DUMP(vcpu, PROCBASED, USE_IO_BITMAPS)printf(" %s: Can set:%s Can clear:%s\n", "USE_IO_BITMAPS" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 25), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 25), 0 ) ? "Yes" : "No");; |
7726 | CTRL_DUMP(vcpu, PROCBASED, MONITOR_TRAP_FLAG)printf(" %s: Can set:%s Can clear:%s\n", "MONITOR_TRAP_FLAG" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 27), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 27), 0 ) ? "Yes" : "No");; |
7727 | CTRL_DUMP(vcpu, PROCBASED, USE_MSR_BITMAPS)printf(" %s: Can set:%s Can clear:%s\n", "USE_MSR_BITMAPS" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 28), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 28), 0 ) ? "Yes" : "No");; |
7728 | CTRL_DUMP(vcpu, PROCBASED, MONITOR_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "MONITOR_EXITING" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 29), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 29), 0 ) ? "Yes" : "No");; |
7729 | CTRL_DUMP(vcpu, PROCBASED, PAUSE_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "PAUSE_EXITING" , vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 30), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 30), 0 ) ? "Yes" : "No");; |
7730 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482, |
7731 | IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1)) { |
7732 | printf(" procbased2 ctls: 0x%llx\n", |
7733 | vcpu->vc_vmx_procbased2_ctls); |
7734 | CTRL_DUMP(vcpu, PROCBASED2, VIRTUALIZE_APIC)printf(" %s: Can set:%s Can clear:%s\n", "VIRTUALIZE_APIC" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 0), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 0), 0 ) ? "Yes" : "No");; |
7735 | CTRL_DUMP(vcpu, PROCBASED2, ENABLE_EPT)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_EPT" , vcpu_vmx_check_cap (vcpu, 0x48B, (1ULL << 1), 1) ? "Yes" : "No", vcpu_vmx_check_cap (vcpu, 0x48B, (1ULL << 1), 0) ? "Yes" : "No");; |
7736 | CTRL_DUMP(vcpu, PROCBASED2, DESCRIPTOR_TABLE_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "DESCRIPTOR_TABLE_EXITING" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 2), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 2), 0 ) ? "Yes" : "No");; |
7737 | CTRL_DUMP(vcpu, PROCBASED2, ENABLE_RDTSCP)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_RDTSCP" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 3), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 3), 0 ) ? "Yes" : "No");; |
7738 | CTRL_DUMP(vcpu, PROCBASED2, VIRTUALIZE_X2APIC_MODE)printf(" %s: Can set:%s Can clear:%s\n", "VIRTUALIZE_X2APIC_MODE" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 4), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 4), 0 ) ? "Yes" : "No");; |
7739 | CTRL_DUMP(vcpu, PROCBASED2, ENABLE_VPID)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_VPID" , vcpu_vmx_check_cap (vcpu, 0x48B, (1ULL << 5), 1) ? "Yes" : "No", vcpu_vmx_check_cap (vcpu, 0x48B, (1ULL << 5), 0) ? "Yes" : "No");; |
7740 | CTRL_DUMP(vcpu, PROCBASED2, WBINVD_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "WBINVD_EXITING" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 6), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 6), 0 ) ? "Yes" : "No");; |
7741 | CTRL_DUMP(vcpu, PROCBASED2, UNRESTRICTED_GUEST)printf(" %s: Can set:%s Can clear:%s\n", "UNRESTRICTED_GUEST" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 7), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 7), 0 ) ? "Yes" : "No");; |
7742 | CTRL_DUMP(vcpu, PROCBASED2,printf(" %s: Can set:%s Can clear:%s\n", "APIC_REGISTER_VIRTUALIZATION" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 8), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 8), 0 ) ? "Yes" : "No"); |
7743 | APIC_REGISTER_VIRTUALIZATION)printf(" %s: Can set:%s Can clear:%s\n", "APIC_REGISTER_VIRTUALIZATION" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 8), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 8), 0 ) ? "Yes" : "No");; |
7744 | CTRL_DUMP(vcpu, PROCBASED2,printf(" %s: Can set:%s Can clear:%s\n", "VIRTUAL_INTERRUPT_DELIVERY" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 9), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 9), 0 ) ? "Yes" : "No"); |
7745 | VIRTUAL_INTERRUPT_DELIVERY)printf(" %s: Can set:%s Can clear:%s\n", "VIRTUAL_INTERRUPT_DELIVERY" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 9), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 9), 0 ) ? "Yes" : "No");; |
7746 | CTRL_DUMP(vcpu, PROCBASED2, PAUSE_LOOP_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "PAUSE_LOOP_EXITING" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 10), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 10), 0 ) ? "Yes" : "No");; |
7747 | CTRL_DUMP(vcpu, PROCBASED2, RDRAND_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "RDRAND_EXITING" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 11), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 11), 0 ) ? "Yes" : "No");; |
7748 | CTRL_DUMP(vcpu, PROCBASED2, ENABLE_INVPCID)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_INVPCID" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 12), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 12), 0 ) ? "Yes" : "No");; |
7749 | CTRL_DUMP(vcpu, PROCBASED2, ENABLE_VM_FUNCTIONS)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_VM_FUNCTIONS" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 13), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 13), 0 ) ? "Yes" : "No");; |
7750 | CTRL_DUMP(vcpu, PROCBASED2, VMCS_SHADOWING)printf(" %s: Can set:%s Can clear:%s\n", "VMCS_SHADOWING" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 14), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 14), 0 ) ? "Yes" : "No");; |
7751 | CTRL_DUMP(vcpu, PROCBASED2, ENABLE_ENCLS_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_ENCLS_EXITING" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 15), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 15), 0 ) ? "Yes" : "No");; |
7752 | CTRL_DUMP(vcpu, PROCBASED2, RDSEED_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "RDSEED_EXITING" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 16), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 16), 0 ) ? "Yes" : "No");; |
7753 | CTRL_DUMP(vcpu, PROCBASED2, ENABLE_PML)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_PML" , vcpu_vmx_check_cap (vcpu, 0x48B, (1ULL << 17), 1) ? "Yes" : "No", vcpu_vmx_check_cap (vcpu, 0x48B, (1ULL << 17), 0) ? "Yes" : "No");; |
7754 | CTRL_DUMP(vcpu, PROCBASED2, EPT_VIOLATION_VE)printf(" %s: Can set:%s Can clear:%s\n", "EPT_VIOLATION_VE" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 18), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 18), 0 ) ? "Yes" : "No");; |
7755 | CTRL_DUMP(vcpu, PROCBASED2, CONCEAL_VMX_FROM_PT)printf(" %s: Can set:%s Can clear:%s\n", "CONCEAL_VMX_FROM_PT" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 19), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 19), 0 ) ? "Yes" : "No");; |
7756 | CTRL_DUMP(vcpu, PROCBASED2, ENABLE_XSAVES_XRSTORS)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_XSAVES_XRSTORS" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 20), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 20), 0 ) ? "Yes" : "No");; |
7757 | CTRL_DUMP(vcpu, PROCBASED2, ENABLE_TSC_SCALING)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_TSC_SCALING" , vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 25), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 25), 0 ) ? "Yes" : "No");; |
7758 | } |
7759 | printf(" entry ctls: 0x%llx\n", |
7760 | vcpu->vc_vmx_entry_ctls); |
7761 | printf(" true entry ctls: 0x%llx\n", |
7762 | vcpu->vc_vmx_true_entry_ctls); |
7763 | CTRL_DUMP(vcpu, ENTRY, LOAD_DEBUG_CONTROLS)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_DEBUG_CONTROLS" , vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 2), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 2), 0 ) ? "Yes" : "No");; |
7764 | CTRL_DUMP(vcpu, ENTRY, IA32E_MODE_GUEST)printf(" %s: Can set:%s Can clear:%s\n", "IA32E_MODE_GUEST" , vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 9), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 9), 0 ) ? "Yes" : "No");; |
7765 | CTRL_DUMP(vcpu, ENTRY, ENTRY_TO_SMM)printf(" %s: Can set:%s Can clear:%s\n", "ENTRY_TO_SMM" , vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 10), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 10), 0 ) ? "Yes" : "No");; |
7766 | CTRL_DUMP(vcpu, ENTRY, DEACTIVATE_DUAL_MONITOR_TREATMENT)printf(" %s: Can set:%s Can clear:%s\n", "DEACTIVATE_DUAL_MONITOR_TREATMENT" , vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 11), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 11), 0 ) ? "Yes" : "No");; |
7767 | CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY" , vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 13), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 13), 0 ) ? "Yes" : "No");; |
7768 | CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_PAT_ON_ENTRY)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_PAT_ON_ENTRY" , vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 14), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 14), 0 ) ? "Yes" : "No");; |
7769 | CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_EFER_ON_ENTRY)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_EFER_ON_ENTRY" , vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 15), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 15), 0 ) ? "Yes" : "No");; |
7770 | CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_BNDCFGS_ON_ENTRY)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_BNDCFGS_ON_ENTRY" , vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 16), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 16), 0 ) ? "Yes" : "No");; |
7771 | CTRL_DUMP(vcpu, ENTRY, CONCEAL_VM_ENTRIES_FROM_PT)printf(" %s: Can set:%s Can clear:%s\n", "CONCEAL_VM_ENTRIES_FROM_PT" , vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 17), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 17), 0 ) ? "Yes" : "No");; |
7772 | printf(" exit ctls: 0x%llx\n", |
7773 | vcpu->vc_vmx_exit_ctls); |
7774 | printf(" true exit ctls: 0x%llx\n", |
7775 | vcpu->vc_vmx_true_exit_ctls); |
7776 | CTRL_DUMP(vcpu, EXIT, SAVE_DEBUG_CONTROLS)printf(" %s: Can set:%s Can clear:%s\n", "SAVE_DEBUG_CONTROLS" , vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 2), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 2), 0 ) ? "Yes" : "No");; |
7777 | CTRL_DUMP(vcpu, EXIT, HOST_SPACE_ADDRESS_SIZE)printf(" %s: Can set:%s Can clear:%s\n", "HOST_SPACE_ADDRESS_SIZE" , vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 9), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 9), 0 ) ? "Yes" : "No");; |
7778 | CTRL_DUMP(vcpu, EXIT, LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT" , vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 12), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 12), 0 ) ? "Yes" : "No");; |
7779 | CTRL_DUMP(vcpu, EXIT, ACKNOWLEDGE_INTERRUPT_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "ACKNOWLEDGE_INTERRUPT_ON_EXIT" , vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 15), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 15), 0 ) ? "Yes" : "No");; |
7780 | CTRL_DUMP(vcpu, EXIT, SAVE_IA32_PAT_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "SAVE_IA32_PAT_ON_EXIT" , vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 18), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 18), 0 ) ? "Yes" : "No");; |
7781 | CTRL_DUMP(vcpu, EXIT, LOAD_IA32_PAT_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_PAT_ON_EXIT" , vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 19), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 19), 0 ) ? "Yes" : "No");; |
7782 | CTRL_DUMP(vcpu, EXIT, SAVE_IA32_EFER_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "SAVE_IA32_EFER_ON_EXIT" , vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 20), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 20), 0 ) ? "Yes" : "No");; |
7783 | CTRL_DUMP(vcpu, EXIT, LOAD_IA32_EFER_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_EFER_ON_EXIT" , vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 21), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 21), 0 ) ? "Yes" : "No");; |
7784 | CTRL_DUMP(vcpu, EXIT, SAVE_VMX_PREEMPTION_TIMER)printf(" %s: Can set:%s Can clear:%s\n", "SAVE_VMX_PREEMPTION_TIMER" , vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 22), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 22), 0 ) ? "Yes" : "No");; |
7785 | CTRL_DUMP(vcpu, EXIT, CLEAR_IA32_BNDCFGS_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "CLEAR_IA32_BNDCFGS_ON_EXIT" , vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 23), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 23), 0 ) ? "Yes" : "No");; |
7786 | CTRL_DUMP(vcpu, EXIT, CONCEAL_VM_EXITS_FROM_PT)printf(" %s: Can set:%s Can clear:%s\n", "CONCEAL_VM_EXITS_FROM_PT" , vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 24), 1) ? "Yes" : "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 24), 0 ) ? "Yes" : "No");; |
7787 | } |
7788 | } |
7789 | |
7790 | /* |
7791 | * vmx_dump_vmcs_field |
7792 | * |
7793 | * Debug function to dump the contents of a single VMCS field |
7794 | * |
7795 | * Parameters: |
7796 | * fieldid: VMCS Field ID |
7797 | * msg: string to display |
7798 | */ |
7799 | void |
7800 | vmx_dump_vmcs_field(uint16_t fieldid, const char *msg) |
7801 | { |
7802 | uint8_t width; |
7803 | uint64_t val; |
7804 | |
7805 | |
7806 | DPRINTF("%s (0x%04x): ", msg, fieldid); |
7807 | if (vmread(fieldid, &val)) |
7808 | DPRINTF("???? "); |
7809 | else { |
7810 | /* |
7811 | * Field width encoding : bits 13:14 |
7812 | * |
7813 | * 0: 16-bit |
7814 | * 1: 64-bit |
7815 | * 2: 32-bit |
7816 | * 3: natural width |
7817 | */ |
7818 | width = (fieldid >> 13) & 0x3; |
7819 | switch (width) { |
7820 | case 0: DPRINTF("0x%04llx ", val); break; |
7821 | case 1: |
7822 | case 3: DPRINTF("0x%016llx ", val); break; |
7823 | case 2: DPRINTF("0x%08llx ", val); |
7824 | } |
7825 | } |
7826 | } |
7827 | |
7828 | /* |
7829 | * vmx_dump_vmcs |
7830 | * |
7831 | * Debug function to dump the contents of the current VMCS. |
7832 | */ |
7833 | void |
7834 | vmx_dump_vmcs(struct vcpu *vcpu) |
7835 | { |
7836 | int has_sec, i; |
7837 | uint32_t cr3_tgt_ct; |
7838 | |
7839 | /* XXX save and load new vmcs, restore at end */ |
7840 | |
7841 | DPRINTF("--CURRENT VMCS STATE--\n"); |
7842 | printf("VMCS launched: %s\n", |
7843 | (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1) ? "Yes" : "No"); |
7844 | DPRINTF("VMXON revision : 0x%x\n", |
7845 | curcpu()->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision); |
7846 | DPRINTF("CR0 fixed0: 0x%llx\n", |
7847 | curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0); |
7848 | DPRINTF("CR0 fixed1: 0x%llx\n", |
7849 | curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1); |
7850 | DPRINTF("CR4 fixed0: 0x%llx\n", |
7851 | curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0); |
7852 | DPRINTF("CR4 fixed1: 0x%llx\n", |
7853 | curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1); |
7854 | DPRINTF("MSR table size: 0x%x\n", |
7855 | 512 * (curcpu()->ci_vmm_cap.vcc_vmx.vmx_msr_table_size + 1)); |
7856 | |
7857 | has_sec = vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482, |
7858 | IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1); |
7859 | |
7860 | if (has_sec) { |
7861 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
7862 | IA32_VMX_ENABLE_VPID(1ULL << 5), 1)) { |
7863 | vmx_dump_vmcs_field(VMCS_GUEST_VPID0x0000, "VPID"); |
7864 | } |
7865 | } |
7866 | |
7867 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS0x481, |
7868 | IA32_VMX_PROCESS_POSTED_INTERRUPTS(1ULL << 7), 1)) { |
7869 | vmx_dump_vmcs_field(VMCS_POSTED_INT_NOTIF_VECTOR0x0002, |
7870 | "Posted Int Notif Vec"); |
7871 | } |
7872 | |
7873 | if (has_sec) { |
7874 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
7875 | IA32_VMX_EPT_VIOLATION_VE(1ULL << 18), 1)) { |
7876 | vmx_dump_vmcs_field(VMCS_EPTP_INDEX0x0004, "EPTP idx"); |
7877 | } |
7878 | } |
7879 | |
7880 | DPRINTF("\n"); |
7881 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_SEL0x0800, "G.ES"); |
7882 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_SEL0x0802, "G.CS"); |
7883 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_SEL0x0804, "G.SS"); |
7884 | DPRINTF("\n"); |
7885 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_SEL0x0806, "G.DS"); |
7886 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_SEL0x0808, "G.FS"); |
7887 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_SEL0x080A, "G.GS"); |
7888 | DPRINTF("\n"); |
7889 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_SEL0x080C, "LDTR"); |
7890 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_SEL0x080E, "G.TR"); |
7891 | |
7892 | if (has_sec) { |
7893 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
7894 | IA32_VMX_VIRTUAL_INTERRUPT_DELIVERY(1ULL << 9), 1)) { |
7895 | vmx_dump_vmcs_field(VMCS_GUEST_INTERRUPT_STATUS0x0810, |
7896 | "Int sts"); |
7897 | } |
7898 | |
7899 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
7900 | IA32_VMX_ENABLE_PML(1ULL << 17), 1)) { |
7901 | vmx_dump_vmcs_field(VMCS_GUEST_PML_INDEX0x0812, "PML Idx"); |
7902 | } |
7903 | } |
7904 | |
7905 | DPRINTF("\n"); |
7906 | vmx_dump_vmcs_field(VMCS_HOST_IA32_ES_SEL0x0C00, "H.ES"); |
7907 | vmx_dump_vmcs_field(VMCS_HOST_IA32_CS_SEL0x0C02, "H.CS"); |
7908 | vmx_dump_vmcs_field(VMCS_HOST_IA32_SS_SEL0x0C04, "H.SS"); |
7909 | DPRINTF("\n"); |
7910 | vmx_dump_vmcs_field(VMCS_HOST_IA32_DS_SEL0x0C06, "H.DS"); |
7911 | vmx_dump_vmcs_field(VMCS_HOST_IA32_FS_SEL0x0C08, "H.FS"); |
7912 | vmx_dump_vmcs_field(VMCS_HOST_IA32_GS_SEL0x0C0A, "H.GS"); |
7913 | DPRINTF("\n"); |
7914 | |
7915 | vmx_dump_vmcs_field(VMCS_IO_BITMAP_A0x2000, "I/O Bitmap A"); |
7916 | DPRINTF("\n"); |
7917 | vmx_dump_vmcs_field(VMCS_IO_BITMAP_B0x2002, "I/O Bitmap B"); |
7918 | DPRINTF("\n"); |
7919 | |
7920 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482, |
7921 | IA32_VMX_USE_MSR_BITMAPS(1ULL << 28), 1)) { |
7922 | vmx_dump_vmcs_field(VMCS_MSR_BITMAP_ADDRESS0x2004, "MSR Bitmap"); |
7923 | DPRINTF("\n"); |
7924 | } |
7925 | |
7926 | vmx_dump_vmcs_field(VMCS_EXIT_STORE_MSR_ADDRESS0x2006, "Exit Store MSRs"); |
7927 | DPRINTF("\n"); |
7928 | vmx_dump_vmcs_field(VMCS_EXIT_LOAD_MSR_ADDRESS0x2008, "Exit Load MSRs"); |
7929 | DPRINTF("\n"); |
7930 | vmx_dump_vmcs_field(VMCS_ENTRY_LOAD_MSR_ADDRESS0x200A, "Entry Load MSRs"); |
7931 | DPRINTF("\n"); |
7932 | vmx_dump_vmcs_field(VMCS_EXECUTIVE_VMCS_POINTER0x200C, "Exec VMCS Ptr"); |
7933 | DPRINTF("\n"); |
7934 | |
7935 | if (has_sec) { |
7936 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
7937 | IA32_VMX_ENABLE_PML(1ULL << 17), 1)) { |
7938 | vmx_dump_vmcs_field(VMCS_PML_ADDRESS0x200E, "PML Addr"); |
7939 | DPRINTF("\n"); |
7940 | } |
7941 | } |
7942 | |
7943 | vmx_dump_vmcs_field(VMCS_TSC_OFFSET0x2010, "TSC Offset"); |
7944 | DPRINTF("\n"); |
7945 | |
7946 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482, |
7947 | IA32_VMX_USE_TPR_SHADOW(1ULL << 21), 1)) { |
7948 | vmx_dump_vmcs_field(VMCS_VIRTUAL_APIC_ADDRESS0x2012, |
7949 | "Virtual APIC Addr"); |
7950 | DPRINTF("\n"); |
7951 | } |
7952 | |
7953 | if (has_sec) { |
7954 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
7955 | IA32_VMX_VIRTUALIZE_APIC(1ULL << 0), 1)) { |
7956 | vmx_dump_vmcs_field(VMCS_APIC_ACCESS_ADDRESS0x2014, |
7957 | "APIC Access Addr"); |
7958 | DPRINTF("\n"); |
7959 | } |
7960 | } |
7961 | |
7962 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS0x481, |
7963 | IA32_VMX_PROCESS_POSTED_INTERRUPTS(1ULL << 7), 1)) { |
7964 | vmx_dump_vmcs_field(VMCS_POSTED_INTERRUPT_DESC0x2016, |
7965 | "Posted Int Desc Addr"); |
7966 | DPRINTF("\n"); |
7967 | } |
7968 | |
7969 | if (has_sec) { |
7970 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
7971 | IA32_VMX_ENABLE_VM_FUNCTIONS(1ULL << 13), 1)) { |
7972 | vmx_dump_vmcs_field(VMCS_VM_FUNCTION_CONTROLS0x2018, |
7973 | "VM Function Controls"); |
7974 | DPRINTF("\n"); |
7975 | } |
7976 | |
7977 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
7978 | IA32_VMX_ENABLE_EPT(1ULL << 1), 1)) { |
7979 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_EPTP0x201A, |
7980 | "EPT Pointer"); |
7981 | DPRINTF("\n"); |
7982 | } |
7983 | |
7984 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
7985 | IA32_VMX_VIRTUAL_INTERRUPT_DELIVERY(1ULL << 9), 1)) { |
7986 | vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_00x201C, |
7987 | "EOI Exit Bitmap 0"); |
7988 | DPRINTF("\n"); |
7989 | vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_10x201E, |
7990 | "EOI Exit Bitmap 1"); |
7991 | DPRINTF("\n"); |
7992 | vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_20x2020, |
7993 | "EOI Exit Bitmap 2"); |
7994 | DPRINTF("\n"); |
7995 | vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_30x2022, |
7996 | "EOI Exit Bitmap 3"); |
7997 | DPRINTF("\n"); |
7998 | } |
7999 | |
8000 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
8001 | IA32_VMX_ENABLE_VM_FUNCTIONS(1ULL << 13), 1)) { |
8002 | /* We assume all CPUs have the same VMFUNC caps */ |
8003 | if (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_vm_func & 0x1) { |
8004 | vmx_dump_vmcs_field(VMCS_EPTP_LIST_ADDRESS0x2024, |
8005 | "EPTP List Addr"); |
8006 | DPRINTF("\n"); |
8007 | } |
8008 | } |
8009 | |
8010 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
8011 | IA32_VMX_VMCS_SHADOWING(1ULL << 14), 1)) { |
8012 | vmx_dump_vmcs_field(VMCS_VMREAD_BITMAP_ADDRESS0x2026, |
8013 | "VMREAD Bitmap Addr"); |
8014 | DPRINTF("\n"); |
8015 | vmx_dump_vmcs_field(VMCS_VMWRITE_BITMAP_ADDRESS0x2028, |
8016 | "VMWRITE Bitmap Addr"); |
8017 | DPRINTF("\n"); |
8018 | } |
8019 | |
8020 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
8021 | IA32_VMX_EPT_VIOLATION_VE(1ULL << 18), 1)) { |
8022 | vmx_dump_vmcs_field(VMCS_VIRTUALIZATION_EXC_ADDRESS0x202A, |
8023 | "#VE Addr"); |
8024 | DPRINTF("\n"); |
8025 | } |
8026 | |
8027 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
8028 | IA32_VMX_ENABLE_XSAVES_XRSTORS(1ULL << 20), 1)) { |
8029 | vmx_dump_vmcs_field(VMCS_XSS_EXITING_BITMAP0x202C, |
8030 | "XSS exiting bitmap addr"); |
8031 | DPRINTF("\n"); |
8032 | } |
8033 | |
8034 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
8035 | IA32_VMX_ENABLE_ENCLS_EXITING(1ULL << 15), 1)) { |
8036 | vmx_dump_vmcs_field(VMCS_ENCLS_EXITING_BITMAP0x202E, |
8037 | "Encls exiting bitmap addr"); |
8038 | DPRINTF("\n"); |
8039 | } |
8040 | |
8041 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
8042 | IA32_VMX_ENABLE_TSC_SCALING(1ULL << 25), 1)) { |
8043 | vmx_dump_vmcs_field(VMCS_TSC_MULTIPLIER0x2032, |
8044 | "TSC scaling factor"); |
8045 | DPRINTF("\n"); |
8046 | } |
8047 | |
8048 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
8049 | IA32_VMX_ENABLE_EPT(1ULL << 1), 1)) { |
8050 | vmx_dump_vmcs_field(VMCS_GUEST_PHYSICAL_ADDRESS0x2400, |
8051 | "Guest PA"); |
8052 | DPRINTF("\n"); |
8053 | } |
8054 | } |
8055 | |
8056 | vmx_dump_vmcs_field(VMCS_LINK_POINTER0x2800, "VMCS Link Pointer"); |
8057 | DPRINTF("\n"); |
8058 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_DEBUGCTL0x2802, "Guest DEBUGCTL"); |
8059 | DPRINTF("\n"); |
8060 | |
8061 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS0x484, |
8062 | IA32_VMX_LOAD_IA32_PAT_ON_ENTRY(1ULL << 14), 1) || |
8063 | vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS0x483, |
8064 | IA32_VMX_SAVE_IA32_PAT_ON_EXIT(1ULL << 18), 1)) { |
8065 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_PAT0x2804, |
8066 | "Guest PAT"); |
8067 | DPRINTF("\n"); |
8068 | } |
8069 | |
8070 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS0x484, |
8071 | IA32_VMX_LOAD_IA32_EFER_ON_ENTRY(1ULL << 15), 1) || |
8072 | vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS0x483, |
8073 | IA32_VMX_SAVE_IA32_EFER_ON_EXIT(1ULL << 20), 1)) { |
8074 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_EFER0x2806, |
8075 | "Guest EFER"); |
8076 | DPRINTF("\n"); |
8077 | } |
8078 | |
8079 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS0x484, |
8080 | IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY(1ULL << 13), 1)) { |
8081 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_PERF_GBL_CTRL0x2808, |
8082 | "Guest Perf Global Ctrl"); |
8083 | DPRINTF("\n"); |
8084 | } |
8085 | |
8086 | if (has_sec) { |
8087 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
8088 | IA32_VMX_ENABLE_EPT(1ULL << 1), 1)) { |
8089 | vmx_dump_vmcs_field(VMCS_GUEST_PDPTE00x280A, "Guest PDPTE0"); |
8090 | DPRINTF("\n"); |
8091 | vmx_dump_vmcs_field(VMCS_GUEST_PDPTE10x280C, "Guest PDPTE1"); |
8092 | DPRINTF("\n"); |
8093 | vmx_dump_vmcs_field(VMCS_GUEST_PDPTE20x280E, "Guest PDPTE2"); |
8094 | DPRINTF("\n"); |
8095 | vmx_dump_vmcs_field(VMCS_GUEST_PDPTE30x2810, "Guest PDPTE3"); |
8096 | DPRINTF("\n"); |
8097 | } |
8098 | } |
8099 | |
8100 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS0x484, |
8101 | IA32_VMX_LOAD_IA32_BNDCFGS_ON_ENTRY(1ULL << 16), 1) || |
8102 | vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS0x483, |
8103 | IA32_VMX_CLEAR_IA32_BNDCFGS_ON_EXIT(1ULL << 23), 1)) { |
8104 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_BNDCFGS0x2812, |
8105 | "Guest BNDCFGS"); |
8106 | DPRINTF("\n"); |
8107 | } |
8108 | |
8109 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS0x483, |
8110 | IA32_VMX_LOAD_IA32_PAT_ON_EXIT(1ULL << 19), 1)) { |
8111 | vmx_dump_vmcs_field(VMCS_HOST_IA32_PAT0x2C00, |
8112 | "Host PAT"); |
8113 | DPRINTF("\n"); |
8114 | } |
8115 | |
8116 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS0x483, |
8117 | IA32_VMX_LOAD_IA32_EFER_ON_EXIT(1ULL << 21), 1)) { |
8118 | vmx_dump_vmcs_field(VMCS_HOST_IA32_EFER0x2C02, |
8119 | "Host EFER"); |
8120 | DPRINTF("\n"); |
8121 | } |
8122 | |
8123 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS0x483, |
8124 | IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT(1ULL << 12), 1)) { |
8125 | vmx_dump_vmcs_field(VMCS_HOST_IA32_PERF_GBL_CTRL0x2C04, |
8126 | "Host Perf Global Ctrl"); |
8127 | DPRINTF("\n"); |
8128 | } |
8129 | |
8130 | vmx_dump_vmcs_field(VMCS_PINBASED_CTLS0x4000, "Pinbased Ctrls"); |
8131 | vmx_dump_vmcs_field(VMCS_PROCBASED_CTLS0x4002, "Procbased Ctrls"); |
8132 | DPRINTF("\n"); |
8133 | vmx_dump_vmcs_field(VMCS_EXCEPTION_BITMAP0x4004, "Exception Bitmap"); |
8134 | vmx_dump_vmcs_field(VMCS_PF_ERROR_CODE_MASK0x4006, "#PF Err Code Mask"); |
8135 | DPRINTF("\n"); |
8136 | vmx_dump_vmcs_field(VMCS_PF_ERROR_CODE_MATCH0x4008, "#PF Err Code Match"); |
8137 | vmx_dump_vmcs_field(VMCS_CR3_TARGET_COUNT0x400A, "CR3 Tgt Count"); |
8138 | DPRINTF("\n"); |
8139 | vmx_dump_vmcs_field(VMCS_EXIT_CTLS0x400C, "Exit Ctrls"); |
8140 | vmx_dump_vmcs_field(VMCS_EXIT_MSR_STORE_COUNT0x400E, "Exit MSR Store Ct"); |
8141 | DPRINTF("\n"); |
8142 | vmx_dump_vmcs_field(VMCS_EXIT_MSR_LOAD_COUNT0x4010, "Exit MSR Load Ct"); |
8143 | vmx_dump_vmcs_field(VMCS_ENTRY_CTLS0x4012, "Entry Ctrls"); |
8144 | DPRINTF("\n"); |
8145 | vmx_dump_vmcs_field(VMCS_ENTRY_MSR_LOAD_COUNT0x4014, "Entry MSR Load Ct"); |
8146 | vmx_dump_vmcs_field(VMCS_ENTRY_INTERRUPTION_INFO0x4016, "Entry Int. Info"); |
8147 | DPRINTF("\n"); |
8148 | vmx_dump_vmcs_field(VMCS_ENTRY_EXCEPTION_ERROR_CODE0x4018, |
8149 | "Entry Ex. Err Code"); |
8150 | vmx_dump_vmcs_field(VMCS_ENTRY_INSTRUCTION_LENGTH0x401A, "Entry Insn Len"); |
8151 | DPRINTF("\n"); |
8152 | |
8153 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482, |
8154 | IA32_VMX_USE_TPR_SHADOW(1ULL << 21), 1)) { |
8155 | vmx_dump_vmcs_field(VMCS_TPR_THRESHOLD0x401C, "TPR Threshold"); |
8156 | DPRINTF("\n"); |
8157 | } |
8158 | |
8159 | if (has_sec) { |
8160 | vmx_dump_vmcs_field(VMCS_PROCBASED2_CTLS0x401E, "2ndary Ctrls"); |
8161 | DPRINTF("\n"); |
8162 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
8163 | IA32_VMX_PAUSE_LOOP_EXITING(1ULL << 10), 1)) { |
8164 | vmx_dump_vmcs_field(VMCS_PLE_GAP0x4020, "PLE Gap"); |
8165 | vmx_dump_vmcs_field(VMCS_PLE_WINDOW0x4022, "PLE Window"); |
8166 | } |
8167 | DPRINTF("\n"); |
8168 | } |
8169 | |
8170 | vmx_dump_vmcs_field(VMCS_INSTRUCTION_ERROR0x4400, "Insn Error"); |
8171 | vmx_dump_vmcs_field(VMCS_EXIT_REASON0x4402, "Exit Reason"); |
8172 | DPRINTF("\n"); |
8173 | |
8174 | vmx_dump_vmcs_field(VMCS_EXIT_INTERRUPTION_INFO0x4404, "Exit Int. Info"); |
8175 | vmx_dump_vmcs_field(VMCS_EXIT_INTERRUPTION_ERR_CODE0x4406, |
8176 | "Exit Int. Err Code"); |
8177 | DPRINTF("\n"); |
8178 | |
8179 | vmx_dump_vmcs_field(VMCS_IDT_VECTORING_INFO0x4408, "IDT vect info"); |
8180 | vmx_dump_vmcs_field(VMCS_IDT_VECTORING_ERROR_CODE0x440A, |
8181 | "IDT vect err code"); |
8182 | DPRINTF("\n"); |
8183 | |
8184 | vmx_dump_vmcs_field(VMCS_INSTRUCTION_LENGTH0x440C, "Insn Len"); |
8185 | vmx_dump_vmcs_field(VMCS_EXIT_INSTRUCTION_INFO0x440E, "Exit Insn Info"); |
8186 | DPRINTF("\n"); |
8187 | |
8188 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_LIMIT0x4800, "G. ES Lim"); |
8189 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_LIMIT0x4802, "G. CS Lim"); |
8190 | DPRINTF("\n"); |
8191 | |
8192 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_LIMIT0x4804, "G. SS Lim"); |
8193 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_LIMIT0x4806, "G. DS Lim"); |
8194 | DPRINTF("\n"); |
8195 | |
8196 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_LIMIT0x4808, "G. FS Lim"); |
8197 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_LIMIT0x480A, "G. GS Lim"); |
8198 | DPRINTF("\n"); |
8199 | |
8200 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_LIMIT0x480C, "G. LDTR Lim"); |
8201 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_LIMIT0x480E, "G. TR Lim"); |
8202 | DPRINTF("\n"); |
8203 | |
8204 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_GDTR_LIMIT0x4810, "G. GDTR Lim"); |
8205 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_IDTR_LIMIT0x4812, "G. IDTR Lim"); |
8206 | DPRINTF("\n"); |
8207 | |
8208 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_AR0x4814, "G. ES AR"); |
8209 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_AR0x4816, "G. CS AR"); |
8210 | DPRINTF("\n"); |
8211 | |
8212 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_AR0x4818, "G. SS AR"); |
8213 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_AR0x481A, "G. DS AR"); |
8214 | DPRINTF("\n"); |
8215 | |
8216 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_AR0x481C, "G. FS AR"); |
8217 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_AR0x481E, "G. GS AR"); |
8218 | DPRINTF("\n"); |
8219 | |
8220 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_AR0x4820, "G. LDTR AR"); |
8221 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_AR0x4822, "G. TR AR"); |
8222 | DPRINTF("\n"); |
8223 | |
8224 | vmx_dump_vmcs_field(VMCS_GUEST_INTERRUPTIBILITY_ST0x4824, "G. Int St."); |
8225 | vmx_dump_vmcs_field(VMCS_GUEST_ACTIVITY_STATE0x4826, "G. Act St."); |
8226 | DPRINTF("\n"); |
8227 | |
8228 | vmx_dump_vmcs_field(VMCS_GUEST_SMBASE0x4828, "G. SMBASE"); |
8229 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_CS0x482A, "G. SYSENTER CS"); |
8230 | DPRINTF("\n"); |
8231 | |
8232 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS0x481, |
8233 | IA32_VMX_ACTIVATE_VMX_PREEMPTION_TIMER(1ULL << 6), 1)) { |
8234 | vmx_dump_vmcs_field(VMCS_VMX_PREEMPTION_TIMER_VAL0x482E, |
8235 | "VMX Preempt Timer"); |
8236 | DPRINTF("\n"); |
8237 | } |
8238 | |
8239 | vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_CS0x4C00, "H. SYSENTER CS"); |
8240 | DPRINTF("\n"); |
8241 | |
8242 | vmx_dump_vmcs_field(VMCS_CR0_MASK0x6000, "CR0 Mask"); |
8243 | DPRINTF("\n"); |
8244 | vmx_dump_vmcs_field(VMCS_CR4_MASK0x6002, "CR4 Mask"); |
8245 | DPRINTF("\n"); |
8246 | |
8247 | vmx_dump_vmcs_field(VMCS_CR0_READ_SHADOW0x6004, "CR0 RD Shadow"); |
8248 | DPRINTF("\n"); |
8249 | vmx_dump_vmcs_field(VMCS_CR4_READ_SHADOW0x6006, "CR4 RD Shadow"); |
8250 | DPRINTF("\n"); |
8251 | |
8252 | /* We assume all CPUs have the same max CR3 target ct */ |
8253 | cr3_tgt_ct = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr3_tgt_count; |
8254 | DPRINTF("Max CR3 target count: 0x%x\n", cr3_tgt_ct); |
8255 | if (cr3_tgt_ct <= VMX_MAX_CR3_TARGETS256) { |
8256 | for (i = 0 ; i < cr3_tgt_ct; i++) { |
8257 | vmx_dump_vmcs_field(VMCS_CR3_TARGET_00x6008 + (2 * i), |
8258 | "CR3 Target"); |
8259 | DPRINTF("\n"); |
8260 | } |
8261 | } else { |
8262 | DPRINTF("(Bogus CR3 Target Count > %d", VMX_MAX_CR3_TARGETS); |
8263 | } |
8264 | |
8265 | vmx_dump_vmcs_field(VMCS_GUEST_EXIT_QUALIFICATION0x6400, "G. Exit Qual"); |
8266 | DPRINTF("\n"); |
8267 | vmx_dump_vmcs_field(VMCS_IO_RCX0x6402, "I/O RCX"); |
8268 | DPRINTF("\n"); |
8269 | vmx_dump_vmcs_field(VMCS_IO_RSI0x6404, "I/O RSI"); |
8270 | DPRINTF("\n"); |
8271 | vmx_dump_vmcs_field(VMCS_IO_RDI0x6406, "I/O RDI"); |
8272 | DPRINTF("\n"); |
8273 | vmx_dump_vmcs_field(VMCS_IO_RIP0x6408, "I/O RIP"); |
8274 | DPRINTF("\n"); |
8275 | vmx_dump_vmcs_field(VMCS_GUEST_LINEAR_ADDRESS0x640A, "G. Lin Addr"); |
8276 | DPRINTF("\n"); |
8277 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR00x6800, "G. CR0"); |
8278 | DPRINTF("\n"); |
8279 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR30x6802, "G. CR3"); |
8280 | DPRINTF("\n"); |
8281 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR40x6804, "G. CR4"); |
8282 | DPRINTF("\n"); |
8283 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_BASE0x6806, "G. ES Base"); |
8284 | DPRINTF("\n"); |
8285 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_BASE0x6808, "G. CS Base"); |
8286 | DPRINTF("\n"); |
8287 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_BASE0x680A, "G. SS Base"); |
8288 | DPRINTF("\n"); |
8289 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_BASE0x680C, "G. DS Base"); |
8290 | DPRINTF("\n"); |
8291 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_BASE0x680E, "G. FS Base"); |
8292 | DPRINTF("\n"); |
8293 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_BASE0x6810, "G. GS Base"); |
8294 | DPRINTF("\n"); |
8295 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_BASE0x6812, "G. LDTR Base"); |
8296 | DPRINTF("\n"); |
8297 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_BASE0x6814, "G. TR Base"); |
8298 | DPRINTF("\n"); |
8299 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_GDTR_BASE0x6816, "G. GDTR Base"); |
8300 | DPRINTF("\n"); |
8301 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_IDTR_BASE0x6818, "G. IDTR Base"); |
8302 | DPRINTF("\n"); |
8303 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_DR70x681A, "G. DR7"); |
8304 | DPRINTF("\n"); |
8305 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_RSP0x681C, "G. RSP"); |
8306 | DPRINTF("\n"); |
8307 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_RIP0x681E, "G. RIP"); |
8308 | DPRINTF("\n"); |
8309 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_RFLAGS0x6820, "G. RFLAGS"); |
8310 | DPRINTF("\n"); |
8311 | vmx_dump_vmcs_field(VMCS_GUEST_PENDING_DBG_EXC0x6822, "G. Pend Dbg Exc"); |
8312 | DPRINTF("\n"); |
8313 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_ESP0x6824, "G. SYSENTER ESP"); |
8314 | DPRINTF("\n"); |
8315 | vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_EIP0x6826, "G. SYSENTER EIP"); |
8316 | DPRINTF("\n"); |
8317 | vmx_dump_vmcs_field(VMCS_HOST_IA32_CR00x6C00, "H. CR0"); |
8318 | DPRINTF("\n"); |
8319 | vmx_dump_vmcs_field(VMCS_HOST_IA32_CR30x6C02, "H. CR3"); |
8320 | DPRINTF("\n"); |
8321 | vmx_dump_vmcs_field(VMCS_HOST_IA32_CR40x6C04, "H. CR4"); |
8322 | DPRINTF("\n"); |
8323 | vmx_dump_vmcs_field(VMCS_HOST_IA32_FS_BASE0x6C06, "H. FS Base"); |
8324 | DPRINTF("\n"); |
8325 | vmx_dump_vmcs_field(VMCS_HOST_IA32_GS_BASE0x6C08, "H. GS Base"); |
8326 | DPRINTF("\n"); |
8327 | vmx_dump_vmcs_field(VMCS_HOST_IA32_TR_BASE0x6C0A, "H. TR Base"); |
8328 | DPRINTF("\n"); |
8329 | vmx_dump_vmcs_field(VMCS_HOST_IA32_GDTR_BASE0x6C0C, "H. GDTR Base"); |
8330 | DPRINTF("\n"); |
8331 | vmx_dump_vmcs_field(VMCS_HOST_IA32_IDTR_BASE0x6C0E, "H. IDTR Base"); |
8332 | DPRINTF("\n"); |
8333 | vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_ESP0x6C10, "H. SYSENTER ESP"); |
8334 | DPRINTF("\n"); |
8335 | vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_EIP0x6C12, "H. SYSENTER EIP"); |
8336 | DPRINTF("\n"); |
8337 | vmx_dump_vmcs_field(VMCS_HOST_IA32_RSP0x6C14, "H. RSP"); |
8338 | DPRINTF("\n"); |
8339 | vmx_dump_vmcs_field(VMCS_HOST_IA32_RIP0x6C16, "H. RIP"); |
8340 | DPRINTF("\n"); |
8341 | } |
8342 | |
8343 | /* |
8344 | * vmx_vcpu_dump_regs |
8345 | * |
8346 | * Debug function to print vcpu regs from the current vcpu |
8347 | * note - vmcs for 'vcpu' must be on this pcpu. |
8348 | * |
8349 | * Parameters: |
8350 | * vcpu - vcpu whose registers should be dumped |
8351 | */ |
8352 | void |
8353 | vmx_vcpu_dump_regs(struct vcpu *vcpu) |
8354 | { |
8355 | uint64_t r; |
8356 | int i; |
8357 | struct vmx_msr_store *msr_store; |
8358 | |
8359 | /* XXX reformat this for 32 bit guest as needed */ |
8360 | DPRINTF("vcpu @ %p in %s mode\n", vcpu, vmm_decode_cpu_mode(vcpu)); |
8361 | i = vmm_get_guest_cpu_cpl(vcpu); |
8362 | if (i == -1) |
8363 | DPRINTF(" CPL=unknown\n"); |
8364 | else |
8365 | DPRINTF(" CPL=%d\n", i); |
8366 | DPRINTF(" rax=0x%016llx rbx=0x%016llx rcx=0x%016llx\n", |
8367 | vcpu->vc_gueststate.vg_rax, vcpu->vc_gueststate.vg_rbx, |
8368 | vcpu->vc_gueststate.vg_rcx); |
8369 | DPRINTF(" rdx=0x%016llx rbp=0x%016llx rdi=0x%016llx\n", |
8370 | vcpu->vc_gueststate.vg_rdx, vcpu->vc_gueststate.vg_rbp, |
8371 | vcpu->vc_gueststate.vg_rdi); |
8372 | DPRINTF(" rsi=0x%016llx r8=0x%016llx r9=0x%016llx\n", |
8373 | vcpu->vc_gueststate.vg_rsi, vcpu->vc_gueststate.vg_r8, |
8374 | vcpu->vc_gueststate.vg_r9); |
8375 | DPRINTF(" r10=0x%016llx r11=0x%016llx r12=0x%016llx\n", |
8376 | vcpu->vc_gueststate.vg_r10, vcpu->vc_gueststate.vg_r11, |
8377 | vcpu->vc_gueststate.vg_r12); |
8378 | DPRINTF(" r13=0x%016llx r14=0x%016llx r15=0x%016llx\n", |
8379 | vcpu->vc_gueststate.vg_r13, vcpu->vc_gueststate.vg_r14, |
8380 | vcpu->vc_gueststate.vg_r15); |
8381 | |
8382 | DPRINTF(" rip=0x%016llx rsp=", vcpu->vc_gueststate.vg_rip); |
8383 | if (vmread(VMCS_GUEST_IA32_RSP0x681C, &r)) |
8384 | DPRINTF("(error reading)\n"); |
8385 | else |
8386 | DPRINTF("0x%016llx\n", r); |
8387 | |
8388 | DPRINTF(" rflags="); |
8389 | if (vmread(VMCS_GUEST_IA32_RFLAGS0x6820, &r)) |
8390 | DPRINTF("(error reading)\n"); |
8391 | else { |
8392 | DPRINTF("0x%016llx ", r); |
8393 | vmm_decode_rflags(r); |
8394 | } |
8395 | |
8396 | DPRINTF(" cr0="); |
8397 | if (vmread(VMCS_GUEST_IA32_CR00x6800, &r)) |
8398 | DPRINTF("(error reading)\n"); |
8399 | else { |
8400 | DPRINTF("0x%016llx ", r); |
8401 | vmm_decode_cr0(r); |
8402 | } |
8403 | |
8404 | DPRINTF(" cr2=0x%016llx\n", vcpu->vc_gueststate.vg_cr2); |
8405 | |
8406 | DPRINTF(" cr3="); |
8407 | if (vmread(VMCS_GUEST_IA32_CR30x6802, &r)) |
8408 | DPRINTF("(error reading)\n"); |
8409 | else { |
8410 | DPRINTF("0x%016llx ", r); |
8411 | vmm_decode_cr3(r); |
8412 | } |
8413 | |
8414 | DPRINTF(" cr4="); |
8415 | if (vmread(VMCS_GUEST_IA32_CR40x6804, &r)) |
8416 | DPRINTF("(error reading)\n"); |
8417 | else { |
8418 | DPRINTF("0x%016llx ", r); |
8419 | vmm_decode_cr4(r); |
8420 | } |
8421 | |
8422 | DPRINTF(" --Guest Segment Info--\n"); |
8423 | |
8424 | DPRINTF(" cs="); |
8425 | if (vmread(VMCS_GUEST_IA32_CS_SEL0x0802, &r)) |
8426 | DPRINTF("(error reading)"); |
8427 | else |
8428 | DPRINTF("0x%04llx rpl=%lld", r, r & 0x3); |
8429 | |
8430 | DPRINTF(" base="); |
8431 | if (vmread(VMCS_GUEST_IA32_CS_BASE0x6808, &r)) |
8432 | DPRINTF("(error reading)"); |
8433 | else |
8434 | DPRINTF("0x%016llx", r); |
8435 | |
8436 | DPRINTF(" limit="); |
8437 | if (vmread(VMCS_GUEST_IA32_CS_LIMIT0x4802, &r)) |
8438 | DPRINTF("(error reading)"); |
8439 | else |
8440 | DPRINTF("0x%016llx", r); |
8441 | |
8442 | DPRINTF(" a/r="); |
8443 | if (vmread(VMCS_GUEST_IA32_CS_AR0x4816, &r)) |
8444 | DPRINTF("(error reading)\n"); |
8445 | else { |
8446 | DPRINTF("0x%04llx\n ", r); |
8447 | vmm_segment_desc_decode(r); |
8448 | } |
8449 | |
8450 | DPRINTF(" ds="); |
8451 | if (vmread(VMCS_GUEST_IA32_DS_SEL0x0806, &r)) |
8452 | DPRINTF("(error reading)"); |
8453 | else |
8454 | DPRINTF("0x%04llx rpl=%lld", r, r & 0x3); |
8455 | |
8456 | DPRINTF(" base="); |
8457 | if (vmread(VMCS_GUEST_IA32_DS_BASE0x680C, &r)) |
8458 | DPRINTF("(error reading)"); |
8459 | else |
8460 | DPRINTF("0x%016llx", r); |
8461 | |
8462 | DPRINTF(" limit="); |
8463 | if (vmread(VMCS_GUEST_IA32_DS_LIMIT0x4806, &r)) |
8464 | DPRINTF("(error reading)"); |
8465 | else |
8466 | DPRINTF("0x%016llx", r); |
8467 | |
8468 | DPRINTF(" a/r="); |
8469 | if (vmread(VMCS_GUEST_IA32_DS_AR0x481A, &r)) |
8470 | DPRINTF("(error reading)\n"); |
8471 | else { |
8472 | DPRINTF("0x%04llx\n ", r); |
8473 | vmm_segment_desc_decode(r); |
8474 | } |
8475 | |
8476 | DPRINTF(" es="); |
8477 | if (vmread(VMCS_GUEST_IA32_ES_SEL0x0800, &r)) |
8478 | DPRINTF("(error reading)"); |
8479 | else |
8480 | DPRINTF("0x%04llx rpl=%lld", r, r & 0x3); |
8481 | |
8482 | DPRINTF(" base="); |
8483 | if (vmread(VMCS_GUEST_IA32_ES_BASE0x6806, &r)) |
8484 | DPRINTF("(error reading)"); |
8485 | else |
8486 | DPRINTF("0x%016llx", r); |
8487 | |
8488 | DPRINTF(" limit="); |
8489 | if (vmread(VMCS_GUEST_IA32_ES_LIMIT0x4800, &r)) |
8490 | DPRINTF("(error reading)"); |
8491 | else |
8492 | DPRINTF("0x%016llx", r); |
8493 | |
8494 | DPRINTF(" a/r="); |
8495 | if (vmread(VMCS_GUEST_IA32_ES_AR0x4814, &r)) |
8496 | DPRINTF("(error reading)\n"); |
8497 | else { |
8498 | DPRINTF("0x%04llx\n ", r); |
8499 | vmm_segment_desc_decode(r); |
8500 | } |
8501 | |
8502 | DPRINTF(" fs="); |
8503 | if (vmread(VMCS_GUEST_IA32_FS_SEL0x0808, &r)) |
8504 | DPRINTF("(error reading)"); |
8505 | else |
8506 | DPRINTF("0x%04llx rpl=%lld", r, r & 0x3); |
8507 | |
8508 | DPRINTF(" base="); |
8509 | if (vmread(VMCS_GUEST_IA32_FS_BASE0x680E, &r)) |
8510 | DPRINTF("(error reading)"); |
8511 | else |
8512 | DPRINTF("0x%016llx", r); |
8513 | |
8514 | DPRINTF(" limit="); |
8515 | if (vmread(VMCS_GUEST_IA32_FS_LIMIT0x4808, &r)) |
8516 | DPRINTF("(error reading)"); |
8517 | else |
8518 | DPRINTF("0x%016llx", r); |
8519 | |
8520 | DPRINTF(" a/r="); |
8521 | if (vmread(VMCS_GUEST_IA32_FS_AR0x481C, &r)) |
8522 | DPRINTF("(error reading)\n"); |
8523 | else { |
8524 | DPRINTF("0x%04llx\n ", r); |
8525 | vmm_segment_desc_decode(r); |
8526 | } |
8527 | |
8528 | DPRINTF(" gs="); |
8529 | if (vmread(VMCS_GUEST_IA32_GS_SEL0x080A, &r)) |
8530 | DPRINTF("(error reading)"); |
8531 | else |
8532 | DPRINTF("0x%04llx rpl=%lld", r, r & 0x3); |
8533 | |
8534 | DPRINTF(" base="); |
8535 | if (vmread(VMCS_GUEST_IA32_GS_BASE0x6810, &r)) |
8536 | DPRINTF("(error reading)"); |
8537 | else |
8538 | DPRINTF("0x%016llx", r); |
8539 | |
8540 | DPRINTF(" limit="); |
8541 | if (vmread(VMCS_GUEST_IA32_GS_LIMIT0x480A, &r)) |
8542 | DPRINTF("(error reading)"); |
8543 | else |
8544 | DPRINTF("0x%016llx", r); |
8545 | |
8546 | DPRINTF(" a/r="); |
8547 | if (vmread(VMCS_GUEST_IA32_GS_AR0x481E, &r)) |
8548 | DPRINTF("(error reading)\n"); |
8549 | else { |
8550 | DPRINTF("0x%04llx\n ", r); |
8551 | vmm_segment_desc_decode(r); |
8552 | } |
8553 | |
8554 | DPRINTF(" ss="); |
8555 | if (vmread(VMCS_GUEST_IA32_SS_SEL0x0804, &r)) |
8556 | DPRINTF("(error reading)"); |
8557 | else |
8558 | DPRINTF("0x%04llx rpl=%lld", r, r & 0x3); |
8559 | |
8560 | DPRINTF(" base="); |
8561 | if (vmread(VMCS_GUEST_IA32_SS_BASE0x680A, &r)) |
8562 | DPRINTF("(error reading)"); |
8563 | else |
8564 | DPRINTF("0x%016llx", r); |
8565 | |
8566 | DPRINTF(" limit="); |
8567 | if (vmread(VMCS_GUEST_IA32_SS_LIMIT0x4804, &r)) |
8568 | DPRINTF("(error reading)"); |
8569 | else |
8570 | DPRINTF("0x%016llx", r); |
8571 | |
8572 | DPRINTF(" a/r="); |
8573 | if (vmread(VMCS_GUEST_IA32_SS_AR0x4818, &r)) |
8574 | DPRINTF("(error reading)\n"); |
8575 | else { |
8576 | DPRINTF("0x%04llx\n ", r); |
8577 | vmm_segment_desc_decode(r); |
8578 | } |
8579 | |
8580 | DPRINTF(" tr="); |
8581 | if (vmread(VMCS_GUEST_IA32_TR_SEL0x080E, &r)) |
8582 | DPRINTF("(error reading)"); |
8583 | else |
8584 | DPRINTF("0x%04llx", r); |
8585 | |
8586 | DPRINTF(" base="); |
8587 | if (vmread(VMCS_GUEST_IA32_TR_BASE0x6814, &r)) |
8588 | DPRINTF("(error reading)"); |
8589 | else |
8590 | DPRINTF("0x%016llx", r); |
8591 | |
8592 | DPRINTF(" limit="); |
8593 | if (vmread(VMCS_GUEST_IA32_TR_LIMIT0x480E, &r)) |
8594 | DPRINTF("(error reading)"); |
8595 | else |
8596 | DPRINTF("0x%016llx", r); |
8597 | |
8598 | DPRINTF(" a/r="); |
8599 | if (vmread(VMCS_GUEST_IA32_TR_AR0x4822, &r)) |
8600 | DPRINTF("(error reading)\n"); |
8601 | else { |
8602 | DPRINTF("0x%04llx\n ", r); |
8603 | vmm_segment_desc_decode(r); |
8604 | } |
8605 | |
8606 | DPRINTF(" gdtr base="); |
8607 | if (vmread(VMCS_GUEST_IA32_GDTR_BASE0x6816, &r)) |
8608 | DPRINTF("(error reading) "); |
8609 | else |
8610 | DPRINTF("0x%016llx", r); |
8611 | |
8612 | DPRINTF(" limit="); |
8613 | if (vmread(VMCS_GUEST_IA32_GDTR_LIMIT0x4810, &r)) |
8614 | DPRINTF("(error reading)\n"); |
8615 | else |
8616 | DPRINTF("0x%016llx\n", r); |
8617 | |
8618 | DPRINTF(" idtr base="); |
8619 | if (vmread(VMCS_GUEST_IA32_IDTR_BASE0x6818, &r)) |
8620 | DPRINTF("(error reading) "); |
8621 | else |
8622 | DPRINTF("0x%016llx", r); |
8623 | |
8624 | DPRINTF(" limit="); |
8625 | if (vmread(VMCS_GUEST_IA32_IDTR_LIMIT0x4812, &r)) |
8626 | DPRINTF("(error reading)\n"); |
8627 | else |
8628 | DPRINTF("0x%016llx\n", r); |
8629 | |
8630 | DPRINTF(" ldtr="); |
8631 | if (vmread(VMCS_GUEST_IA32_LDTR_SEL0x080C, &r)) |
8632 | DPRINTF("(error reading)"); |
8633 | else |
8634 | DPRINTF("0x%04llx", r); |
8635 | |
8636 | DPRINTF(" base="); |
8637 | if (vmread(VMCS_GUEST_IA32_LDTR_BASE0x6812, &r)) |
8638 | DPRINTF("(error reading)"); |
8639 | else |
8640 | DPRINTF("0x%016llx", r); |
8641 | |
8642 | DPRINTF(" limit="); |
8643 | if (vmread(VMCS_GUEST_IA32_LDTR_LIMIT0x480C, &r)) |
8644 | DPRINTF("(error reading)"); |
8645 | else |
8646 | DPRINTF("0x%016llx", r); |
8647 | |
8648 | DPRINTF(" a/r="); |
8649 | if (vmread(VMCS_GUEST_IA32_LDTR_AR0x4820, &r)) |
8650 | DPRINTF("(error reading)\n"); |
8651 | else { |
8652 | DPRINTF("0x%04llx\n ", r); |
8653 | vmm_segment_desc_decode(r); |
8654 | } |
8655 | |
8656 | DPRINTF(" --Guest MSRs @ 0x%016llx (paddr: 0x%016llx)--\n", |
8657 | (uint64_t)vcpu->vc_vmx_msr_exit_save_va, |
8658 | (uint64_t)vcpu->vc_vmx_msr_exit_save_pa); |
8659 | |
8660 | msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; |
8661 | |
8662 | for (i = 0; i < VMX_NUM_MSR_STORE7; i++) { |
8663 | DPRINTF(" MSR %d @ %p : 0x%08llx (%s), " |
8664 | "value=0x%016llx ", |
8665 | i, &msr_store[i], msr_store[i].vms_index, |
8666 | msr_name_decode(msr_store[i].vms_index), |
8667 | msr_store[i].vms_data); |
8668 | vmm_decode_msr_value(msr_store[i].vms_index, |
8669 | msr_store[i].vms_data); |
8670 | } |
8671 | } |
8672 | |
8673 | /* |
8674 | * msr_name_decode |
8675 | * |
8676 | * Returns a human-readable name for the MSR supplied in 'msr'. |
8677 | * |
8678 | * Parameters: |
8679 | * msr - The MSR to decode |
8680 | * |
8681 | * Return value: |
8682 | * NULL-terminated character string containing the name of the MSR requested |
8683 | */ |
8684 | const char * |
8685 | msr_name_decode(uint32_t msr) |
8686 | { |
8687 | /* |
8688 | * Add as needed. Also consider adding a decode function when |
8689 | * adding to this table. |
8690 | */ |
8691 | |
8692 | switch (msr) { |
8693 | case MSR_TSC0x010: return "TSC"; |
8694 | case MSR_APICBASE0x01b: return "APIC base"; |
8695 | case MSR_IA32_FEATURE_CONTROL0x03a: return "IA32 feature control"; |
8696 | case MSR_PERFCTR00x0c1: return "perf counter 0"; |
8697 | case MSR_PERFCTR10x0c2: return "perf counter 1"; |
8698 | case MSR_TEMPERATURE_TARGET0x1a2: return "temperature target"; |
8699 | case MSR_MTRRcap0x0fe: return "MTRR cap"; |
8700 | case MSR_PERF_STATUS0x198: return "perf status"; |
8701 | case MSR_PERF_CTL0x199: return "perf control"; |
8702 | case MSR_MTRRvarBase0x200: return "MTRR variable base"; |
8703 | case MSR_MTRRfix64K_000000x250: return "MTRR fixed 64K"; |
8704 | case MSR_MTRRfix16K_800000x258: return "MTRR fixed 16K"; |
8705 | case MSR_MTRRfix4K_C00000x268: return "MTRR fixed 4K"; |
8706 | case MSR_CR_PAT0x277: return "PAT"; |
8707 | case MSR_MTRRdefType0x2ff: return "MTRR default type"; |
8708 | case MSR_EFER0xc0000080: return "EFER"; |
8709 | case MSR_STAR0xc0000081: return "STAR"; |
8710 | case MSR_LSTAR0xc0000082: return "LSTAR"; |
8711 | case MSR_CSTAR0xc0000083: return "CSTAR"; |
8712 | case MSR_SFMASK0xc0000084: return "SFMASK"; |
8713 | case MSR_FSBASE0xc0000100: return "FSBASE"; |
8714 | case MSR_GSBASE0xc0000101: return "GSBASE"; |
8715 | case MSR_KERNELGSBASE0xc0000102: return "KGSBASE"; |
8716 | case MSR_MISC_ENABLE0x1a0: return "Misc Enable"; |
8717 | default: return "Unknown MSR"; |
8718 | } |
8719 | } |
8720 | |
8721 | /* |
8722 | * vmm_segment_desc_decode |
8723 | * |
8724 | * Debug function to print segment information for supplied descriptor |
8725 | * |
8726 | * Parameters: |
8727 | * val - The A/R bytes for the segment descriptor to decode |
8728 | */ |
8729 | void |
8730 | vmm_segment_desc_decode(uint64_t val) |
8731 | { |
8732 | uint16_t ar; |
8733 | uint8_t g, type, s, dpl, p, dib, l; |
8734 | uint32_t unusable; |
8735 | |
8736 | /* Exit early on unusable descriptors */ |
8737 | unusable = val & 0x10000; |
8738 | if (unusable) { |
8739 | DPRINTF("(unusable)\n"); |
8740 | return; |
8741 | } |
8742 | |
8743 | ar = (uint16_t)val; |
8744 | |
8745 | g = (ar & 0x8000) >> 15; |
8746 | dib = (ar & 0x4000) >> 14; |
8747 | l = (ar & 0x2000) >> 13; |
8748 | p = (ar & 0x80) >> 7; |
8749 | dpl = (ar & 0x60) >> 5; |
8750 | s = (ar & 0x10) >> 4; |
8751 | type = (ar & 0xf); |
8752 | |
8753 | DPRINTF("granularity=%d dib=%d l(64 bit)=%d present=%d sys=%d ", |
8754 | g, dib, l, p, s); |
8755 | |
8756 | DPRINTF("type="); |
8757 | if (!s) { |
8758 | switch (type) { |
8759 | case SDT_SYSLDT2: DPRINTF("ldt\n"); break; |
8760 | case SDT_SYS386TSS9: DPRINTF("tss (available)\n"); break; |
8761 | case SDT_SYS386BSY11: DPRINTF("tss (busy)\n"); break; |
8762 | case SDT_SYS386CGT12: DPRINTF("call gate\n"); break; |
8763 | case SDT_SYS386IGT14: DPRINTF("interrupt gate\n"); break; |
8764 | case SDT_SYS386TGT15: DPRINTF("trap gate\n"); break; |
8765 | /* XXX handle 32 bit segment types by inspecting mode */ |
8766 | default: DPRINTF("unknown"); |
8767 | } |
8768 | } else { |
8769 | switch (type + 16) { |
8770 | case SDT_MEMRO16: DPRINTF("data, r/o\n"); break; |
8771 | case SDT_MEMROA17: DPRINTF("data, r/o, accessed\n"); break; |
8772 | case SDT_MEMRW18: DPRINTF("data, r/w\n"); break; |
8773 | case SDT_MEMRWA19: DPRINTF("data, r/w, accessed\n"); break; |
8774 | case SDT_MEMROD20: DPRINTF("data, r/o, expand down\n"); break; |
8775 | case SDT_MEMRODA21: DPRINTF("data, r/o, expand down, " |
8776 | "accessed\n"); |
8777 | break; |
8778 | case SDT_MEMRWD22: DPRINTF("data, r/w, expand down\n"); break; |
8779 | case SDT_MEMRWDA23: DPRINTF("data, r/w, expand down, " |
8780 | "accessed\n"); |
8781 | break; |
8782 | case SDT_MEME24: DPRINTF("code, x only\n"); break; |
8783 | case SDT_MEMEA25: DPRINTF("code, x only, accessed\n"); |
8784 | case SDT_MEMER26: DPRINTF("code, r/x\n"); break; |
8785 | case SDT_MEMERA27: DPRINTF("code, r/x, accessed\n"); break; |
8786 | case SDT_MEMEC28: DPRINTF("code, x only, conforming\n"); break; |
8787 | case SDT_MEMEAC29: DPRINTF("code, x only, conforming, " |
8788 | "accessed\n"); |
8789 | break; |
8790 | case SDT_MEMERC30: DPRINTF("code, r/x, conforming\n"); break; |
8791 | case SDT_MEMERAC31: DPRINTF("code, r/x, conforming, accessed\n"); |
8792 | break; |
8793 | } |
8794 | } |
8795 | } |
8796 | |
8797 | void |
8798 | vmm_decode_cr0(uint64_t cr0) |
8799 | { |
8800 | struct vmm_reg_debug_info cr0_info[11] = { |
8801 | { CR0_PG0x80000000, "PG ", "pg " }, |
8802 | { CR0_CD0x40000000, "CD ", "cd " }, |
8803 | { CR0_NW0x20000000, "NW ", "nw " }, |
8804 | { CR0_AM0x00040000, "AM ", "am " }, |
8805 | { CR0_WP0x00010000, "WP ", "wp " }, |
8806 | { CR0_NE0x00000020, "NE ", "ne " }, |
8807 | { CR0_ET0x00000010, "ET ", "et " }, |
8808 | { CR0_TS0x00000008, "TS ", "ts " }, |
8809 | { CR0_EM0x00000004, "EM ", "em " }, |
8810 | { CR0_MP0x00000002, "MP ", "mp " }, |
8811 | { CR0_PE0x00000001, "PE", "pe" } |
8812 | }; |
8813 | |
8814 | uint8_t i; |
8815 | |
8816 | DPRINTF("("); |
8817 | for (i = 0; i < nitems(cr0_info)(sizeof((cr0_info)) / sizeof((cr0_info)[0])); i++) |
8818 | if (cr0 & cr0_info[i].vrdi_bit) |
8819 | DPRINTF("%s", cr0_info[i].vrdi_present); |
8820 | else |
8821 | DPRINTF("%s", cr0_info[i].vrdi_absent); |
8822 | |
8823 | DPRINTF(")\n"); |
8824 | } |
8825 | |
8826 | void |
8827 | vmm_decode_cr3(uint64_t cr3) |
8828 | { |
8829 | struct vmm_reg_debug_info cr3_info[2] = { |
8830 | { CR3_PWT(1ULL << 3), "PWT ", "pwt "}, |
8831 | { CR3_PCD(1ULL << 4), "PCD", "pcd"} |
8832 | }; |
8833 | |
8834 | uint64_t cr4; |
8835 | uint8_t i; |
8836 | |
8837 | if (vmread(VMCS_GUEST_IA32_CR40x6804, &cr4)) { |
8838 | DPRINTF("(error)\n"); |
8839 | return; |
8840 | } |
8841 | |
8842 | /* If CR4.PCIDE = 0, interpret CR3.PWT and CR3.PCD */ |
8843 | if ((cr4 & CR4_PCIDE0x00020000) == 0) { |
8844 | DPRINTF("("); |
8845 | for (i = 0 ; i < nitems(cr3_info)(sizeof((cr3_info)) / sizeof((cr3_info)[0])) ; i++) |
8846 | if (cr3 & cr3_info[i].vrdi_bit) |
8847 | DPRINTF("%s", cr3_info[i].vrdi_present); |
8848 | else |
8849 | DPRINTF("%s", cr3_info[i].vrdi_absent); |
8850 | |
8851 | DPRINTF(")\n"); |
8852 | } else { |
8853 | DPRINTF("(pcid=0x%llx)\n", cr3 & 0xFFF); |
8854 | } |
8855 | } |
8856 | |
8857 | void |
8858 | vmm_decode_cr4(uint64_t cr4) |
8859 | { |
8860 | struct vmm_reg_debug_info cr4_info[19] = { |
8861 | { CR4_PKE0x00400000, "PKE ", "pke "}, |
8862 | { CR4_SMAP0x00200000, "SMAP ", "smap "}, |
8863 | { CR4_SMEP0x00100000, "SMEP ", "smep "}, |
8864 | { CR4_OSXSAVE0x00040000, "OSXSAVE ", "osxsave "}, |
8865 | { CR4_PCIDE0x00020000, "PCIDE ", "pcide "}, |
8866 | { CR4_FSGSBASE0x00010000, "FSGSBASE ", "fsgsbase "}, |
8867 | { CR4_SMXE0x00004000, "SMXE ", "smxe "}, |
8868 | { CR4_VMXE0x00002000, "VMXE ", "vmxe "}, |
8869 | { CR4_OSXMMEXCPT0x00000400, "OSXMMEXCPT ", "osxmmexcpt "}, |
8870 | { CR4_OSFXSR0x00000200, "OSFXSR ", "osfxsr "}, |
8871 | { CR4_PCE0x00000100, "PCE ", "pce "}, |
8872 | { CR4_PGE0x00000080, "PGE ", "pge "}, |
8873 | { CR4_MCE0x00000040, "MCE ", "mce "}, |
8874 | { CR4_PAE0x00000020, "PAE ", "pae "}, |
8875 | { CR4_PSE0x00000010, "PSE ", "pse "}, |
8876 | { CR4_DE0x00000008, "DE ", "de "}, |
8877 | { CR4_TSD0x00000004, "TSD ", "tsd "}, |
8878 | { CR4_PVI0x00000002, "PVI ", "pvi "}, |
8879 | { CR4_VME0x00000001, "VME", "vme"} |
8880 | }; |
8881 | |
8882 | uint8_t i; |
8883 | |
8884 | DPRINTF("("); |
8885 | for (i = 0; i < nitems(cr4_info)(sizeof((cr4_info)) / sizeof((cr4_info)[0])); i++) |
8886 | if (cr4 & cr4_info[i].vrdi_bit) |
8887 | DPRINTF("%s", cr4_info[i].vrdi_present); |
8888 | else |
8889 | DPRINTF("%s", cr4_info[i].vrdi_absent); |
8890 | |
8891 | DPRINTF(")\n"); |
8892 | } |
8893 | |
8894 | void |
8895 | vmm_decode_apicbase_msr_value(uint64_t apicbase) |
8896 | { |
8897 | struct vmm_reg_debug_info apicbase_info[3] = { |
8898 | { APICBASE_BSP0x100, "BSP ", "bsp "}, |
8899 | { APICBASE_ENABLE_X2APIC0x400, "X2APIC ", "x2apic "}, |
8900 | { APICBASE_GLOBAL_ENABLE0x800, "GLB_EN", "glb_en"} |
8901 | }; |
8902 | |
8903 | uint8_t i; |
8904 | |
8905 | DPRINTF("("); |
8906 | for (i = 0; i < nitems(apicbase_info)(sizeof((apicbase_info)) / sizeof((apicbase_info)[0])); i++) |
8907 | if (apicbase & apicbase_info[i].vrdi_bit) |
8908 | DPRINTF("%s", apicbase_info[i].vrdi_present); |
8909 | else |
8910 | DPRINTF("%s", apicbase_info[i].vrdi_absent); |
8911 | |
8912 | DPRINTF(")\n"); |
8913 | } |
8914 | |
8915 | void |
8916 | vmm_decode_ia32_fc_value(uint64_t fcr) |
8917 | { |
8918 | struct vmm_reg_debug_info fcr_info[4] = { |
8919 | { IA32_FEATURE_CONTROL_LOCK0x01, "LOCK ", "lock "}, |
8920 | { IA32_FEATURE_CONTROL_SMX_EN0x02, "SMX ", "smx "}, |
8921 | { IA32_FEATURE_CONTROL_VMX_EN0x04, "VMX ", "vmx "}, |
8922 | { IA32_FEATURE_CONTROL_SENTER_EN(1ULL << 15), "SENTER ", "senter "} |
8923 | }; |
8924 | |
8925 | uint8_t i; |
8926 | |
8927 | DPRINTF("("); |
8928 | for (i = 0; i < nitems(fcr_info)(sizeof((fcr_info)) / sizeof((fcr_info)[0])); i++) |
8929 | if (fcr & fcr_info[i].vrdi_bit) |
8930 | DPRINTF("%s", fcr_info[i].vrdi_present); |
8931 | else |
8932 | DPRINTF("%s", fcr_info[i].vrdi_absent); |
8933 | |
8934 | if (fcr & IA32_FEATURE_CONTROL_SENTER_EN(1ULL << 15)) |
8935 | DPRINTF(" [SENTER param = 0x%llx]", |
8936 | (fcr & IA32_FEATURE_CONTROL_SENTER_PARAM_MASK) >> 8); |
8937 | |
8938 | DPRINTF(")\n"); |
8939 | } |
8940 | |
8941 | void |
8942 | vmm_decode_mtrrcap_value(uint64_t val) |
8943 | { |
8944 | struct vmm_reg_debug_info mtrrcap_info[3] = { |
8945 | { MTRRcap_FIXED0x100, "FIXED ", "fixed "}, |
8946 | { MTRRcap_WC0x400, "WC ", "wc "}, |
8947 | { MTRRcap_SMRR0x800, "SMRR ", "smrr "} |
8948 | }; |
8949 | |
8950 | uint8_t i; |
8951 | |
8952 | DPRINTF("("); |
8953 | for (i = 0; i < nitems(mtrrcap_info)(sizeof((mtrrcap_info)) / sizeof((mtrrcap_info)[0])); i++) |
8954 | if (val & mtrrcap_info[i].vrdi_bit) |
8955 | DPRINTF("%s", mtrrcap_info[i].vrdi_present); |
8956 | else |
8957 | DPRINTF("%s", mtrrcap_info[i].vrdi_absent); |
8958 | |
8959 | if (val & MTRRcap_FIXED0x100) |
8960 | DPRINTF(" [nr fixed ranges = 0x%llx]", |
8961 | (val & 0xff)); |
8962 | |
8963 | DPRINTF(")\n"); |
8964 | } |
8965 | |
8966 | void |
8967 | vmm_decode_perf_status_value(uint64_t val) |
8968 | { |
8969 | DPRINTF("(pstate ratio = 0x%llx)\n", (val & 0xffff)); |
8970 | } |
8971 | |
8972 | void vmm_decode_perf_ctl_value(uint64_t val) |
8973 | { |
8974 | DPRINTF("(%s ", (val & PERF_CTL_TURBO) ? "TURBO" : "turbo"); |
8975 | DPRINTF("pstate req = 0x%llx)\n", (val & 0xfffF)); |
8976 | } |
8977 | |
8978 | void |
8979 | vmm_decode_mtrrdeftype_value(uint64_t mtrrdeftype) |
8980 | { |
8981 | struct vmm_reg_debug_info mtrrdeftype_info[2] = { |
8982 | { MTRRdefType_FIXED_ENABLE0x400, "FIXED ", "fixed "}, |
8983 | { MTRRdefType_ENABLE0x800, "ENABLED ", "enabled "}, |
8984 | }; |
8985 | |
8986 | uint8_t i; |
8987 | int type; |
8988 | |
8989 | DPRINTF("("); |
8990 | for (i = 0; i < nitems(mtrrdeftype_info)(sizeof((mtrrdeftype_info)) / sizeof((mtrrdeftype_info)[0])); i++) |
8991 | if (mtrrdeftype & mtrrdeftype_info[i].vrdi_bit) |
8992 | DPRINTF("%s", mtrrdeftype_info[i].vrdi_present); |
8993 | else |
8994 | DPRINTF("%s", mtrrdeftype_info[i].vrdi_absent); |
8995 | |
8996 | DPRINTF("type = "); |
8997 | type = mtrr2mrt(mtrrdeftype & 0xff); |
8998 | switch (type) { |
8999 | case MDF_UNCACHEABLE(1<<0): DPRINTF("UC"); break; |
9000 | case MDF_WRITECOMBINE(1<<1): DPRINTF("WC"); break; |
9001 | case MDF_WRITETHROUGH(1<<2): DPRINTF("WT"); break; |
9002 | case MDF_WRITEPROTECT(1<<4): DPRINTF("RO"); break; |
9003 | case MDF_WRITEBACK(1<<3): DPRINTF("WB"); break; |
9004 | case MDF_UNKNOWN(1<<5): |
9005 | default: |
9006 | DPRINTF("??"); |
9007 | break; |
9008 | } |
9009 | |
9010 | DPRINTF(")\n"); |
9011 | } |
9012 | |
9013 | void |
9014 | vmm_decode_efer_value(uint64_t efer) |
9015 | { |
9016 | struct vmm_reg_debug_info efer_info[4] = { |
9017 | { EFER_SCE0x00000001, "SCE ", "sce "}, |
9018 | { EFER_LME0x00000100, "LME ", "lme "}, |
9019 | { EFER_LMA0x00000400, "LMA ", "lma "}, |
9020 | { EFER_NXE0x00000800, "NXE", "nxe"}, |
9021 | }; |
9022 | |
9023 | uint8_t i; |
9024 | |
9025 | DPRINTF("("); |
9026 | for (i = 0; i < nitems(efer_info)(sizeof((efer_info)) / sizeof((efer_info)[0])); i++) |
9027 | if (efer & efer_info[i].vrdi_bit) |
9028 | DPRINTF("%s", efer_info[i].vrdi_present); |
9029 | else |
9030 | DPRINTF("%s", efer_info[i].vrdi_absent); |
9031 | |
9032 | DPRINTF(")\n"); |
9033 | } |
9034 | |
9035 | void |
9036 | vmm_decode_msr_value(uint64_t msr, uint64_t val) |
9037 | { |
9038 | switch (msr) { |
9039 | case MSR_APICBASE0x01b: vmm_decode_apicbase_msr_value(val); break; |
9040 | case MSR_IA32_FEATURE_CONTROL0x03a: vmm_decode_ia32_fc_value(val); break; |
9041 | case MSR_MTRRcap0x0fe: vmm_decode_mtrrcap_value(val); break; |
9042 | case MSR_PERF_STATUS0x198: vmm_decode_perf_status_value(val); break; |
9043 | case MSR_PERF_CTL0x199: vmm_decode_perf_ctl_value(val); break; |
9044 | case MSR_MTRRdefType0x2ff: vmm_decode_mtrrdeftype_value(val); break; |
9045 | case MSR_EFER0xc0000080: vmm_decode_efer_value(val); break; |
9046 | case MSR_MISC_ENABLE0x1a0: vmm_decode_misc_enable_value(val); break; |
9047 | default: DPRINTF("\n"); |
9048 | } |
9049 | } |
9050 | |
9051 | void |
9052 | vmm_decode_rflags(uint64_t rflags) |
9053 | { |
9054 | struct vmm_reg_debug_info rflags_info[16] = { |
9055 | { PSL_C0x00000001, "CF ", "cf "}, |
9056 | { PSL_PF0x00000004, "PF ", "pf "}, |
9057 | { PSL_AF0x00000010, "AF ", "af "}, |
9058 | { PSL_Z0x00000040, "ZF ", "zf "}, |
9059 | { PSL_N0x00000080, "SF ", "sf "}, /* sign flag */ |
9060 | { PSL_T0x00000100, "TF ", "tf "}, |
9061 | { PSL_I0x00000200, "IF ", "if "}, |
9062 | { PSL_D0x00000400, "DF ", "df "}, |
9063 | { PSL_V0x00000800, "OF ", "of "}, /* overflow flag */ |
9064 | { PSL_NT0x00004000, "NT ", "nt "}, |
9065 | { PSL_RF0x00010000, "RF ", "rf "}, |
9066 | { PSL_VM0x00020000, "VM ", "vm "}, |
9067 | { PSL_AC0x00040000, "AC ", "ac "}, |
9068 | { PSL_VIF0x00080000, "VIF ", "vif "}, |
9069 | { PSL_VIP0x00100000, "VIP ", "vip "}, |
9070 | { PSL_ID0x00200000, "ID ", "id "}, |
9071 | }; |
9072 | |
9073 | uint8_t i, iopl; |
9074 | |
9075 | DPRINTF("("); |
9076 | for (i = 0; i < nitems(rflags_info)(sizeof((rflags_info)) / sizeof((rflags_info)[0])); i++) |
9077 | if (rflags & rflags_info[i].vrdi_bit) |
9078 | DPRINTF("%s", rflags_info[i].vrdi_present); |
9079 | else |
9080 | DPRINTF("%s", rflags_info[i].vrdi_absent); |
9081 | |
9082 | iopl = (rflags & PSL_IOPL0x00003000) >> 12; |
9083 | DPRINTF("IOPL=%d", iopl); |
9084 | |
9085 | DPRINTF(")\n"); |
9086 | } |
9087 | |
9088 | void |
9089 | vmm_decode_misc_enable_value(uint64_t misc) |
9090 | { |
9091 | struct vmm_reg_debug_info misc_info[10] = { |
9092 | { MISC_ENABLE_FAST_STRINGS(1 << 0), "FSE ", "fse "}, |
9093 | { MISC_ENABLE_TCC(1 << 3), "TCC ", "tcc "}, |
9094 | { MISC_ENABLE_PERF_MON_AVAILABLE(1 << 7), "PERF ", "perf "}, |
9095 | { MISC_ENABLE_BTS_UNAVAILABLE(1 << 11), "BTSU ", "btsu "}, |
9096 | { MISC_ENABLE_PEBS_UNAVAILABLE(1 << 12), "PEBSU ", "pebsu "}, |
9097 | { MISC_ENABLE_EIST_ENABLED(1 << 16), "EIST ", "eist "}, |
9098 | { MISC_ENABLE_ENABLE_MONITOR_FSM(1 << 18), "MFSM ", "mfsm "}, |
9099 | { MISC_ENABLE_LIMIT_CPUID_MAXVAL(1 << 22), "CMAX ", "cmax "}, |
9100 | { MISC_ENABLE_xTPR_MESSAGE_DISABLE(1 << 23), "xTPRD ", "xtprd "}, |
9101 | { MISC_ENABLE_XD_BIT_DISABLE(1 << 2), "NXD", "nxd"}, |
9102 | }; |
9103 | |
9104 | uint8_t i; |
9105 | |
9106 | DPRINTF("("); |
9107 | for (i = 0; i < nitems(misc_info)(sizeof((misc_info)) / sizeof((misc_info)[0])); i++) |
9108 | if (misc & misc_info[i].vrdi_bit) |
9109 | DPRINTF("%s", misc_info[i].vrdi_present); |
9110 | else |
9111 | DPRINTF("%s", misc_info[i].vrdi_absent); |
9112 | |
9113 | DPRINTF(")\n"); |
9114 | } |
9115 | |
9116 | const char * |
9117 | vmm_decode_cpu_mode(struct vcpu *vcpu) |
9118 | { |
9119 | int mode = vmm_get_guest_cpu_mode(vcpu); |
9120 | |
9121 | switch (mode) { |
9122 | case VMM_CPU_MODE_REAL: return "real"; |
9123 | case VMM_CPU_MODE_PROT: return "16 bit protected"; |
9124 | case VMM_CPU_MODE_PROT32: return "32 bit protected"; |
9125 | case VMM_CPU_MODE_COMPAT: return "compatibility"; |
9126 | case VMM_CPU_MODE_LONG: return "long"; |
9127 | default: return "unknown"; |
9128 | } |
9129 | } |
9130 | #endif /* VMM_DEBUG */ |