File: | arch/amd64/amd64/vmm.c |
Warning: | line 4465, column 2 Value stored to 'level' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: vmm.c,v 1.301 2022/01/11 20:34:22 tobhe Exp $ */ |
2 | /* |
3 | * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org> |
4 | * |
5 | * Permission to use, copy, modify, and distribute this software for any |
6 | * purpose with or without fee is hereby granted, provided that the above |
7 | * copyright notice and this permission notice appear in all copies. |
8 | * |
9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
16 | */ |
17 | |
18 | #include <sys/param.h> |
19 | #include <sys/systm.h> |
20 | #include <sys/signalvar.h> |
21 | #include <sys/malloc.h> |
22 | #include <sys/device.h> |
23 | #include <sys/pool.h> |
24 | #include <sys/proc.h> |
25 | #include <sys/user.h> |
26 | #include <sys/ioctl.h> |
27 | #include <sys/queue.h> |
28 | #include <sys/rwlock.h> |
29 | #include <sys/pledge.h> |
30 | #include <sys/memrange.h> |
31 | #include <sys/tracepoint.h> |
32 | |
33 | #include <uvm/uvm_extern.h> |
34 | |
35 | #include <machine/fpu.h> |
36 | #include <machine/pmap.h> |
37 | #include <machine/biosvar.h> |
38 | #include <machine/segments.h> |
39 | #include <machine/cpufunc.h> |
40 | #include <machine/vmmvar.h> |
41 | |
42 | #include <dev/isa/isareg.h> |
43 | #include <dev/pv/pvreg.h> |
44 | |
45 | /* #define VMM_DEBUG */ |
46 | |
47 | void *l1tf_flush_region; |
48 | |
49 | #ifdef VMM_DEBUG |
50 | #define DPRINTF(x...) do { printf(x); } while(0) |
51 | #else |
52 | #define DPRINTF(x...) |
53 | #endif /* VMM_DEBUG */ |
54 | |
55 | #define DEVNAME(s)((s)->sc_dev.dv_xname) ((s)->sc_dev.dv_xname) |
56 | |
57 | #define CTRL_DUMP(x,y,z)printf(" %s: Can set:%s Can clear:%s\n", "z" , vcpu_vmx_check_cap (x, IA32_VMX_y_CTLS, IA32_VMX_z, 1) ? "Yes" : "No", vcpu_vmx_check_cap (x, IA32_VMX_y_CTLS, IA32_VMX_z, 0) ? "Yes" : "No"); printf(" %s: Can set:%s Can clear:%s\n", #z , \ |
58 | vcpu_vmx_check_cap(x, IA32_VMX_##y ##_CTLS, \ |
59 | IA32_VMX_##z, 1) ? "Yes" : "No", \ |
60 | vcpu_vmx_check_cap(x, IA32_VMX_##y ##_CTLS, \ |
61 | IA32_VMX_##z, 0) ? "Yes" : "No"); |
62 | |
63 | #define VMX_EXIT_INFO_HAVE_RIP0x1 0x1 |
64 | #define VMX_EXIT_INFO_HAVE_REASON0x2 0x2 |
65 | #define VMX_EXIT_INFO_COMPLETE(0x1 | 0x2) \ |
66 | (VMX_EXIT_INFO_HAVE_RIP0x1 | VMX_EXIT_INFO_HAVE_REASON0x2) |
67 | |
68 | struct vm { |
69 | struct vmspace *vm_vmspace; |
70 | vm_map_t vm_map; |
71 | uint32_t vm_id; |
72 | pid_t vm_creator_pid; |
73 | size_t vm_nmemranges; |
74 | size_t vm_memory_size; |
75 | char vm_name[VMM_MAX_NAME_LEN64]; |
76 | struct vm_mem_range vm_memranges[VMM_MAX_MEM_RANGES16]; |
77 | |
78 | struct vcpu_head vm_vcpu_list; |
79 | uint32_t vm_vcpu_ct; |
80 | u_int vm_vcpus_running; |
81 | struct rwlock vm_vcpu_lock; |
82 | |
83 | SLIST_ENTRY(vm)struct { struct vm *sle_next; } vm_link; |
84 | }; |
85 | |
86 | SLIST_HEAD(vmlist_head, vm)struct vmlist_head { struct vm *slh_first; }; |
87 | |
88 | struct vmm_softc { |
89 | struct device sc_dev; |
90 | |
91 | /* Capabilities */ |
92 | uint32_t nr_vmx_cpus; |
93 | uint32_t nr_svm_cpus; |
94 | uint32_t nr_rvi_cpus; |
95 | uint32_t nr_ept_cpus; |
96 | |
97 | /* Managed VMs */ |
98 | struct vmlist_head vm_list; |
99 | |
100 | int mode; |
101 | |
102 | size_t vcpu_ct; |
103 | size_t vcpu_max; |
104 | |
105 | struct rwlock vm_lock; |
106 | size_t vm_ct; /* number of in-memory VMs */ |
107 | size_t vm_idx; /* next unique VM index */ |
108 | |
109 | struct rwlock vpid_lock; |
110 | uint16_t max_vpid; |
111 | uint8_t vpids[512]; /* bitmap of used VPID/ASIDs */ |
112 | }; |
113 | |
114 | void vmx_dump_vmcs_field(uint16_t, const char *); |
115 | int vmm_enabled(void); |
116 | int vmm_probe(struct device *, void *, void *); |
117 | void vmm_attach(struct device *, struct device *, void *); |
118 | int vmmopen(dev_t, int, int, struct proc *); |
119 | int vmmioctl(dev_t, u_long, caddr_t, int, struct proc *); |
120 | int vmmclose(dev_t, int, int, struct proc *); |
121 | int vmm_start(void); |
122 | int vmm_stop(void); |
123 | size_t vm_create_check_mem_ranges(struct vm_create_params *); |
124 | int vm_create(struct vm_create_params *, struct proc *); |
125 | int vm_run(struct vm_run_params *); |
126 | int vm_terminate(struct vm_terminate_params *); |
127 | int vm_get_info(struct vm_info_params *); |
128 | int vm_resetcpu(struct vm_resetcpu_params *); |
129 | int vm_intr_pending(struct vm_intr_params *); |
130 | int vm_rwregs(struct vm_rwregs_params *, int); |
131 | int vm_mprotect_ept(struct vm_mprotect_ept_params *); |
132 | int vm_rwvmparams(struct vm_rwvmparams_params *, int); |
133 | int vm_find(uint32_t, struct vm **); |
134 | int vcpu_readregs_vmx(struct vcpu *, uint64_t, struct vcpu_reg_state *); |
135 | int vcpu_readregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *); |
136 | int vcpu_writeregs_vmx(struct vcpu *, uint64_t, int, struct vcpu_reg_state *); |
137 | int vcpu_writeregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *); |
138 | int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *); |
139 | int vcpu_reset_regs_vmx(struct vcpu *, struct vcpu_reg_state *); |
140 | int vcpu_reset_regs_svm(struct vcpu *, struct vcpu_reg_state *); |
141 | int vcpu_reload_vmcs_vmx(struct vcpu *); |
142 | int vcpu_init(struct vcpu *); |
143 | int vcpu_init_vmx(struct vcpu *); |
144 | int vcpu_init_svm(struct vcpu *); |
145 | int vcpu_must_stop(struct vcpu *); |
146 | int vcpu_run_vmx(struct vcpu *, struct vm_run_params *); |
147 | int vcpu_run_svm(struct vcpu *, struct vm_run_params *); |
148 | void vcpu_deinit(struct vcpu *); |
149 | void vcpu_deinit_vmx(struct vcpu *); |
150 | void vcpu_deinit_svm(struct vcpu *); |
151 | int vm_impl_init(struct vm *, struct proc *); |
152 | int vm_impl_init_vmx(struct vm *, struct proc *); |
153 | int vm_impl_init_svm(struct vm *, struct proc *); |
154 | void vm_impl_deinit(struct vm *); |
155 | void vm_impl_deinit_vmx(struct vm *); |
156 | void vm_impl_deinit_svm(struct vm *); |
157 | void vm_teardown(struct vm *); |
158 | int vcpu_vmx_check_cap(struct vcpu *, uint32_t, uint32_t, int); |
159 | int vcpu_vmx_compute_ctrl(uint64_t, uint16_t, uint32_t, uint32_t, uint32_t *); |
160 | int vmx_get_exit_info(uint64_t *, uint64_t *); |
161 | int vmx_load_pdptes(struct vcpu *); |
162 | int vmx_handle_exit(struct vcpu *); |
163 | int svm_handle_exit(struct vcpu *); |
164 | int svm_handle_msr(struct vcpu *); |
165 | int vmm_handle_xsetbv(struct vcpu *, uint64_t *); |
166 | int vmx_handle_xsetbv(struct vcpu *); |
167 | int svm_handle_xsetbv(struct vcpu *); |
168 | int vmm_handle_cpuid(struct vcpu *); |
169 | int vmx_handle_rdmsr(struct vcpu *); |
170 | int vmx_handle_wrmsr(struct vcpu *); |
171 | int vmx_handle_cr0_write(struct vcpu *, uint64_t); |
172 | int vmx_handle_cr4_write(struct vcpu *, uint64_t); |
173 | int vmx_handle_cr(struct vcpu *); |
174 | int svm_handle_inout(struct vcpu *); |
175 | int vmx_handle_inout(struct vcpu *); |
176 | int svm_handle_hlt(struct vcpu *); |
177 | int vmx_handle_hlt(struct vcpu *); |
178 | int vmm_inject_ud(struct vcpu *); |
179 | int vmm_inject_gp(struct vcpu *); |
180 | int vmm_inject_db(struct vcpu *); |
181 | void vmx_handle_intr(struct vcpu *); |
182 | void vmx_handle_intwin(struct vcpu *); |
183 | void vmx_handle_misc_enable_msr(struct vcpu *); |
184 | int vmm_get_guest_memtype(struct vm *, paddr_t); |
185 | int vmx_get_guest_faulttype(void); |
186 | int svm_get_guest_faulttype(struct vmcb *); |
187 | int vmx_get_exit_qualification(uint64_t *); |
188 | int vmm_get_guest_cpu_cpl(struct vcpu *); |
189 | int vmm_get_guest_cpu_mode(struct vcpu *); |
190 | int svm_fault_page(struct vcpu *, paddr_t); |
191 | int vmx_fault_page(struct vcpu *, paddr_t); |
192 | int vmx_handle_np_fault(struct vcpu *); |
193 | int svm_handle_np_fault(struct vcpu *); |
194 | int vmx_mprotect_ept(vm_map_t, paddr_t, paddr_t, int); |
195 | pt_entry_t *vmx_pmap_find_pte_ept(pmap_t, paddr_t); |
196 | int vmm_alloc_vpid(uint16_t *); |
197 | void vmm_free_vpid(uint16_t); |
198 | const char *vcpu_state_decode(u_int); |
199 | const char *vmx_exit_reason_decode(uint32_t); |
200 | const char *svm_exit_reason_decode(uint32_t); |
201 | const char *vmx_instruction_error_decode(uint32_t); |
202 | void svm_setmsrbr(struct vcpu *, uint32_t); |
203 | void svm_setmsrbw(struct vcpu *, uint32_t); |
204 | void svm_setmsrbrw(struct vcpu *, uint32_t); |
205 | void vmx_setmsrbr(struct vcpu *, uint32_t); |
206 | void vmx_setmsrbw(struct vcpu *, uint32_t); |
207 | void vmx_setmsrbrw(struct vcpu *, uint32_t); |
208 | void svm_set_clean(struct vcpu *, uint32_t); |
209 | void svm_set_dirty(struct vcpu *, uint32_t); |
210 | |
211 | int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size); |
212 | void vmm_init_pvclock(struct vcpu *, paddr_t); |
213 | int vmm_update_pvclock(struct vcpu *); |
214 | int vmm_pat_is_valid(uint64_t); |
215 | |
216 | #ifdef MULTIPROCESSOR1 |
217 | static int vmx_remote_vmclear(struct cpu_info*, struct vcpu *); |
218 | #endif |
219 | |
220 | #ifdef VMM_DEBUG |
221 | void dump_vcpu(struct vcpu *); |
222 | void vmx_vcpu_dump_regs(struct vcpu *); |
223 | void vmx_dump_vmcs(struct vcpu *); |
224 | const char *msr_name_decode(uint32_t); |
225 | void vmm_segment_desc_decode(uint64_t); |
226 | void vmm_decode_cr0(uint64_t); |
227 | void vmm_decode_cr3(uint64_t); |
228 | void vmm_decode_cr4(uint64_t); |
229 | void vmm_decode_msr_value(uint64_t, uint64_t); |
230 | void vmm_decode_apicbase_msr_value(uint64_t); |
231 | void vmm_decode_ia32_fc_value(uint64_t); |
232 | void vmm_decode_mtrrcap_value(uint64_t); |
233 | void vmm_decode_perf_status_value(uint64_t); |
234 | void vmm_decode_perf_ctl_value(uint64_t); |
235 | void vmm_decode_mtrrdeftype_value(uint64_t); |
236 | void vmm_decode_efer_value(uint64_t); |
237 | void vmm_decode_rflags(uint64_t); |
238 | void vmm_decode_misc_enable_value(uint64_t); |
239 | const char *vmm_decode_cpu_mode(struct vcpu *); |
240 | |
241 | extern int mtrr2mrt(int); |
242 | |
243 | struct vmm_reg_debug_info { |
244 | uint64_t vrdi_bit; |
245 | const char *vrdi_present; |
246 | const char *vrdi_absent; |
247 | }; |
248 | #endif /* VMM_DEBUG */ |
249 | |
250 | extern uint64_t tsc_frequency; |
251 | extern int tsc_is_invariant; |
252 | |
253 | const char *vmm_hv_signature = VMM_HV_SIGNATURE"OpenBSDVMM58"; |
254 | |
255 | const struct kmem_pa_mode vmm_kp_contig = { |
256 | .kp_constraint = &no_constraint, |
257 | .kp_maxseg = 1, |
258 | .kp_align = 4096, |
259 | .kp_zero = 1, |
260 | }; |
261 | |
262 | struct cfdriver vmm_cd = { |
263 | NULL((void *)0), "vmm", DV_DULL, CD_SKIPHIBERNATE2 |
264 | }; |
265 | |
266 | const struct cfattach vmm_ca = { |
267 | sizeof(struct vmm_softc), vmm_probe, vmm_attach, NULL((void *)0), NULL((void *)0) |
268 | }; |
269 | |
270 | /* |
271 | * Helper struct to easily get the VMCS field IDs needed in vmread/vmwrite |
272 | * to access the individual fields of the guest segment registers. This |
273 | * struct is indexed by VCPU_REGS_* id. |
274 | */ |
275 | const struct { |
276 | uint64_t selid; |
277 | uint64_t limitid; |
278 | uint64_t arid; |
279 | uint64_t baseid; |
280 | } vmm_vmx_sreg_vmcs_fields[] = { |
281 | { VMCS_GUEST_IA32_CS_SEL0x0802, VMCS_GUEST_IA32_CS_LIMIT0x4802, |
282 | VMCS_GUEST_IA32_CS_AR0x4816, VMCS_GUEST_IA32_CS_BASE0x6808 }, |
283 | { VMCS_GUEST_IA32_DS_SEL0x0806, VMCS_GUEST_IA32_DS_LIMIT0x4806, |
284 | VMCS_GUEST_IA32_DS_AR0x481A, VMCS_GUEST_IA32_DS_BASE0x680C }, |
285 | { VMCS_GUEST_IA32_ES_SEL0x0800, VMCS_GUEST_IA32_ES_LIMIT0x4800, |
286 | VMCS_GUEST_IA32_ES_AR0x4814, VMCS_GUEST_IA32_ES_BASE0x6806 }, |
287 | { VMCS_GUEST_IA32_FS_SEL0x0808, VMCS_GUEST_IA32_FS_LIMIT0x4808, |
288 | VMCS_GUEST_IA32_FS_AR0x481C, VMCS_GUEST_IA32_FS_BASE0x680E }, |
289 | { VMCS_GUEST_IA32_GS_SEL0x080A, VMCS_GUEST_IA32_GS_LIMIT0x480A, |
290 | VMCS_GUEST_IA32_GS_AR0x481E, VMCS_GUEST_IA32_GS_BASE0x6810 }, |
291 | { VMCS_GUEST_IA32_SS_SEL0x0804, VMCS_GUEST_IA32_SS_LIMIT0x4804, |
292 | VMCS_GUEST_IA32_SS_AR0x4818, VMCS_GUEST_IA32_SS_BASE0x680A }, |
293 | { VMCS_GUEST_IA32_LDTR_SEL0x080C, VMCS_GUEST_IA32_LDTR_LIMIT0x480C, |
294 | VMCS_GUEST_IA32_LDTR_AR0x4820, VMCS_GUEST_IA32_LDTR_BASE0x6812 }, |
295 | { VMCS_GUEST_IA32_TR_SEL0x080E, VMCS_GUEST_IA32_TR_LIMIT0x480E, |
296 | VMCS_GUEST_IA32_TR_AR0x4822, VMCS_GUEST_IA32_TR_BASE0x6814 } |
297 | }; |
298 | |
299 | /* Pools for VMs and VCPUs */ |
300 | struct pool vm_pool; |
301 | struct pool vcpu_pool; |
302 | |
303 | struct vmm_softc *vmm_softc; |
304 | |
305 | /* IDT information used when populating host state area */ |
306 | extern vaddr_t idt_vaddr; |
307 | extern struct gate_descriptor *idt; |
308 | |
309 | /* Constants used in "CR access exit" */ |
310 | #define CR_WRITE0 0 |
311 | #define CR_READ1 1 |
312 | #define CR_CLTS2 2 |
313 | #define CR_LMSW3 3 |
314 | |
315 | /* |
316 | * vmm_enabled |
317 | * |
318 | * Checks if we have at least one CPU with either VMX or SVM. |
319 | * Returns 1 if we have at least one of either type, but not both, 0 otherwise. |
320 | */ |
321 | int |
322 | vmm_enabled(void) |
323 | { |
324 | struct cpu_info *ci; |
325 | CPU_INFO_ITERATORint cii; |
326 | int found_vmx = 0, found_svm = 0; |
327 | |
328 | /* Check if we have at least one CPU with either VMX or SVM */ |
329 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { |
330 | if (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) |
331 | found_vmx = 1; |
332 | if (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1)) |
333 | found_svm = 1; |
334 | } |
335 | |
336 | /* Don't support both SVM and VMX at the same time */ |
337 | if (found_vmx && found_svm) |
338 | return (0); |
339 | |
340 | if (found_vmx || found_svm) |
341 | return 1; |
342 | |
343 | return 0; |
344 | } |
345 | |
346 | int |
347 | vmm_probe(struct device *parent, void *match, void *aux) |
348 | { |
349 | const char **busname = (const char **)aux; |
350 | |
351 | if (strcmp(*busname, vmm_cd.cd_name) != 0) |
352 | return (0); |
353 | return (1); |
354 | } |
355 | |
356 | /* |
357 | * vmm_attach |
358 | * |
359 | * Calculates how many of each type of CPU we have, prints this into dmesg |
360 | * during attach. Initializes various locks, pools, and list structures for the |
361 | * VMM. |
362 | */ |
363 | void |
364 | vmm_attach(struct device *parent, struct device *self, void *aux) |
365 | { |
366 | struct vmm_softc *sc = (struct vmm_softc *)self; |
367 | struct cpu_info *ci; |
368 | CPU_INFO_ITERATORint cii; |
369 | |
370 | sc->nr_vmx_cpus = 0; |
371 | sc->nr_svm_cpus = 0; |
372 | sc->nr_rvi_cpus = 0; |
373 | sc->nr_ept_cpus = 0; |
374 | sc->vcpu_ct = 0; |
375 | sc->vm_ct = 0; |
376 | sc->vm_idx = 0; |
377 | |
378 | /* Calculate CPU features */ |
379 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { |
380 | if (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) |
381 | sc->nr_vmx_cpus++; |
382 | if (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1)) |
383 | sc->nr_svm_cpus++; |
384 | if (ci->ci_vmm_flags & CI_VMM_RVI(1 << 2)) |
385 | sc->nr_rvi_cpus++; |
386 | if (ci->ci_vmm_flags & CI_VMM_EPT(1 << 3)) |
387 | sc->nr_ept_cpus++; |
388 | } |
389 | |
390 | SLIST_INIT(&sc->vm_list){ ((&sc->vm_list)->slh_first) = ((void *)0); }; |
391 | rw_init(&sc->vm_lock, "vm_list")_rw_init_flags(&sc->vm_lock, "vm_list", 0, ((void *)0) ); |
392 | |
393 | if (sc->nr_ept_cpus) { |
394 | printf(": VMX/EPT"); |
395 | sc->mode = VMM_MODE_EPT; |
396 | } else if (sc->nr_vmx_cpus) { |
397 | printf(": VMX"); |
398 | sc->mode = VMM_MODE_VMX; |
399 | } else if (sc->nr_rvi_cpus) { |
400 | printf(": SVM/RVI"); |
401 | sc->mode = VMM_MODE_RVI; |
402 | } else if (sc->nr_svm_cpus) { |
403 | printf(": SVM"); |
404 | sc->mode = VMM_MODE_SVM; |
405 | } else { |
406 | printf(": unknown"); |
407 | sc->mode = VMM_MODE_UNKNOWN; |
408 | } |
409 | |
410 | if (sc->mode == VMM_MODE_EPT || sc->mode == VMM_MODE_VMX) { |
411 | if (!(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr)) { |
412 | l1tf_flush_region = km_alloc(VMX_L1D_FLUSH_SIZE(64 * 1024), |
413 | &kv_any, &vmm_kp_contig, &kd_waitok); |
414 | if (!l1tf_flush_region) { |
415 | printf(" (failing, no memory)"); |
416 | sc->mode = VMM_MODE_UNKNOWN; |
417 | } else { |
418 | printf(" (using slow L1TF mitigation)"); |
419 | memset(l1tf_flush_region, 0xcc,__builtin_memset((l1tf_flush_region), (0xcc), ((64 * 1024))) |
420 | VMX_L1D_FLUSH_SIZE)__builtin_memset((l1tf_flush_region), (0xcc), ((64 * 1024))); |
421 | } |
422 | } |
423 | } |
424 | printf("\n"); |
425 | |
426 | if (sc->mode == VMM_MODE_SVM || sc->mode == VMM_MODE_RVI) { |
427 | sc->max_vpid = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_svm.svm_max_asid; |
428 | } else { |
429 | sc->max_vpid = 0xFFF; |
430 | } |
431 | |
432 | bzero(&sc->vpids, sizeof(sc->vpids))__builtin_bzero((&sc->vpids), (sizeof(sc->vpids))); |
433 | rw_init(&sc->vpid_lock, "vpid")_rw_init_flags(&sc->vpid_lock, "vpid", 0, ((void *)0)); |
434 | |
435 | pool_init(&vm_pool, sizeof(struct vm), 0, IPL_MPFLOOR0x9, PR_WAITOK0x0001, |
436 | "vmpool", NULL((void *)0)); |
437 | pool_init(&vcpu_pool, sizeof(struct vcpu), 64, IPL_MPFLOOR0x9, PR_WAITOK0x0001, |
438 | "vcpupl", NULL((void *)0)); |
439 | |
440 | vmm_softc = sc; |
441 | } |
442 | |
443 | /* |
444 | * vmmopen |
445 | * |
446 | * Called during open of /dev/vmm. |
447 | * |
448 | * Parameters: |
449 | * dev, flag, mode, p: These come from the character device and are |
450 | * all unused for this function |
451 | * |
452 | * Return values: |
453 | * ENODEV: if vmm(4) didn't attach or no supported CPUs detected |
454 | * 0: successful open |
455 | */ |
456 | int |
457 | vmmopen(dev_t dev, int flag, int mode, struct proc *p) |
458 | { |
459 | /* Don't allow open if we didn't attach */ |
460 | if (vmm_softc == NULL((void *)0)) |
461 | return (ENODEV19); |
462 | |
463 | /* Don't allow open if we didn't detect any supported CPUs */ |
464 | if (vmm_softc->mode != VMM_MODE_EPT && vmm_softc->mode != VMM_MODE_RVI) |
465 | return (ENODEV19); |
466 | |
467 | return 0; |
468 | } |
469 | |
470 | /* |
471 | * vmmioctl |
472 | * |
473 | * Main ioctl dispatch routine for /dev/vmm. Parses ioctl type and calls |
474 | * appropriate lower level handler routine. Returns result to ioctl caller. |
475 | */ |
476 | int |
477 | vmmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) |
478 | { |
479 | int ret; |
480 | |
481 | KERNEL_UNLOCK()_kernel_unlock(); |
482 | |
483 | switch (cmd) { |
484 | case VMM_IOC_CREATE(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_create_params) & 0x1fff) << 16) | ((('V' )) << 8) | ((1))): |
485 | if ((ret = vmm_start()) != 0) { |
486 | vmm_stop(); |
487 | break; |
488 | } |
489 | ret = vm_create((struct vm_create_params *)data, p); |
490 | break; |
491 | case VMM_IOC_RUN(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_run_params) & 0x1fff) << 16) | ((('V')) << 8) | ((2))): |
492 | ret = vm_run((struct vm_run_params *)data); |
493 | break; |
494 | case VMM_IOC_INFO(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_info_params) & 0x1fff) << 16) | ((('V')) << 8) | ((3))): |
495 | ret = vm_get_info((struct vm_info_params *)data); |
496 | break; |
497 | case VMM_IOC_TERM((unsigned long)0x80000000 | ((sizeof(struct vm_terminate_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((4))): |
498 | ret = vm_terminate((struct vm_terminate_params *)data); |
499 | break; |
500 | case VMM_IOC_RESETCPU((unsigned long)0x80000000 | ((sizeof(struct vm_resetcpu_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((5))): |
501 | ret = vm_resetcpu((struct vm_resetcpu_params *)data); |
502 | break; |
503 | case VMM_IOC_INTR((unsigned long)0x80000000 | ((sizeof(struct vm_intr_params) & 0x1fff) << 16) | ((('V')) << 8) | ((6))): |
504 | ret = vm_intr_pending((struct vm_intr_params *)data); |
505 | break; |
506 | case VMM_IOC_READREGS(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_rwregs_params) & 0x1fff) << 16) | ((('V' )) << 8) | ((7))): |
507 | ret = vm_rwregs((struct vm_rwregs_params *)data, 0); |
508 | break; |
509 | case VMM_IOC_WRITEREGS((unsigned long)0x80000000 | ((sizeof(struct vm_rwregs_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((8))): |
510 | ret = vm_rwregs((struct vm_rwregs_params *)data, 1); |
511 | break; |
512 | case VMM_IOC_MPROTECT_EPT((unsigned long)0x80000000 | ((sizeof(struct vm_mprotect_ept_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((11))): |
513 | ret = vm_mprotect_ept((struct vm_mprotect_ept_params *)data); |
514 | break; |
515 | case VMM_IOC_READVMPARAMS(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_rwvmparams_params) & 0x1fff) << 16) | (( ('V')) << 8) | ((9))): |
516 | ret = vm_rwvmparams((struct vm_rwvmparams_params *)data, 0); |
517 | break; |
518 | case VMM_IOC_WRITEVMPARAMS((unsigned long)0x80000000 | ((sizeof(struct vm_rwvmparams_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((10))): |
519 | ret = vm_rwvmparams((struct vm_rwvmparams_params *)data, 1); |
520 | break; |
521 | |
522 | default: |
523 | DPRINTF("%s: unknown ioctl code 0x%lx\n", __func__, cmd); |
524 | ret = ENOTTY25; |
525 | } |
526 | |
527 | KERNEL_LOCK()_kernel_lock(); |
528 | |
529 | return (ret); |
530 | } |
531 | |
532 | /* |
533 | * pledge_ioctl_vmm |
534 | * |
535 | * Restrict the allowed ioctls in a pledged process context. |
536 | * Is called from pledge_ioctl(). |
537 | */ |
538 | int |
539 | pledge_ioctl_vmm(struct proc *p, long com) |
540 | { |
541 | switch (com) { |
542 | case VMM_IOC_CREATE(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_create_params) & 0x1fff) << 16) | ((('V' )) << 8) | ((1))): |
543 | case VMM_IOC_INFO(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_info_params) & 0x1fff) << 16) | ((('V')) << 8) | ((3))): |
544 | /* The "parent" process in vmd forks and manages VMs */ |
545 | if (p->p_p->ps_pledge & PLEDGE_PROC0x0000000000001000ULL) |
546 | return (0); |
547 | break; |
548 | case VMM_IOC_TERM((unsigned long)0x80000000 | ((sizeof(struct vm_terminate_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((4))): |
549 | /* XXX VM processes should only terminate themselves */ |
550 | case VMM_IOC_RUN(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_run_params) & 0x1fff) << 16) | ((('V')) << 8) | ((2))): |
551 | case VMM_IOC_RESETCPU((unsigned long)0x80000000 | ((sizeof(struct vm_resetcpu_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((5))): |
552 | case VMM_IOC_INTR((unsigned long)0x80000000 | ((sizeof(struct vm_intr_params) & 0x1fff) << 16) | ((('V')) << 8) | ((6))): |
553 | case VMM_IOC_READREGS(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_rwregs_params) & 0x1fff) << 16) | ((('V' )) << 8) | ((7))): |
554 | case VMM_IOC_WRITEREGS((unsigned long)0x80000000 | ((sizeof(struct vm_rwregs_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((8))): |
555 | case VMM_IOC_MPROTECT_EPT((unsigned long)0x80000000 | ((sizeof(struct vm_mprotect_ept_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((11))): |
556 | case VMM_IOC_READVMPARAMS(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_rwvmparams_params) & 0x1fff) << 16) | (( ('V')) << 8) | ((9))): |
557 | case VMM_IOC_WRITEVMPARAMS((unsigned long)0x80000000 | ((sizeof(struct vm_rwvmparams_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((10))): |
558 | return (0); |
559 | } |
560 | |
561 | return (EPERM1); |
562 | } |
563 | |
564 | /* |
565 | * vmmclose |
566 | * |
567 | * Called when /dev/vmm is closed. Presently unused. |
568 | */ |
569 | int |
570 | vmmclose(dev_t dev, int flag, int mode, struct proc *p) |
571 | { |
572 | return 0; |
573 | } |
574 | |
575 | /* |
576 | * vm_find_vcpu |
577 | * |
578 | * Lookup VMM VCPU by ID number |
579 | * |
580 | * Parameters: |
581 | * vm: vm structure |
582 | * id: index id of vcpu |
583 | * |
584 | * Returns pointer to vcpu structure if successful, NULL otherwise |
585 | */ |
586 | static struct vcpu * |
587 | vm_find_vcpu(struct vm *vm, uint32_t id) |
588 | { |
589 | struct vcpu *vcpu; |
590 | |
591 | if (vm == NULL((void *)0)) |
592 | return NULL((void *)0); |
593 | rw_enter_read(&vm->vm_vcpu_lock); |
594 | SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link)for((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu ) != ((void *)0); (vcpu) = ((vcpu)->vc_vcpu_link.sle_next) ) { |
595 | if (vcpu->vc_id == id) |
596 | break; |
597 | } |
598 | rw_exit_read(&vm->vm_vcpu_lock); |
599 | return vcpu; |
600 | } |
601 | |
602 | |
603 | /* |
604 | * vm_resetcpu |
605 | * |
606 | * Resets the vcpu defined in 'vrp' to power-on-init register state |
607 | * |
608 | * Parameters: |
609 | * vrp: ioctl structure defining the vcpu to reset (see vmmvar.h) |
610 | * |
611 | * Returns 0 if successful, or various error codes on failure: |
612 | * ENOENT if the VM id contained in 'vrp' refers to an unknown VM or |
613 | * if vrp describes an unknown vcpu for this VM |
614 | * EBUSY if the indicated VCPU is not stopped |
615 | * EIO if the indicated VCPU failed to reset |
616 | */ |
617 | int |
618 | vm_resetcpu(struct vm_resetcpu_params *vrp) |
619 | { |
620 | struct vm *vm; |
621 | struct vcpu *vcpu; |
622 | int error; |
623 | |
624 | /* Find the desired VM */ |
625 | rw_enter_read(&vmm_softc->vm_lock); |
626 | error = vm_find(vrp->vrp_vm_id, &vm); |
627 | rw_exit_read(&vmm_softc->vm_lock); |
628 | |
629 | /* Not found? exit. */ |
630 | if (error != 0) { |
631 | DPRINTF("%s: vm id %u not found\n", __func__, |
632 | vrp->vrp_vm_id); |
633 | return (error); |
634 | } |
635 | |
636 | vcpu = vm_find_vcpu(vm, vrp->vrp_vcpu_id); |
637 | |
638 | if (vcpu == NULL((void *)0)) { |
639 | DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__, |
640 | vrp->vrp_vcpu_id, vrp->vrp_vm_id); |
641 | return (ENOENT2); |
642 | } |
643 | |
644 | rw_enter_write(&vcpu->vc_lock); |
645 | |
646 | if (vcpu->vc_state != VCPU_STATE_STOPPED) { |
647 | DPRINTF("%s: reset of vcpu %u on vm %u attempted " |
648 | "while vcpu was in state %u (%s)\n", __func__, |
649 | vrp->vrp_vcpu_id, vrp->vrp_vm_id, vcpu->vc_state, |
650 | vcpu_state_decode(vcpu->vc_state)); |
651 | |
652 | rw_exit_write(&vcpu->vc_lock); |
653 | return (EBUSY16); |
654 | } |
655 | |
656 | DPRINTF("%s: resetting vm %d vcpu %d to power on defaults\n", __func__, |
657 | vm->vm_id, vcpu->vc_id); |
658 | |
659 | if (vcpu_reset_regs(vcpu, &vrp->vrp_init_state)) { |
660 | printf("%s: failed\n", __func__); |
661 | #ifdef VMM_DEBUG |
662 | dump_vcpu(vcpu); |
663 | #endif /* VMM_DEBUG */ |
664 | rw_exit_write(&vcpu->vc_lock); |
665 | return (EIO5); |
666 | } |
667 | |
668 | rw_exit_write(&vcpu->vc_lock); |
669 | return (0); |
670 | } |
671 | |
672 | /* |
673 | * vm_intr_pending |
674 | * |
675 | * IOCTL handler routine for VMM_IOC_INTR messages, sent from vmd when an |
676 | * interrupt is pending and needs acknowledgment |
677 | * |
678 | * Parameters: |
679 | * vip: Describes the vm/vcpu for which the interrupt is pending |
680 | * |
681 | * Return values: |
682 | * 0: if successful |
683 | * ENOENT: if the VM/VCPU defined by 'vip' cannot be found |
684 | */ |
685 | int |
686 | vm_intr_pending(struct vm_intr_params *vip) |
687 | { |
688 | struct vm *vm; |
689 | struct vcpu *vcpu; |
690 | int error; |
691 | |
692 | /* Find the desired VM */ |
693 | rw_enter_read(&vmm_softc->vm_lock); |
694 | error = vm_find(vip->vip_vm_id, &vm); |
695 | |
696 | /* Not found? exit. */ |
697 | if (error != 0) { |
698 | rw_exit_read(&vmm_softc->vm_lock); |
699 | return (error); |
700 | } |
701 | |
702 | vcpu = vm_find_vcpu(vm, vip->vip_vcpu_id); |
703 | rw_exit_read(&vmm_softc->vm_lock); |
704 | |
705 | if (vcpu == NULL((void *)0)) |
706 | return (ENOENT2); |
707 | |
708 | rw_enter_write(&vcpu->vc_lock); |
709 | vcpu->vc_intr = vip->vip_intr; |
710 | rw_exit_write(&vcpu->vc_lock); |
711 | |
712 | return (0); |
713 | } |
714 | |
715 | /* |
716 | * vm_rwvmparams |
717 | * |
718 | * IOCTL handler to read/write the current vmm params like pvclock gpa, pvclock |
719 | * version, etc. |
720 | * |
721 | * Parameters: |
722 | * vrwp: Describes the VM and VCPU to get/set the params from |
723 | * dir: 0 for reading, 1 for writing |
724 | * |
725 | * Return values: |
726 | * 0: if successful |
727 | * ENOENT: if the VM/VCPU defined by 'vpp' cannot be found |
728 | * EINVAL: if an error occurred reading the registers of the guest |
729 | */ |
730 | int |
731 | vm_rwvmparams(struct vm_rwvmparams_params *vpp, int dir) { |
732 | struct vm *vm; |
733 | struct vcpu *vcpu; |
734 | int error; |
735 | |
736 | /* Find the desired VM */ |
737 | rw_enter_read(&vmm_softc->vm_lock); |
738 | error = vm_find(vpp->vpp_vm_id, &vm); |
739 | |
740 | /* Not found? exit. */ |
741 | if (error != 0) { |
742 | rw_exit_read(&vmm_softc->vm_lock); |
743 | return (error); |
744 | } |
745 | |
746 | vcpu = vm_find_vcpu(vm, vpp->vpp_vcpu_id); |
747 | rw_exit_read(&vmm_softc->vm_lock); |
748 | |
749 | if (vcpu == NULL((void *)0)) |
750 | return (ENOENT2); |
751 | |
752 | if (dir == 0) { |
753 | if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION0x2) |
754 | vpp->vpp_pvclock_version = vcpu->vc_pvclock_version; |
755 | if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA0x1) |
756 | vpp->vpp_pvclock_system_gpa = \ |
757 | vcpu->vc_pvclock_system_gpa; |
758 | return (0); |
759 | } |
760 | |
761 | if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION0x2) |
762 | vcpu->vc_pvclock_version = vpp->vpp_pvclock_version; |
763 | if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA0x1) { |
764 | vmm_init_pvclock(vcpu, vpp->vpp_pvclock_system_gpa); |
765 | } |
766 | return (0); |
767 | |
768 | } |
769 | |
770 | /* |
771 | * vm_readregs |
772 | * |
773 | * IOCTL handler to read/write the current register values of a guest VCPU. |
774 | * The VCPU must not be running. |
775 | * |
776 | * Parameters: |
777 | * vrwp: Describes the VM and VCPU to get/set the registers from. The |
778 | * register values are returned here as well. |
779 | * dir: 0 for reading, 1 for writing |
780 | * |
781 | * Return values: |
782 | * 0: if successful |
783 | * ENOENT: if the VM/VCPU defined by 'vrwp' cannot be found |
784 | * EINVAL: if an error occurred accessing the registers of the guest |
785 | * EPERM: if the vm cannot be accessed from the calling process |
786 | */ |
787 | int |
788 | vm_rwregs(struct vm_rwregs_params *vrwp, int dir) |
789 | { |
790 | struct vm *vm; |
791 | struct vcpu *vcpu; |
792 | struct vcpu_reg_state *vrs = &vrwp->vrwp_regs; |
793 | int error, ret; |
794 | |
795 | /* Find the desired VM */ |
796 | rw_enter_read(&vmm_softc->vm_lock); |
797 | error = vm_find(vrwp->vrwp_vm_id, &vm); |
798 | |
799 | /* Not found? exit. */ |
800 | if (error != 0) { |
801 | rw_exit_read(&vmm_softc->vm_lock); |
802 | return (error); |
803 | } |
804 | |
805 | vcpu = vm_find_vcpu(vm, vrwp->vrwp_vcpu_id); |
806 | rw_exit_read(&vmm_softc->vm_lock); |
807 | |
808 | if (vcpu == NULL((void *)0)) |
809 | return (ENOENT2); |
810 | |
811 | rw_enter_write(&vcpu->vc_lock); |
812 | if (vmm_softc->mode == VMM_MODE_VMX || |
813 | vmm_softc->mode == VMM_MODE_EPT) |
814 | ret = (dir == 0) ? |
815 | vcpu_readregs_vmx(vcpu, vrwp->vrwp_mask, vrs) : |
816 | vcpu_writeregs_vmx(vcpu, vrwp->vrwp_mask, 1, vrs); |
817 | else if (vmm_softc->mode == VMM_MODE_SVM || |
818 | vmm_softc->mode == VMM_MODE_RVI) |
819 | ret = (dir == 0) ? |
820 | vcpu_readregs_svm(vcpu, vrwp->vrwp_mask, vrs) : |
821 | vcpu_writeregs_svm(vcpu, vrwp->vrwp_mask, vrs); |
822 | else { |
823 | DPRINTF("%s: unknown vmm mode", __func__); |
824 | ret = EINVAL22; |
825 | } |
826 | rw_exit_write(&vcpu->vc_lock); |
827 | |
828 | return (ret); |
829 | } |
830 | |
831 | /* |
832 | * vm_mprotect_ept |
833 | * |
834 | * IOCTL handler to sets the access protections of the ept |
835 | * |
836 | * Parameters: |
837 | * vmep: describes the memory for which the protect will be applied.. |
838 | * |
839 | * Return values: |
840 | * 0: if successful |
841 | * ENOENT: if the VM defined by 'vmep' cannot be found |
842 | * EINVAL: if the sgpa or size is not page aligned, the prot is invalid, |
843 | * size is too large (512GB), there is wraparound |
844 | * (like start = 512GB-1 and end = 512GB-2), |
845 | * the address specified is not within the vm's mem range |
846 | * or the address lies inside reserved (MMIO) memory |
847 | */ |
848 | int |
849 | vm_mprotect_ept(struct vm_mprotect_ept_params *vmep) |
850 | { |
851 | struct vm *vm; |
852 | struct vcpu *vcpu; |
853 | vaddr_t sgpa; |
854 | size_t size; |
855 | vm_prot_t prot; |
856 | uint64_t msr; |
857 | int ret, memtype; |
858 | |
859 | /* If not EPT or RVI, nothing to do here */ |
860 | if (!(vmm_softc->mode == VMM_MODE_EPT |
861 | || vmm_softc->mode == VMM_MODE_RVI)) |
862 | return (0); |
863 | |
864 | /* Find the desired VM */ |
865 | rw_enter_read(&vmm_softc->vm_lock); |
866 | ret = vm_find(vmep->vmep_vm_id, &vm); |
867 | rw_exit_read(&vmm_softc->vm_lock); |
868 | |
869 | /* Not found? exit. */ |
870 | if (ret != 0) { |
871 | DPRINTF("%s: vm id %u not found\n", __func__, |
872 | vmep->vmep_vm_id); |
873 | return (ret); |
874 | } |
875 | |
876 | vcpu = vm_find_vcpu(vm, vmep->vmep_vcpu_id); |
877 | |
878 | if (vcpu == NULL((void *)0)) { |
879 | DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__, |
880 | vmep->vmep_vcpu_id, vmep->vmep_vm_id); |
881 | return (ENOENT2); |
882 | } |
883 | |
884 | if (vcpu->vc_state != VCPU_STATE_STOPPED) { |
885 | DPRINTF("%s: mprotect_ept %u on vm %u attempted " |
886 | "while vcpu was in state %u (%s)\n", __func__, |
887 | vmep->vmep_vcpu_id, vmep->vmep_vm_id, vcpu->vc_state, |
888 | vcpu_state_decode(vcpu->vc_state)); |
889 | |
890 | return (EBUSY16); |
891 | } |
892 | |
893 | /* Only proceed if the pmap is in the correct mode */ |
894 | KASSERT((vmm_softc->mode == VMM_MODE_EPT &&(((vmm_softc->mode == VMM_MODE_EPT && vm->vm_map ->pmap->pm_type == 2) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == 3)) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm.c" , 897, "(vmm_softc->mode == VMM_MODE_EPT && vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI)" )) |
895 | vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) ||(((vmm_softc->mode == VMM_MODE_EPT && vm->vm_map ->pmap->pm_type == 2) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == 3)) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm.c" , 897, "(vmm_softc->mode == VMM_MODE_EPT && vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI)" )) |
896 | (vmm_softc->mode == VMM_MODE_RVI &&(((vmm_softc->mode == VMM_MODE_EPT && vm->vm_map ->pmap->pm_type == 2) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == 3)) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm.c" , 897, "(vmm_softc->mode == VMM_MODE_EPT && vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI)" )) |
897 | vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI))(((vmm_softc->mode == VMM_MODE_EPT && vm->vm_map ->pmap->pm_type == 2) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == 3)) ? (void )0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm.c" , 897, "(vmm_softc->mode == VMM_MODE_EPT && vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI)" )); |
898 | |
899 | sgpa = vmep->vmep_sgpa; |
900 | size = vmep->vmep_size; |
901 | prot = vmep->vmep_prot; |
902 | |
903 | /* No W^X permissions */ |
904 | if ((prot & PROT_MASK(0x01 | 0x02 | 0x04)) != prot && |
905 | (prot & (PROT_WRITE0x02 | PROT_EXEC0x04)) == (PROT_WRITE0x02 | PROT_EXEC0x04)) { |
906 | DPRINTF("%s: W+X permission requested\n", __func__); |
907 | return (EINVAL22); |
908 | } |
909 | |
910 | /* No Write only permissions */ |
911 | if ((prot & (PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04)) == PROT_WRITE0x02) { |
912 | DPRINTF("%s: No Write only permissions\n", __func__); |
913 | return (EINVAL22); |
914 | } |
915 | |
916 | /* No empty permissions */ |
917 | if (prot == 0) { |
918 | DPRINTF("%s: No empty permissions\n", __func__); |
919 | return (EINVAL22); |
920 | } |
921 | |
922 | /* No execute only on EPT CPUs that don't have that capability */ |
923 | if (vmm_softc->mode == VMM_MODE_EPT) { |
924 | msr = rdmsr(IA32_VMX_EPT_VPID_CAP0x48C); |
925 | if (prot == PROT_EXEC0x04 && |
926 | (msr & IA32_EPT_VPID_CAP_XO_TRANSLATIONS(1ULL << 0)) == 0) { |
927 | DPRINTF("%s: Execute only permissions unsupported," |
928 | " adding read permission\n", __func__); |
929 | |
930 | prot |= PROT_READ0x01; |
931 | } |
932 | } |
933 | |
934 | /* Must be page aligned */ |
935 | if ((sgpa & PAGE_MASK((1 << 12) - 1)) || (size & PAGE_MASK((1 << 12) - 1)) || size == 0) |
936 | return (EINVAL22); |
937 | |
938 | /* size must be less then 512GB */ |
939 | if (size >= NBPD_L4(1ULL << 39)) |
940 | return (EINVAL22); |
941 | |
942 | /* no wraparound */ |
943 | if (sgpa + size < sgpa) |
944 | return (EINVAL22); |
945 | |
946 | /* |
947 | * Specifying addresses within the PCI MMIO space is forbidden. |
948 | * Disallow addresses that start inside the MMIO space: |
949 | * [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END] |
950 | */ |
951 | if (sgpa >= VMM_PCI_MMIO_BAR_BASE0xF0000000ULL && sgpa <= VMM_PCI_MMIO_BAR_END0xFFFFFFFFULL) |
952 | return (EINVAL22); |
953 | |
954 | /* |
955 | * ... and disallow addresses that end inside the MMIO space: |
956 | * (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END] |
957 | */ |
958 | if (sgpa + size > VMM_PCI_MMIO_BAR_BASE0xF0000000ULL && |
959 | sgpa + size <= VMM_PCI_MMIO_BAR_END0xFFFFFFFFULL) |
960 | return (EINVAL22); |
961 | |
962 | memtype = vmm_get_guest_memtype(vm, sgpa); |
963 | if (memtype == VMM_MEM_TYPE_UNKNOWN) |
964 | return (EINVAL22); |
965 | |
966 | if (vmm_softc->mode == VMM_MODE_EPT) |
967 | ret = vmx_mprotect_ept(vm->vm_map, sgpa, sgpa + size, prot); |
968 | else if (vmm_softc->mode == VMM_MODE_RVI) { |
969 | pmap_write_protect(vm->vm_map->pmap, sgpa, sgpa + size, prot); |
970 | /* XXX requires a invlpga */ |
971 | ret = 0; |
972 | } else |
973 | return (EINVAL22); |
974 | |
975 | return (ret); |
976 | } |
977 | |
978 | /* |
979 | * vmx_mprotect_ept |
980 | * |
981 | * apply the ept protections to the requested pages, faulting in the page if |
982 | * required. |
983 | */ |
984 | int |
985 | vmx_mprotect_ept(vm_map_t vm_map, paddr_t sgpa, paddr_t egpa, int prot) |
986 | { |
987 | struct vmx_invept_descriptor vid; |
988 | pmap_t pmap; |
989 | pt_entry_t *pte; |
990 | paddr_t addr; |
991 | int ret = 0; |
992 | |
993 | pmap = vm_map->pmap; |
994 | |
995 | KERNEL_LOCK()_kernel_lock(); |
996 | |
997 | for (addr = sgpa; addr < egpa; addr += PAGE_SIZE(1 << 12)) { |
998 | pte = vmx_pmap_find_pte_ept(pmap, addr); |
999 | if (pte == NULL((void *)0)) { |
1000 | ret = uvm_fault(vm_map, addr, VM_FAULT_WIRE((vm_fault_t) 0x2), |
1001 | PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04); |
1002 | if (ret) |
1003 | printf("%s: uvm_fault returns %d, GPA=0x%llx\n", |
1004 | __func__, ret, (uint64_t)addr); |
1005 | |
1006 | pte = vmx_pmap_find_pte_ept(pmap, addr); |
1007 | if (pte == NULL((void *)0)) { |
1008 | KERNEL_UNLOCK()_kernel_unlock(); |
1009 | return EFAULT14; |
1010 | } |
1011 | } |
1012 | |
1013 | if (prot & PROT_READ0x01) |
1014 | *pte |= EPT_R(1ULL << 0); |
1015 | else |
1016 | *pte &= ~EPT_R(1ULL << 0); |
1017 | |
1018 | if (prot & PROT_WRITE0x02) |
1019 | *pte |= EPT_W(1ULL << 1); |
1020 | else |
1021 | *pte &= ~EPT_W(1ULL << 1); |
1022 | |
1023 | if (prot & PROT_EXEC0x04) |
1024 | *pte |= EPT_X(1ULL << 2); |
1025 | else |
1026 | *pte &= ~EPT_X(1ULL << 2); |
1027 | } |
1028 | |
1029 | /* |
1030 | * SDM 3C: 28.3.3.4 Guidelines for Use of the INVEPT Instruction |
1031 | * the first bullet point seems to say we should call invept. |
1032 | * |
1033 | * Software should use the INVEPT instruction with the “single-context” |
1034 | * INVEPT type after making any of the following changes to an EPT |
1035 | * paging-structure entry (the INVEPT descriptor should contain an |
1036 | * EPTP value that references — directly or indirectly |
1037 | * — the modified EPT paging structure): |
1038 | * — Changing any of the privilege bits 2:0 from 1 to 0. |
1039 | * */ |
1040 | if (pmap->eptp != 0) { |
1041 | memset(&vid, 0, sizeof(vid))__builtin_memset((&vid), (0), (sizeof(vid))); |
1042 | vid.vid_eptp = pmap->eptp; |
1043 | DPRINTF("%s: flushing EPT TLB for EPTP 0x%llx\n", __func__, |
1044 | vid.vid_eptp); |
1045 | invept(IA32_VMX_INVEPT_SINGLE_CTX0x1, &vid); |
1046 | } |
1047 | |
1048 | KERNEL_UNLOCK()_kernel_unlock(); |
1049 | |
1050 | return ret; |
1051 | } |
1052 | |
1053 | /* |
1054 | * vmx_pmap_find_pte_ept |
1055 | * |
1056 | * find the page table entry specified by addr in the pmap supplied. |
1057 | */ |
1058 | pt_entry_t * |
1059 | vmx_pmap_find_pte_ept(pmap_t pmap, paddr_t addr) |
1060 | { |
1061 | int l4idx, l3idx, l2idx, l1idx; |
1062 | pd_entry_t *pd; |
1063 | paddr_t pdppa; |
1064 | pt_entry_t *ptes, *pte; |
1065 | |
1066 | l4idx = (addr & L4_MASK0x0000ff8000000000UL) >> L4_SHIFT39; /* PML4E idx */ |
1067 | l3idx = (addr & L3_MASK0x0000007fc0000000UL) >> L3_SHIFT30; /* PDPTE idx */ |
1068 | l2idx = (addr & L2_MASK0x000000003fe00000UL) >> L2_SHIFT21; /* PDE idx */ |
1069 | l1idx = (addr & L1_MASK0x00000000001ff000UL) >> L1_SHIFT12; /* PTE idx */ |
1070 | |
1071 | pd = (pd_entry_t *)pmap->pm_pdir; |
1072 | if (pd == NULL((void *)0)) |
1073 | return NULL((void *)0); |
1074 | |
1075 | /* |
1076 | * l4idx should always be 0 since we don't support more than 512GB |
1077 | * guest physical memory. |
1078 | */ |
1079 | if (l4idx > 0) |
1080 | return NULL((void *)0); |
1081 | |
1082 | /* |
1083 | * l3idx should always be < MAXDSIZ/1GB because we don't support more |
1084 | * than MAXDSIZ guest phys mem. |
1085 | */ |
1086 | if (l3idx >= MAXDSIZ((paddr_t)32*1024*1024*1024) / ((paddr_t)1024 * 1024 * 1024)) |
1087 | return NULL((void *)0); |
1088 | |
1089 | pdppa = pd[l4idx] & PG_FRAME0x000ffffffffff000UL; |
1090 | if (pdppa == 0) |
1091 | return NULL((void *)0); |
1092 | |
1093 | ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000 )) + (pdppa)); |
1094 | |
1095 | pdppa = ptes[l3idx] & PG_FRAME0x000ffffffffff000UL; |
1096 | if (pdppa == 0) |
1097 | return NULL((void *)0); |
1098 | |
1099 | ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000 )) + (pdppa)); |
1100 | |
1101 | pdppa = ptes[l2idx] & PG_FRAME0x000ffffffffff000UL; |
1102 | if (pdppa == 0) |
1103 | return NULL((void *)0); |
1104 | |
1105 | ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000 )) + (pdppa)); |
1106 | |
1107 | pte = &ptes[l1idx]; |
1108 | if (*pte == 0) |
1109 | return NULL((void *)0); |
1110 | |
1111 | return pte; |
1112 | } |
1113 | |
1114 | /* |
1115 | * vm_find |
1116 | * |
1117 | * Function to find an existing VM by its identifier. |
1118 | * Must be called under the global vm_lock. |
1119 | * |
1120 | * Parameters: |
1121 | * id: The VM identifier. |
1122 | * *res: A pointer to the VM or NULL if not found |
1123 | * |
1124 | * Return values: |
1125 | * 0: if successful |
1126 | * ENOENT: if the VM defined by 'id' cannot be found |
1127 | * EPERM: if the VM cannot be accessed by the current process |
1128 | */ |
1129 | int |
1130 | vm_find(uint32_t id, struct vm **res) |
1131 | { |
1132 | struct proc *p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; |
1133 | struct vm *vm; |
1134 | |
1135 | *res = NULL((void *)0); |
1136 | SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link)for((vm) = ((&vmm_softc->vm_list)->slh_first); (vm) != ((void *)0); (vm) = ((vm)->vm_link.sle_next)) { |
1137 | if (vm->vm_id == id) { |
1138 | /* |
1139 | * In the pledged VM process, only allow to find |
1140 | * the VM that is running in the current process. |
1141 | * The managing vmm parent process can lookup all |
1142 | * all VMs and is indicated by PLEDGE_PROC. |
1143 | */ |
1144 | if (((p->p_p->ps_pledge & |
1145 | (PLEDGE_VMM0x0000000040000000ULL | PLEDGE_PROC0x0000000000001000ULL)) == PLEDGE_VMM0x0000000040000000ULL) && |
1146 | (vm->vm_creator_pid != p->p_p->ps_pid)) |
1147 | return (pledge_fail(p, EPERM1, PLEDGE_VMM0x0000000040000000ULL)); |
1148 | *res = vm; |
1149 | return (0); |
1150 | } |
1151 | } |
1152 | |
1153 | return (ENOENT2); |
1154 | } |
1155 | |
1156 | /* |
1157 | * vmm_start |
1158 | * |
1159 | * Starts VMM mode on the system |
1160 | */ |
1161 | int |
1162 | vmm_start(void) |
1163 | { |
1164 | struct cpu_info *self = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
1165 | int ret = 0; |
1166 | #ifdef MULTIPROCESSOR1 |
1167 | struct cpu_info *ci; |
1168 | CPU_INFO_ITERATORint cii; |
1169 | int i; |
1170 | #endif |
1171 | |
1172 | /* VMM is already running */ |
1173 | if (self->ci_flags & CPUF_VMM0x20000) |
1174 | return (0); |
1175 | |
1176 | #ifdef MULTIPROCESSOR1 |
1177 | /* Broadcast start VMM IPI */ |
1178 | x86_broadcast_ipi(X86_IPI_START_VMM0x00000100); |
1179 | |
1180 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { |
1181 | if (ci == self) |
1182 | continue; |
1183 | for (i = 100000; (!(ci->ci_flags & CPUF_VMM0x20000)) && i>0;i--) |
1184 | delay(10)(*delay_func)(10); |
1185 | if (!(ci->ci_flags & CPUF_VMM0x20000)) { |
1186 | printf("%s: failed to enter VMM mode\n", |
1187 | ci->ci_dev->dv_xname); |
1188 | ret = EIO5; |
1189 | } |
1190 | } |
1191 | #endif /* MULTIPROCESSOR */ |
1192 | |
1193 | /* Start VMM on this CPU */ |
1194 | start_vmm_on_cpu(self); |
1195 | if (!(self->ci_flags & CPUF_VMM0x20000)) { |
1196 | printf("%s: failed to enter VMM mode\n", |
1197 | self->ci_dev->dv_xname); |
1198 | ret = EIO5; |
1199 | } |
1200 | |
1201 | return (ret); |
1202 | } |
1203 | |
1204 | /* |
1205 | * vmm_stop |
1206 | * |
1207 | * Stops VMM mode on the system |
1208 | */ |
1209 | int |
1210 | vmm_stop(void) |
1211 | { |
1212 | struct cpu_info *self = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
1213 | int ret = 0; |
1214 | #ifdef MULTIPROCESSOR1 |
1215 | struct cpu_info *ci; |
1216 | CPU_INFO_ITERATORint cii; |
1217 | int i; |
1218 | #endif |
1219 | |
1220 | /* VMM is not running */ |
1221 | if (!(self->ci_flags & CPUF_VMM0x20000)) |
1222 | return (0); |
1223 | |
1224 | #ifdef MULTIPROCESSOR1 |
1225 | /* Stop VMM on other CPUs */ |
1226 | x86_broadcast_ipi(X86_IPI_STOP_VMM0x00000200); |
1227 | |
1228 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { |
1229 | if (ci == self) |
1230 | continue; |
1231 | for (i = 100000; (ci->ci_flags & CPUF_VMM0x20000) && i>0 ;i--) |
1232 | delay(10)(*delay_func)(10); |
1233 | if (ci->ci_flags & CPUF_VMM0x20000) { |
1234 | printf("%s: failed to exit VMM mode\n", |
1235 | ci->ci_dev->dv_xname); |
1236 | ret = EIO5; |
1237 | } |
1238 | } |
1239 | #endif /* MULTIPROCESSOR */ |
1240 | |
1241 | /* Stop VMM on this CPU */ |
1242 | stop_vmm_on_cpu(self); |
1243 | if (self->ci_flags & CPUF_VMM0x20000) { |
1244 | printf("%s: failed to exit VMM mode\n", |
1245 | self->ci_dev->dv_xname); |
1246 | ret = EIO5; |
1247 | } |
1248 | |
1249 | return (ret); |
1250 | } |
1251 | |
1252 | /* |
1253 | * start_vmm_on_cpu |
1254 | * |
1255 | * Starts VMM mode on 'ci' by executing the appropriate CPU-specific insn |
1256 | * sequence to enter VMM mode (eg, VMXON) |
1257 | */ |
1258 | void |
1259 | start_vmm_on_cpu(struct cpu_info *ci) |
1260 | { |
1261 | uint64_t msr; |
1262 | uint32_t cr4; |
1263 | |
1264 | /* No VMM mode? exit. */ |
1265 | if ((ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) == 0 && |
1266 | (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1)) == 0) |
1267 | return; |
1268 | |
1269 | /* |
1270 | * AMD SVM |
1271 | */ |
1272 | if (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1)) { |
1273 | msr = rdmsr(MSR_EFER0xc0000080); |
1274 | msr |= EFER_SVME0x00001000; |
1275 | wrmsr(MSR_EFER0xc0000080, msr); |
1276 | } |
1277 | |
1278 | /* |
1279 | * Intel VMX |
1280 | */ |
1281 | if (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) { |
1282 | if (ci->ci_vmxon_region == 0) |
1283 | return; |
1284 | else { |
1285 | bzero(ci->ci_vmxon_region, PAGE_SIZE)__builtin_bzero((ci->ci_vmxon_region), ((1 << 12))); |
1286 | ci->ci_vmxon_region->vr_revision = |
1287 | ci->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision; |
1288 | |
1289 | /* Set CR4.VMXE */ |
1290 | cr4 = rcr4(); |
1291 | cr4 |= CR4_VMXE0x00002000; |
1292 | lcr4(cr4); |
1293 | |
1294 | /* Enable VMX */ |
1295 | msr = rdmsr(MSR_IA32_FEATURE_CONTROL0x03a); |
1296 | if (msr & IA32_FEATURE_CONTROL_LOCK0x01) { |
1297 | if (!(msr & IA32_FEATURE_CONTROL_VMX_EN0x04)) |
1298 | return; |
1299 | } else { |
1300 | msr |= IA32_FEATURE_CONTROL_VMX_EN0x04 | |
1301 | IA32_FEATURE_CONTROL_LOCK0x01; |
1302 | wrmsr(MSR_IA32_FEATURE_CONTROL0x03a, msr); |
1303 | } |
1304 | |
1305 | /* Enter VMX mode */ |
1306 | if (vmxon((uint64_t *)&ci->ci_vmxon_region_pa)) |
1307 | return; |
1308 | } |
1309 | } |
1310 | |
1311 | ci->ci_flags |= CPUF_VMM0x20000; |
1312 | } |
1313 | |
1314 | /* |
1315 | * stop_vmm_on_cpu |
1316 | * |
1317 | * Stops VMM mode on 'ci' by executing the appropriate CPU-specific insn |
1318 | * sequence to exit VMM mode (eg, VMXOFF) |
1319 | */ |
1320 | void |
1321 | stop_vmm_on_cpu(struct cpu_info *ci) |
1322 | { |
1323 | uint64_t msr; |
1324 | uint32_t cr4; |
1325 | |
1326 | if (!(ci->ci_flags & CPUF_VMM0x20000)) |
1327 | return; |
1328 | |
1329 | /* |
1330 | * AMD SVM |
1331 | */ |
1332 | if (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1)) { |
1333 | msr = rdmsr(MSR_EFER0xc0000080); |
1334 | msr &= ~EFER_SVME0x00001000; |
1335 | wrmsr(MSR_EFER0xc0000080, msr); |
1336 | } |
1337 | |
1338 | /* |
1339 | * Intel VMX |
1340 | */ |
1341 | if (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) { |
1342 | if (vmxoff()) |
1343 | panic("VMXOFF failed"); |
1344 | |
1345 | cr4 = rcr4(); |
1346 | cr4 &= ~CR4_VMXE0x00002000; |
1347 | lcr4(cr4); |
1348 | } |
1349 | |
1350 | ci->ci_flags &= ~CPUF_VMM0x20000; |
1351 | } |
1352 | |
1353 | /* |
1354 | * vmclear_on_cpu |
1355 | * |
1356 | * Flush and clear VMCS on 'ci' by executing vmclear. |
1357 | * |
1358 | */ |
1359 | void |
1360 | vmclear_on_cpu(struct cpu_info *ci) |
1361 | { |
1362 | if ((ci->ci_flags & CPUF_VMM0x20000) && (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0))) { |
1363 | if (vmclear(&ci->ci_vmcs_pa)) |
1364 | panic("VMCLEAR ipi failed"); |
1365 | atomic_swap_ulong(&ci->ci_vmcs_pa, VMX_VMCS_PA_CLEAR)_atomic_swap_ulong((&ci->ci_vmcs_pa), (0xFFFFFFFFFFFFFFFFUL )); |
1366 | } |
1367 | } |
1368 | |
1369 | #ifdef MULTIPROCESSOR1 |
1370 | static int |
1371 | vmx_remote_vmclear(struct cpu_info *ci, struct vcpu *vcpu) |
1372 | { |
1373 | int ret = 0, nticks = 200000000; |
1374 | |
1375 | rw_enter_write(&ci->ci_vmcs_lock); |
1376 | atomic_swap_ulong(&ci->ci_vmcs_pa, vcpu->vc_control_pa)_atomic_swap_ulong((&ci->ci_vmcs_pa), (vcpu->vc_control_pa )); |
1377 | x86_send_ipi(ci, X86_IPI_VMCLEAR_VMM0x00000004); |
1378 | |
1379 | while (ci->ci_vmcs_pa != VMX_VMCS_PA_CLEAR0xFFFFFFFFFFFFFFFFUL) { |
1380 | CPU_BUSY_CYCLE()__asm volatile("pause": : : "memory"); |
1381 | if (--nticks <= 0) { |
1382 | printf("%s: spun out\n", __func__); |
1383 | ret = 1; |
1384 | break; |
1385 | } |
1386 | } |
1387 | atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED)_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (0)); |
1388 | rw_exit_write(&ci->ci_vmcs_lock); |
1389 | |
1390 | return (ret); |
1391 | } |
1392 | #endif /* MULTIPROCESSOR */ |
1393 | |
1394 | /* |
1395 | * vm_create_check_mem_ranges |
1396 | * |
1397 | * Make sure that the guest physical memory ranges given by the user process |
1398 | * do not overlap and are in ascending order. |
1399 | * |
1400 | * The last physical address may not exceed VMM_MAX_VM_MEM_SIZE. |
1401 | * |
1402 | * Return Values: |
1403 | * The total memory size in MB if the checks were successful |
1404 | * 0: One of the memory ranges was invalid, or VMM_MAX_VM_MEM_SIZE was |
1405 | * exceeded |
1406 | */ |
1407 | size_t |
1408 | vm_create_check_mem_ranges(struct vm_create_params *vcp) |
1409 | { |
1410 | size_t i, memsize = 0; |
1411 | struct vm_mem_range *vmr, *pvmr; |
1412 | const paddr_t maxgpa = (uint64_t)VMM_MAX_VM_MEM_SIZE32768 * 1024 * 1024; |
1413 | |
1414 | if (vcp->vcp_nmemranges == 0 || |
1415 | vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES16) |
1416 | return (0); |
1417 | |
1418 | for (i = 0; i < vcp->vcp_nmemranges; i++) { |
1419 | vmr = &vcp->vcp_memranges[i]; |
1420 | |
1421 | /* Only page-aligned addresses and sizes are permitted */ |
1422 | if ((vmr->vmr_gpa & PAGE_MASK((1 << 12) - 1)) || (vmr->vmr_va & PAGE_MASK((1 << 12) - 1)) || |
1423 | (vmr->vmr_size & PAGE_MASK((1 << 12) - 1)) || vmr->vmr_size == 0) |
1424 | return (0); |
1425 | |
1426 | /* Make sure that VMM_MAX_VM_MEM_SIZE is not exceeded */ |
1427 | if (vmr->vmr_gpa >= maxgpa || |
1428 | vmr->vmr_size > maxgpa - vmr->vmr_gpa) |
1429 | return (0); |
1430 | |
1431 | /* |
1432 | * Make sure that all virtual addresses are within the address |
1433 | * space of the process and that they do not wrap around. |
1434 | * Calling uvm_share() when creating the VM will take care of |
1435 | * further checks. |
1436 | */ |
1437 | if (vmr->vmr_va < VM_MIN_ADDRESS(1 << 12) || |
1438 | vmr->vmr_va >= VM_MAXUSER_ADDRESS0x00007f7fffffc000 || |
1439 | vmr->vmr_size >= VM_MAXUSER_ADDRESS0x00007f7fffffc000 - vmr->vmr_va) |
1440 | return (0); |
1441 | |
1442 | /* |
1443 | * Specifying ranges within the PCI MMIO space is forbidden. |
1444 | * Disallow ranges that start inside the MMIO space: |
1445 | * [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END] |
1446 | */ |
1447 | if (vmr->vmr_gpa >= VMM_PCI_MMIO_BAR_BASE0xF0000000ULL && |
1448 | vmr->vmr_gpa <= VMM_PCI_MMIO_BAR_END0xFFFFFFFFULL) |
1449 | return (0); |
1450 | |
1451 | /* |
1452 | * ... and disallow ranges that end inside the MMIO space: |
1453 | * (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END] |
1454 | */ |
1455 | if (vmr->vmr_gpa + vmr->vmr_size > VMM_PCI_MMIO_BAR_BASE0xF0000000ULL && |
1456 | vmr->vmr_gpa + vmr->vmr_size <= VMM_PCI_MMIO_BAR_END0xFFFFFFFFULL) |
1457 | return (0); |
1458 | |
1459 | /* |
1460 | * Make sure that guest physical memory ranges do not overlap |
1461 | * and that they are ascending. |
1462 | */ |
1463 | if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa) |
1464 | return (0); |
1465 | |
1466 | memsize += vmr->vmr_size; |
1467 | pvmr = vmr; |
1468 | } |
1469 | |
1470 | if (memsize % (1024 * 1024) != 0) |
1471 | return (0); |
1472 | memsize /= 1024 * 1024; |
1473 | return (memsize); |
1474 | } |
1475 | |
1476 | /* |
1477 | * vm_create |
1478 | * |
1479 | * Creates the in-memory VMM structures for the VM defined by 'vcp'. The |
1480 | * parent of this VM shall be the process defined by 'p'. |
1481 | * This function does not start the VCPU(s) - see vm_start. |
1482 | * |
1483 | * Return Values: |
1484 | * 0: the create operation was successful |
1485 | * ENOMEM: out of memory |
1486 | * various other errors from vcpu_init/vm_impl_init |
1487 | */ |
1488 | int |
1489 | vm_create(struct vm_create_params *vcp, struct proc *p) |
1490 | { |
1491 | int i, ret; |
1492 | size_t memsize; |
1493 | struct vm *vm; |
1494 | struct vcpu *vcpu; |
1495 | |
1496 | if (!(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_flags & CPUF_VMM0x20000)) |
1497 | return (EINVAL22); |
1498 | |
1499 | memsize = vm_create_check_mem_ranges(vcp); |
1500 | if (memsize == 0) |
1501 | return (EINVAL22); |
1502 | |
1503 | /* XXX - support UP only (for now) */ |
1504 | if (vcp->vcp_ncpus != 1) |
1505 | return (EINVAL22); |
1506 | |
1507 | rw_enter_write(&vmm_softc->vm_lock); |
1508 | if (vmm_softc->vcpu_ct + vcp->vcp_ncpus > VMM_MAX_VCPUS512) { |
1509 | DPRINTF("%s: maximum vcpus (%lu) reached\n", __func__, |
1510 | vmm_softc->vcpu_max); |
1511 | rw_exit_write(&vmm_softc->vm_lock); |
1512 | return (ENOMEM12); |
1513 | } |
1514 | vmm_softc->vcpu_ct += vcp->vcp_ncpus; |
1515 | |
1516 | vm = pool_get(&vm_pool, PR_WAITOK0x0001 | PR_ZERO0x0008); |
1517 | SLIST_INIT(&vm->vm_vcpu_list){ ((&vm->vm_vcpu_list)->slh_first) = ((void *)0); }; |
1518 | rw_init(&vm->vm_vcpu_lock, "vcpu_list")_rw_init_flags(&vm->vm_vcpu_lock, "vcpu_list", 0, ((void *)0)); |
1519 | |
1520 | vm->vm_creator_pid = p->p_p->ps_pid; |
1521 | vm->vm_nmemranges = vcp->vcp_nmemranges; |
1522 | memcpy(vm->vm_memranges, vcp->vcp_memranges,__builtin_memcpy((vm->vm_memranges), (vcp->vcp_memranges ), (vm->vm_nmemranges * sizeof(vm->vm_memranges[0]))) |
1523 | vm->vm_nmemranges * sizeof(vm->vm_memranges[0]))__builtin_memcpy((vm->vm_memranges), (vcp->vcp_memranges ), (vm->vm_nmemranges * sizeof(vm->vm_memranges[0]))); |
1524 | vm->vm_memory_size = memsize; |
1525 | strncpy(vm->vm_name, vcp->vcp_name, VMM_MAX_NAME_LEN64 - 1); |
1526 | |
1527 | if (vm_impl_init(vm, p)) { |
1528 | printf("failed to init arch-specific features for vm %p\n", vm); |
1529 | vm_teardown(vm); |
1530 | rw_exit_write(&vmm_softc->vm_lock); |
1531 | return (ENOMEM12); |
1532 | } |
1533 | |
1534 | vmm_softc->vm_ct++; |
1535 | vmm_softc->vm_idx++; |
1536 | |
1537 | vm->vm_id = vmm_softc->vm_idx; |
1538 | vm->vm_vcpu_ct = 0; |
1539 | vm->vm_vcpus_running = 0; |
1540 | |
1541 | /* Initialize each VCPU defined in 'vcp' */ |
1542 | for (i = 0; i < vcp->vcp_ncpus; i++) { |
1543 | vcpu = pool_get(&vcpu_pool, PR_WAITOK0x0001 | PR_ZERO0x0008); |
1544 | vcpu->vc_parent = vm; |
1545 | if ((ret = vcpu_init(vcpu)) != 0) { |
1546 | printf("failed to init vcpu %d for vm %p\n", i, vm); |
1547 | vm_teardown(vm); |
1548 | vmm_softc->vm_idx--; |
1549 | rw_exit_write(&vmm_softc->vm_lock); |
1550 | return (ret); |
1551 | } |
1552 | rw_enter_write(&vm->vm_vcpu_lock); |
1553 | vcpu->vc_id = vm->vm_vcpu_ct; |
1554 | vm->vm_vcpu_ct++; |
1555 | SLIST_INSERT_HEAD(&vm->vm_vcpu_list, vcpu, vc_vcpu_link)do { (vcpu)->vc_vcpu_link.sle_next = (&vm->vm_vcpu_list )->slh_first; (&vm->vm_vcpu_list)->slh_first = ( vcpu); } while (0); |
1556 | rw_exit_write(&vm->vm_vcpu_lock); |
1557 | } |
1558 | |
1559 | /* XXX init various other hardware parts (vlapic, vioapic, etc) */ |
1560 | |
1561 | SLIST_INSERT_HEAD(&vmm_softc->vm_list, vm, vm_link)do { (vm)->vm_link.sle_next = (&vmm_softc->vm_list) ->slh_first; (&vmm_softc->vm_list)->slh_first = ( vm); } while (0); |
1562 | rw_exit_write(&vmm_softc->vm_lock); |
1563 | |
1564 | vcp->vcp_id = vm->vm_id; |
1565 | |
1566 | return (0); |
1567 | } |
1568 | |
1569 | /* |
1570 | * vm_impl_init_vmx |
1571 | * |
1572 | * Intel VMX specific VM initialization routine |
1573 | * |
1574 | * Parameters: |
1575 | * vm: the VM being initialized |
1576 | * p: vmd process owning the VM |
1577 | * |
1578 | * Return values: |
1579 | * 0: the initialization was successful |
1580 | * ENOMEM: the initialization failed (lack of resources) |
1581 | */ |
1582 | int |
1583 | vm_impl_init_vmx(struct vm *vm, struct proc *p) |
1584 | { |
1585 | int i, ret; |
1586 | vaddr_t mingpa, maxgpa; |
1587 | struct vm_mem_range *vmr; |
1588 | |
1589 | /* If not EPT, nothing to do here */ |
1590 | if (vmm_softc->mode != VMM_MODE_EPT) |
1591 | return (0); |
1592 | |
1593 | vmr = &vm->vm_memranges[0]; |
1594 | mingpa = vmr->vmr_gpa; |
1595 | vmr = &vm->vm_memranges[vm->vm_nmemranges - 1]; |
1596 | maxgpa = vmr->vmr_gpa + vmr->vmr_size; |
1597 | |
1598 | /* |
1599 | * uvmspace_alloc (currently) always returns a valid vmspace |
1600 | */ |
1601 | vm->vm_vmspace = uvmspace_alloc(mingpa, maxgpa, TRUE1, FALSE0); |
1602 | vm->vm_map = &vm->vm_vmspace->vm_map; |
1603 | |
1604 | /* Map the new map with an anon */ |
1605 | DPRINTF("%s: created vm_map @ %p\n", __func__, vm->vm_map); |
1606 | for (i = 0; i < vm->vm_nmemranges; i++) { |
1607 | vmr = &vm->vm_memranges[i]; |
1608 | ret = uvm_share(vm->vm_map, vmr->vmr_gpa, |
1609 | PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04, |
1610 | &p->p_vmspace->vm_map, vmr->vmr_va, vmr->vmr_size); |
1611 | if (ret) { |
1612 | printf("%s: uvm_share failed (%d)\n", __func__, ret); |
1613 | /* uvmspace_free calls pmap_destroy for us */ |
1614 | uvmspace_free(vm->vm_vmspace); |
1615 | vm->vm_vmspace = NULL((void *)0); |
1616 | return (ENOMEM12); |
1617 | } |
1618 | } |
1619 | |
1620 | ret = pmap_convert(vm->vm_map->pmap, PMAP_TYPE_EPT2); |
1621 | if (ret) { |
1622 | printf("%s: pmap_convert failed\n", __func__); |
1623 | /* uvmspace_free calls pmap_destroy for us */ |
1624 | uvmspace_free(vm->vm_vmspace); |
1625 | vm->vm_vmspace = NULL((void *)0); |
1626 | return (ENOMEM12); |
1627 | } |
1628 | |
1629 | return (0); |
1630 | } |
1631 | |
1632 | /* |
1633 | * vm_impl_init_svm |
1634 | * |
1635 | * AMD SVM specific VM initialization routine |
1636 | * |
1637 | * Parameters: |
1638 | * vm: the VM being initialized |
1639 | * p: vmd process owning the VM |
1640 | * |
1641 | * Return values: |
1642 | * 0: the initialization was successful |
1643 | * ENOMEM: the initialization failed (lack of resources) |
1644 | */ |
1645 | int |
1646 | vm_impl_init_svm(struct vm *vm, struct proc *p) |
1647 | { |
1648 | int i, ret; |
1649 | vaddr_t mingpa, maxgpa; |
1650 | struct vm_mem_range *vmr; |
1651 | |
1652 | /* If not RVI, nothing to do here */ |
1653 | if (vmm_softc->mode != VMM_MODE_RVI) |
1654 | return (0); |
1655 | |
1656 | vmr = &vm->vm_memranges[0]; |
1657 | mingpa = vmr->vmr_gpa; |
1658 | vmr = &vm->vm_memranges[vm->vm_nmemranges - 1]; |
1659 | maxgpa = vmr->vmr_gpa + vmr->vmr_size; |
1660 | |
1661 | /* |
1662 | * uvmspace_alloc (currently) always returns a valid vmspace |
1663 | */ |
1664 | vm->vm_vmspace = uvmspace_alloc(mingpa, maxgpa, TRUE1, FALSE0); |
1665 | vm->vm_map = &vm->vm_vmspace->vm_map; |
1666 | |
1667 | /* Map the new map with an anon */ |
1668 | DPRINTF("%s: created vm_map @ %p\n", __func__, vm->vm_map); |
1669 | for (i = 0; i < vm->vm_nmemranges; i++) { |
1670 | vmr = &vm->vm_memranges[i]; |
1671 | ret = uvm_share(vm->vm_map, vmr->vmr_gpa, |
1672 | PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04, |
1673 | &p->p_vmspace->vm_map, vmr->vmr_va, vmr->vmr_size); |
1674 | if (ret) { |
1675 | printf("%s: uvm_share failed (%d)\n", __func__, ret); |
1676 | /* uvmspace_free calls pmap_destroy for us */ |
1677 | uvmspace_free(vm->vm_vmspace); |
1678 | vm->vm_vmspace = NULL((void *)0); |
1679 | return (ENOMEM12); |
1680 | } |
1681 | } |
1682 | |
1683 | /* Convert pmap to RVI */ |
1684 | ret = pmap_convert(vm->vm_map->pmap, PMAP_TYPE_RVI3); |
1685 | |
1686 | return (ret); |
1687 | } |
1688 | |
1689 | /* |
1690 | * vm_impl_init |
1691 | * |
1692 | * Calls the architecture-specific VM init routine |
1693 | * |
1694 | * Parameters: |
1695 | * vm: the VM being initialized |
1696 | * p: vmd process owning the VM |
1697 | * |
1698 | * Return values (from architecture-specific init routines): |
1699 | * 0: the initialization was successful |
1700 | * ENOMEM: the initialization failed (lack of resources) |
1701 | */ |
1702 | int |
1703 | vm_impl_init(struct vm *vm, struct proc *p) |
1704 | { |
1705 | int ret; |
1706 | |
1707 | KERNEL_LOCK()_kernel_lock(); |
1708 | if (vmm_softc->mode == VMM_MODE_VMX || |
1709 | vmm_softc->mode == VMM_MODE_EPT) |
1710 | ret = vm_impl_init_vmx(vm, p); |
1711 | else if (vmm_softc->mode == VMM_MODE_SVM || |
1712 | vmm_softc->mode == VMM_MODE_RVI) |
1713 | ret = vm_impl_init_svm(vm, p); |
1714 | else |
1715 | panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); |
1716 | KERNEL_UNLOCK()_kernel_unlock(); |
1717 | |
1718 | return (ret); |
1719 | } |
1720 | |
1721 | /* |
1722 | * vm_impl_deinit_vmx |
1723 | * |
1724 | * Intel VMX specific VM deinitialization routine |
1725 | * |
1726 | * Parameters: |
1727 | * vm: VM to deinit |
1728 | */ |
1729 | void |
1730 | vm_impl_deinit_vmx(struct vm *vm) |
1731 | { |
1732 | /* Unused */ |
1733 | } |
1734 | |
1735 | /* |
1736 | * vm_impl_deinit_svm |
1737 | * |
1738 | * AMD SVM specific VM deinitialization routine |
1739 | * |
1740 | * Parameters: |
1741 | * vm: VM to deinit |
1742 | */ |
1743 | void |
1744 | vm_impl_deinit_svm(struct vm *vm) |
1745 | { |
1746 | /* Unused */ |
1747 | } |
1748 | |
1749 | /* |
1750 | * vm_impl_deinit |
1751 | * |
1752 | * Calls the architecture-specific VM init routine |
1753 | * |
1754 | * Parameters: |
1755 | * vm: VM to deinit |
1756 | */ |
1757 | void |
1758 | vm_impl_deinit(struct vm *vm) |
1759 | { |
1760 | if (vmm_softc->mode == VMM_MODE_VMX || |
1761 | vmm_softc->mode == VMM_MODE_EPT) |
1762 | vm_impl_deinit_vmx(vm); |
1763 | else if (vmm_softc->mode == VMM_MODE_SVM || |
1764 | vmm_softc->mode == VMM_MODE_RVI) |
1765 | vm_impl_deinit_svm(vm); |
1766 | else |
1767 | panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); |
1768 | } |
1769 | |
1770 | /* |
1771 | * vcpu_reload_vmcs_vmx |
1772 | * |
1773 | * (Re)load the VMCS on the current cpu. Must be called with the VMCS write |
1774 | * lock acquired. If the VMCS is determined to be loaded on a remote cpu, an |
1775 | * ipi will be used to remotely flush it before loading the VMCS locally. |
1776 | * |
1777 | * Parameters: |
1778 | * vcpu: Pointer to the vcpu needing its VMCS |
1779 | * |
1780 | * Return values: |
1781 | * 0: if successful |
1782 | * EINVAL: an error occurred during flush or reload |
1783 | */ |
1784 | int |
1785 | vcpu_reload_vmcs_vmx(struct vcpu *vcpu) |
1786 | { |
1787 | struct cpu_info *ci, *last_ci; |
1788 | |
1789 | rw_assert_wrlock(&vcpu->vc_lock); |
1790 | |
1791 | ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
1792 | last_ci = vcpu->vc_last_pcpu; |
1793 | |
1794 | if (last_ci == NULL((void *)0)) { |
1795 | /* First launch */ |
1796 | if (vmclear(&vcpu->vc_control_pa)) |
1797 | return (EINVAL22); |
1798 | atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED)_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (0)); |
1799 | #ifdef MULTIPROCESSOR1 |
1800 | } else if (last_ci != ci) { |
1801 | /* We've moved CPUs at some point, so remote VMCLEAR */ |
1802 | if (vmx_remote_vmclear(last_ci, vcpu)) |
1803 | return (EINVAL22); |
1804 | KASSERT(vcpu->vc_vmx_vmcs_state == VMCS_CLEARED)((vcpu->vc_vmx_vmcs_state == 0) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/arch/amd64/amd64/vmm.c", 1804, "vcpu->vc_vmx_vmcs_state == VMCS_CLEARED" )); |
1805 | #endif /* MULTIPROCESSOR */ |
1806 | } |
1807 | |
1808 | if (vmptrld(&vcpu->vc_control_pa)) { |
1809 | printf("%s: vmptrld\n", __func__); |
1810 | return (EINVAL22); |
1811 | } |
1812 | |
1813 | return (0); |
1814 | } |
1815 | |
1816 | /* |
1817 | * vcpu_readregs_vmx |
1818 | * |
1819 | * Reads 'vcpu's registers |
1820 | * |
1821 | * Parameters: |
1822 | * vcpu: the vcpu to read register values from |
1823 | * regmask: the types of registers to read |
1824 | * vrs: output parameter where register values are stored |
1825 | * |
1826 | * Return values: |
1827 | * 0: if successful |
1828 | * EINVAL: an error reading registers occurred |
1829 | */ |
1830 | int |
1831 | vcpu_readregs_vmx(struct vcpu *vcpu, uint64_t regmask, |
1832 | struct vcpu_reg_state *vrs) |
1833 | { |
1834 | int i, ret = 0; |
1835 | uint64_t sel, limit, ar; |
1836 | uint64_t *gprs = vrs->vrs_gprs; |
1837 | uint64_t *crs = vrs->vrs_crs; |
1838 | uint64_t *msrs = vrs->vrs_msrs; |
1839 | uint64_t *drs = vrs->vrs_drs; |
1840 | struct vcpu_segment_info *sregs = vrs->vrs_sregs; |
1841 | struct vmx_msr_store *msr_store; |
1842 | |
1843 | #ifdef VMM_DEBUG |
1844 | /* VMCS should be loaded... */ |
1845 | paddr_t pa = 0ULL; |
1846 | if (vmptrst(&pa)) |
1847 | panic("%s: vmptrst", __func__); |
1848 | KASSERT(pa == vcpu->vc_control_pa)((pa == vcpu->vc_control_pa) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/arch/amd64/amd64/vmm.c", 1848, "pa == vcpu->vc_control_pa" )); |
1849 | #endif /* VMM_DEBUG */ |
1850 | |
1851 | if (regmask & VM_RWREGS_GPRS0x1) { |
1852 | gprs[VCPU_REGS_RAX0] = vcpu->vc_gueststate.vg_rax; |
1853 | gprs[VCPU_REGS_RBX1] = vcpu->vc_gueststate.vg_rbx; |
1854 | gprs[VCPU_REGS_RCX2] = vcpu->vc_gueststate.vg_rcx; |
1855 | gprs[VCPU_REGS_RDX3] = vcpu->vc_gueststate.vg_rdx; |
1856 | gprs[VCPU_REGS_RSI4] = vcpu->vc_gueststate.vg_rsi; |
1857 | gprs[VCPU_REGS_RDI5] = vcpu->vc_gueststate.vg_rdi; |
1858 | gprs[VCPU_REGS_R86] = vcpu->vc_gueststate.vg_r8; |
1859 | gprs[VCPU_REGS_R97] = vcpu->vc_gueststate.vg_r9; |
1860 | gprs[VCPU_REGS_R108] = vcpu->vc_gueststate.vg_r10; |
1861 | gprs[VCPU_REGS_R119] = vcpu->vc_gueststate.vg_r11; |
1862 | gprs[VCPU_REGS_R1210] = vcpu->vc_gueststate.vg_r12; |
1863 | gprs[VCPU_REGS_R1311] = vcpu->vc_gueststate.vg_r13; |
1864 | gprs[VCPU_REGS_R1412] = vcpu->vc_gueststate.vg_r14; |
1865 | gprs[VCPU_REGS_R1513] = vcpu->vc_gueststate.vg_r15; |
1866 | gprs[VCPU_REGS_RBP15] = vcpu->vc_gueststate.vg_rbp; |
1867 | gprs[VCPU_REGS_RIP16] = vcpu->vc_gueststate.vg_rip; |
1868 | if (vmread(VMCS_GUEST_IA32_RSP0x681C, &gprs[VCPU_REGS_RSP14])) |
1869 | goto errout; |
1870 | if (vmread(VMCS_GUEST_IA32_RFLAGS0x6820, &gprs[VCPU_REGS_RFLAGS17])) |
1871 | goto errout; |
1872 | } |
1873 | |
1874 | if (regmask & VM_RWREGS_SREGS0x2) { |
1875 | for (i = 0; i < nitems(vmm_vmx_sreg_vmcs_fields)(sizeof((vmm_vmx_sreg_vmcs_fields)) / sizeof((vmm_vmx_sreg_vmcs_fields )[0])); i++) { |
1876 | if (vmread(vmm_vmx_sreg_vmcs_fields[i].selid, &sel)) |
1877 | goto errout; |
1878 | if (vmread(vmm_vmx_sreg_vmcs_fields[i].limitid, &limit)) |
1879 | goto errout; |
1880 | if (vmread(vmm_vmx_sreg_vmcs_fields[i].arid, &ar)) |
1881 | goto errout; |
1882 | if (vmread(vmm_vmx_sreg_vmcs_fields[i].baseid, |
1883 | &sregs[i].vsi_base)) |
1884 | goto errout; |
1885 | |
1886 | sregs[i].vsi_sel = sel; |
1887 | sregs[i].vsi_limit = limit; |
1888 | sregs[i].vsi_ar = ar; |
1889 | } |
1890 | |
1891 | if (vmread(VMCS_GUEST_IA32_GDTR_LIMIT0x4810, &limit)) |
1892 | goto errout; |
1893 | if (vmread(VMCS_GUEST_IA32_GDTR_BASE0x6816, |
1894 | &vrs->vrs_gdtr.vsi_base)) |
1895 | goto errout; |
1896 | vrs->vrs_gdtr.vsi_limit = limit; |
1897 | |
1898 | if (vmread(VMCS_GUEST_IA32_IDTR_LIMIT0x4812, &limit)) |
1899 | goto errout; |
1900 | if (vmread(VMCS_GUEST_IA32_IDTR_BASE0x6818, |
1901 | &vrs->vrs_idtr.vsi_base)) |
1902 | goto errout; |
1903 | vrs->vrs_idtr.vsi_limit = limit; |
1904 | } |
1905 | |
1906 | if (regmask & VM_RWREGS_CRS0x4) { |
1907 | crs[VCPU_REGS_CR21] = vcpu->vc_gueststate.vg_cr2; |
1908 | crs[VCPU_REGS_XCR05] = vcpu->vc_gueststate.vg_xcr0; |
1909 | if (vmread(VMCS_GUEST_IA32_CR00x6800, &crs[VCPU_REGS_CR00])) |
1910 | goto errout; |
1911 | if (vmread(VMCS_GUEST_IA32_CR30x6802, &crs[VCPU_REGS_CR32])) |
1912 | goto errout; |
1913 | if (vmread(VMCS_GUEST_IA32_CR40x6804, &crs[VCPU_REGS_CR43])) |
1914 | goto errout; |
1915 | if (vmread(VMCS_GUEST_PDPTE00x280A, &crs[VCPU_REGS_PDPTE06])) |
1916 | goto errout; |
1917 | if (vmread(VMCS_GUEST_PDPTE10x280C, &crs[VCPU_REGS_PDPTE17])) |
1918 | goto errout; |
1919 | if (vmread(VMCS_GUEST_PDPTE20x280E, &crs[VCPU_REGS_PDPTE28])) |
1920 | goto errout; |
1921 | if (vmread(VMCS_GUEST_PDPTE30x2810, &crs[VCPU_REGS_PDPTE39])) |
1922 | goto errout; |
1923 | } |
1924 | |
1925 | msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; |
1926 | |
1927 | if (regmask & VM_RWREGS_MSRS0x8) { |
1928 | for (i = 0; i < VCPU_REGS_NMSRS(6 + 1); i++) { |
1929 | msrs[i] = msr_store[i].vms_data; |
1930 | } |
1931 | } |
1932 | |
1933 | if (regmask & VM_RWREGS_DRS0x10) { |
1934 | drs[VCPU_REGS_DR00] = vcpu->vc_gueststate.vg_dr0; |
1935 | drs[VCPU_REGS_DR11] = vcpu->vc_gueststate.vg_dr1; |
1936 | drs[VCPU_REGS_DR22] = vcpu->vc_gueststate.vg_dr2; |
1937 | drs[VCPU_REGS_DR33] = vcpu->vc_gueststate.vg_dr3; |
1938 | drs[VCPU_REGS_DR64] = vcpu->vc_gueststate.vg_dr6; |
1939 | if (vmread(VMCS_GUEST_IA32_DR70x681A, &drs[VCPU_REGS_DR75])) |
1940 | goto errout; |
1941 | } |
1942 | |
1943 | goto out; |
1944 | |
1945 | errout: |
1946 | ret = EINVAL22; |
1947 | out: |
1948 | return (ret); |
1949 | } |
1950 | |
1951 | /* |
1952 | * vcpu_readregs_svm |
1953 | * |
1954 | * Reads 'vcpu's registers |
1955 | * |
1956 | * Parameters: |
1957 | * vcpu: the vcpu to read register values from |
1958 | * regmask: the types of registers to read |
1959 | * vrs: output parameter where register values are stored |
1960 | * |
1961 | * Return values: |
1962 | * 0: if successful |
1963 | */ |
1964 | int |
1965 | vcpu_readregs_svm(struct vcpu *vcpu, uint64_t regmask, |
1966 | struct vcpu_reg_state *vrs) |
1967 | { |
1968 | uint64_t *gprs = vrs->vrs_gprs; |
1969 | uint64_t *crs = vrs->vrs_crs; |
1970 | uint64_t *msrs = vrs->vrs_msrs; |
1971 | uint64_t *drs = vrs->vrs_drs; |
1972 | uint32_t attr; |
1973 | struct vcpu_segment_info *sregs = vrs->vrs_sregs; |
1974 | struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; |
1975 | |
1976 | if (regmask & VM_RWREGS_GPRS0x1) { |
1977 | gprs[VCPU_REGS_RAX0] = vcpu->vc_gueststate.vg_rax; |
1978 | gprs[VCPU_REGS_RBX1] = vcpu->vc_gueststate.vg_rbx; |
1979 | gprs[VCPU_REGS_RCX2] = vcpu->vc_gueststate.vg_rcx; |
1980 | gprs[VCPU_REGS_RDX3] = vcpu->vc_gueststate.vg_rdx; |
1981 | gprs[VCPU_REGS_RSI4] = vcpu->vc_gueststate.vg_rsi; |
1982 | gprs[VCPU_REGS_RDI5] = vcpu->vc_gueststate.vg_rdi; |
1983 | gprs[VCPU_REGS_R86] = vcpu->vc_gueststate.vg_r8; |
1984 | gprs[VCPU_REGS_R97] = vcpu->vc_gueststate.vg_r9; |
1985 | gprs[VCPU_REGS_R108] = vcpu->vc_gueststate.vg_r10; |
1986 | gprs[VCPU_REGS_R119] = vcpu->vc_gueststate.vg_r11; |
1987 | gprs[VCPU_REGS_R1210] = vcpu->vc_gueststate.vg_r12; |
1988 | gprs[VCPU_REGS_R1311] = vcpu->vc_gueststate.vg_r13; |
1989 | gprs[VCPU_REGS_R1412] = vcpu->vc_gueststate.vg_r14; |
1990 | gprs[VCPU_REGS_R1513] = vcpu->vc_gueststate.vg_r15; |
1991 | gprs[VCPU_REGS_RBP15] = vcpu->vc_gueststate.vg_rbp; |
1992 | gprs[VCPU_REGS_RIP16] = vmcb->v_rip; |
1993 | gprs[VCPU_REGS_RSP14] = vmcb->v_rsp; |
1994 | gprs[VCPU_REGS_RFLAGS17] = vmcb->v_rflags; |
1995 | } |
1996 | |
1997 | if (regmask & VM_RWREGS_SREGS0x2) { |
1998 | sregs[VCPU_REGS_CS0].vsi_sel = vmcb->v_cs.vs_sel; |
1999 | sregs[VCPU_REGS_CS0].vsi_limit = vmcb->v_cs.vs_lim; |
2000 | attr = vmcb->v_cs.vs_attr; |
2001 | sregs[VCPU_REGS_CS0].vsi_ar = (attr & 0xff) | ((attr << 4) & |
2002 | 0xf000); |
2003 | sregs[VCPU_REGS_CS0].vsi_base = vmcb->v_cs.vs_base; |
2004 | |
2005 | sregs[VCPU_REGS_DS1].vsi_sel = vmcb->v_ds.vs_sel; |
2006 | sregs[VCPU_REGS_DS1].vsi_limit = vmcb->v_ds.vs_lim; |
2007 | attr = vmcb->v_ds.vs_attr; |
2008 | sregs[VCPU_REGS_DS1].vsi_ar = (attr & 0xff) | ((attr << 4) & |
2009 | 0xf000); |
2010 | sregs[VCPU_REGS_DS1].vsi_base = vmcb->v_ds.vs_base; |
2011 | |
2012 | sregs[VCPU_REGS_ES2].vsi_sel = vmcb->v_es.vs_sel; |
2013 | sregs[VCPU_REGS_ES2].vsi_limit = vmcb->v_es.vs_lim; |
2014 | attr = vmcb->v_es.vs_attr; |
2015 | sregs[VCPU_REGS_ES2].vsi_ar = (attr & 0xff) | ((attr << 4) & |
2016 | 0xf000); |
2017 | sregs[VCPU_REGS_ES2].vsi_base = vmcb->v_es.vs_base; |
2018 | |
2019 | sregs[VCPU_REGS_FS3].vsi_sel = vmcb->v_fs.vs_sel; |
2020 | sregs[VCPU_REGS_FS3].vsi_limit = vmcb->v_fs.vs_lim; |
2021 | attr = vmcb->v_fs.vs_attr; |
2022 | sregs[VCPU_REGS_FS3].vsi_ar = (attr & 0xff) | ((attr << 4) & |
2023 | 0xf000); |
2024 | sregs[VCPU_REGS_FS3].vsi_base = vmcb->v_fs.vs_base; |
2025 | |
2026 | sregs[VCPU_REGS_GS4].vsi_sel = vmcb->v_gs.vs_sel; |
2027 | sregs[VCPU_REGS_GS4].vsi_limit = vmcb->v_gs.vs_lim; |
2028 | attr = vmcb->v_gs.vs_attr; |
2029 | sregs[VCPU_REGS_GS4].vsi_ar = (attr & 0xff) | ((attr << 4) & |
2030 | 0xf000); |
2031 | sregs[VCPU_REGS_GS4].vsi_base = vmcb->v_gs.vs_base; |
2032 | |
2033 | sregs[VCPU_REGS_SS5].vsi_sel = vmcb->v_ss.vs_sel; |
2034 | sregs[VCPU_REGS_SS5].vsi_limit = vmcb->v_ss.vs_lim; |
2035 | attr = vmcb->v_ss.vs_attr; |
2036 | sregs[VCPU_REGS_SS5].vsi_ar = (attr & 0xff) | ((attr << 4) & |
2037 | 0xf000); |
2038 | sregs[VCPU_REGS_SS5].vsi_base = vmcb->v_ss.vs_base; |
2039 | |
2040 | sregs[VCPU_REGS_LDTR6].vsi_sel = vmcb->v_ldtr.vs_sel; |
2041 | sregs[VCPU_REGS_LDTR6].vsi_limit = vmcb->v_ldtr.vs_lim; |
2042 | attr = vmcb->v_ldtr.vs_attr; |
2043 | sregs[VCPU_REGS_LDTR6].vsi_ar = (attr & 0xff) | ((attr << 4) |
2044 | & 0xf000); |
2045 | sregs[VCPU_REGS_LDTR6].vsi_base = vmcb->v_ldtr.vs_base; |
2046 | |
2047 | sregs[VCPU_REGS_TR7].vsi_sel = vmcb->v_tr.vs_sel; |
2048 | sregs[VCPU_REGS_TR7].vsi_limit = vmcb->v_tr.vs_lim; |
2049 | attr = vmcb->v_tr.vs_attr; |
2050 | sregs[VCPU_REGS_TR7].vsi_ar = (attr & 0xff) | ((attr << 4) & |
2051 | 0xf000); |
2052 | sregs[VCPU_REGS_TR7].vsi_base = vmcb->v_tr.vs_base; |
2053 | |
2054 | vrs->vrs_gdtr.vsi_limit = vmcb->v_gdtr.vs_lim; |
2055 | vrs->vrs_gdtr.vsi_base = vmcb->v_gdtr.vs_base; |
2056 | vrs->vrs_idtr.vsi_limit = vmcb->v_idtr.vs_lim; |
2057 | vrs->vrs_idtr.vsi_base = vmcb->v_idtr.vs_base; |
2058 | } |
2059 | |
2060 | if (regmask & VM_RWREGS_CRS0x4) { |
2061 | crs[VCPU_REGS_CR00] = vmcb->v_cr0; |
2062 | crs[VCPU_REGS_CR32] = vmcb->v_cr3; |
2063 | crs[VCPU_REGS_CR43] = vmcb->v_cr4; |
2064 | crs[VCPU_REGS_CR21] = vcpu->vc_gueststate.vg_cr2; |
2065 | crs[VCPU_REGS_XCR05] = vcpu->vc_gueststate.vg_xcr0; |
2066 | } |
2067 | |
2068 | if (regmask & VM_RWREGS_MSRS0x8) { |
2069 | msrs[VCPU_REGS_EFER0] = vmcb->v_efer; |
2070 | msrs[VCPU_REGS_STAR1] = vmcb->v_star; |
2071 | msrs[VCPU_REGS_LSTAR2] = vmcb->v_lstar; |
2072 | msrs[VCPU_REGS_CSTAR3] = vmcb->v_cstar; |
2073 | msrs[VCPU_REGS_SFMASK4] = vmcb->v_sfmask; |
2074 | msrs[VCPU_REGS_KGSBASE5] = vmcb->v_kgsbase; |
2075 | } |
2076 | |
2077 | if (regmask & VM_RWREGS_DRS0x10) { |
2078 | drs[VCPU_REGS_DR00] = vcpu->vc_gueststate.vg_dr0; |
2079 | drs[VCPU_REGS_DR11] = vcpu->vc_gueststate.vg_dr1; |
2080 | drs[VCPU_REGS_DR22] = vcpu->vc_gueststate.vg_dr2; |
2081 | drs[VCPU_REGS_DR33] = vcpu->vc_gueststate.vg_dr3; |
2082 | drs[VCPU_REGS_DR64] = vmcb->v_dr6; |
2083 | drs[VCPU_REGS_DR75] = vmcb->v_dr7; |
2084 | } |
2085 | |
2086 | return (0); |
2087 | } |
2088 | |
2089 | /* |
2090 | * vcpu_writeregs_vmx |
2091 | * |
2092 | * Writes VCPU registers |
2093 | * |
2094 | * Parameters: |
2095 | * vcpu: the vcpu that has to get its registers written to |
2096 | * regmask: the types of registers to write |
2097 | * loadvmcs: bit to indicate whether the VMCS has to be loaded first |
2098 | * vrs: the register values to write |
2099 | * |
2100 | * Return values: |
2101 | * 0: if successful |
2102 | * EINVAL an error writing registers occurred |
2103 | */ |
2104 | int |
2105 | vcpu_writeregs_vmx(struct vcpu *vcpu, uint64_t regmask, int loadvmcs, |
2106 | struct vcpu_reg_state *vrs) |
2107 | { |
2108 | int i, ret = 0; |
2109 | uint16_t sel; |
2110 | uint64_t limit, ar; |
2111 | uint64_t *gprs = vrs->vrs_gprs; |
2112 | uint64_t *crs = vrs->vrs_crs; |
2113 | uint64_t *msrs = vrs->vrs_msrs; |
2114 | uint64_t *drs = vrs->vrs_drs; |
2115 | struct vcpu_segment_info *sregs = vrs->vrs_sregs; |
2116 | struct vmx_msr_store *msr_store; |
2117 | |
2118 | if (loadvmcs) { |
2119 | if (vcpu_reload_vmcs_vmx(vcpu)) |
2120 | return (EINVAL22); |
2121 | } |
2122 | |
2123 | #ifdef VMM_DEBUG |
2124 | /* VMCS should be loaded... */ |
2125 | paddr_t pa = 0ULL; |
2126 | if (vmptrst(&pa)) |
2127 | panic("%s: vmptrst", __func__); |
2128 | KASSERT(pa == vcpu->vc_control_pa)((pa == vcpu->vc_control_pa) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/arch/amd64/amd64/vmm.c", 2128, "pa == vcpu->vc_control_pa" )); |
2129 | #endif /* VMM_DEBUG */ |
2130 | |
2131 | if (regmask & VM_RWREGS_GPRS0x1) { |
2132 | vcpu->vc_gueststate.vg_rax = gprs[VCPU_REGS_RAX0]; |
2133 | vcpu->vc_gueststate.vg_rbx = gprs[VCPU_REGS_RBX1]; |
2134 | vcpu->vc_gueststate.vg_rcx = gprs[VCPU_REGS_RCX2]; |
2135 | vcpu->vc_gueststate.vg_rdx = gprs[VCPU_REGS_RDX3]; |
2136 | vcpu->vc_gueststate.vg_rsi = gprs[VCPU_REGS_RSI4]; |
2137 | vcpu->vc_gueststate.vg_rdi = gprs[VCPU_REGS_RDI5]; |
2138 | vcpu->vc_gueststate.vg_r8 = gprs[VCPU_REGS_R86]; |
2139 | vcpu->vc_gueststate.vg_r9 = gprs[VCPU_REGS_R97]; |
2140 | vcpu->vc_gueststate.vg_r10 = gprs[VCPU_REGS_R108]; |
2141 | vcpu->vc_gueststate.vg_r11 = gprs[VCPU_REGS_R119]; |
2142 | vcpu->vc_gueststate.vg_r12 = gprs[VCPU_REGS_R1210]; |
2143 | vcpu->vc_gueststate.vg_r13 = gprs[VCPU_REGS_R1311]; |
2144 | vcpu->vc_gueststate.vg_r14 = gprs[VCPU_REGS_R1412]; |
2145 | vcpu->vc_gueststate.vg_r15 = gprs[VCPU_REGS_R1513]; |
2146 | vcpu->vc_gueststate.vg_rbp = gprs[VCPU_REGS_RBP15]; |
2147 | vcpu->vc_gueststate.vg_rip = gprs[VCPU_REGS_RIP16]; |
2148 | if (vmwrite(VMCS_GUEST_IA32_RIP0x681E, gprs[VCPU_REGS_RIP16])) |
2149 | goto errout; |
2150 | if (vmwrite(VMCS_GUEST_IA32_RSP0x681C, gprs[VCPU_REGS_RSP14])) |
2151 | goto errout; |
2152 | if (vmwrite(VMCS_GUEST_IA32_RFLAGS0x6820, gprs[VCPU_REGS_RFLAGS17])) |
2153 | goto errout; |
2154 | } |
2155 | |
2156 | if (regmask & VM_RWREGS_SREGS0x2) { |
2157 | for (i = 0; i < nitems(vmm_vmx_sreg_vmcs_fields)(sizeof((vmm_vmx_sreg_vmcs_fields)) / sizeof((vmm_vmx_sreg_vmcs_fields )[0])); i++) { |
2158 | sel = sregs[i].vsi_sel; |
2159 | limit = sregs[i].vsi_limit; |
2160 | ar = sregs[i].vsi_ar; |
2161 | |
2162 | if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].selid, sel)) |
2163 | goto errout; |
2164 | if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].limitid, limit)) |
2165 | goto errout; |
2166 | if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].arid, ar)) |
2167 | goto errout; |
2168 | if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].baseid, |
2169 | sregs[i].vsi_base)) |
2170 | goto errout; |
2171 | } |
2172 | |
2173 | if (vmwrite(VMCS_GUEST_IA32_GDTR_LIMIT0x4810, |
2174 | vrs->vrs_gdtr.vsi_limit)) |
2175 | goto errout; |
2176 | if (vmwrite(VMCS_GUEST_IA32_GDTR_BASE0x6816, |
2177 | vrs->vrs_gdtr.vsi_base)) |
2178 | goto errout; |
2179 | if (vmwrite(VMCS_GUEST_IA32_IDTR_LIMIT0x4812, |
2180 | vrs->vrs_idtr.vsi_limit)) |
2181 | goto errout; |
2182 | if (vmwrite(VMCS_GUEST_IA32_IDTR_BASE0x6818, |
2183 | vrs->vrs_idtr.vsi_base)) |
2184 | goto errout; |
2185 | } |
2186 | |
2187 | if (regmask & VM_RWREGS_CRS0x4) { |
2188 | vcpu->vc_gueststate.vg_xcr0 = crs[VCPU_REGS_XCR05]; |
2189 | if (vmwrite(VMCS_GUEST_IA32_CR00x6800, crs[VCPU_REGS_CR00])) |
2190 | goto errout; |
2191 | if (vmwrite(VMCS_GUEST_IA32_CR30x6802, crs[VCPU_REGS_CR32])) |
2192 | goto errout; |
2193 | if (vmwrite(VMCS_GUEST_IA32_CR40x6804, crs[VCPU_REGS_CR43])) |
2194 | goto errout; |
2195 | if (vmwrite(VMCS_GUEST_PDPTE00x280A, crs[VCPU_REGS_PDPTE06])) |
2196 | goto errout; |
2197 | if (vmwrite(VMCS_GUEST_PDPTE10x280C, crs[VCPU_REGS_PDPTE17])) |
2198 | goto errout; |
2199 | if (vmwrite(VMCS_GUEST_PDPTE20x280E, crs[VCPU_REGS_PDPTE28])) |
2200 | goto errout; |
2201 | if (vmwrite(VMCS_GUEST_PDPTE30x2810, crs[VCPU_REGS_PDPTE39])) |
2202 | goto errout; |
2203 | } |
2204 | |
2205 | msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; |
2206 | |
2207 | if (regmask & VM_RWREGS_MSRS0x8) { |
2208 | for (i = 0; i < VCPU_REGS_NMSRS(6 + 1); i++) { |
2209 | msr_store[i].vms_data = msrs[i]; |
2210 | } |
2211 | } |
2212 | |
2213 | if (regmask & VM_RWREGS_DRS0x10) { |
2214 | vcpu->vc_gueststate.vg_dr0 = drs[VCPU_REGS_DR00]; |
2215 | vcpu->vc_gueststate.vg_dr1 = drs[VCPU_REGS_DR11]; |
2216 | vcpu->vc_gueststate.vg_dr2 = drs[VCPU_REGS_DR22]; |
2217 | vcpu->vc_gueststate.vg_dr3 = drs[VCPU_REGS_DR33]; |
2218 | vcpu->vc_gueststate.vg_dr6 = drs[VCPU_REGS_DR64]; |
2219 | if (vmwrite(VMCS_GUEST_IA32_DR70x681A, drs[VCPU_REGS_DR75])) |
2220 | goto errout; |
2221 | } |
2222 | |
2223 | goto out; |
2224 | |
2225 | errout: |
2226 | ret = EINVAL22; |
2227 | out: |
2228 | if (loadvmcs) { |
2229 | if (vmclear(&vcpu->vc_control_pa)) |
2230 | ret = EINVAL22; |
2231 | } |
2232 | return (ret); |
2233 | } |
2234 | |
2235 | /* |
2236 | * vcpu_writeregs_svm |
2237 | * |
2238 | * Writes 'vcpu's registers |
2239 | * |
2240 | * Parameters: |
2241 | * vcpu: the vcpu that has to get its registers written to |
2242 | * regmask: the types of registers to write |
2243 | * vrs: the register values to write |
2244 | * |
2245 | * Return values: |
2246 | * 0: if successful |
2247 | * EINVAL an error writing registers occurred |
2248 | */ |
2249 | int |
2250 | vcpu_writeregs_svm(struct vcpu *vcpu, uint64_t regmask, |
2251 | struct vcpu_reg_state *vrs) |
2252 | { |
2253 | uint64_t *gprs = vrs->vrs_gprs; |
2254 | uint64_t *crs = vrs->vrs_crs; |
2255 | uint16_t attr; |
2256 | uint64_t *msrs = vrs->vrs_msrs; |
2257 | uint64_t *drs = vrs->vrs_drs; |
2258 | struct vcpu_segment_info *sregs = vrs->vrs_sregs; |
2259 | struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; |
2260 | |
2261 | if (regmask & VM_RWREGS_GPRS0x1) { |
2262 | vcpu->vc_gueststate.vg_rax = gprs[VCPU_REGS_RAX0]; |
2263 | vcpu->vc_gueststate.vg_rbx = gprs[VCPU_REGS_RBX1]; |
2264 | vcpu->vc_gueststate.vg_rcx = gprs[VCPU_REGS_RCX2]; |
2265 | vcpu->vc_gueststate.vg_rdx = gprs[VCPU_REGS_RDX3]; |
2266 | vcpu->vc_gueststate.vg_rsi = gprs[VCPU_REGS_RSI4]; |
2267 | vcpu->vc_gueststate.vg_rdi = gprs[VCPU_REGS_RDI5]; |
2268 | vcpu->vc_gueststate.vg_r8 = gprs[VCPU_REGS_R86]; |
2269 | vcpu->vc_gueststate.vg_r9 = gprs[VCPU_REGS_R97]; |
2270 | vcpu->vc_gueststate.vg_r10 = gprs[VCPU_REGS_R108]; |
2271 | vcpu->vc_gueststate.vg_r11 = gprs[VCPU_REGS_R119]; |
2272 | vcpu->vc_gueststate.vg_r12 = gprs[VCPU_REGS_R1210]; |
2273 | vcpu->vc_gueststate.vg_r13 = gprs[VCPU_REGS_R1311]; |
2274 | vcpu->vc_gueststate.vg_r14 = gprs[VCPU_REGS_R1412]; |
2275 | vcpu->vc_gueststate.vg_r15 = gprs[VCPU_REGS_R1513]; |
2276 | vcpu->vc_gueststate.vg_rbp = gprs[VCPU_REGS_RBP15]; |
2277 | vcpu->vc_gueststate.vg_rip = gprs[VCPU_REGS_RIP16]; |
2278 | |
2279 | vmcb->v_rip = gprs[VCPU_REGS_RIP16]; |
2280 | vmcb->v_rsp = gprs[VCPU_REGS_RSP14]; |
2281 | vmcb->v_rflags = gprs[VCPU_REGS_RFLAGS17]; |
2282 | } |
2283 | |
2284 | if (regmask & VM_RWREGS_SREGS0x2) { |
2285 | vmcb->v_cs.vs_sel = sregs[VCPU_REGS_CS0].vsi_sel; |
2286 | vmcb->v_cs.vs_lim = sregs[VCPU_REGS_CS0].vsi_limit; |
2287 | attr = sregs[VCPU_REGS_CS0].vsi_ar; |
2288 | vmcb->v_cs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); |
2289 | vmcb->v_cs.vs_base = sregs[VCPU_REGS_CS0].vsi_base; |
2290 | vmcb->v_ds.vs_sel = sregs[VCPU_REGS_DS1].vsi_sel; |
2291 | vmcb->v_ds.vs_lim = sregs[VCPU_REGS_DS1].vsi_limit; |
2292 | attr = sregs[VCPU_REGS_DS1].vsi_ar; |
2293 | vmcb->v_ds.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); |
2294 | vmcb->v_ds.vs_base = sregs[VCPU_REGS_DS1].vsi_base; |
2295 | vmcb->v_es.vs_sel = sregs[VCPU_REGS_ES2].vsi_sel; |
2296 | vmcb->v_es.vs_lim = sregs[VCPU_REGS_ES2].vsi_limit; |
2297 | attr = sregs[VCPU_REGS_ES2].vsi_ar; |
2298 | vmcb->v_es.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); |
2299 | vmcb->v_es.vs_base = sregs[VCPU_REGS_ES2].vsi_base; |
2300 | vmcb->v_fs.vs_sel = sregs[VCPU_REGS_FS3].vsi_sel; |
2301 | vmcb->v_fs.vs_lim = sregs[VCPU_REGS_FS3].vsi_limit; |
2302 | attr = sregs[VCPU_REGS_FS3].vsi_ar; |
2303 | vmcb->v_fs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); |
2304 | vmcb->v_fs.vs_base = sregs[VCPU_REGS_FS3].vsi_base; |
2305 | vmcb->v_gs.vs_sel = sregs[VCPU_REGS_GS4].vsi_sel; |
2306 | vmcb->v_gs.vs_lim = sregs[VCPU_REGS_GS4].vsi_limit; |
2307 | attr = sregs[VCPU_REGS_GS4].vsi_ar; |
2308 | vmcb->v_gs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); |
2309 | vmcb->v_gs.vs_base = sregs[VCPU_REGS_GS4].vsi_base; |
2310 | vmcb->v_ss.vs_sel = sregs[VCPU_REGS_SS5].vsi_sel; |
2311 | vmcb->v_ss.vs_lim = sregs[VCPU_REGS_SS5].vsi_limit; |
2312 | attr = sregs[VCPU_REGS_SS5].vsi_ar; |
2313 | vmcb->v_ss.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); |
2314 | vmcb->v_ss.vs_base = sregs[VCPU_REGS_SS5].vsi_base; |
2315 | vmcb->v_ldtr.vs_sel = sregs[VCPU_REGS_LDTR6].vsi_sel; |
2316 | vmcb->v_ldtr.vs_lim = sregs[VCPU_REGS_LDTR6].vsi_limit; |
2317 | attr = sregs[VCPU_REGS_LDTR6].vsi_ar; |
2318 | vmcb->v_ldtr.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); |
2319 | vmcb->v_ldtr.vs_base = sregs[VCPU_REGS_LDTR6].vsi_base; |
2320 | vmcb->v_tr.vs_sel = sregs[VCPU_REGS_TR7].vsi_sel; |
2321 | vmcb->v_tr.vs_lim = sregs[VCPU_REGS_TR7].vsi_limit; |
2322 | attr = sregs[VCPU_REGS_TR7].vsi_ar; |
2323 | vmcb->v_tr.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); |
2324 | vmcb->v_tr.vs_base = sregs[VCPU_REGS_TR7].vsi_base; |
2325 | vmcb->v_gdtr.vs_lim = vrs->vrs_gdtr.vsi_limit; |
2326 | vmcb->v_gdtr.vs_base = vrs->vrs_gdtr.vsi_base; |
2327 | vmcb->v_idtr.vs_lim = vrs->vrs_idtr.vsi_limit; |
2328 | vmcb->v_idtr.vs_base = vrs->vrs_idtr.vsi_base; |
2329 | } |
2330 | |
2331 | if (regmask & VM_RWREGS_CRS0x4) { |
2332 | vmcb->v_cr0 = crs[VCPU_REGS_CR00]; |
2333 | vmcb->v_cr3 = crs[VCPU_REGS_CR32]; |
2334 | vmcb->v_cr4 = crs[VCPU_REGS_CR43]; |
2335 | vcpu->vc_gueststate.vg_cr2 = crs[VCPU_REGS_CR21]; |
2336 | vcpu->vc_gueststate.vg_xcr0 = crs[VCPU_REGS_XCR05]; |
2337 | } |
2338 | |
2339 | if (regmask & VM_RWREGS_MSRS0x8) { |
2340 | vmcb->v_efer |= msrs[VCPU_REGS_EFER0]; |
2341 | vmcb->v_star = msrs[VCPU_REGS_STAR1]; |
2342 | vmcb->v_lstar = msrs[VCPU_REGS_LSTAR2]; |
2343 | vmcb->v_cstar = msrs[VCPU_REGS_CSTAR3]; |
2344 | vmcb->v_sfmask = msrs[VCPU_REGS_SFMASK4]; |
2345 | vmcb->v_kgsbase = msrs[VCPU_REGS_KGSBASE5]; |
2346 | } |
2347 | |
2348 | if (regmask & VM_RWREGS_DRS0x10) { |
2349 | vcpu->vc_gueststate.vg_dr0 = drs[VCPU_REGS_DR00]; |
2350 | vcpu->vc_gueststate.vg_dr1 = drs[VCPU_REGS_DR11]; |
2351 | vcpu->vc_gueststate.vg_dr2 = drs[VCPU_REGS_DR22]; |
2352 | vcpu->vc_gueststate.vg_dr3 = drs[VCPU_REGS_DR33]; |
2353 | vmcb->v_dr6 = drs[VCPU_REGS_DR64]; |
2354 | vmcb->v_dr7 = drs[VCPU_REGS_DR75]; |
2355 | } |
2356 | |
2357 | return (0); |
2358 | } |
2359 | |
2360 | /* |
2361 | * vcpu_reset_regs_svm |
2362 | * |
2363 | * Initializes 'vcpu's registers to supplied state |
2364 | * |
2365 | * Parameters: |
2366 | * vcpu: the vcpu whose register state is to be initialized |
2367 | * vrs: the register state to set |
2368 | * |
2369 | * Return values: |
2370 | * 0: registers init'ed successfully |
2371 | * EINVAL: an error occurred setting register state |
2372 | */ |
2373 | int |
2374 | vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs) |
2375 | { |
2376 | struct vmcb *vmcb; |
2377 | int ret; |
2378 | uint16_t asid; |
2379 | |
2380 | vmcb = (struct vmcb *)vcpu->vc_control_va; |
2381 | |
2382 | /* |
2383 | * Intercept controls |
2384 | * |
2385 | * External Interrupt exiting (SVM_INTERCEPT_INTR) |
2386 | * External NMI exiting (SVM_INTERCEPT_NMI) |
2387 | * CPUID instruction (SVM_INTERCEPT_CPUID) |
2388 | * HLT instruction (SVM_INTERCEPT_HLT) |
2389 | * I/O instructions (SVM_INTERCEPT_INOUT) |
2390 | * MSR access (SVM_INTERCEPT_MSR) |
2391 | * shutdown events (SVM_INTERCEPT_SHUTDOWN) |
2392 | * |
2393 | * VMRUN instruction (SVM_INTERCEPT_VMRUN) |
2394 | * VMMCALL instruction (SVM_INTERCEPT_VMMCALL) |
2395 | * VMLOAD instruction (SVM_INTERCEPT_VMLOAD) |
2396 | * VMSAVE instruction (SVM_INTERCEPT_VMSAVE) |
2397 | * STGI instruction (SVM_INTERCEPT_STGI) |
2398 | * CLGI instruction (SVM_INTERCEPT_CLGI) |
2399 | * SKINIT instruction (SVM_INTERCEPT_SKINIT) |
2400 | * ICEBP instruction (SVM_INTERCEPT_ICEBP) |
2401 | * MWAIT instruction (SVM_INTERCEPT_MWAIT_UNCOND) |
2402 | * MWAIT instruction (SVM_INTERCEPT_MWAIT_COND) |
2403 | * MONITOR instruction (SVM_INTERCEPT_MONITOR) |
2404 | * RDTSCP instruction (SVM_INTERCEPT_RDTSCP) |
2405 | * INVLPGA instruction (SVM_INTERCEPT_INVLPGA) |
2406 | * XSETBV instruction (SVM_INTERCEPT_XSETBV) (if available) |
2407 | */ |
2408 | vmcb->v_intercept1 = SVM_INTERCEPT_INTR(1UL << 0) | SVM_INTERCEPT_NMI(1UL << 1) | |
2409 | SVM_INTERCEPT_CPUID(1UL << 18) | SVM_INTERCEPT_HLT(1UL << 24) | SVM_INTERCEPT_INOUT(1UL << 27) | |
2410 | SVM_INTERCEPT_MSR(1UL << 28) | SVM_INTERCEPT_SHUTDOWN(1UL << 31); |
2411 | |
2412 | vmcb->v_intercept2 = SVM_INTERCEPT_VMRUN(1UL << 0) | SVM_INTERCEPT_VMMCALL(1UL << 1) | |
2413 | SVM_INTERCEPT_VMLOAD(1UL << 2) | SVM_INTERCEPT_VMSAVE(1UL << 3) | SVM_INTERCEPT_STGI(1UL << 4) | |
2414 | SVM_INTERCEPT_CLGI(1UL << 5) | SVM_INTERCEPT_SKINIT(1UL << 6) | SVM_INTERCEPT_ICEBP(1UL << 8) | |
2415 | SVM_INTERCEPT_MWAIT_UNCOND(1UL << 11) | SVM_INTERCEPT_MONITOR(1UL << 10) | |
2416 | SVM_INTERCEPT_MWAIT_COND(1UL << 12) | SVM_INTERCEPT_RDTSCP(1UL << 7) | |
2417 | SVM_INTERCEPT_INVLPGA(1UL << 26); |
2418 | |
2419 | if (xsave_mask) |
2420 | vmcb->v_intercept2 |= SVM_INTERCEPT_XSETBV(1UL << 13); |
2421 | |
2422 | /* Setup I/O bitmap */ |
2423 | memset((uint8_t *)vcpu->vc_svm_ioio_va, 0xFF, 3 * PAGE_SIZE)__builtin_memset(((uint8_t *)vcpu->vc_svm_ioio_va), (0xFF) , (3 * (1 << 12))); |
2424 | vmcb->v_iopm_pa = (uint64_t)(vcpu->vc_svm_ioio_pa); |
2425 | |
2426 | /* Setup MSR bitmap */ |
2427 | memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF, 2 * PAGE_SIZE)__builtin_memset(((uint8_t *)vcpu->vc_msr_bitmap_va), (0xFF ), (2 * (1 << 12))); |
2428 | vmcb->v_msrpm_pa = (uint64_t)(vcpu->vc_msr_bitmap_pa); |
2429 | svm_setmsrbrw(vcpu, MSR_IA32_FEATURE_CONTROL0x03a); |
2430 | svm_setmsrbrw(vcpu, MSR_SYSENTER_CS0x174); |
2431 | svm_setmsrbrw(vcpu, MSR_SYSENTER_ESP0x175); |
2432 | svm_setmsrbrw(vcpu, MSR_SYSENTER_EIP0x176); |
2433 | svm_setmsrbrw(vcpu, MSR_STAR0xc0000081); |
2434 | svm_setmsrbrw(vcpu, MSR_LSTAR0xc0000082); |
2435 | svm_setmsrbrw(vcpu, MSR_CSTAR0xc0000083); |
2436 | svm_setmsrbrw(vcpu, MSR_SFMASK0xc0000084); |
2437 | svm_setmsrbrw(vcpu, MSR_FSBASE0xc0000100); |
2438 | svm_setmsrbrw(vcpu, MSR_GSBASE0xc0000101); |
2439 | svm_setmsrbrw(vcpu, MSR_KERNELGSBASE0xc0000102); |
2440 | |
2441 | /* EFER is R/O so we can ensure the guest always has SVME */ |
2442 | svm_setmsrbr(vcpu, MSR_EFER0xc0000080); |
2443 | |
2444 | /* allow reading TSC */ |
2445 | svm_setmsrbr(vcpu, MSR_TSC0x010); |
2446 | |
2447 | /* Guest VCPU ASID */ |
2448 | if (vmm_alloc_vpid(&asid)) { |
2449 | DPRINTF("%s: could not allocate asid\n", __func__); |
2450 | ret = EINVAL22; |
2451 | goto exit; |
2452 | } |
2453 | |
2454 | vmcb->v_asid = asid; |
2455 | vcpu->vc_vpid = asid; |
2456 | |
2457 | /* TLB Control - First time in, flush all*/ |
2458 | vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_ALL1; |
2459 | |
2460 | /* INTR masking */ |
2461 | vmcb->v_intr_masking = 1; |
2462 | |
2463 | /* PAT */ |
2464 | vmcb->v_g_pat = PATENTRY(0, PAT_WB)(0x6UL << ((0) * 8)) | PATENTRY(1, PAT_WC)(0x1UL << ((1) * 8)) | |
2465 | PATENTRY(2, PAT_UCMINUS)(0x7UL << ((2) * 8)) | PATENTRY(3, PAT_UC)(0x0UL << ((3) * 8)) | |
2466 | PATENTRY(4, PAT_WB)(0x6UL << ((4) * 8)) | PATENTRY(5, PAT_WC)(0x1UL << ((5) * 8)) | |
2467 | PATENTRY(6, PAT_UCMINUS)(0x7UL << ((6) * 8)) | PATENTRY(7, PAT_UC)(0x0UL << ((7) * 8)); |
2468 | |
2469 | /* NPT */ |
2470 | if (vmm_softc->mode == VMM_MODE_RVI) { |
2471 | vmcb->v_np_enable = 1; |
2472 | vmcb->v_n_cr3 = vcpu->vc_parent->vm_map->pmap->pm_pdirpa; |
2473 | } |
2474 | |
2475 | /* Enable SVME in EFER (must always be set) */ |
2476 | vmcb->v_efer |= EFER_SVME0x00001000; |
2477 | |
2478 | ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), vrs); |
2479 | |
2480 | /* xcr0 power on default sets bit 0 (x87 state) */ |
2481 | vcpu->vc_gueststate.vg_xcr0 = XCR0_X870x00000001 & xsave_mask; |
2482 | |
2483 | vcpu->vc_parent->vm_map->pmap->eptp = 0; |
2484 | |
2485 | exit: |
2486 | return ret; |
2487 | } |
2488 | |
2489 | /* |
2490 | * svm_setmsrbr |
2491 | * |
2492 | * Allow read access to the specified msr on the supplied vcpu. |
2493 | * |
2494 | * Parameters: |
2495 | * vcpu: the VCPU to allow access |
2496 | * msr: the MSR number to allow access to |
2497 | */ |
2498 | void |
2499 | svm_setmsrbr(struct vcpu *vcpu, uint32_t msr) |
2500 | { |
2501 | uint8_t *msrs; |
2502 | uint16_t idx; |
2503 | |
2504 | msrs = (uint8_t *)vcpu->vc_msr_bitmap_va; |
2505 | |
2506 | /* |
2507 | * MSR Read bitmap layout: |
2508 | * Pentium MSRs (0x0 - 0x1fff) @ 0x0 |
2509 | * Gen6 and Syscall MSRs (0xc0000000 - 0xc0001fff) @ 0x800 |
2510 | * Gen7 and Gen8 MSRs (0xc0010000 - 0xc0011fff) @ 0x1000 |
2511 | * |
2512 | * Read enable bit is low order bit of 2-bit pair |
2513 | * per MSR (eg, MSR 0x0 write bit is at bit 0 @ 0x0) |
2514 | */ |
2515 | if (msr <= 0x1fff) { |
2516 | idx = SVM_MSRIDX(msr)((msr) / 4); |
2517 | msrs[idx] &= ~(SVM_MSRBIT_R(msr)(1 << (((msr) % 4) * 2))); |
2518 | } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) { |
2519 | idx = SVM_MSRIDX(msr - 0xc0000000)((msr - 0xc0000000) / 4) + 0x800; |
2520 | msrs[idx] &= ~(SVM_MSRBIT_R(msr - 0xc0000000)(1 << (((msr - 0xc0000000) % 4) * 2))); |
2521 | } else if (msr >= 0xc0010000 && msr <= 0xc0011fff) { |
2522 | idx = SVM_MSRIDX(msr - 0xc0010000)((msr - 0xc0010000) / 4) + 0x1000; |
2523 | msrs[idx] &= ~(SVM_MSRBIT_R(msr - 0xc0010000)(1 << (((msr - 0xc0010000) % 4) * 2))); |
2524 | } else { |
2525 | printf("%s: invalid msr 0x%x\n", __func__, msr); |
2526 | return; |
2527 | } |
2528 | } |
2529 | |
2530 | /* |
2531 | * svm_setmsrbw |
2532 | * |
2533 | * Allow write access to the specified msr on the supplied vcpu |
2534 | * |
2535 | * Parameters: |
2536 | * vcpu: the VCPU to allow access |
2537 | * msr: the MSR number to allow access to |
2538 | */ |
2539 | void |
2540 | svm_setmsrbw(struct vcpu *vcpu, uint32_t msr) |
2541 | { |
2542 | uint8_t *msrs; |
2543 | uint16_t idx; |
2544 | |
2545 | msrs = (uint8_t *)vcpu->vc_msr_bitmap_va; |
2546 | |
2547 | /* |
2548 | * MSR Write bitmap layout: |
2549 | * Pentium MSRs (0x0 - 0x1fff) @ 0x0 |
2550 | * Gen6 and Syscall MSRs (0xc0000000 - 0xc0001fff) @ 0x800 |
2551 | * Gen7 and Gen8 MSRs (0xc0010000 - 0xc0011fff) @ 0x1000 |
2552 | * |
2553 | * Write enable bit is high order bit of 2-bit pair |
2554 | * per MSR (eg, MSR 0x0 write bit is at bit 1 @ 0x0) |
2555 | */ |
2556 | if (msr <= 0x1fff) { |
2557 | idx = SVM_MSRIDX(msr)((msr) / 4); |
2558 | msrs[idx] &= ~(SVM_MSRBIT_W(msr)(1 << (((msr) % 4) * 2 + 1))); |
2559 | } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) { |
2560 | idx = SVM_MSRIDX(msr - 0xc0000000)((msr - 0xc0000000) / 4) + 0x800; |
2561 | msrs[idx] &= ~(SVM_MSRBIT_W(msr - 0xc0000000)(1 << (((msr - 0xc0000000) % 4) * 2 + 1))); |
2562 | } else if (msr >= 0xc0010000 && msr <= 0xc0011fff) { |
2563 | idx = SVM_MSRIDX(msr - 0xc0010000)((msr - 0xc0010000) / 4) + 0x1000; |
2564 | msrs[idx] &= ~(SVM_MSRBIT_W(msr - 0xc0010000)(1 << (((msr - 0xc0010000) % 4) * 2 + 1))); |
2565 | } else { |
2566 | printf("%s: invalid msr 0x%x\n", __func__, msr); |
2567 | return; |
2568 | } |
2569 | } |
2570 | |
2571 | /* |
2572 | * svm_setmsrbrw |
2573 | * |
2574 | * Allow read/write access to the specified msr on the supplied vcpu |
2575 | * |
2576 | * Parameters: |
2577 | * vcpu: the VCPU to allow access |
2578 | * msr: the MSR number to allow access to |
2579 | */ |
2580 | void |
2581 | svm_setmsrbrw(struct vcpu *vcpu, uint32_t msr) |
2582 | { |
2583 | svm_setmsrbr(vcpu, msr); |
2584 | svm_setmsrbw(vcpu, msr); |
2585 | } |
2586 | |
2587 | /* |
2588 | * vmx_setmsrbr |
2589 | * |
2590 | * Allow read access to the specified msr on the supplied vcpu. |
2591 | * |
2592 | * Parameters: |
2593 | * vcpu: the VCPU to allow access |
2594 | * msr: the MSR number to allow access to |
2595 | */ |
2596 | void |
2597 | vmx_setmsrbr(struct vcpu *vcpu, uint32_t msr) |
2598 | { |
2599 | uint8_t *msrs; |
2600 | uint16_t idx; |
2601 | |
2602 | msrs = (uint8_t *)vcpu->vc_msr_bitmap_va; |
2603 | |
2604 | /* |
2605 | * MSR Read bitmap layout: |
2606 | * "Low" MSRs (0x0 - 0x1fff) @ 0x0 |
2607 | * "High" MSRs (0xc0000000 - 0xc0001fff) @ 0x400 |
2608 | */ |
2609 | if (msr <= 0x1fff) { |
2610 | idx = VMX_MSRIDX(msr)((msr) / 8); |
2611 | msrs[idx] &= ~(VMX_MSRBIT(msr)(1 << (msr) % 8)); |
2612 | } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) { |
2613 | idx = VMX_MSRIDX(msr - 0xc0000000)((msr - 0xc0000000) / 8) + 0x400; |
2614 | msrs[idx] &= ~(VMX_MSRBIT(msr - 0xc0000000)(1 << (msr - 0xc0000000) % 8)); |
2615 | } else |
2616 | printf("%s: invalid msr 0x%x\n", __func__, msr); |
2617 | } |
2618 | |
2619 | /* |
2620 | * vmx_setmsrbw |
2621 | * |
2622 | * Allow write access to the specified msr on the supplied vcpu |
2623 | * |
2624 | * Parameters: |
2625 | * vcpu: the VCPU to allow access |
2626 | * msr: the MSR number to allow access to |
2627 | */ |
2628 | void |
2629 | vmx_setmsrbw(struct vcpu *vcpu, uint32_t msr) |
2630 | { |
2631 | uint8_t *msrs; |
2632 | uint16_t idx; |
2633 | |
2634 | msrs = (uint8_t *)vcpu->vc_msr_bitmap_va; |
2635 | |
2636 | /* |
2637 | * MSR Write bitmap layout: |
2638 | * "Low" MSRs (0x0 - 0x1fff) @ 0x800 |
2639 | * "High" MSRs (0xc0000000 - 0xc0001fff) @ 0xc00 |
2640 | */ |
2641 | if (msr <= 0x1fff) { |
2642 | idx = VMX_MSRIDX(msr)((msr) / 8) + 0x800; |
2643 | msrs[idx] &= ~(VMX_MSRBIT(msr)(1 << (msr) % 8)); |
2644 | } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) { |
2645 | idx = VMX_MSRIDX(msr - 0xc0000000)((msr - 0xc0000000) / 8) + 0xc00; |
2646 | msrs[idx] &= ~(VMX_MSRBIT(msr - 0xc0000000)(1 << (msr - 0xc0000000) % 8)); |
2647 | } else |
2648 | printf("%s: invalid msr 0x%x\n", __func__, msr); |
2649 | } |
2650 | |
2651 | /* |
2652 | * vmx_setmsrbrw |
2653 | * |
2654 | * Allow read/write access to the specified msr on the supplied vcpu |
2655 | * |
2656 | * Parameters: |
2657 | * vcpu: the VCPU to allow access |
2658 | * msr: the MSR number to allow access to |
2659 | */ |
2660 | void |
2661 | vmx_setmsrbrw(struct vcpu *vcpu, uint32_t msr) |
2662 | { |
2663 | vmx_setmsrbr(vcpu, msr); |
2664 | vmx_setmsrbw(vcpu, msr); |
2665 | } |
2666 | |
2667 | /* |
2668 | * svm_set_clean |
2669 | * |
2670 | * Sets (mark as unmodified) the VMCB clean bit set in 'value'. |
2671 | * For example, to set the clean bit for the VMCB intercepts (bit position 0), |
2672 | * the caller provides 'SVM_CLEANBITS_I' (0x1) for the 'value' argument. |
2673 | * Multiple cleanbits can be provided in 'value' at the same time (eg, |
2674 | * "SVM_CLEANBITS_I | SVM_CLEANBITS_TPR"). |
2675 | * |
2676 | * Note that this function does not clear any bits; to clear bits in the |
2677 | * vmcb cleanbits bitfield, use 'svm_set_dirty'. |
2678 | * |
2679 | * Parameters: |
2680 | * vmcs: the VCPU whose VMCB clean value should be set |
2681 | * value: the value(s) to enable in the cleanbits mask |
2682 | */ |
2683 | void |
2684 | svm_set_clean(struct vcpu *vcpu, uint32_t value) |
2685 | { |
2686 | struct vmcb *vmcb; |
2687 | |
2688 | /* If no cleanbits support, do nothing */ |
2689 | if (!curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_svm.svm_vmcb_clean) |
2690 | return; |
2691 | |
2692 | vmcb = (struct vmcb *)vcpu->vc_control_va; |
2693 | |
2694 | vmcb->v_vmcb_clean_bits |= value; |
2695 | } |
2696 | |
2697 | /* |
2698 | * svm_set_dirty |
2699 | * |
2700 | * Clears (mark as modified) the VMCB clean bit set in 'value'. |
2701 | * For example, to clear the bit for the VMCB intercepts (bit position 0) |
2702 | * the caller provides 'SVM_CLEANBITS_I' (0x1) for the 'value' argument. |
2703 | * Multiple dirty bits can be provided in 'value' at the same time (eg, |
2704 | * "SVM_CLEANBITS_I | SVM_CLEANBITS_TPR"). |
2705 | * |
2706 | * Parameters: |
2707 | * vmcs: the VCPU whose VMCB dirty value should be set |
2708 | * value: the value(s) to dirty in the cleanbits mask |
2709 | */ |
2710 | void |
2711 | svm_set_dirty(struct vcpu *vcpu, uint32_t value) |
2712 | { |
2713 | struct vmcb *vmcb; |
2714 | |
2715 | /* If no cleanbits support, do nothing */ |
2716 | if (!curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_svm.svm_vmcb_clean) |
2717 | return; |
2718 | |
2719 | vmcb = (struct vmcb *)vcpu->vc_control_va; |
2720 | |
2721 | vmcb->v_vmcb_clean_bits &= ~value; |
2722 | } |
2723 | |
2724 | /* |
2725 | * vcpu_reset_regs_vmx |
2726 | * |
2727 | * Initializes 'vcpu's registers to supplied state |
2728 | * |
2729 | * Parameters: |
2730 | * vcpu: the vcpu whose register state is to be initialized |
2731 | * vrs: the register state to set |
2732 | * |
2733 | * Return values: |
2734 | * 0: registers init'ed successfully |
2735 | * EINVAL: an error occurred setting register state |
2736 | */ |
2737 | int |
2738 | vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs) |
2739 | { |
2740 | int ret = 0, ug = 0; |
2741 | uint32_t cr0, cr4; |
2742 | uint32_t pinbased, procbased, procbased2, exit, entry; |
2743 | uint32_t want1, want0; |
2744 | uint64_t msr, ctrlval, eptp, cr3; |
2745 | uint16_t ctrl, vpid; |
2746 | struct vmx_msr_store *msr_store; |
2747 | |
2748 | rw_assert_wrlock(&vcpu->vc_lock); |
2749 | |
2750 | cr0 = vrs->vrs_crs[VCPU_REGS_CR00]; |
2751 | |
2752 | if (vcpu_reload_vmcs_vmx(vcpu)) { |
2753 | DPRINTF("%s: error reloading VMCS\n", __func__); |
2754 | ret = EINVAL22; |
2755 | goto exit; |
2756 | } |
2757 | |
2758 | #ifdef VMM_DEBUG |
2759 | /* VMCS should be loaded... */ |
2760 | paddr_t pa = 0ULL; |
2761 | if (vmptrst(&pa)) |
2762 | panic("%s: vmptrst", __func__); |
2763 | KASSERT(pa == vcpu->vc_control_pa)((pa == vcpu->vc_control_pa) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/arch/amd64/amd64/vmm.c", 2763, "pa == vcpu->vc_control_pa" )); |
2764 | #endif /* VMM_DEBUG */ |
2765 | |
2766 | /* Compute Basic Entry / Exit Controls */ |
2767 | vcpu->vc_vmx_basic = rdmsr(IA32_VMX_BASIC0x480); |
2768 | vcpu->vc_vmx_entry_ctls = rdmsr(IA32_VMX_ENTRY_CTLS0x484); |
2769 | vcpu->vc_vmx_exit_ctls = rdmsr(IA32_VMX_EXIT_CTLS0x483); |
2770 | vcpu->vc_vmx_pinbased_ctls = rdmsr(IA32_VMX_PINBASED_CTLS0x481); |
2771 | vcpu->vc_vmx_procbased_ctls = rdmsr(IA32_VMX_PROCBASED_CTLS0x482); |
2772 | |
2773 | /* Compute True Entry / Exit Controls (if applicable) */ |
2774 | if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) { |
2775 | vcpu->vc_vmx_true_entry_ctls = rdmsr(IA32_VMX_TRUE_ENTRY_CTLS0x490); |
2776 | vcpu->vc_vmx_true_exit_ctls = rdmsr(IA32_VMX_TRUE_EXIT_CTLS0x48F); |
2777 | vcpu->vc_vmx_true_pinbased_ctls = |
2778 | rdmsr(IA32_VMX_TRUE_PINBASED_CTLS0x48D); |
2779 | vcpu->vc_vmx_true_procbased_ctls = |
2780 | rdmsr(IA32_VMX_TRUE_PROCBASED_CTLS0x48E); |
2781 | } |
2782 | |
2783 | /* Compute Secondary Procbased Controls (if applicable) */ |
2784 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482, |
2785 | IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1)) |
2786 | vcpu->vc_vmx_procbased2_ctls = rdmsr(IA32_VMX_PROCBASED2_CTLS0x48B); |
2787 | |
2788 | /* |
2789 | * Pinbased ctrls |
2790 | * |
2791 | * We must be able to set the following: |
2792 | * IA32_VMX_EXTERNAL_INT_EXITING - exit on host interrupt |
2793 | * IA32_VMX_NMI_EXITING - exit on host NMI |
2794 | */ |
2795 | want1 = IA32_VMX_EXTERNAL_INT_EXITING(1ULL << 0) | |
2796 | IA32_VMX_NMI_EXITING(1ULL << 3); |
2797 | want0 = 0; |
2798 | |
2799 | if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) { |
2800 | ctrl = IA32_VMX_TRUE_PINBASED_CTLS0x48D; |
2801 | ctrlval = vcpu->vc_vmx_true_pinbased_ctls; |
2802 | } else { |
2803 | ctrl = IA32_VMX_PINBASED_CTLS0x481; |
2804 | ctrlval = vcpu->vc_vmx_pinbased_ctls; |
2805 | } |
2806 | |
2807 | if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &pinbased)) { |
2808 | DPRINTF("%s: error computing pinbased controls\n", __func__); |
2809 | ret = EINVAL22; |
2810 | goto exit; |
2811 | } |
2812 | |
2813 | if (vmwrite(VMCS_PINBASED_CTLS0x4000, pinbased)) { |
2814 | DPRINTF("%s: error setting pinbased controls\n", __func__); |
2815 | ret = EINVAL22; |
2816 | goto exit; |
2817 | } |
2818 | |
2819 | /* |
2820 | * Procbased ctrls |
2821 | * |
2822 | * We must be able to set the following: |
2823 | * IA32_VMX_HLT_EXITING - exit on HLT instruction |
2824 | * IA32_VMX_MWAIT_EXITING - exit on MWAIT instruction |
2825 | * IA32_VMX_UNCONDITIONAL_IO_EXITING - exit on I/O instructions |
2826 | * IA32_VMX_USE_MSR_BITMAPS - exit on various MSR accesses |
2827 | * IA32_VMX_CR8_LOAD_EXITING - guest TPR access |
2828 | * IA32_VMX_CR8_STORE_EXITING - guest TPR access |
2829 | * IA32_VMX_USE_TPR_SHADOW - guest TPR access (shadow) |
2830 | * IA32_VMX_MONITOR_EXITING - exit on MONITOR instruction |
2831 | * |
2832 | * If we have EPT, we must be able to clear the following |
2833 | * IA32_VMX_CR3_LOAD_EXITING - don't care about guest CR3 accesses |
2834 | * IA32_VMX_CR3_STORE_EXITING - don't care about guest CR3 accesses |
2835 | */ |
2836 | want1 = IA32_VMX_HLT_EXITING(1ULL << 7) | |
2837 | IA32_VMX_MWAIT_EXITING(1ULL << 10) | |
2838 | IA32_VMX_UNCONDITIONAL_IO_EXITING(1ULL << 24) | |
2839 | IA32_VMX_USE_MSR_BITMAPS(1ULL << 28) | |
2840 | IA32_VMX_CR8_LOAD_EXITING(1ULL << 19) | |
2841 | IA32_VMX_CR8_STORE_EXITING(1ULL << 20) | |
2842 | IA32_VMX_MONITOR_EXITING(1ULL << 29) | |
2843 | IA32_VMX_USE_TPR_SHADOW(1ULL << 21); |
2844 | want0 = 0; |
2845 | |
2846 | if (vmm_softc->mode == VMM_MODE_EPT) { |
2847 | want1 |= IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31); |
2848 | want0 |= IA32_VMX_CR3_LOAD_EXITING(1ULL << 15) | |
2849 | IA32_VMX_CR3_STORE_EXITING(1ULL << 16); |
2850 | } |
2851 | |
2852 | if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) { |
2853 | ctrl = IA32_VMX_TRUE_PROCBASED_CTLS0x48E; |
2854 | ctrlval = vcpu->vc_vmx_true_procbased_ctls; |
2855 | } else { |
2856 | ctrl = IA32_VMX_PROCBASED_CTLS0x482; |
2857 | ctrlval = vcpu->vc_vmx_procbased_ctls; |
2858 | } |
2859 | |
2860 | if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &procbased)) { |
2861 | DPRINTF("%s: error computing procbased controls\n", __func__); |
2862 | ret = EINVAL22; |
2863 | goto exit; |
2864 | } |
2865 | |
2866 | if (vmwrite(VMCS_PROCBASED_CTLS0x4002, procbased)) { |
2867 | DPRINTF("%s: error setting procbased controls\n", __func__); |
2868 | ret = EINVAL22; |
2869 | goto exit; |
2870 | } |
2871 | |
2872 | /* |
2873 | * Secondary Procbased ctrls |
2874 | * |
2875 | * We want to be able to set the following, if available: |
2876 | * IA32_VMX_ENABLE_VPID - use VPIDs where available |
2877 | * |
2878 | * If we have EPT, we must be able to set the following: |
2879 | * IA32_VMX_ENABLE_EPT - enable EPT |
2880 | * |
2881 | * If we have unrestricted guest capability, we must be able to set |
2882 | * the following: |
2883 | * IA32_VMX_UNRESTRICTED_GUEST - enable unrestricted guest (if caller |
2884 | * specified CR0_PG | CR0_PE in %cr0 in the 'vrs' parameter) |
2885 | */ |
2886 | want1 = 0; |
2887 | |
2888 | /* XXX checking for 2ndary controls can be combined here */ |
2889 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482, |
2890 | IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1)) { |
2891 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
2892 | IA32_VMX_ENABLE_VPID(1ULL << 5), 1)) { |
2893 | want1 |= IA32_VMX_ENABLE_VPID(1ULL << 5); |
2894 | vcpu->vc_vmx_vpid_enabled = 1; |
2895 | } |
2896 | } |
2897 | |
2898 | if (vmm_softc->mode == VMM_MODE_EPT) |
2899 | want1 |= IA32_VMX_ENABLE_EPT(1ULL << 1); |
2900 | |
2901 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482, |
2902 | IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1)) { |
2903 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
2904 | IA32_VMX_UNRESTRICTED_GUEST(1ULL << 7), 1)) { |
2905 | if ((cr0 & (CR0_PE0x00000001 | CR0_PG0x80000000)) == 0) { |
2906 | want1 |= IA32_VMX_UNRESTRICTED_GUEST(1ULL << 7); |
2907 | ug = 1; |
2908 | } |
2909 | } |
2910 | } |
2911 | |
2912 | want0 = ~want1; |
2913 | ctrlval = vcpu->vc_vmx_procbased2_ctls; |
2914 | ctrl = IA32_VMX_PROCBASED2_CTLS0x48B; |
2915 | |
2916 | if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &procbased2)) { |
2917 | DPRINTF("%s: error computing secondary procbased controls\n", |
2918 | __func__); |
2919 | ret = EINVAL22; |
2920 | goto exit; |
2921 | } |
2922 | |
2923 | if (vmwrite(VMCS_PROCBASED2_CTLS0x401E, procbased2)) { |
2924 | DPRINTF("%s: error setting secondary procbased controls\n", |
2925 | __func__); |
2926 | ret = EINVAL22; |
2927 | goto exit; |
2928 | } |
2929 | |
2930 | /* |
2931 | * Exit ctrls |
2932 | * |
2933 | * We must be able to set the following: |
2934 | * IA32_VMX_SAVE_DEBUG_CONTROLS |
2935 | * IA32_VMX_HOST_SPACE_ADDRESS_SIZE - exit to long mode |
2936 | * IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT - ack interrupt on exit |
2937 | */ |
2938 | want1 = IA32_VMX_HOST_SPACE_ADDRESS_SIZE(1ULL << 9) | |
2939 | IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT(1ULL << 15) | |
2940 | IA32_VMX_SAVE_DEBUG_CONTROLS(1ULL << 2); |
2941 | want0 = 0; |
2942 | |
2943 | if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) { |
2944 | ctrl = IA32_VMX_TRUE_EXIT_CTLS0x48F; |
2945 | ctrlval = vcpu->vc_vmx_true_exit_ctls; |
2946 | } else { |
2947 | ctrl = IA32_VMX_EXIT_CTLS0x483; |
2948 | ctrlval = vcpu->vc_vmx_exit_ctls; |
2949 | } |
2950 | |
2951 | if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &exit)) { |
2952 | DPRINTF("%s: error computing exit controls\n", __func__); |
2953 | ret = EINVAL22; |
2954 | goto exit; |
2955 | } |
2956 | |
2957 | if (vmwrite(VMCS_EXIT_CTLS0x400C, exit)) { |
2958 | DPRINTF("%s: error setting exit controls\n", __func__); |
2959 | ret = EINVAL22; |
2960 | goto exit; |
2961 | } |
2962 | |
2963 | /* |
2964 | * Entry ctrls |
2965 | * |
2966 | * We must be able to set the following: |
2967 | * IA32_VMX_IA32E_MODE_GUEST (if no unrestricted guest) |
2968 | * IA32_VMX_LOAD_DEBUG_CONTROLS |
2969 | * We must be able to clear the following: |
2970 | * IA32_VMX_ENTRY_TO_SMM - enter to SMM |
2971 | * IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT |
2972 | * IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY |
2973 | */ |
2974 | want1 = IA32_VMX_LOAD_DEBUG_CONTROLS(1ULL << 2); |
2975 | if (vrs->vrs_msrs[VCPU_REGS_EFER0] & EFER_LMA0x00000400) |
2976 | want1 |= IA32_VMX_IA32E_MODE_GUEST(1ULL << 9); |
2977 | |
2978 | want0 = IA32_VMX_ENTRY_TO_SMM(1ULL << 10) | |
2979 | IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT(1ULL << 11) | |
2980 | IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY(1ULL << 13); |
2981 | |
2982 | if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) { |
2983 | ctrl = IA32_VMX_TRUE_ENTRY_CTLS0x490; |
2984 | ctrlval = vcpu->vc_vmx_true_entry_ctls; |
2985 | } else { |
2986 | ctrl = IA32_VMX_ENTRY_CTLS0x484; |
2987 | ctrlval = vcpu->vc_vmx_entry_ctls; |
2988 | } |
2989 | |
2990 | if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &entry)) { |
2991 | ret = EINVAL22; |
2992 | goto exit; |
2993 | } |
2994 | |
2995 | if (vmwrite(VMCS_ENTRY_CTLS0x4012, entry)) { |
2996 | ret = EINVAL22; |
2997 | goto exit; |
2998 | } |
2999 | |
3000 | if (vmm_softc->mode == VMM_MODE_EPT) { |
3001 | eptp = vcpu->vc_parent->vm_map->pmap->pm_pdirpa; |
3002 | msr = rdmsr(IA32_VMX_EPT_VPID_CAP0x48C); |
3003 | if (msr & IA32_EPT_VPID_CAP_PAGE_WALK_4(1ULL << 6)) { |
3004 | /* Page walk length 4 supported */ |
3005 | eptp |= ((IA32_EPT_PAGE_WALK_LENGTH0x4 - 1) << 3); |
3006 | } else { |
3007 | DPRINTF("EPT page walk length 4 not supported\n"); |
3008 | ret = EINVAL22; |
3009 | goto exit; |
3010 | } |
3011 | |
3012 | if (msr & IA32_EPT_VPID_CAP_WB(1ULL << 14)) { |
3013 | /* WB cache type supported */ |
3014 | eptp |= IA32_EPT_PAGING_CACHE_TYPE_WB0x6; |
3015 | } else |
3016 | DPRINTF("%s: no WB cache type available, guest VM " |
3017 | "will run uncached\n", __func__); |
3018 | |
3019 | DPRINTF("Guest EPTP = 0x%llx\n", eptp); |
3020 | if (vmwrite(VMCS_GUEST_IA32_EPTP0x201A, eptp)) { |
3021 | DPRINTF("%s: error setting guest EPTP\n", __func__); |
3022 | ret = EINVAL22; |
3023 | goto exit; |
3024 | } |
3025 | |
3026 | vcpu->vc_parent->vm_map->pmap->eptp = eptp; |
3027 | } |
3028 | |
3029 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482, |
3030 | IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1)) { |
3031 | if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B, |
3032 | IA32_VMX_ENABLE_VPID(1ULL << 5), 1)) { |
3033 | |
3034 | /* We may sleep during allocation, so reload VMCS. */ |
3035 | vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
3036 | ret = vmm_alloc_vpid(&vpid); |
3037 | if (vcpu_reload_vmcs_vmx(vcpu)) { |
3038 | printf("%s: failed to reload vmcs\n", __func__); |
3039 | ret = EINVAL22; |
3040 | goto exit; |
3041 | } |
3042 | if (ret) { |
3043 | DPRINTF("%s: could not allocate VPID\n", |
3044 | __func__); |
3045 | ret = EINVAL22; |
3046 | goto exit; |
3047 | } |
3048 | |
3049 | if (vmwrite(VMCS_GUEST_VPID0x0000, vpid)) { |
3050 | DPRINTF("%s: error setting guest VPID\n", |
3051 | __func__); |
3052 | ret = EINVAL22; |
3053 | goto exit; |
3054 | } |
3055 | |
3056 | vcpu->vc_vpid = vpid; |
3057 | } |
3058 | } |
3059 | |
3060 | /* |
3061 | * Determine which bits in CR0 have to be set to a fixed |
3062 | * value as per Intel SDM A.7. |
3063 | * CR0 bits in the vrs parameter must match these. |
3064 | */ |
3065 | want1 = (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) & |
3066 | (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1); |
3067 | want0 = ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) & |
3068 | ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1); |
3069 | |
3070 | /* |
3071 | * CR0_FIXED0 and CR0_FIXED1 may report the CR0_PG and CR0_PE bits as |
3072 | * fixed to 1 even if the CPU supports the unrestricted guest |
3073 | * feature. Update want1 and want0 accordingly to allow |
3074 | * any value for CR0_PG and CR0_PE in vrs->vrs_crs[VCPU_REGS_CR0] if |
3075 | * the CPU has the unrestricted guest capability. |
3076 | */ |
3077 | if (ug) { |
3078 | want1 &= ~(CR0_PG0x80000000 | CR0_PE0x00000001); |
3079 | want0 &= ~(CR0_PG0x80000000 | CR0_PE0x00000001); |
3080 | } |
3081 | |
3082 | /* |
3083 | * VMX may require some bits to be set that userland should not have |
3084 | * to care about. Set those here. |
3085 | */ |
3086 | if (want1 & CR0_NE0x00000020) |
3087 | cr0 |= CR0_NE0x00000020; |
3088 | |
3089 | if ((cr0 & want1) != want1) { |
3090 | ret = EINVAL22; |
3091 | goto exit; |
3092 | } |
3093 | |
3094 | if ((~cr0 & want0) != want0) { |
3095 | ret = EINVAL22; |
3096 | goto exit; |
3097 | } |
3098 | |
3099 | vcpu->vc_vmx_cr0_fixed1 = want1; |
3100 | vcpu->vc_vmx_cr0_fixed0 = want0; |
3101 | /* |
3102 | * Determine which bits in CR4 have to be set to a fixed |
3103 | * value as per Intel SDM A.8. |
3104 | * CR4 bits in the vrs parameter must match these, except |
3105 | * CR4_VMXE - we add that here since it must always be set. |
3106 | */ |
3107 | want1 = (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) & |
3108 | (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1); |
3109 | want0 = ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) & |
3110 | ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1); |
3111 | |
3112 | cr4 = vrs->vrs_crs[VCPU_REGS_CR43] | CR4_VMXE0x00002000; |
3113 | |
3114 | if ((cr4 & want1) != want1) { |
3115 | ret = EINVAL22; |
3116 | goto exit; |
3117 | } |
3118 | |
3119 | if ((~cr4 & want0) != want0) { |
3120 | ret = EINVAL22; |
3121 | goto exit; |
3122 | } |
3123 | |
3124 | cr3 = vrs->vrs_crs[VCPU_REGS_CR32]; |
3125 | |
3126 | /* Restore PDPTEs if 32-bit PAE paging is being used */ |
3127 | if (cr3 && (cr4 & CR4_PAE0x00000020) && |
3128 | !(vrs->vrs_msrs[VCPU_REGS_EFER0] & EFER_LMA0x00000400)) { |
3129 | if (vmwrite(VMCS_GUEST_PDPTE00x280A, |
3130 | vrs->vrs_crs[VCPU_REGS_PDPTE06])) { |
3131 | ret = EINVAL22; |
3132 | goto exit; |
3133 | } |
3134 | |
3135 | if (vmwrite(VMCS_GUEST_PDPTE10x280C, |
3136 | vrs->vrs_crs[VCPU_REGS_PDPTE17])) { |
3137 | ret = EINVAL22; |
3138 | goto exit; |
3139 | } |
3140 | |
3141 | if (vmwrite(VMCS_GUEST_PDPTE20x280E, |
3142 | vrs->vrs_crs[VCPU_REGS_PDPTE28])) { |
3143 | ret = EINVAL22; |
3144 | goto exit; |
3145 | } |
3146 | |
3147 | if (vmwrite(VMCS_GUEST_PDPTE30x2810, |
3148 | vrs->vrs_crs[VCPU_REGS_PDPTE39])) { |
3149 | ret = EINVAL22; |
3150 | goto exit; |
3151 | } |
3152 | } |
3153 | |
3154 | vrs->vrs_crs[VCPU_REGS_CR00] = cr0; |
3155 | vrs->vrs_crs[VCPU_REGS_CR43] = cr4; |
3156 | |
3157 | /* |
3158 | * Select host MSRs to be loaded on exit |
3159 | */ |
3160 | msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_load_va; |
3161 | msr_store[0].vms_index = MSR_EFER0xc0000080; |
3162 | msr_store[0].vms_data = rdmsr(MSR_EFER0xc0000080); |
3163 | msr_store[1].vms_index = MSR_STAR0xc0000081; |
3164 | msr_store[1].vms_data = rdmsr(MSR_STAR0xc0000081); |
3165 | msr_store[2].vms_index = MSR_LSTAR0xc0000082; |
3166 | msr_store[2].vms_data = rdmsr(MSR_LSTAR0xc0000082); |
3167 | msr_store[3].vms_index = MSR_CSTAR0xc0000083; |
3168 | msr_store[3].vms_data = rdmsr(MSR_CSTAR0xc0000083); |
3169 | msr_store[4].vms_index = MSR_SFMASK0xc0000084; |
3170 | msr_store[4].vms_data = rdmsr(MSR_SFMASK0xc0000084); |
3171 | msr_store[5].vms_index = MSR_KERNELGSBASE0xc0000102; |
3172 | msr_store[5].vms_data = rdmsr(MSR_KERNELGSBASE0xc0000102); |
3173 | msr_store[6].vms_index = MSR_MISC_ENABLE0x1a0; |
3174 | msr_store[6].vms_data = rdmsr(MSR_MISC_ENABLE0x1a0); |
3175 | |
3176 | /* |
3177 | * Select guest MSRs to be loaded on entry / saved on exit |
3178 | */ |
3179 | msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; |
3180 | |
3181 | msr_store[VCPU_REGS_EFER0].vms_index = MSR_EFER0xc0000080; |
3182 | msr_store[VCPU_REGS_STAR1].vms_index = MSR_STAR0xc0000081; |
3183 | msr_store[VCPU_REGS_LSTAR2].vms_index = MSR_LSTAR0xc0000082; |
3184 | msr_store[VCPU_REGS_CSTAR3].vms_index = MSR_CSTAR0xc0000083; |
3185 | msr_store[VCPU_REGS_SFMASK4].vms_index = MSR_SFMASK0xc0000084; |
3186 | msr_store[VCPU_REGS_KGSBASE5].vms_index = MSR_KERNELGSBASE0xc0000102; |
3187 | msr_store[VCPU_REGS_MISC_ENABLE6].vms_index = MSR_MISC_ENABLE0x1a0; |
3188 | |
3189 | /* |
3190 | * Initialize MSR_MISC_ENABLE as it can't be read and populated from vmd |
3191 | * and some of the content is based on the host. |
3192 | */ |
3193 | msr_store[VCPU_REGS_MISC_ENABLE6].vms_data = rdmsr(MSR_MISC_ENABLE0x1a0); |
3194 | msr_store[VCPU_REGS_MISC_ENABLE6].vms_data &= |
3195 | ~(MISC_ENABLE_TCC(1 << 3) | MISC_ENABLE_PERF_MON_AVAILABLE(1 << 7) | |
3196 | MISC_ENABLE_EIST_ENABLED(1 << 16) | MISC_ENABLE_ENABLE_MONITOR_FSM(1 << 18) | |
3197 | MISC_ENABLE_xTPR_MESSAGE_DISABLE(1 << 23)); |
3198 | msr_store[VCPU_REGS_MISC_ENABLE6].vms_data |= |
3199 | MISC_ENABLE_BTS_UNAVAILABLE(1 << 11) | MISC_ENABLE_PEBS_UNAVAILABLE(1 << 12); |
3200 | |
3201 | /* |
3202 | * Currently we have the same count of entry/exit MSRs loads/stores |
3203 | * but this is not an architectural requirement. |
3204 | */ |
3205 | if (vmwrite(VMCS_EXIT_MSR_STORE_COUNT0x400E, VMX_NUM_MSR_STORE7)) { |
3206 | DPRINTF("%s: error setting guest MSR exit store count\n", |
3207 | __func__); |
3208 | ret = EINVAL22; |
3209 | goto exit; |
3210 | } |
3211 | |
3212 | if (vmwrite(VMCS_EXIT_MSR_LOAD_COUNT0x4010, VMX_NUM_MSR_STORE7)) { |
3213 | DPRINTF("%s: error setting guest MSR exit load count\n", |
3214 | __func__); |
3215 | ret = EINVAL22; |
3216 | goto exit; |
3217 | } |
3218 | |
3219 | if (vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT0x4014, VMX_NUM_MSR_STORE7)) { |
3220 | DPRINTF("%s: error setting guest MSR entry load count\n", |
3221 | __func__); |
3222 | ret = EINVAL22; |
3223 | goto exit; |
3224 | } |
3225 | |
3226 | if (vmwrite(VMCS_EXIT_STORE_MSR_ADDRESS0x2006, |
3227 | vcpu->vc_vmx_msr_exit_save_pa)) { |
3228 | DPRINTF("%s: error setting guest MSR exit store address\n", |
3229 | __func__); |
3230 | ret = EINVAL22; |
3231 | goto exit; |
3232 | } |
3233 | |
3234 | if (vmwrite(VMCS_EXIT_LOAD_MSR_ADDRESS0x2008, |
3235 | vcpu->vc_vmx_msr_exit_load_pa)) { |
3236 | DPRINTF("%s: error setting guest MSR exit load address\n", |
3237 | __func__); |
3238 | ret = EINVAL22; |
3239 | goto exit; |
3240 | } |
3241 | |
3242 | if (vmwrite(VMCS_ENTRY_LOAD_MSR_ADDRESS0x200A, |
3243 | vcpu->vc_vmx_msr_exit_save_pa)) { |
3244 | DPRINTF("%s: error setting guest MSR entry load address\n", |
3245 | __func__); |
3246 | ret = EINVAL22; |
3247 | goto exit; |
3248 | } |
3249 | |
3250 | if (vmwrite(VMCS_MSR_BITMAP_ADDRESS0x2004, |
3251 | vcpu->vc_msr_bitmap_pa)) { |
3252 | DPRINTF("%s: error setting guest MSR bitmap address\n", |
3253 | __func__); |
3254 | ret = EINVAL22; |
3255 | goto exit; |
3256 | } |
3257 | |
3258 | if (vmwrite(VMCS_CR4_MASK0x6002, CR4_VMXE0x00002000)) { |
3259 | DPRINTF("%s: error setting guest CR4 mask\n", __func__); |
3260 | ret = EINVAL22; |
3261 | goto exit; |
3262 | } |
3263 | |
3264 | if (vmwrite(VMCS_CR0_MASK0x6000, CR0_NE0x00000020)) { |
3265 | DPRINTF("%s: error setting guest CR0 mask\n", __func__); |
3266 | ret = EINVAL22; |
3267 | goto exit; |
3268 | } |
3269 | |
3270 | /* |
3271 | * Set up the VMCS for the register state we want during VCPU start. |
3272 | * This matches what the CPU state would be after a bootloader |
3273 | * transition to 'start'. |
3274 | */ |
3275 | ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), 0, vrs); |
3276 | |
3277 | /* |
3278 | * Set up the MSR bitmap |
3279 | */ |
3280 | memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF, PAGE_SIZE)__builtin_memset(((uint8_t *)vcpu->vc_msr_bitmap_va), (0xFF ), ((1 << 12))); |
3281 | vmx_setmsrbrw(vcpu, MSR_IA32_FEATURE_CONTROL0x03a); |
3282 | vmx_setmsrbrw(vcpu, MSR_SYSENTER_CS0x174); |
3283 | vmx_setmsrbrw(vcpu, MSR_SYSENTER_ESP0x175); |
3284 | vmx_setmsrbrw(vcpu, MSR_SYSENTER_EIP0x176); |
3285 | vmx_setmsrbrw(vcpu, MSR_EFER0xc0000080); |
3286 | vmx_setmsrbrw(vcpu, MSR_STAR0xc0000081); |
3287 | vmx_setmsrbrw(vcpu, MSR_LSTAR0xc0000082); |
3288 | vmx_setmsrbrw(vcpu, MSR_CSTAR0xc0000083); |
3289 | vmx_setmsrbrw(vcpu, MSR_SFMASK0xc0000084); |
3290 | vmx_setmsrbrw(vcpu, MSR_FSBASE0xc0000100); |
3291 | vmx_setmsrbrw(vcpu, MSR_GSBASE0xc0000101); |
3292 | vmx_setmsrbrw(vcpu, MSR_KERNELGSBASE0xc0000102); |
3293 | vmx_setmsrbr(vcpu, MSR_MISC_ENABLE0x1a0); |
3294 | |
3295 | /* XXX CR0 shadow */ |
3296 | /* XXX CR4 shadow */ |
3297 | |
3298 | /* xcr0 power on default sets bit 0 (x87 state) */ |
3299 | vcpu->vc_gueststate.vg_xcr0 = XCR0_X870x00000001 & xsave_mask; |
3300 | |
3301 | /* XXX PAT shadow */ |
3302 | vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT0x277); |
3303 | |
3304 | /* Flush the VMCS */ |
3305 | if (vmclear(&vcpu->vc_control_pa)) { |
3306 | DPRINTF("%s: vmclear failed\n", __func__); |
3307 | ret = EINVAL22; |
3308 | } |
3309 | atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED)_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (0)); |
3310 | |
3311 | exit: |
3312 | return (ret); |
3313 | } |
3314 | |
3315 | /* |
3316 | * vcpu_init_vmx |
3317 | * |
3318 | * Intel VMX specific VCPU initialization routine. |
3319 | * |
3320 | * This function allocates various per-VCPU memory regions, sets up initial |
3321 | * VCPU VMCS controls, and sets initial register values. |
3322 | * |
3323 | * Parameters: |
3324 | * vcpu: the VCPU structure being initialized |
3325 | * |
3326 | * Return values: |
3327 | * 0: the VCPU was initialized successfully |
3328 | * ENOMEM: insufficient resources |
3329 | * EINVAL: an error occurred during VCPU initialization |
3330 | */ |
3331 | int |
3332 | vcpu_init_vmx(struct vcpu *vcpu) |
3333 | { |
3334 | struct vmcs *vmcs; |
3335 | uint32_t cr0, cr4; |
3336 | int ret = 0; |
3337 | |
3338 | /* Allocate VMCS VA */ |
3339 | vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, &kp_zero, |
3340 | &kd_waitok); |
3341 | vcpu->vc_vmx_vmcs_state = VMCS_CLEARED0; |
3342 | |
3343 | if (!vcpu->vc_control_va) |
3344 | return (ENOMEM12); |
3345 | |
3346 | /* Compute VMCS PA */ |
3347 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_control_va, |
3348 | (paddr_t *)&vcpu->vc_control_pa)) { |
3349 | ret = ENOMEM12; |
3350 | goto exit; |
3351 | } |
3352 | |
3353 | /* Allocate MSR bitmap VA */ |
3354 | vcpu->vc_msr_bitmap_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, &kp_zero, |
3355 | &kd_waitok); |
3356 | |
3357 | if (!vcpu->vc_msr_bitmap_va) { |
3358 | ret = ENOMEM12; |
3359 | goto exit; |
3360 | } |
3361 | |
3362 | /* Compute MSR bitmap PA */ |
3363 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_msr_bitmap_va, |
3364 | (paddr_t *)&vcpu->vc_msr_bitmap_pa)) { |
3365 | ret = ENOMEM12; |
3366 | goto exit; |
3367 | } |
3368 | |
3369 | /* Allocate MSR exit load area VA */ |
3370 | vcpu->vc_vmx_msr_exit_load_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, |
3371 | &kp_zero, &kd_waitok); |
3372 | |
3373 | if (!vcpu->vc_vmx_msr_exit_load_va) { |
3374 | ret = ENOMEM12; |
3375 | goto exit; |
3376 | } |
3377 | |
3378 | /* Compute MSR exit load area PA */ |
3379 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_vmx_msr_exit_load_va, |
3380 | &vcpu->vc_vmx_msr_exit_load_pa)) { |
3381 | ret = ENOMEM12; |
3382 | goto exit; |
3383 | } |
3384 | |
3385 | /* Allocate MSR exit save area VA */ |
3386 | vcpu->vc_vmx_msr_exit_save_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, |
3387 | &kp_zero, &kd_waitok); |
3388 | |
3389 | if (!vcpu->vc_vmx_msr_exit_save_va) { |
3390 | ret = ENOMEM12; |
3391 | goto exit; |
3392 | } |
3393 | |
3394 | /* Compute MSR exit save area PA */ |
3395 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_vmx_msr_exit_save_va, |
3396 | &vcpu->vc_vmx_msr_exit_save_pa)) { |
3397 | ret = ENOMEM12; |
3398 | goto exit; |
3399 | } |
3400 | |
3401 | /* Allocate MSR entry load area VA */ |
3402 | vcpu->vc_vmx_msr_entry_load_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, |
3403 | &kp_zero, &kd_waitok); |
3404 | |
3405 | if (!vcpu->vc_vmx_msr_entry_load_va) { |
3406 | ret = ENOMEM12; |
3407 | goto exit; |
3408 | } |
3409 | |
3410 | /* Compute MSR entry load area PA */ |
3411 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_vmx_msr_entry_load_va, |
3412 | &vcpu->vc_vmx_msr_entry_load_pa)) { |
3413 | ret = ENOMEM12; |
3414 | goto exit; |
3415 | } |
3416 | |
3417 | vmcs = (struct vmcs *)vcpu->vc_control_va; |
3418 | vmcs->vmcs_revision = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision; |
3419 | |
3420 | /* |
3421 | * Load the VMCS onto this PCPU so we can write registers |
3422 | */ |
3423 | if (vmptrld(&vcpu->vc_control_pa)) { |
3424 | ret = EINVAL22; |
3425 | goto exit; |
3426 | } |
3427 | |
3428 | /* Host CR0 */ |
3429 | cr0 = rcr0() & ~CR0_TS0x00000008; |
3430 | if (vmwrite(VMCS_HOST_IA32_CR00x6C00, cr0)) { |
3431 | DPRINTF("%s: error writing host CR0\n", __func__); |
3432 | ret = EINVAL22; |
3433 | goto exit; |
3434 | } |
3435 | |
3436 | /* Host CR4 */ |
3437 | cr4 = rcr4(); |
3438 | if (vmwrite(VMCS_HOST_IA32_CR40x6C04, cr4)) { |
3439 | DPRINTF("%s: error writing host CR4\n", __func__); |
3440 | ret = EINVAL22; |
3441 | goto exit; |
3442 | } |
3443 | |
3444 | /* Host Segment Selectors */ |
3445 | if (vmwrite(VMCS_HOST_IA32_CS_SEL0x0C02, GSEL(GCODE_SEL, SEL_KPL)(((1) << 3) | 0))) { |
3446 | DPRINTF("%s: error writing host CS selector\n", __func__); |
3447 | ret = EINVAL22; |
3448 | goto exit; |
3449 | } |
3450 | |
3451 | if (vmwrite(VMCS_HOST_IA32_DS_SEL0x0C06, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) { |
3452 | DPRINTF("%s: error writing host DS selector\n", __func__); |
3453 | ret = EINVAL22; |
3454 | goto exit; |
3455 | } |
3456 | |
3457 | if (vmwrite(VMCS_HOST_IA32_ES_SEL0x0C00, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) { |
3458 | DPRINTF("%s: error writing host ES selector\n", __func__); |
3459 | ret = EINVAL22; |
3460 | goto exit; |
3461 | } |
3462 | |
3463 | if (vmwrite(VMCS_HOST_IA32_FS_SEL0x0C08, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) { |
3464 | DPRINTF("%s: error writing host FS selector\n", __func__); |
3465 | ret = EINVAL22; |
3466 | goto exit; |
3467 | } |
3468 | |
3469 | if (vmwrite(VMCS_HOST_IA32_GS_SEL0x0C0A, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) { |
3470 | DPRINTF("%s: error writing host GS selector\n", __func__); |
3471 | ret = EINVAL22; |
3472 | goto exit; |
3473 | } |
3474 | |
3475 | if (vmwrite(VMCS_HOST_IA32_SS_SEL0x0C04, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) { |
3476 | DPRINTF("%s: error writing host SS selector\n", __func__); |
3477 | ret = EINVAL22; |
3478 | goto exit; |
3479 | } |
3480 | |
3481 | if (vmwrite(VMCS_HOST_IA32_TR_SEL0x0C0C, GSYSSEL(GPROC0_SEL, SEL_KPL)((((0) << 4) + (6 << 3)) | 0))) { |
3482 | DPRINTF("%s: error writing host TR selector\n", __func__); |
3483 | ret = EINVAL22; |
3484 | goto exit; |
3485 | } |
3486 | |
3487 | /* Host IDTR base */ |
3488 | if (vmwrite(VMCS_HOST_IA32_IDTR_BASE0x6C0E, idt_vaddr)) { |
3489 | DPRINTF("%s: error writing host IDTR base\n", __func__); |
3490 | ret = EINVAL22; |
3491 | goto exit; |
3492 | } |
3493 | |
3494 | /* VMCS link */ |
3495 | if (vmwrite(VMCS_LINK_POINTER0x2800, VMX_VMCS_PA_CLEAR0xFFFFFFFFFFFFFFFFUL)) { |
3496 | DPRINTF("%s: error writing VMCS link pointer\n", __func__); |
3497 | ret = EINVAL22; |
3498 | goto exit; |
3499 | } |
3500 | |
3501 | /* Flush the initial VMCS */ |
3502 | if (vmclear(&vcpu->vc_control_pa)) { |
3503 | DPRINTF("%s: vmclear failed\n", __func__); |
3504 | ret = EINVAL22; |
3505 | } |
3506 | |
3507 | exit: |
3508 | if (ret) { |
3509 | if (vcpu->vc_control_va) |
3510 | km_free((void *)vcpu->vc_control_va, PAGE_SIZE(1 << 12), |
3511 | &kv_page, &kp_zero); |
3512 | if (vcpu->vc_msr_bitmap_va) |
3513 | km_free((void *)vcpu->vc_msr_bitmap_va, PAGE_SIZE(1 << 12), |
3514 | &kv_page, &kp_zero); |
3515 | if (vcpu->vc_vmx_msr_exit_save_va) |
3516 | km_free((void *)vcpu->vc_vmx_msr_exit_save_va, |
3517 | PAGE_SIZE(1 << 12), &kv_page, &kp_zero); |
3518 | if (vcpu->vc_vmx_msr_exit_load_va) |
3519 | km_free((void *)vcpu->vc_vmx_msr_exit_load_va, |
3520 | PAGE_SIZE(1 << 12), &kv_page, &kp_zero); |
3521 | if (vcpu->vc_vmx_msr_entry_load_va) |
3522 | km_free((void *)vcpu->vc_vmx_msr_entry_load_va, |
3523 | PAGE_SIZE(1 << 12), &kv_page, &kp_zero); |
3524 | } |
3525 | |
3526 | return (ret); |
3527 | } |
3528 | |
3529 | /* |
3530 | * vcpu_reset_regs |
3531 | * |
3532 | * Resets a vcpu's registers to the provided state |
3533 | * |
3534 | * Parameters: |
3535 | * vcpu: the vcpu whose registers shall be reset |
3536 | * vrs: the desired register state |
3537 | * |
3538 | * Return values: |
3539 | * 0: the vcpu's registers were successfully reset |
3540 | * !0: the vcpu's registers could not be reset (see arch-specific reset |
3541 | * function for various values that can be returned here) |
3542 | */ |
3543 | int |
3544 | vcpu_reset_regs(struct vcpu *vcpu, struct vcpu_reg_state *vrs) |
3545 | { |
3546 | int ret; |
3547 | |
3548 | if (vmm_softc->mode == VMM_MODE_VMX || |
3549 | vmm_softc->mode == VMM_MODE_EPT) |
3550 | ret = vcpu_reset_regs_vmx(vcpu, vrs); |
3551 | else if (vmm_softc->mode == VMM_MODE_SVM || |
3552 | vmm_softc->mode == VMM_MODE_RVI) |
3553 | ret = vcpu_reset_regs_svm(vcpu, vrs); |
3554 | else |
3555 | panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); |
3556 | |
3557 | return (ret); |
3558 | } |
3559 | |
3560 | /* |
3561 | * vcpu_init_svm |
3562 | * |
3563 | * AMD SVM specific VCPU initialization routine. |
3564 | * |
3565 | * This function allocates various per-VCPU memory regions, sets up initial |
3566 | * VCPU VMCB controls, and sets initial register values. |
3567 | * |
3568 | * Parameters: |
3569 | * vcpu: the VCPU structure being initialized |
3570 | * |
3571 | * Return values: |
3572 | * 0: the VCPU was initialized successfully |
3573 | * ENOMEM: insufficient resources |
3574 | * EINVAL: an error occurred during VCPU initialization |
3575 | */ |
3576 | int |
3577 | vcpu_init_svm(struct vcpu *vcpu) |
3578 | { |
3579 | int ret = 0; |
3580 | |
3581 | /* Allocate VMCB VA */ |
3582 | vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, &kp_zero, |
3583 | &kd_waitok); |
3584 | |
3585 | if (!vcpu->vc_control_va) |
3586 | return (ENOMEM12); |
3587 | |
3588 | /* Compute VMCB PA */ |
3589 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_control_va, |
3590 | (paddr_t *)&vcpu->vc_control_pa)) { |
3591 | ret = ENOMEM12; |
3592 | goto exit; |
3593 | } |
3594 | |
3595 | DPRINTF("%s: VMCB va @ 0x%llx, pa @ 0x%llx\n", __func__, |
3596 | (uint64_t)vcpu->vc_control_va, |
3597 | (uint64_t)vcpu->vc_control_pa); |
3598 | |
3599 | |
3600 | /* Allocate MSR bitmap VA (2 pages) */ |
3601 | vcpu->vc_msr_bitmap_va = (vaddr_t)km_alloc(2 * PAGE_SIZE(1 << 12), &kv_any, |
3602 | &vmm_kp_contig, &kd_waitok); |
3603 | |
3604 | if (!vcpu->vc_msr_bitmap_va) { |
3605 | ret = ENOMEM12; |
3606 | goto exit; |
3607 | } |
3608 | |
3609 | /* Compute MSR bitmap PA */ |
3610 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_msr_bitmap_va, |
3611 | (paddr_t *)&vcpu->vc_msr_bitmap_pa)) { |
3612 | ret = ENOMEM12; |
3613 | goto exit; |
3614 | } |
3615 | |
3616 | DPRINTF("%s: MSR bitmap va @ 0x%llx, pa @ 0x%llx\n", __func__, |
3617 | (uint64_t)vcpu->vc_msr_bitmap_va, |
3618 | (uint64_t)vcpu->vc_msr_bitmap_pa); |
3619 | |
3620 | /* Allocate host state area VA */ |
3621 | vcpu->vc_svm_hsa_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, |
3622 | &kp_zero, &kd_waitok); |
3623 | |
3624 | if (!vcpu->vc_svm_hsa_va) { |
3625 | ret = ENOMEM12; |
3626 | goto exit; |
3627 | } |
3628 | |
3629 | /* Compute host state area PA */ |
3630 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_svm_hsa_va, |
3631 | &vcpu->vc_svm_hsa_pa)) { |
3632 | ret = ENOMEM12; |
3633 | goto exit; |
3634 | } |
3635 | |
3636 | DPRINTF("%s: HSA va @ 0x%llx, pa @ 0x%llx\n", __func__, |
3637 | (uint64_t)vcpu->vc_svm_hsa_va, |
3638 | (uint64_t)vcpu->vc_svm_hsa_pa); |
3639 | |
3640 | /* Allocate IOIO area VA (3 pages) */ |
3641 | vcpu->vc_svm_ioio_va = (vaddr_t)km_alloc(3 * PAGE_SIZE(1 << 12), &kv_any, |
3642 | &vmm_kp_contig, &kd_waitok); |
3643 | |
3644 | if (!vcpu->vc_svm_ioio_va) { |
3645 | ret = ENOMEM12; |
3646 | goto exit; |
3647 | } |
3648 | |
3649 | /* Compute IOIO area PA */ |
3650 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_svm_ioio_va, |
3651 | &vcpu->vc_svm_ioio_pa)) { |
3652 | ret = ENOMEM12; |
3653 | goto exit; |
3654 | } |
3655 | |
3656 | DPRINTF("%s: IOIO va @ 0x%llx, pa @ 0x%llx\n", __func__, |
3657 | (uint64_t)vcpu->vc_svm_ioio_va, |
3658 | (uint64_t)vcpu->vc_svm_ioio_pa); |
3659 | |
3660 | exit: |
3661 | if (ret) { |
3662 | if (vcpu->vc_control_va) |
3663 | km_free((void *)vcpu->vc_control_va, PAGE_SIZE(1 << 12), |
3664 | &kv_page, &kp_zero); |
3665 | if (vcpu->vc_msr_bitmap_va) |
3666 | km_free((void *)vcpu->vc_msr_bitmap_va, 2 * PAGE_SIZE(1 << 12), |
3667 | &kv_any, &vmm_kp_contig); |
3668 | if (vcpu->vc_svm_hsa_va) |
3669 | km_free((void *)vcpu->vc_svm_hsa_va, PAGE_SIZE(1 << 12), |
3670 | &kv_page, &kp_zero); |
3671 | if (vcpu->vc_svm_ioio_va) |
3672 | km_free((void *)vcpu->vc_svm_ioio_va, |
3673 | 3 * PAGE_SIZE(1 << 12), &kv_any, &vmm_kp_contig); |
3674 | } |
3675 | |
3676 | return (ret); |
3677 | } |
3678 | |
3679 | /* |
3680 | * vcpu_init |
3681 | * |
3682 | * Calls the architecture-specific VCPU init routine |
3683 | */ |
3684 | int |
3685 | vcpu_init(struct vcpu *vcpu) |
3686 | { |
3687 | int ret = 0; |
3688 | |
3689 | vcpu->vc_virt_mode = vmm_softc->mode; |
3690 | vcpu->vc_state = VCPU_STATE_STOPPED; |
3691 | vcpu->vc_vpid = 0; |
3692 | vcpu->vc_pvclock_system_gpa = 0; |
3693 | vcpu->vc_last_pcpu = NULL((void *)0); |
3694 | |
3695 | rw_init(&vcpu->vc_lock, "vcpu")_rw_init_flags(&vcpu->vc_lock, "vcpu", 0, ((void *)0)); |
3696 | |
3697 | /* Shadow PAT MSR, starting with host's value. */ |
3698 | vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT0x277); |
3699 | |
3700 | if (vmm_softc->mode == VMM_MODE_VMX || |
3701 | vmm_softc->mode == VMM_MODE_EPT) |
3702 | ret = vcpu_init_vmx(vcpu); |
3703 | else if (vmm_softc->mode == VMM_MODE_SVM || |
3704 | vmm_softc->mode == VMM_MODE_RVI) |
3705 | ret = vcpu_init_svm(vcpu); |
3706 | else |
3707 | panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); |
3708 | |
3709 | return (ret); |
3710 | } |
3711 | |
3712 | /* |
3713 | * vcpu_deinit_vmx |
3714 | * |
3715 | * Deinitializes the vcpu described by 'vcpu' |
3716 | * |
3717 | * Parameters: |
3718 | * vcpu: the vcpu to be deinited |
3719 | */ |
3720 | void |
3721 | vcpu_deinit_vmx(struct vcpu *vcpu) |
3722 | { |
3723 | if (vcpu->vc_control_va) |
3724 | km_free((void *)vcpu->vc_control_va, PAGE_SIZE(1 << 12), |
3725 | &kv_page, &kp_zero); |
3726 | if (vcpu->vc_vmx_msr_exit_save_va) |
3727 | km_free((void *)vcpu->vc_vmx_msr_exit_save_va, |
3728 | PAGE_SIZE(1 << 12), &kv_page, &kp_zero); |
3729 | if (vcpu->vc_vmx_msr_exit_load_va) |
3730 | km_free((void *)vcpu->vc_vmx_msr_exit_load_va, |
3731 | PAGE_SIZE(1 << 12), &kv_page, &kp_zero); |
3732 | if (vcpu->vc_vmx_msr_entry_load_va) |
3733 | km_free((void *)vcpu->vc_vmx_msr_entry_load_va, |
3734 | PAGE_SIZE(1 << 12), &kv_page, &kp_zero); |
3735 | |
3736 | if (vcpu->vc_vmx_vpid_enabled) |
3737 | vmm_free_vpid(vcpu->vc_vpid); |
3738 | } |
3739 | |
3740 | /* |
3741 | * vcpu_deinit_svm |
3742 | * |
3743 | * Deinitializes the vcpu described by 'vcpu' |
3744 | * |
3745 | * Parameters: |
3746 | * vcpu: the vcpu to be deinited |
3747 | */ |
3748 | void |
3749 | vcpu_deinit_svm(struct vcpu *vcpu) |
3750 | { |
3751 | if (vcpu->vc_control_va) |
3752 | km_free((void *)vcpu->vc_control_va, PAGE_SIZE(1 << 12), &kv_page, |
3753 | &kp_zero); |
3754 | if (vcpu->vc_msr_bitmap_va) |
3755 | km_free((void *)vcpu->vc_msr_bitmap_va, 2 * PAGE_SIZE(1 << 12), &kv_any, |
3756 | &vmm_kp_contig); |
3757 | if (vcpu->vc_svm_hsa_va) |
3758 | km_free((void *)vcpu->vc_svm_hsa_va, PAGE_SIZE(1 << 12), &kv_page, |
3759 | &kp_zero); |
3760 | if (vcpu->vc_svm_ioio_va) |
3761 | km_free((void *)vcpu->vc_svm_ioio_va, 3 * PAGE_SIZE(1 << 12), &kv_any, |
3762 | &vmm_kp_contig); |
3763 | |
3764 | vmm_free_vpid(vcpu->vc_vpid); |
3765 | } |
3766 | |
3767 | /* |
3768 | * vcpu_deinit |
3769 | * |
3770 | * Calls the architecture-specific VCPU deinit routine |
3771 | * |
3772 | * Parameters: |
3773 | * vcpu: the vcpu to be deinited |
3774 | */ |
3775 | void |
3776 | vcpu_deinit(struct vcpu *vcpu) |
3777 | { |
3778 | if (vmm_softc->mode == VMM_MODE_VMX || |
3779 | vmm_softc->mode == VMM_MODE_EPT) |
3780 | vcpu_deinit_vmx(vcpu); |
3781 | else if (vmm_softc->mode == VMM_MODE_SVM || |
3782 | vmm_softc->mode == VMM_MODE_RVI) |
3783 | vcpu_deinit_svm(vcpu); |
3784 | else |
3785 | panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); |
3786 | } |
3787 | |
3788 | /* |
3789 | * vm_teardown |
3790 | * |
3791 | * Tears down (destroys) the vm indicated by 'vm'. |
3792 | * |
3793 | * Parameters: |
3794 | * vm: vm to be torn down |
3795 | */ |
3796 | void |
3797 | vm_teardown(struct vm *vm) |
3798 | { |
3799 | struct vcpu *vcpu, *tmp; |
3800 | |
3801 | rw_assert_wrlock(&vmm_softc->vm_lock); |
3802 | KERNEL_LOCK()_kernel_lock(); |
3803 | |
3804 | /* Free VCPUs */ |
3805 | rw_enter_write(&vm->vm_vcpu_lock); |
3806 | SLIST_FOREACH_SAFE(vcpu, &vm->vm_vcpu_list, vc_vcpu_link, tmp)for ((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu ) && ((tmp) = ((vcpu)->vc_vcpu_link.sle_next), 1); (vcpu) = (tmp)) { |
3807 | SLIST_REMOVE(&vm->vm_vcpu_list, vcpu, vcpu, vc_vcpu_link)do { if ((&vm->vm_vcpu_list)->slh_first == (vcpu)) { do { ((&vm->vm_vcpu_list))->slh_first = ((&vm-> vm_vcpu_list))->slh_first->vc_vcpu_link.sle_next; } while (0); } else { struct vcpu *curelm = (&vm->vm_vcpu_list )->slh_first; while (curelm->vc_vcpu_link.sle_next != ( vcpu)) curelm = curelm->vc_vcpu_link.sle_next; curelm-> vc_vcpu_link.sle_next = curelm->vc_vcpu_link.sle_next-> vc_vcpu_link.sle_next; } ((vcpu)->vc_vcpu_link.sle_next) = ((void *)-1); } while (0); |
3808 | vcpu_deinit(vcpu); |
3809 | pool_put(&vcpu_pool, vcpu); |
3810 | vmm_softc->vcpu_ct--; |
3811 | } |
3812 | |
3813 | vm_impl_deinit(vm); |
3814 | |
3815 | /* teardown guest vmspace */ |
3816 | if (vm->vm_vmspace != NULL((void *)0)) { |
3817 | uvmspace_free(vm->vm_vmspace); |
3818 | vm->vm_vmspace = NULL((void *)0); |
3819 | } |
3820 | |
3821 | if (vm->vm_id > 0) { |
3822 | vmm_softc->vm_ct--; |
3823 | if (vmm_softc->vm_ct < 1) |
3824 | vmm_stop(); |
3825 | } |
3826 | pool_put(&vm_pool, vm); |
3827 | |
3828 | KERNEL_UNLOCK()_kernel_unlock(); |
3829 | rw_exit_write(&vm->vm_vcpu_lock); |
3830 | } |
3831 | |
3832 | /* |
3833 | * vcpu_vmx_check_cap |
3834 | * |
3835 | * Checks if the 'cap' bit in the 'msr' MSR can be set or cleared (set = 1 |
3836 | * or set = 0, respectively). |
3837 | * |
3838 | * When considering 'msr', we check to see if true controls are available, |
3839 | * and use those if so. |
3840 | * |
3841 | * Returns 1 of 'cap' can be set/cleared as requested, 0 otherwise. |
3842 | */ |
3843 | int |
3844 | vcpu_vmx_check_cap(struct vcpu *vcpu, uint32_t msr, uint32_t cap, int set) |
3845 | { |
3846 | uint64_t ctl; |
3847 | |
3848 | if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) { |
3849 | switch (msr) { |
3850 | case IA32_VMX_PINBASED_CTLS0x481: |
3851 | ctl = vcpu->vc_vmx_true_pinbased_ctls; |
3852 | break; |
3853 | case IA32_VMX_PROCBASED_CTLS0x482: |
3854 | ctl = vcpu->vc_vmx_true_procbased_ctls; |
3855 | break; |
3856 | case IA32_VMX_PROCBASED2_CTLS0x48B: |
3857 | ctl = vcpu->vc_vmx_procbased2_ctls; |
3858 | break; |
3859 | case IA32_VMX_ENTRY_CTLS0x484: |
3860 | ctl = vcpu->vc_vmx_true_entry_ctls; |
3861 | break; |
3862 | case IA32_VMX_EXIT_CTLS0x483: |
3863 | ctl = vcpu->vc_vmx_true_exit_ctls; |
3864 | break; |
3865 | default: |
3866 | return (0); |
3867 | } |
3868 | } else { |
3869 | switch (msr) { |
3870 | case IA32_VMX_PINBASED_CTLS0x481: |
3871 | ctl = vcpu->vc_vmx_pinbased_ctls; |
3872 | break; |
3873 | case IA32_VMX_PROCBASED_CTLS0x482: |
3874 | ctl = vcpu->vc_vmx_procbased_ctls; |
3875 | break; |
3876 | case IA32_VMX_PROCBASED2_CTLS0x48B: |
3877 | ctl = vcpu->vc_vmx_procbased2_ctls; |
3878 | break; |
3879 | case IA32_VMX_ENTRY_CTLS0x484: |
3880 | ctl = vcpu->vc_vmx_entry_ctls; |
3881 | break; |
3882 | case IA32_VMX_EXIT_CTLS0x483: |
3883 | ctl = vcpu->vc_vmx_exit_ctls; |
3884 | break; |
3885 | default: |
3886 | return (0); |
3887 | } |
3888 | } |
3889 | |
3890 | if (set) { |
3891 | /* Check bit 'cap << 32', must be !0 */ |
3892 | return (ctl & ((uint64_t)cap << 32)) != 0; |
3893 | } else { |
3894 | /* Check bit 'cap', must be 0 */ |
3895 | return (ctl & cap) == 0; |
3896 | } |
3897 | } |
3898 | |
3899 | /* |
3900 | * vcpu_vmx_compute_ctrl |
3901 | * |
3902 | * Computes the appropriate control value, given the supplied parameters |
3903 | * and CPU capabilities. |
3904 | * |
3905 | * Intel has made somewhat of a mess of this computation - it is described |
3906 | * using no fewer than three different approaches, spread across many |
3907 | * pages of the SDM. Further compounding the problem is the fact that now |
3908 | * we have "true controls" for each type of "control", and each needs to |
3909 | * be examined to get the calculation right, but only if "true" controls |
3910 | * are present on the CPU we're on. |
3911 | * |
3912 | * Parameters: |
3913 | * ctrlval: the control value, as read from the CPU MSR |
3914 | * ctrl: which control is being set (eg, pinbased, procbased, etc) |
3915 | * want0: the set of desired 0 bits |
3916 | * want1: the set of desired 1 bits |
3917 | * out: (out) the correct value to write into the VMCS for this VCPU, |
3918 | * for the 'ctrl' desired. |
3919 | * |
3920 | * Returns 0 if successful, or EINVAL if the supplied parameters define |
3921 | * an unworkable control setup. |
3922 | */ |
3923 | int |
3924 | vcpu_vmx_compute_ctrl(uint64_t ctrlval, uint16_t ctrl, uint32_t want1, |
3925 | uint32_t want0, uint32_t *out) |
3926 | { |
3927 | int i, set, clear; |
3928 | |
3929 | *out = 0; |
3930 | |
3931 | /* |
3932 | * The Intel SDM gives three formulae for determining which bits to |
3933 | * set/clear for a given control and desired functionality. Formula |
3934 | * 1 is the simplest but disallows use of newer features that are |
3935 | * enabled by functionality in later CPUs. |
3936 | * |
3937 | * Formulas 2 and 3 allow such extra functionality. We use formula |
3938 | * 2 - this requires us to know the identity of controls in the |
3939 | * "default1" class for each control register, but allows us to not |
3940 | * have to pass along and/or query both sets of capability MSRs for |
3941 | * each control lookup. This makes the code slightly longer, |
3942 | * however. |
3943 | */ |
3944 | for (i = 0; i < 32; i++) { |
3945 | /* Figure out if we can set and / or clear this bit */ |
3946 | set = (ctrlval & (1ULL << (i + 32))) != 0; |
3947 | clear = ((1ULL << i) & ((uint64_t)ctrlval)) == 0; |
3948 | |
3949 | /* If the bit can't be set nor cleared, something's wrong */ |
3950 | if (!set && !clear) |
3951 | return (EINVAL22); |
3952 | |
3953 | /* |
3954 | * Formula 2.c.i - "If the relevant VMX capability MSR |
3955 | * reports that a control has a single setting, use that |
3956 | * setting." |
3957 | */ |
3958 | if (set && !clear) { |
3959 | if (want0 & (1ULL << i)) |
3960 | return (EINVAL22); |
3961 | else |
3962 | *out |= (1ULL << i); |
3963 | } else if (clear && !set) { |
3964 | if (want1 & (1ULL << i)) |
3965 | return (EINVAL22); |
3966 | else |
3967 | *out &= ~(1ULL << i); |
3968 | } else { |
3969 | /* |
3970 | * 2.c.ii - "If the relevant VMX capability MSR |
3971 | * reports that a control can be set to 0 or 1 |
3972 | * and that control's meaning is known to the VMM, |
3973 | * set the control based on the functionality desired." |
3974 | */ |
3975 | if (want1 & (1ULL << i)) |
3976 | *out |= (1ULL << i); |
3977 | else if (want0 & (1 << i)) |
3978 | *out &= ~(1ULL << i); |
3979 | else { |
3980 | /* |
3981 | * ... assuming the control's meaning is not |
3982 | * known to the VMM ... |
3983 | * |
3984 | * 2.c.iii - "If the relevant VMX capability |
3985 | * MSR reports that a control can be set to 0 |
3986 | * or 1 and the control is not in the default1 |
3987 | * class, set the control to 0." |
3988 | * |
3989 | * 2.c.iv - "If the relevant VMX capability |
3990 | * MSR reports that a control can be set to 0 |
3991 | * or 1 and the control is in the default1 |
3992 | * class, set the control to 1." |
3993 | */ |
3994 | switch (ctrl) { |
3995 | case IA32_VMX_PINBASED_CTLS0x481: |
3996 | case IA32_VMX_TRUE_PINBASED_CTLS0x48D: |
3997 | /* |
3998 | * A.3.1 - default1 class of pinbased |
3999 | * controls comprises bits 1,2,4 |
4000 | */ |
4001 | switch (i) { |
4002 | case 1: |
4003 | case 2: |
4004 | case 4: |
4005 | *out |= (1ULL << i); |
4006 | break; |
4007 | default: |
4008 | *out &= ~(1ULL << i); |
4009 | break; |
4010 | } |
4011 | break; |
4012 | case IA32_VMX_PROCBASED_CTLS0x482: |
4013 | case IA32_VMX_TRUE_PROCBASED_CTLS0x48E: |
4014 | /* |
4015 | * A.3.2 - default1 class of procbased |
4016 | * controls comprises bits 1, 4-6, 8, |
4017 | * 13-16, 26 |
4018 | */ |
4019 | switch (i) { |
4020 | case 1: |
4021 | case 4 ... 6: |
4022 | case 8: |
4023 | case 13 ... 16: |
4024 | case 26: |
4025 | *out |= (1ULL << i); |
4026 | break; |
4027 | default: |
4028 | *out &= ~(1ULL << i); |
4029 | break; |
4030 | } |
4031 | break; |
4032 | /* |
4033 | * Unknown secondary procbased controls |
4034 | * can always be set to 0 |
4035 | */ |
4036 | case IA32_VMX_PROCBASED2_CTLS0x48B: |
4037 | *out &= ~(1ULL << i); |
4038 | break; |
4039 | case IA32_VMX_EXIT_CTLS0x483: |
4040 | case IA32_VMX_TRUE_EXIT_CTLS0x48F: |
4041 | /* |
4042 | * A.4 - default1 class of exit |
4043 | * controls comprises bits 0-8, 10, |
4044 | * 11, 13, 14, 16, 17 |
4045 | */ |
4046 | switch (i) { |
4047 | case 0 ... 8: |
4048 | case 10 ... 11: |
4049 | case 13 ... 14: |
4050 | case 16 ... 17: |
4051 | *out |= (1ULL << i); |
4052 | break; |
4053 | default: |
4054 | *out &= ~(1ULL << i); |
4055 | break; |
4056 | } |
4057 | break; |
4058 | case IA32_VMX_ENTRY_CTLS0x484: |
4059 | case IA32_VMX_TRUE_ENTRY_CTLS0x490: |
4060 | /* |
4061 | * A.5 - default1 class of entry |
4062 | * controls comprises bits 0-8, 12 |
4063 | */ |
4064 | switch (i) { |
4065 | case 0 ... 8: |
4066 | case 12: |
4067 | *out |= (1ULL << i); |
4068 | break; |
4069 | default: |
4070 | *out &= ~(1ULL << i); |
4071 | break; |
4072 | } |
4073 | break; |
4074 | } |
4075 | } |
4076 | } |
4077 | } |
4078 | |
4079 | return (0); |
4080 | } |
4081 | |
4082 | /* |
4083 | * vm_get_info |
4084 | * |
4085 | * Returns information about the VM indicated by 'vip'. The 'vip_size' field |
4086 | * in the 'vip' parameter is used to indicate the size of the caller's buffer. |
4087 | * If insufficient space exists in that buffer, the required size needed is |
4088 | * returned in vip_size and the number of VM information structures returned |
4089 | * in vip_info_count is set to 0. The caller should then try the ioctl again |
4090 | * after allocating a sufficiently large buffer. |
4091 | * |
4092 | * Parameters: |
4093 | * vip: information structure identifying the VM to query |
4094 | * |
4095 | * Return values: |
4096 | * 0: the operation succeeded |
4097 | * ENOMEM: memory allocation error during processing |
4098 | * EFAULT: error copying data to user process |
4099 | */ |
4100 | int |
4101 | vm_get_info(struct vm_info_params *vip) |
4102 | { |
4103 | struct vm_info_result *out; |
4104 | struct vm *vm; |
4105 | struct vcpu *vcpu; |
4106 | int i, j; |
4107 | size_t need; |
4108 | |
4109 | rw_enter_read(&vmm_softc->vm_lock); |
4110 | need = vmm_softc->vm_ct * sizeof(struct vm_info_result); |
4111 | if (vip->vip_size < need) { |
4112 | vip->vip_info_ct = 0; |
4113 | vip->vip_size = need; |
4114 | rw_exit_read(&vmm_softc->vm_lock); |
4115 | return (0); |
4116 | } |
4117 | |
4118 | out = malloc(need, M_DEVBUF2, M_NOWAIT0x0002|M_ZERO0x0008); |
4119 | if (out == NULL((void *)0)) { |
4120 | vip->vip_info_ct = 0; |
4121 | rw_exit_read(&vmm_softc->vm_lock); |
4122 | return (ENOMEM12); |
4123 | } |
4124 | |
4125 | i = 0; |
4126 | vip->vip_info_ct = vmm_softc->vm_ct; |
4127 | SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link)for((vm) = ((&vmm_softc->vm_list)->slh_first); (vm) != ((void *)0); (vm) = ((vm)->vm_link.sle_next)) { |
4128 | out[i].vir_memory_size = vm->vm_memory_size; |
4129 | out[i].vir_used_size = |
4130 | pmap_resident_count(vm->vm_map->pmap)((vm->vm_map->pmap)->pm_stats.resident_count) * PAGE_SIZE(1 << 12); |
4131 | out[i].vir_ncpus = vm->vm_vcpu_ct; |
4132 | out[i].vir_id = vm->vm_id; |
4133 | out[i].vir_creator_pid = vm->vm_creator_pid; |
4134 | strlcpy(out[i].vir_name, vm->vm_name, VMM_MAX_NAME_LEN64); |
4135 | rw_enter_read(&vm->vm_vcpu_lock); |
4136 | for (j = 0; j < vm->vm_vcpu_ct; j++) { |
4137 | out[i].vir_vcpu_state[j] = VCPU_STATE_UNKNOWN; |
4138 | SLIST_FOREACH(vcpu, &vm->vm_vcpu_list,for((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu ) != ((void *)0); (vcpu) = ((vcpu)->vc_vcpu_link.sle_next) ) |
4139 | vc_vcpu_link)for((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu ) != ((void *)0); (vcpu) = ((vcpu)->vc_vcpu_link.sle_next) ) { |
4140 | if (vcpu->vc_id == j) |
4141 | out[i].vir_vcpu_state[j] = |
4142 | vcpu->vc_state; |
4143 | } |
4144 | } |
4145 | rw_exit_read(&vm->vm_vcpu_lock); |
4146 | i++; |
4147 | } |
4148 | rw_exit_read(&vmm_softc->vm_lock); |
4149 | if (copyout(out, vip->vip_info, need) == EFAULT14) { |
4150 | free(out, M_DEVBUF2, need); |
4151 | return (EFAULT14); |
4152 | } |
4153 | |
4154 | free(out, M_DEVBUF2, need); |
4155 | return (0); |
4156 | } |
4157 | |
4158 | /* |
4159 | * vm_terminate |
4160 | * |
4161 | * Terminates the VM indicated by 'vtp'. |
4162 | * |
4163 | * Parameters: |
4164 | * vtp: structure defining the VM to terminate |
4165 | * |
4166 | * Return values: |
4167 | * 0: the VM was terminated |
4168 | * !0: the VM could not be located |
4169 | */ |
4170 | int |
4171 | vm_terminate(struct vm_terminate_params *vtp) |
4172 | { |
4173 | struct vm *vm; |
4174 | struct vcpu *vcpu; |
4175 | u_int old, next; |
4176 | int error; |
4177 | |
4178 | /* |
4179 | * Find desired VM |
4180 | */ |
4181 | rw_enter_write(&vmm_softc->vm_lock); |
4182 | error = vm_find(vtp->vtp_vm_id, &vm); |
4183 | |
4184 | if (error == 0) { |
4185 | rw_enter_read(&vm->vm_vcpu_lock); |
4186 | SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link)for((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu ) != ((void *)0); (vcpu) = ((vcpu)->vc_vcpu_link.sle_next) ) { |
4187 | do { |
4188 | old = vcpu->vc_state; |
4189 | if (old == VCPU_STATE_RUNNING) |
4190 | next = VCPU_STATE_REQTERM; |
4191 | else if (old == VCPU_STATE_STOPPED) |
4192 | next = VCPU_STATE_TERMINATED; |
4193 | else /* must be REQTERM or TERMINATED */ |
4194 | break; |
4195 | } while (old != atomic_cas_uint(&vcpu->vc_state,_atomic_cas_uint((&vcpu->vc_state), (old), (next)) |
4196 | old, next)_atomic_cas_uint((&vcpu->vc_state), (old), (next))); |
4197 | } |
4198 | rw_exit_read(&vm->vm_vcpu_lock); |
4199 | } else { |
4200 | rw_exit_write(&vmm_softc->vm_lock); |
4201 | return (error); |
4202 | } |
4203 | |
4204 | SLIST_REMOVE(&vmm_softc->vm_list, vm, vm, vm_link)do { if ((&vmm_softc->vm_list)->slh_first == (vm)) { do { ((&vmm_softc->vm_list))->slh_first = ((&vmm_softc ->vm_list))->slh_first->vm_link.sle_next; } while (0 ); } else { struct vm *curelm = (&vmm_softc->vm_list)-> slh_first; while (curelm->vm_link.sle_next != (vm)) curelm = curelm->vm_link.sle_next; curelm->vm_link.sle_next = curelm->vm_link.sle_next->vm_link.sle_next; } ((vm)-> vm_link.sle_next) = ((void *)-1); } while (0); |
4205 | if (vm->vm_vcpus_running == 0) |
4206 | vm_teardown(vm); |
4207 | |
4208 | rw_exit_write(&vmm_softc->vm_lock); |
4209 | |
4210 | return (0); |
4211 | } |
4212 | |
4213 | /* |
4214 | * vm_run |
4215 | * |
4216 | * Run the vm / vcpu specified by 'vrp' |
4217 | * |
4218 | * Parameters: |
4219 | * vrp: structure defining the VM to run |
4220 | * |
4221 | * Return value: |
4222 | * ENOENT: the VM defined in 'vrp' could not be located |
4223 | * EBUSY: the VM defined in 'vrp' is already running |
4224 | * EFAULT: error copying data from userspace (vmd) on return from previous |
4225 | * exit. |
4226 | * EAGAIN: help is needed from vmd(8) (device I/O or exit vmm(4) cannot |
4227 | * handle in-kernel.) |
4228 | * 0: the run loop exited and no help is needed from vmd(8) |
4229 | */ |
4230 | int |
4231 | vm_run(struct vm_run_params *vrp) |
4232 | { |
4233 | struct vm *vm; |
4234 | struct vcpu *vcpu; |
4235 | int ret = 0, error; |
4236 | u_int old, next; |
4237 | |
4238 | /* |
4239 | * Find desired VM |
4240 | */ |
4241 | rw_enter_read(&vmm_softc->vm_lock); |
4242 | error = vm_find(vrp->vrp_vm_id, &vm); |
4243 | |
4244 | /* |
4245 | * Attempt to locate the requested VCPU. If found, attempt to |
4246 | * to transition from VCPU_STATE_STOPPED -> VCPU_STATE_RUNNING. |
4247 | * Failure to make the transition indicates the VCPU is busy. |
4248 | */ |
4249 | if (error == 0) { |
4250 | rw_enter_read(&vm->vm_vcpu_lock); |
4251 | SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link)for((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu ) != ((void *)0); (vcpu) = ((vcpu)->vc_vcpu_link.sle_next) ) { |
4252 | if (vcpu->vc_id == vrp->vrp_vcpu_id) |
4253 | break; |
4254 | } |
4255 | |
4256 | if (vcpu != NULL((void *)0)) { |
4257 | old = VCPU_STATE_STOPPED; |
4258 | next = VCPU_STATE_RUNNING; |
4259 | |
4260 | if (atomic_cas_uint(&vcpu->vc_state, old, next)_atomic_cas_uint((&vcpu->vc_state), (old), (next)) != old) |
4261 | ret = EBUSY16; |
4262 | else { |
4263 | atomic_inc_int(&vm->vm_vcpus_running)_atomic_inc_int(&vm->vm_vcpus_running); |
4264 | rw_enter_write(&vcpu->vc_lock); |
4265 | } |
4266 | } else |
4267 | ret = ENOENT2; |
4268 | |
4269 | rw_exit_read(&vm->vm_vcpu_lock); |
4270 | } |
4271 | rw_exit_read(&vmm_softc->vm_lock); |
4272 | |
4273 | if (error != 0) |
4274 | ret = error; |
4275 | |
4276 | /* Bail if errors detected in the previous steps */ |
4277 | if (ret) |
4278 | return (ret); |
4279 | |
4280 | /* |
4281 | * We may be returning from userland helping us from the last exit. |
4282 | * If so (vrp_continue == 1), copy in the exit data from vmd. The |
4283 | * exit data will be consumed before the next entry (this typically |
4284 | * comprises VCPU register changes as the result of vmd(8)'s actions). |
4285 | */ |
4286 | if (vrp->vrp_continue) { |
4287 | if (copyin(vrp->vrp_exit, &vcpu->vc_exit, |
4288 | sizeof(struct vm_exit)) == EFAULT14) { |
4289 | rw_exit_write(&vcpu->vc_lock); |
4290 | return (EFAULT14); |
4291 | } |
4292 | } |
4293 | |
4294 | /* Run the VCPU specified in vrp */ |
4295 | if (vcpu->vc_virt_mode == VMM_MODE_VMX || |
4296 | vcpu->vc_virt_mode == VMM_MODE_EPT) { |
4297 | ret = vcpu_run_vmx(vcpu, vrp); |
4298 | } else if (vcpu->vc_virt_mode == VMM_MODE_SVM || |
4299 | vcpu->vc_virt_mode == VMM_MODE_RVI) { |
4300 | ret = vcpu_run_svm(vcpu, vrp); |
4301 | } |
4302 | |
4303 | /* |
4304 | * We can set the VCPU states here without CAS because once |
4305 | * a VCPU is in state RUNNING or REQTERM, only the VCPU itself |
4306 | * can switch the state. |
4307 | */ |
4308 | atomic_dec_int(&vm->vm_vcpus_running)_atomic_dec_int(&vm->vm_vcpus_running); |
4309 | if (vcpu->vc_state == VCPU_STATE_REQTERM) { |
4310 | vrp->vrp_exit_reason = VM_EXIT_TERMINATED0xFFFE; |
4311 | vcpu->vc_state = VCPU_STATE_TERMINATED; |
4312 | if (vm->vm_vcpus_running == 0) { |
4313 | rw_enter_write(&vmm_softc->vm_lock); |
4314 | vm_teardown(vm); |
4315 | rw_exit_write(&vmm_softc->vm_lock); |
4316 | } |
4317 | ret = 0; |
4318 | } else if (ret == 0 || ret == EAGAIN35) { |
4319 | /* If we are exiting, populate exit data so vmd can help. */ |
4320 | vrp->vrp_exit_reason = (ret == 0) ? VM_EXIT_NONE0xFFFF |
4321 | : vcpu->vc_gueststate.vg_exit_reason; |
4322 | vrp->vrp_irqready = vcpu->vc_irqready; |
4323 | vcpu->vc_state = VCPU_STATE_STOPPED; |
4324 | |
4325 | if (copyout(&vcpu->vc_exit, vrp->vrp_exit, |
4326 | sizeof(struct vm_exit)) == EFAULT14) { |
4327 | ret = EFAULT14; |
4328 | } else |
4329 | ret = 0; |
4330 | } else { |
4331 | vrp->vrp_exit_reason = VM_EXIT_TERMINATED0xFFFE; |
4332 | vcpu->vc_state = VCPU_STATE_TERMINATED; |
4333 | } |
4334 | |
4335 | rw_exit_write(&vcpu->vc_lock); |
4336 | |
4337 | return (ret); |
4338 | } |
4339 | |
4340 | /* |
4341 | * vcpu_must_stop |
4342 | * |
4343 | * Check if we need to (temporarily) stop running the VCPU for some reason, |
4344 | * such as: |
4345 | * - the VM was requested to terminate |
4346 | * - the proc running this VCPU has pending signals |
4347 | * |
4348 | * Parameters: |
4349 | * vcpu: the VCPU to check |
4350 | * |
4351 | * Return values: |
4352 | * 1: the VM owning this VCPU should stop |
4353 | * 0: no stop is needed |
4354 | */ |
4355 | int |
4356 | vcpu_must_stop(struct vcpu *vcpu) |
4357 | { |
4358 | struct proc *p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; |
4359 | |
4360 | if (vcpu->vc_state == VCPU_STATE_REQTERM) |
4361 | return (1); |
4362 | if (SIGPENDING(p)(((p)->p_siglist | (p)->p_p->ps_siglist) & ~(p)-> p_sigmask) != 0) |
4363 | return (1); |
4364 | return (0); |
4365 | } |
4366 | |
4367 | /* |
4368 | * vmm_fpurestore |
4369 | * |
4370 | * Restore the guest's FPU state, saving the existing userland thread's |
4371 | * FPU context if necessary. Must be called with interrupts disabled. |
4372 | */ |
4373 | int |
4374 | vmm_fpurestore(struct vcpu *vcpu) |
4375 | { |
4376 | struct cpu_info *ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
4377 | |
4378 | /* save vmm's FPU state if we haven't already */ |
4379 | if (ci->ci_flags & CPUF_USERXSTATE0x0200) { |
4380 | ci->ci_flags &= ~CPUF_USERXSTATE0x0200; |
4381 | fpusavereset(&curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_addr->u_pcb.pcb_savefpu); |
4382 | } |
4383 | |
4384 | if (vcpu->vc_fpuinited) { |
4385 | if (xrstor_user(&vcpu->vc_g_fpu, xsave_mask)) { |
4386 | DPRINTF("%s: guest attempted to set invalid %s\n", |
4387 | __func__, "xsave/xrstor state"); |
4388 | return EINVAL22; |
4389 | } |
4390 | } |
4391 | |
4392 | if (xsave_mask) { |
4393 | /* Restore guest %xcr0 */ |
4394 | if (xsetbv_user(0, vcpu->vc_gueststate.vg_xcr0)) { |
4395 | DPRINTF("%s: guest attempted to set invalid bits in " |
4396 | "xcr0 (guest %%xcr0=0x%llx, host %%xcr0=0x%llx)\n", |
4397 | __func__, vcpu->vc_gueststate.vg_xcr0, xsave_mask); |
4398 | return EINVAL22; |
4399 | } |
4400 | } |
4401 | |
4402 | return 0; |
4403 | } |
4404 | |
4405 | /* |
4406 | * vmm_fpusave |
4407 | * |
4408 | * Save the guest's FPU state. Must be called with interrupts disabled. |
4409 | */ |
4410 | void |
4411 | vmm_fpusave(struct vcpu *vcpu) |
4412 | { |
4413 | if (xsave_mask) { |
4414 | /* Save guest %xcr0 */ |
4415 | vcpu->vc_gueststate.vg_xcr0 = xgetbv(0); |
4416 | |
4417 | /* Restore host %xcr0 */ |
4418 | xsetbv(0, xsave_mask); |
4419 | } |
4420 | |
4421 | /* |
4422 | * Save full copy of FPU state - guest content is always |
4423 | * a subset of host's save area (see xsetbv exit handler) |
4424 | */ |
4425 | fpusavereset(&vcpu->vc_g_fpu); |
4426 | vcpu->vc_fpuinited = 1; |
4427 | } |
4428 | |
4429 | /* |
4430 | * vmm_translate_gva |
4431 | * |
4432 | * Translates a guest virtual address to a guest physical address by walking |
4433 | * the currently active page table (if needed). |
4434 | * |
4435 | * Note - this function can possibly alter the supplied VCPU state. |
4436 | * Specifically, it may inject exceptions depending on the current VCPU |
4437 | * configuration, and may alter %cr2 on #PF. Consequently, this function |
4438 | * should only be used as part of instruction emulation. |
4439 | * |
4440 | * Parameters: |
4441 | * vcpu: The VCPU this translation should be performed for (guest MMU settings |
4442 | * are gathered from this VCPU) |
4443 | * va: virtual address to translate |
4444 | * pa: pointer to paddr_t variable that will receive the translated physical |
4445 | * address. 'pa' is unchanged on error. |
4446 | * mode: one of PROT_READ, PROT_WRITE, PROT_EXEC indicating the mode in which |
4447 | * the address should be translated |
4448 | * |
4449 | * Return values: |
4450 | * 0: the address was successfully translated - 'pa' contains the physical |
4451 | * address currently mapped by 'va'. |
4452 | * EFAULT: the PTE for 'VA' is unmapped. A #PF will be injected in this case |
4453 | * and %cr2 set in the vcpu structure. |
4454 | * EINVAL: an error occurred reading paging table structures |
4455 | */ |
4456 | int |
4457 | vmm_translate_gva(struct vcpu *vcpu, uint64_t va, uint64_t *pa, int mode) |
4458 | { |
4459 | int level, shift, pdidx; |
4460 | uint64_t pte, pt_paddr, pte_paddr, mask, low_mask, high_mask; |
4461 | uint64_t shift_width, pte_size, *hva; |
4462 | paddr_t hpa; |
4463 | struct vcpu_reg_state vrs; |
4464 | |
4465 | level = 0; |
Value stored to 'level' is never read | |
4466 | |
4467 | if (vmm_softc->mode == VMM_MODE_EPT || |
4468 | vmm_softc->mode == VMM_MODE_VMX) { |
4469 | if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), &vrs)) |
4470 | return (EINVAL22); |
4471 | } else if (vmm_softc->mode == VMM_MODE_RVI || |
4472 | vmm_softc->mode == VMM_MODE_SVM) { |
4473 | if (vcpu_readregs_svm(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), &vrs)) |
4474 | return (EINVAL22); |
4475 | } else { |
4476 | printf("%s: unknown vmm mode", __func__); |
4477 | return (EINVAL22); |
4478 | } |
4479 | |
4480 | DPRINTF("%s: guest %%cr0=0x%llx, %%cr3=0x%llx\n", __func__, |
4481 | vrs.vrs_crs[VCPU_REGS_CR0], vrs.vrs_crs[VCPU_REGS_CR3]); |
4482 | |
4483 | if (!(vrs.vrs_crs[VCPU_REGS_CR00] & CR0_PG0x80000000)) { |
4484 | DPRINTF("%s: unpaged, va=pa=0x%llx\n", __func__, |
4485 | va); |
4486 | *pa = va; |
4487 | return (0); |
4488 | } |
4489 | |
4490 | pt_paddr = vrs.vrs_crs[VCPU_REGS_CR32]; |
4491 | |
4492 | if (vrs.vrs_crs[VCPU_REGS_CR00] & CR0_PE0x00000001) { |
4493 | if (vrs.vrs_crs[VCPU_REGS_CR43] & CR4_PAE0x00000020) { |
4494 | pte_size = sizeof(uint64_t); |
4495 | shift_width = 9; |
4496 | |
4497 | if (vrs.vrs_msrs[VCPU_REGS_EFER0] & EFER_LMA0x00000400) { |
4498 | level = 4; |
4499 | mask = L4_MASK0x0000ff8000000000UL; |
4500 | shift = L4_SHIFT39; |
4501 | } else { |
4502 | level = 3; |
4503 | mask = L3_MASK0x0000007fc0000000UL; |
4504 | shift = L3_SHIFT30; |
4505 | } |
4506 | } else { |
4507 | level = 2; |
4508 | shift_width = 10; |
4509 | mask = 0xFFC00000; |
4510 | shift = 22; |
4511 | pte_size = sizeof(uint32_t); |
4512 | } |
4513 | } else { |
4514 | return (EINVAL22); |
4515 | } |
4516 | |
4517 | DPRINTF("%s: pte size=%lld level=%d mask=0x%llx, shift=%d, " |
4518 | "shift_width=%lld\n", __func__, pte_size, level, mask, shift, |
4519 | shift_width); |
4520 | |
4521 | /* XXX: Check for R bit in segment selector and set A bit */ |
4522 | |
4523 | for (;level > 0; level--) { |
4524 | pdidx = (va & mask) >> shift; |
4525 | pte_paddr = (pt_paddr) + (pdidx * pte_size); |
4526 | |
4527 | DPRINTF("%s: read pte level %d @ GPA 0x%llx\n", __func__, |
4528 | level, pte_paddr); |
4529 | if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, pte_paddr, |
4530 | &hpa)) { |
4531 | DPRINTF("%s: cannot extract HPA for GPA 0x%llx\n", |
4532 | __func__, pte_paddr); |
4533 | return (EINVAL22); |
4534 | } |
4535 | |
4536 | hpa = hpa | (pte_paddr & 0xFFF); |
4537 | hva = (uint64_t *)PMAP_DIRECT_MAP(hpa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000 )) + (hpa)); |
4538 | DPRINTF("%s: GPA 0x%llx -> HPA 0x%llx -> HVA 0x%llx\n", |
4539 | __func__, pte_paddr, (uint64_t)hpa, (uint64_t)hva); |
4540 | if (pte_size == 8) |
4541 | pte = *hva; |
4542 | else |
4543 | pte = *(uint32_t *)hva; |
4544 | |
4545 | DPRINTF("%s: PTE @ 0x%llx = 0x%llx\n", __func__, pte_paddr, |
4546 | pte); |
4547 | |
4548 | /* XXX: Set CR2 */ |
4549 | if (!(pte & PG_V0x0000000000000001UL)) |
4550 | return (EFAULT14); |
4551 | |
4552 | /* XXX: Check for SMAP */ |
4553 | if ((mode == PROT_WRITE0x02) && !(pte & PG_RW0x0000000000000002UL)) |
4554 | return (EPERM1); |
4555 | |
4556 | if ((vcpu->vc_exit.cpl > 0) && !(pte & PG_u0x0000000000000004UL)) |
4557 | return (EPERM1); |
4558 | |
4559 | pte = pte | PG_U0x0000000000000020UL; |
4560 | if (mode == PROT_WRITE0x02) |
4561 | pte = pte | PG_M0x0000000000000040UL; |
4562 | *hva = pte; |
4563 | |
4564 | /* XXX: EINVAL if in 32bit and PG_PS is 1 but CR4.PSE is 0 */ |
4565 | if (pte & PG_PS0x0000000000000080UL) |
4566 | break; |
4567 | |
4568 | if (level > 1) { |
4569 | pt_paddr = pte & PG_FRAME0x000ffffffffff000UL; |
4570 | shift -= shift_width; |
4571 | mask = mask >> shift_width; |
4572 | } |
4573 | } |
4574 | |
4575 | low_mask = ((uint64_t)1ULL << shift) - 1; |
4576 | high_mask = (((uint64_t)1ULL << ((pte_size * 8) - 1)) - 1) ^ low_mask; |
4577 | *pa = (pte & high_mask) | (va & low_mask); |
4578 | |
4579 | DPRINTF("%s: final GPA for GVA 0x%llx = 0x%llx\n", __func__, |
4580 | va, *pa); |
4581 | |
4582 | return (0); |
4583 | } |
4584 | |
4585 | |
4586 | /* |
4587 | * vcpu_run_vmx |
4588 | * |
4589 | * VMX main loop used to run a VCPU. |
4590 | * |
4591 | * Parameters: |
4592 | * vcpu: The VCPU to run |
4593 | * vrp: run parameters |
4594 | * |
4595 | * Return values: |
4596 | * 0: The run loop exited and no help is needed from vmd |
4597 | * EAGAIN: The run loop exited and help from vmd is needed |
4598 | * EINVAL: an error occurred |
4599 | */ |
4600 | int |
4601 | vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp) |
4602 | { |
4603 | int ret = 0, exitinfo; |
4604 | struct region_descriptor gdt; |
4605 | struct cpu_info *ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
4606 | uint64_t exit_reason, cr3, insn_error; |
4607 | struct schedstate_percpu *spc; |
4608 | struct vmx_invvpid_descriptor vid; |
4609 | uint64_t eii, procbased, int_st; |
4610 | uint16_t irq, ldt_sel; |
4611 | u_long s; |
4612 | struct region_descriptor gdtr, idtr; |
4613 | |
4614 | rw_assert_wrlock(&vcpu->vc_lock); |
4615 | |
4616 | if (vcpu_reload_vmcs_vmx(vcpu)) { |
4617 | printf("%s: failed (re)loading vmcs\n", __func__); |
4618 | return (EINVAL22); |
4619 | } |
4620 | |
4621 | /* |
4622 | * If we are returning from userspace (vmd) because we exited |
4623 | * last time, fix up any needed vcpu state first. Which state |
4624 | * needs to be fixed up depends on what vmd populated in the |
4625 | * exit data structure. |
4626 | */ |
4627 | irq = vrp->vrp_irq; |
4628 | |
4629 | if (vrp->vrp_continue) { |
4630 | switch (vcpu->vc_gueststate.vg_exit_reason) { |
4631 | case VMX_EXIT_IO30: |
4632 | if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN) |
4633 | vcpu->vc_gueststate.vg_rax = |
4634 | vcpu->vc_exit.vei.vei_data; |
4635 | break; |
4636 | case VM_EXIT_NONE0xFFFF: |
4637 | case VMX_EXIT_HLT12: |
4638 | case VMX_EXIT_INT_WINDOW7: |
4639 | case VMX_EXIT_EXTINT1: |
4640 | case VMX_EXIT_EPT_VIOLATION48: |
4641 | case VMX_EXIT_CPUID10: |
4642 | case VMX_EXIT_XSETBV55: |
4643 | break; |
4644 | #ifdef VMM_DEBUG |
4645 | case VMX_EXIT_TRIPLE_FAULT2: |
4646 | DPRINTF("%s: vm %d vcpu %d triple fault\n", |
4647 | __func__, vcpu->vc_parent->vm_id, |
4648 | vcpu->vc_id); |
4649 | vmx_vcpu_dump_regs(vcpu); |
4650 | dump_vcpu(vcpu); |
4651 | vmx_dump_vmcs(vcpu); |
4652 | break; |
4653 | case VMX_EXIT_ENTRY_FAILED_GUEST_STATE33: |
4654 | DPRINTF("%s: vm %d vcpu %d failed entry " |
4655 | "due to invalid guest state\n", |
4656 | __func__, vcpu->vc_parent->vm_id, |
4657 | vcpu->vc_id); |
4658 | vmx_vcpu_dump_regs(vcpu); |
4659 | dump_vcpu(vcpu); |
4660 | return (EINVAL22); |
4661 | default: |
4662 | DPRINTF("%s: unimplemented exit type %d (%s)\n", |
4663 | __func__, |
4664 | vcpu->vc_gueststate.vg_exit_reason, |
4665 | vmx_exit_reason_decode( |
4666 | vcpu->vc_gueststate.vg_exit_reason)); |
4667 | vmx_vcpu_dump_regs(vcpu); |
4668 | dump_vcpu(vcpu); |
4669 | break; |
4670 | #endif /* VMM_DEBUG */ |
4671 | } |
4672 | } |
4673 | |
4674 | setregion(&gdt, ci->ci_gdt, GDT_SIZE((6 << 3) + (1 << 4)) - 1); |
4675 | if (gdt.rd_base == 0) { |
4676 | printf("%s: setregion\n", __func__); |
4677 | return (EINVAL22); |
4678 | } |
4679 | |
4680 | /* Host GDTR base */ |
4681 | if (vmwrite(VMCS_HOST_IA32_GDTR_BASE0x6C0C, gdt.rd_base)) { |
4682 | printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__, |
4683 | VMCS_HOST_IA32_GDTR_BASE0x6C0C, gdt.rd_base); |
4684 | return (EINVAL22); |
4685 | } |
4686 | |
4687 | /* Host TR base */ |
4688 | if (vmwrite(VMCS_HOST_IA32_TR_BASE0x6C0A, (uint64_t)ci->ci_tss)) { |
4689 | printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__, |
4690 | VMCS_HOST_IA32_TR_BASE0x6C0A, (uint64_t)ci->ci_tss); |
4691 | return (EINVAL22); |
4692 | } |
4693 | |
4694 | /* Host CR3 */ |
4695 | cr3 = rcr3(); |
4696 | if (vmwrite(VMCS_HOST_IA32_CR30x6C02, cr3)) { |
4697 | printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__, |
4698 | VMCS_HOST_IA32_CR30x6C02, cr3); |
4699 | return (EINVAL22); |
4700 | } |
4701 | |
4702 | /* Handle vmd(8) injected interrupts */ |
4703 | /* Is there an interrupt pending injection? */ |
4704 | if (irq != 0xFFFF) { |
4705 | if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST0x4824, &int_st)) { |
4706 | printf("%s: can't get interruptibility state\n", |
4707 | __func__); |
4708 | return (EINVAL22); |
4709 | } |
4710 | |
4711 | /* Interruptibility state 0x3 covers NMIs and STI */ |
4712 | if (!(int_st & 0x3) && vcpu->vc_irqready) { |
4713 | eii = (irq & 0xFF); |
4714 | eii |= (1ULL << 31); /* Valid */ |
4715 | eii |= (0ULL << 8); /* Hardware Interrupt */ |
4716 | if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO0x4016, eii)) { |
4717 | printf("vcpu_run_vmx: can't vector " |
4718 | "interrupt to guest\n"); |
4719 | return (EINVAL22); |
4720 | } |
4721 | |
4722 | irq = 0xFFFF; |
4723 | } |
4724 | } else if (!vcpu->vc_intr) { |
4725 | /* |
4726 | * Disable window exiting |
4727 | */ |
4728 | if (vmread(VMCS_PROCBASED_CTLS0x4002, &procbased)) { |
4729 | printf("%s: can't read procbased ctls on exit\n", |
4730 | __func__); |
4731 | return (EINVAL22); |
4732 | } else { |
4733 | procbased &= ~IA32_VMX_INTERRUPT_WINDOW_EXITING(1ULL << 2); |
4734 | if (vmwrite(VMCS_PROCBASED_CTLS0x4002, procbased)) { |
4735 | printf("%s: can't write procbased ctls " |
4736 | "on exit\n", __func__); |
4737 | return (EINVAL22); |
4738 | } |
4739 | } |
4740 | } |
4741 | |
4742 | while (ret == 0) { |
4743 | #ifdef VMM_DEBUG |
4744 | paddr_t pa = 0ULL; |
4745 | vmptrst(&pa); |
4746 | KASSERT(pa == vcpu->vc_control_pa)((pa == vcpu->vc_control_pa) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/arch/amd64/amd64/vmm.c", 4746, "pa == vcpu->vc_control_pa" )); |
4747 | #endif /* VMM_DEBUG */ |
4748 | |
4749 | vmm_update_pvclock(vcpu); |
4750 | |
4751 | /* Inject event if present */ |
4752 | if (vcpu->vc_event != 0) { |
4753 | eii = (vcpu->vc_event & 0xFF); |
4754 | eii |= (1ULL << 31); /* Valid */ |
4755 | |
4756 | /* Set the "Send error code" flag for certain vectors */ |
4757 | switch (vcpu->vc_event & 0xFF) { |
4758 | case VMM_EX_DF8: |
4759 | case VMM_EX_TS10: |
4760 | case VMM_EX_NP11: |
4761 | case VMM_EX_SS12: |
4762 | case VMM_EX_GP13: |
4763 | case VMM_EX_PF14: |
4764 | case VMM_EX_AC17: |
4765 | eii |= (1ULL << 11); |
4766 | } |
4767 | |
4768 | eii |= (3ULL << 8); /* Hardware Exception */ |
4769 | if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO0x4016, eii)) { |
4770 | printf("%s: can't vector event to guest\n", |
4771 | __func__); |
4772 | ret = EINVAL22; |
4773 | break; |
4774 | } |
4775 | |
4776 | if (vmwrite(VMCS_ENTRY_EXCEPTION_ERROR_CODE0x4018, 0)) { |
4777 | printf("%s: can't write error code to guest\n", |
4778 | __func__); |
4779 | ret = EINVAL22; |
4780 | break; |
4781 | } |
4782 | |
4783 | vcpu->vc_event = 0; |
4784 | } |
4785 | |
4786 | if (vcpu->vc_vmx_vpid_enabled) { |
4787 | /* Invalidate old TLB mappings */ |
4788 | vid.vid_vpid = vcpu->vc_parent->vm_id; |
4789 | vid.vid_addr = 0; |
4790 | invvpid(IA32_VMX_INVVPID_SINGLE_CTX_GLB0x3, &vid); |
4791 | } |
4792 | |
4793 | /* Start / resume the VCPU */ |
4794 | |
4795 | /* Disable interrupts and save the current host FPU state. */ |
4796 | s = intr_disable(); |
4797 | if ((ret = vmm_fpurestore(vcpu))) { |
4798 | intr_restore(s); |
4799 | break; |
4800 | } |
4801 | |
4802 | sgdt(&gdtr); |
4803 | sidt(&idtr); |
4804 | sldt(&ldt_sel); |
4805 | |
4806 | TRACEPOINT(vmm, guest_enter, vcpu, vrp)do { extern struct dt_probe (dt_static_vmm_guest_enter); struct dt_probe *dtp = &(dt_static_vmm_guest_enter); struct dt_provider *dtpv = dtp->dtp_prov; if (__builtin_expect(((dt_tracing) != 0), 0) && __builtin_expect(((dtp->dtp_recording ) != 0), 0)) { dtpv->dtpv_enter(dtpv, dtp, vcpu, vrp); } } while (0); |
4807 | |
4808 | ret = vmx_enter_guest(&vcpu->vc_control_pa, |
4809 | &vcpu->vc_gueststate, |
4810 | (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1), |
4811 | ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr); |
4812 | |
4813 | bare_lgdt(&gdtr); |
4814 | lidt(&idtr); |
4815 | lldt(ldt_sel); |
4816 | |
4817 | /* |
4818 | * On exit, interrupts are disabled, and we are running with |
4819 | * the guest FPU state still possibly on the CPU. Save the FPU |
4820 | * state before re-enabling interrupts. |
4821 | */ |
4822 | vmm_fpusave(vcpu); |
4823 | intr_restore(s); |
4824 | |
4825 | TRACEPOINT(vmm, guest_exit, vcpu, vrp, exit_reason)do { extern struct dt_probe (dt_static_vmm_guest_exit); struct dt_probe *dtp = &(dt_static_vmm_guest_exit); struct dt_provider *dtpv = dtp->dtp_prov; if (__builtin_expect(((dt_tracing) != 0), 0) && __builtin_expect(((dtp->dtp_recording ) != 0), 0)) { dtpv->dtpv_enter(dtpv, dtp, vcpu, vrp, exit_reason ); } } while (0); |
4826 | |
4827 | atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_LAUNCHED)_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (1)); |
4828 | exit_reason = VM_EXIT_NONE0xFFFF; |
4829 | |
4830 | /* If we exited successfully ... */ |
4831 | if (ret == 0) { |
4832 | /* |
4833 | * ret == 0 implies we entered the guest, and later |
4834 | * exited for some valid reason |
4835 | */ |
4836 | exitinfo = vmx_get_exit_info( |
4837 | &vcpu->vc_gueststate.vg_rip, &exit_reason); |
4838 | if (vmread(VMCS_GUEST_IA32_RFLAGS0x6820, |
4839 | &vcpu->vc_gueststate.vg_rflags)) { |
4840 | printf("%s: can't read guest rflags during " |
4841 | "exit\n", __func__); |
4842 | ret = EINVAL22; |
4843 | break; |
4844 | } |
4845 | |
4846 | /* Update our state */ |
4847 | if (!(exitinfo & VMX_EXIT_INFO_HAVE_RIP0x1)) { |
4848 | printf("%s: cannot read guest rip\n", __func__); |
4849 | ret = EINVAL22; |
4850 | break; |
4851 | } |
4852 | |
4853 | if (!(exitinfo & VMX_EXIT_INFO_HAVE_REASON0x2)) { |
4854 | printf("%s: cant read exit reason\n", __func__); |
4855 | ret = EINVAL22; |
4856 | break; |
4857 | } |
4858 | |
4859 | /* |
4860 | * Handle the exit. This will alter "ret" to EAGAIN if |
4861 | * the exit handler determines help from vmd is needed. |
4862 | */ |
4863 | vcpu->vc_gueststate.vg_exit_reason = exit_reason; |
4864 | ret = vmx_handle_exit(vcpu); |
4865 | |
4866 | if (vcpu->vc_gueststate.vg_rflags & PSL_I0x00000200) |
4867 | vcpu->vc_irqready = 1; |
4868 | else |
4869 | vcpu->vc_irqready = 0; |
4870 | |
4871 | /* |
4872 | * If not ready for interrupts, but interrupts pending, |
4873 | * enable interrupt window exiting. |
4874 | */ |
4875 | if (vcpu->vc_irqready == 0 && vcpu->vc_intr) { |
4876 | if (vmread(VMCS_PROCBASED_CTLS0x4002, &procbased)) { |
4877 | printf("%s: can't read procbased ctls " |
4878 | "on intwin exit\n", __func__); |
4879 | ret = EINVAL22; |
4880 | break; |
4881 | } |
4882 | |
4883 | procbased |= IA32_VMX_INTERRUPT_WINDOW_EXITING(1ULL << 2); |
4884 | if (vmwrite(VMCS_PROCBASED_CTLS0x4002, procbased)) { |
4885 | printf("%s: can't write procbased ctls " |
4886 | "on intwin exit\n", __func__); |
4887 | ret = EINVAL22; |
4888 | break; |
4889 | } |
4890 | } |
4891 | |
4892 | /* |
4893 | * Exit to vmd if we are terminating, failed to enter, |
4894 | * or need help (device I/O) |
4895 | */ |
4896 | if (ret || vcpu_must_stop(vcpu)) |
4897 | break; |
4898 | |
4899 | if (vcpu->vc_intr && vcpu->vc_irqready) { |
4900 | ret = EAGAIN35; |
4901 | break; |
4902 | } |
4903 | |
4904 | /* Check if we should yield - don't hog the {p,v}pu */ |
4905 | spc = &ci->ci_schedstate; |
4906 | if (spc->spc_schedflags & SPCF_SHOULDYIELD0x0002) |
4907 | break; |
4908 | |
4909 | } else { |
4910 | /* |
4911 | * We failed vmresume or vmlaunch for some reason, |
4912 | * typically due to invalid vmcs state or other |
4913 | * reasons documented in SDM Vol 3C 30.4. |
4914 | */ |
4915 | switch (ret) { |
4916 | case VMX_FAIL_LAUNCH_INVALID_VMCS2: |
4917 | printf("%s: failed %s with invalid vmcs\n", |
4918 | __func__, |
4919 | (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1 |
4920 | ? "vmresume" : "vmlaunch")); |
4921 | break; |
4922 | case VMX_FAIL_LAUNCH_VALID_VMCS3: |
4923 | printf("%s: failed %s with valid vmcs\n", |
4924 | __func__, |
4925 | (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1 |
4926 | ? "vmresume" : "vmlaunch")); |
4927 | break; |
4928 | default: |
4929 | printf("%s: failed %s for unknown reason\n", |
4930 | __func__, |
4931 | (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1 |
4932 | ? "vmresume" : "vmlaunch")); |
4933 | } |
4934 | |
4935 | ret = EINVAL22; |
4936 | |
4937 | /* Try to translate a vmfail error code, if possible. */ |
4938 | if (vmread(VMCS_INSTRUCTION_ERROR0x4400, &insn_error)) { |
4939 | printf("%s: can't read insn error field\n", |
4940 | __func__); |
4941 | } else |
4942 | printf("%s: error code = %lld, %s\n", __func__, |
4943 | insn_error, |
4944 | vmx_instruction_error_decode(insn_error)); |
4945 | #ifdef VMM_DEBUG |
4946 | vmx_vcpu_dump_regs(vcpu); |
4947 | dump_vcpu(vcpu); |
4948 | #endif /* VMM_DEBUG */ |
4949 | } |
4950 | } |
4951 | |
4952 | vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
4953 | |
4954 | /* Copy the VCPU register state to the exit structure */ |
4955 | if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), &vcpu->vc_exit.vrs)) |
4956 | ret = EINVAL22; |
4957 | vcpu->vc_exit.cpl = vmm_get_guest_cpu_cpl(vcpu); |
4958 | |
4959 | return (ret); |
4960 | } |
4961 | |
4962 | /* |
4963 | * vmx_handle_intr |
4964 | * |
4965 | * Handle host (external) interrupts. We read which interrupt fired by |
4966 | * extracting the vector from the VMCS and dispatch the interrupt directly |
4967 | * to the host using vmm_dispatch_intr. |
4968 | */ |
4969 | void |
4970 | vmx_handle_intr(struct vcpu *vcpu) |
4971 | { |
4972 | uint8_t vec; |
4973 | uint64_t eii; |
4974 | struct gate_descriptor *idte; |
4975 | vaddr_t handler; |
4976 | |
4977 | if (vmread(VMCS_EXIT_INTERRUPTION_INFO0x4404, &eii)) { |
4978 | printf("%s: can't obtain intr info\n", __func__); |
4979 | return; |
4980 | } |
4981 | |
4982 | vec = eii & 0xFF; |
4983 | |
4984 | /* XXX check "error valid" code in eii, abort if 0 */ |
4985 | idte=&idt[vec]; |
4986 | handler = idte->gd_looffset + ((uint64_t)idte->gd_hioffset << 16); |
4987 | vmm_dispatch_intr(handler); |
4988 | } |
4989 | |
4990 | /* |
4991 | * svm_handle_hlt |
4992 | * |
4993 | * Handle HLT exits |
4994 | * |
4995 | * Parameters |
4996 | * vcpu: The VCPU that executed the HLT instruction |
4997 | * |
4998 | * Return Values: |
4999 | * EIO: The guest halted with interrupts disabled |
5000 | * EAGAIN: Normal return to vmd - vmd should halt scheduling this VCPU |
5001 | * until a virtual interrupt is ready to inject |
5002 | */ |
5003 | int |
5004 | svm_handle_hlt(struct vcpu *vcpu) |
5005 | { |
5006 | struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; |
5007 | uint64_t rflags = vmcb->v_rflags; |
5008 | |
5009 | /* All HLT insns are 1 byte */ |
5010 | vcpu->vc_gueststate.vg_rip += 1; |
5011 | |
5012 | if (!(rflags & PSL_I0x00000200)) { |
5013 | DPRINTF("%s: guest halted with interrupts disabled\n", |
5014 | __func__); |
5015 | return (EIO5); |
5016 | } |
5017 | |
5018 | return (EAGAIN35); |
5019 | } |
5020 | |
5021 | /* |
5022 | * vmx_handle_hlt |
5023 | * |
5024 | * Handle HLT exits. HLTing the CPU with interrupts disabled will terminate |
5025 | * the guest (no NMIs handled) by returning EIO to vmd. |
5026 | * |
5027 | * Parameters: |
5028 | * vcpu: The VCPU that executed the HLT instruction |
5029 | * |
5030 | * Return Values: |
5031 | * EINVAL: An error occurred extracting information from the VMCS, or an |
5032 | * invalid HLT instruction was encountered |
5033 | * EIO: The guest halted with interrupts disabled |
5034 | * EAGAIN: Normal return to vmd - vmd should halt scheduling this VCPU |
5035 | * until a virtual interrupt is ready to inject |
5036 | * |
5037 | */ |
5038 | int |
5039 | vmx_handle_hlt(struct vcpu *vcpu) |
5040 | { |
5041 | uint64_t insn_length, rflags; |
5042 | |
5043 | if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) { |
5044 | printf("%s: can't obtain instruction length\n", __func__); |
5045 | return (EINVAL22); |
5046 | } |
5047 | |
5048 | if (vmread(VMCS_GUEST_IA32_RFLAGS0x6820, &rflags)) { |
5049 | printf("%s: can't obtain guest rflags\n", __func__); |
5050 | return (EINVAL22); |
5051 | } |
5052 | |
5053 | if (insn_length != 1) { |
5054 | DPRINTF("%s: HLT with instruction length %lld not supported\n", |
5055 | __func__, insn_length); |
5056 | return (EINVAL22); |
5057 | } |
5058 | |
5059 | if (!(rflags & PSL_I0x00000200)) { |
5060 | DPRINTF("%s: guest halted with interrupts disabled\n", |
5061 | __func__); |
5062 | return (EIO5); |
5063 | } |
5064 | |
5065 | vcpu->vc_gueststate.vg_rip += insn_length; |
5066 | return (EAGAIN35); |
5067 | } |
5068 | |
5069 | /* |
5070 | * vmx_get_exit_info |
5071 | * |
5072 | * Returns exit information containing the current guest RIP and exit reason |
5073 | * in rip and exit_reason. The return value is a bitmask indicating whether |
5074 | * reading the RIP and exit reason was successful. |
5075 | */ |
5076 | int |
5077 | vmx_get_exit_info(uint64_t *rip, uint64_t *exit_reason) |
5078 | { |
5079 | int rv = 0; |
5080 | |
5081 | if (vmread(VMCS_GUEST_IA32_RIP0x681E, rip) == 0) { |
5082 | rv |= VMX_EXIT_INFO_HAVE_RIP0x1; |
5083 | if (vmread(VMCS_EXIT_REASON0x4402, exit_reason) == 0) |
5084 | rv |= VMX_EXIT_INFO_HAVE_REASON0x2; |
5085 | } |
5086 | return (rv); |
5087 | } |
5088 | |
5089 | /* |
5090 | * svm_handle_exit |
5091 | * |
5092 | * Handle exits from the VM by decoding the exit reason and calling various |
5093 | * subhandlers as needed. |
5094 | */ |
5095 | int |
5096 | svm_handle_exit(struct vcpu *vcpu) |
5097 | { |
5098 | uint64_t exit_reason, rflags; |
5099 | int update_rip, ret = 0; |
5100 | struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; |
5101 | |
5102 | update_rip = 0; |
5103 | exit_reason = vcpu->vc_gueststate.vg_exit_reason; |
5104 | rflags = vcpu->vc_gueststate.vg_rflags; |
5105 | |
5106 | switch (exit_reason) { |
5107 | case SVM_VMEXIT_VINTR0x64: |
5108 | if (!(rflags & PSL_I0x00000200)) { |
5109 | DPRINTF("%s: impossible interrupt window exit " |
5110 | "config\n", __func__); |
5111 | ret = EINVAL22; |
5112 | break; |
5113 | } |
5114 | |
5115 | /* |
5116 | * Guest is now ready for interrupts, so disable interrupt |
5117 | * window exiting. |
5118 | */ |
5119 | vmcb->v_irq = 0; |
5120 | vmcb->v_intr_vector = 0; |
5121 | vmcb->v_intercept1 &= ~SVM_INTERCEPT_VINTR(1UL << 4); |
5122 | svm_set_dirty(vcpu, SVM_CLEANBITS_TPR(1 << 3) | SVM_CLEANBITS_I(1 << 0)); |
5123 | |
5124 | update_rip = 0; |
5125 | break; |
5126 | case SVM_VMEXIT_INTR0x60: |
5127 | update_rip = 0; |
5128 | break; |
5129 | case SVM_VMEXIT_SHUTDOWN0x7F: |
5130 | update_rip = 0; |
5131 | ret = EAGAIN35; |
5132 | break; |
5133 | case SVM_VMEXIT_NPF0x400: |
5134 | ret = svm_handle_np_fault(vcpu); |
5135 | break; |
5136 | case SVM_VMEXIT_CPUID0x72: |
5137 | ret = vmm_handle_cpuid(vcpu); |
5138 | update_rip = 1; |
5139 | break; |
5140 | case SVM_VMEXIT_MSR0x7C: |
5141 | ret = svm_handle_msr(vcpu); |
5142 | update_rip = 1; |
5143 | break; |
5144 | case SVM_VMEXIT_XSETBV0x8D: |
5145 | ret = svm_handle_xsetbv(vcpu); |
5146 | update_rip = 1; |
5147 | break; |
5148 | case SVM_VMEXIT_IOIO0x7B: |
5149 | ret = svm_handle_inout(vcpu); |
5150 | update_rip = 1; |
5151 | break; |
5152 | case SVM_VMEXIT_HLT0x78: |
5153 | ret = svm_handle_hlt(vcpu); |
5154 | update_rip = 1; |
5155 | break; |
5156 | case SVM_VMEXIT_MWAIT0x8B: |
5157 | case SVM_VMEXIT_MWAIT_CONDITIONAL0x8C: |
5158 | case SVM_VMEXIT_MONITOR0x8A: |
5159 | case SVM_VMEXIT_VMRUN0x80: |
5160 | case SVM_VMEXIT_VMMCALL0x81: |
5161 | case SVM_VMEXIT_VMLOAD0x82: |
5162 | case SVM_VMEXIT_VMSAVE0x83: |
5163 | case SVM_VMEXIT_STGI0x84: |
5164 | case SVM_VMEXIT_CLGI0x85: |
5165 | case SVM_VMEXIT_SKINIT0x86: |
5166 | case SVM_VMEXIT_RDTSCP0x87: |
5167 | case SVM_VMEXIT_ICEBP0x88: |
5168 | case SVM_VMEXIT_INVLPGA0x7A: |
5169 | ret = vmm_inject_ud(vcpu); |
5170 | update_rip = 0; |
5171 | break; |
5172 | default: |
5173 | DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__, |
5174 | exit_reason, (uint64_t)vcpu->vc_control_pa); |
5175 | return (EINVAL22); |
5176 | } |
5177 | |
5178 | if (update_rip) { |
5179 | vmcb->v_rip = vcpu->vc_gueststate.vg_rip; |
5180 | |
5181 | if (rflags & PSL_T0x00000100) { |
5182 | if (vmm_inject_db(vcpu)) { |
5183 | printf("%s: can't inject #DB exception to " |
5184 | "guest", __func__); |
5185 | return (EINVAL22); |
5186 | } |
5187 | } |
5188 | } |
5189 | |
5190 | /* Enable SVME in EFER (must always be set) */ |
5191 | vmcb->v_efer |= EFER_SVME0x00001000; |
5192 | svm_set_dirty(vcpu, SVM_CLEANBITS_CR(1 << 5)); |
5193 | |
5194 | return (ret); |
5195 | } |
5196 | |
5197 | /* |
5198 | * vmx_handle_exit |
5199 | * |
5200 | * Handle exits from the VM by decoding the exit reason and calling various |
5201 | * subhandlers as needed. |
5202 | */ |
5203 | int |
5204 | vmx_handle_exit(struct vcpu *vcpu) |
5205 | { |
5206 | uint64_t exit_reason, rflags, istate; |
5207 | int update_rip, ret = 0; |
5208 | |
5209 | update_rip = 0; |
5210 | exit_reason = vcpu->vc_gueststate.vg_exit_reason; |
5211 | rflags = vcpu->vc_gueststate.vg_rflags; |
5212 | |
5213 | switch (exit_reason) { |
5214 | case VMX_EXIT_INT_WINDOW7: |
5215 | if (!(rflags & PSL_I0x00000200)) { |
5216 | DPRINTF("%s: impossible interrupt window exit " |
5217 | "config\n", __func__); |
5218 | ret = EINVAL22; |
5219 | break; |
5220 | } |
5221 | |
5222 | ret = EAGAIN35; |
5223 | update_rip = 0; |
5224 | break; |
5225 | case VMX_EXIT_EPT_VIOLATION48: |
5226 | ret = vmx_handle_np_fault(vcpu); |
5227 | break; |
5228 | case VMX_EXIT_CPUID10: |
5229 | ret = vmm_handle_cpuid(vcpu); |
5230 | update_rip = 1; |
5231 | break; |
5232 | case VMX_EXIT_IO30: |
5233 | ret = vmx_handle_inout(vcpu); |
5234 | update_rip = 1; |
5235 | break; |
5236 | case VMX_EXIT_EXTINT1: |
5237 | vmx_handle_intr(vcpu); |
5238 | update_rip = 0; |
5239 | break; |
5240 | case VMX_EXIT_CR_ACCESS28: |
5241 | ret = vmx_handle_cr(vcpu); |
5242 | update_rip = 1; |
5243 | break; |
5244 | case VMX_EXIT_HLT12: |
5245 | ret = vmx_handle_hlt(vcpu); |
5246 | update_rip = 1; |
5247 | break; |
5248 | case VMX_EXIT_RDMSR31: |
5249 | ret = vmx_handle_rdmsr(vcpu); |
5250 | update_rip = 1; |
5251 | break; |
5252 | case VMX_EXIT_WRMSR32: |
5253 | ret = vmx_handle_wrmsr(vcpu); |
5254 | update_rip = 1; |
5255 | break; |
5256 | case VMX_EXIT_XSETBV55: |
5257 | ret = vmx_handle_xsetbv(vcpu); |
5258 | update_rip = 1; |
5259 | break; |
5260 | case VMX_EXIT_MWAIT36: |
5261 | case VMX_EXIT_MONITOR39: |
5262 | case VMX_EXIT_VMXON27: |
5263 | case VMX_EXIT_VMWRITE25: |
5264 | case VMX_EXIT_VMREAD23: |
5265 | case VMX_EXIT_VMLAUNCH20: |
5266 | case VMX_EXIT_VMRESUME24: |
5267 | case VMX_EXIT_VMPTRLD21: |
5268 | case VMX_EXIT_VMPTRST22: |
5269 | case VMX_EXIT_VMCLEAR19: |
5270 | case VMX_EXIT_VMCALL18: |
5271 | case VMX_EXIT_VMFUNC59: |
5272 | case VMX_EXIT_VMXOFF26: |
5273 | case VMX_EXIT_INVVPID53: |
5274 | case VMX_EXIT_INVEPT50: |
5275 | ret = vmm_inject_ud(vcpu); |
5276 | update_rip = 0; |
5277 | break; |
5278 | case VMX_EXIT_TRIPLE_FAULT2: |
5279 | #ifdef VMM_DEBUG |
5280 | DPRINTF("%s: vm %d vcpu %d triple fault\n", __func__, |
5281 | vcpu->vc_parent->vm_id, vcpu->vc_id); |
5282 | vmx_vcpu_dump_regs(vcpu); |
5283 | dump_vcpu(vcpu); |
5284 | vmx_dump_vmcs(vcpu); |
5285 | #endif /* VMM_DEBUG */ |
5286 | ret = EAGAIN35; |
5287 | update_rip = 0; |
5288 | break; |
5289 | default: |
5290 | #ifdef VMM_DEBUG |
5291 | DPRINTF("%s: unhandled exit 0x%llx (%s)\n", __func__, |
5292 | exit_reason, vmx_exit_reason_decode(exit_reason)); |
5293 | #endif /* VMM_DEBUG */ |
5294 | return (EINVAL22); |
5295 | } |
5296 | |
5297 | if (update_rip) { |
5298 | if (vmwrite(VMCS_GUEST_IA32_RIP0x681E, |
5299 | vcpu->vc_gueststate.vg_rip)) { |
5300 | printf("%s: can't advance rip\n", __func__); |
5301 | return (EINVAL22); |
5302 | } |
5303 | |
5304 | if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST0x4824, |
5305 | &istate)) { |
5306 | printf("%s: can't read interruptibility state\n", |
5307 | __func__); |
5308 | return (EINVAL22); |
5309 | } |
5310 | |
5311 | /* Interruptibility state 0x3 covers NMIs and STI */ |
5312 | istate &= ~0x3; |
5313 | |
5314 | if (vmwrite(VMCS_GUEST_INTERRUPTIBILITY_ST0x4824, |
5315 | istate)) { |
5316 | printf("%s: can't write interruptibility state\n", |
5317 | __func__); |
5318 | return (EINVAL22); |
5319 | } |
5320 | |
5321 | if (rflags & PSL_T0x00000100) { |
5322 | if (vmm_inject_db(vcpu)) { |
5323 | printf("%s: can't inject #DB exception to " |
5324 | "guest", __func__); |
5325 | return (EINVAL22); |
5326 | } |
5327 | } |
5328 | } |
5329 | |
5330 | return (ret); |
5331 | } |
5332 | |
5333 | /* |
5334 | * vmm_inject_gp |
5335 | * |
5336 | * Injects an #GP exception into the guest VCPU. |
5337 | * |
5338 | * Parameters: |
5339 | * vcpu: vcpu to inject into |
5340 | * |
5341 | * Return values: |
5342 | * Always 0 |
5343 | */ |
5344 | int |
5345 | vmm_inject_gp(struct vcpu *vcpu) |
5346 | { |
5347 | DPRINTF("%s: injecting #GP at guest %%rip 0x%llx\n", __func__, |
5348 | vcpu->vc_gueststate.vg_rip); |
5349 | vcpu->vc_event = VMM_EX_GP13; |
5350 | |
5351 | return (0); |
5352 | } |
5353 | |
5354 | /* |
5355 | * vmm_inject_ud |
5356 | * |
5357 | * Injects an #UD exception into the guest VCPU. |
5358 | * |
5359 | * Parameters: |
5360 | * vcpu: vcpu to inject into |
5361 | * |
5362 | * Return values: |
5363 | * Always 0 |
5364 | */ |
5365 | int |
5366 | vmm_inject_ud(struct vcpu *vcpu) |
5367 | { |
5368 | DPRINTF("%s: injecting #UD at guest %%rip 0x%llx\n", __func__, |
5369 | vcpu->vc_gueststate.vg_rip); |
5370 | vcpu->vc_event = VMM_EX_UD6; |
5371 | |
5372 | return (0); |
5373 | } |
5374 | |
5375 | /* |
5376 | * vmm_inject_db |
5377 | * |
5378 | * Injects a #DB exception into the guest VCPU. |
5379 | * |
5380 | * Parameters: |
5381 | * vcpu: vcpu to inject into |
5382 | * |
5383 | * Return values: |
5384 | * Always 0 |
5385 | */ |
5386 | int |
5387 | vmm_inject_db(struct vcpu *vcpu) |
5388 | { |
5389 | DPRINTF("%s: injecting #DB at guest %%rip 0x%llx\n", __func__, |
5390 | vcpu->vc_gueststate.vg_rip); |
5391 | vcpu->vc_event = VMM_EX_DB1; |
5392 | |
5393 | return (0); |
5394 | } |
5395 | |
5396 | /* |
5397 | * vmm_get_guest_memtype |
5398 | * |
5399 | * Returns the type of memory 'gpa' refers to in the context of vm 'vm' |
5400 | */ |
5401 | int |
5402 | vmm_get_guest_memtype(struct vm *vm, paddr_t gpa) |
5403 | { |
5404 | int i; |
5405 | struct vm_mem_range *vmr; |
5406 | |
5407 | if (gpa >= VMM_PCI_MMIO_BAR_BASE0xF0000000ULL && gpa <= VMM_PCI_MMIO_BAR_END0xFFFFFFFFULL) { |
5408 | DPRINTF("guest mmio access @ 0x%llx\n", (uint64_t)gpa); |
5409 | return (VMM_MEM_TYPE_REGULAR); |
5410 | } |
5411 | |
5412 | /* XXX Use binary search? */ |
5413 | for (i = 0; i < vm->vm_nmemranges; i++) { |
5414 | vmr = &vm->vm_memranges[i]; |
5415 | |
5416 | /* |
5417 | * vm_memranges are ascending. gpa can no longer be in one of |
5418 | * the memranges |
5419 | */ |
5420 | if (gpa < vmr->vmr_gpa) |
5421 | break; |
5422 | |
5423 | if (gpa < vmr->vmr_gpa + vmr->vmr_size) |
5424 | return (VMM_MEM_TYPE_REGULAR); |
5425 | } |
5426 | |
5427 | DPRINTF("guest memtype @ 0x%llx unknown\n", (uint64_t)gpa); |
5428 | return (VMM_MEM_TYPE_UNKNOWN); |
5429 | } |
5430 | |
5431 | /* |
5432 | * vmx_get_exit_qualification |
5433 | * |
5434 | * Return the current VMCS' exit qualification information |
5435 | */ |
5436 | int |
5437 | vmx_get_exit_qualification(uint64_t *exit_qualification) |
5438 | { |
5439 | if (vmread(VMCS_GUEST_EXIT_QUALIFICATION0x6400, exit_qualification)) { |
5440 | printf("%s: can't extract exit qual\n", __func__); |
5441 | return (EINVAL22); |
5442 | } |
5443 | |
5444 | return (0); |
5445 | } |
5446 | |
5447 | /* |
5448 | * vmx_get_guest_faulttype |
5449 | * |
5450 | * Determines the type (R/W/X) of the last fault on the VCPU last run on |
5451 | * this PCPU. |
5452 | */ |
5453 | int |
5454 | vmx_get_guest_faulttype(void) |
5455 | { |
5456 | uint64_t exit_qual; |
5457 | uint64_t presentmask = IA32_VMX_EPT_FAULT_WAS_READABLE(1ULL << 3) | |
5458 | IA32_VMX_EPT_FAULT_WAS_WRITABLE(1ULL << 4) | IA32_VMX_EPT_FAULT_WAS_EXECABLE(1ULL << 5); |
5459 | vm_prot_t prot, was_prot; |
5460 | |
5461 | if (vmx_get_exit_qualification(&exit_qual)) |
5462 | return (-1); |
5463 | |
5464 | if ((exit_qual & presentmask) == 0) |
5465 | return VM_FAULT_INVALID((vm_fault_t) 0x0); |
5466 | |
5467 | was_prot = 0; |
5468 | if (exit_qual & IA32_VMX_EPT_FAULT_WAS_READABLE(1ULL << 3)) |
5469 | was_prot |= PROT_READ0x01; |
5470 | if (exit_qual & IA32_VMX_EPT_FAULT_WAS_WRITABLE(1ULL << 4)) |
5471 | was_prot |= PROT_WRITE0x02; |
5472 | if (exit_qual & IA32_VMX_EPT_FAULT_WAS_EXECABLE(1ULL << 5)) |
5473 | was_prot |= PROT_EXEC0x04; |
5474 | |
5475 | prot = 0; |
5476 | if (exit_qual & IA32_VMX_EPT_FAULT_READ(1ULL << 0)) |
5477 | prot = PROT_READ0x01; |
5478 | else if (exit_qual & IA32_VMX_EPT_FAULT_WRITE(1ULL << 1)) |
5479 | prot = PROT_WRITE0x02; |
5480 | else if (exit_qual & IA32_VMX_EPT_FAULT_EXEC(1ULL << 2)) |
5481 | prot = PROT_EXEC0x04; |
5482 | |
5483 | if ((was_prot & prot) == 0) |
5484 | return VM_FAULT_PROTECT((vm_fault_t) 0x1); |
5485 | |
5486 | return (-1); |
5487 | } |
5488 | |
5489 | /* |
5490 | * svm_get_guest_faulttype |
5491 | * |
5492 | * Determines the type (R/W/X) of the last fault on the VCPU last run on |
5493 | * this PCPU. |
5494 | */ |
5495 | int |
5496 | svm_get_guest_faulttype(struct vmcb *vmcb) |
5497 | { |
5498 | if (!(vmcb->v_exitinfo1 & 0x1)) |
5499 | return VM_FAULT_INVALID((vm_fault_t) 0x0); |
5500 | return VM_FAULT_PROTECT((vm_fault_t) 0x1); |
5501 | } |
5502 | |
5503 | /* |
5504 | * svm_fault_page |
5505 | * |
5506 | * Request a new page to be faulted into the UVM map of the VM owning 'vcpu' |
5507 | * at address 'gpa'. |
5508 | */ |
5509 | int |
5510 | svm_fault_page(struct vcpu *vcpu, paddr_t gpa) |
5511 | { |
5512 | int ret; |
5513 | |
5514 | ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, VM_FAULT_WIRE((vm_fault_t) 0x2), |
5515 | PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04); |
5516 | if (ret) |
5517 | printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n", |
5518 | __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_rip); |
5519 | |
5520 | return (ret); |
5521 | } |
5522 | |
5523 | /* |
5524 | * svm_handle_np_fault |
5525 | * |
5526 | * High level nested paging handler for SVM. Verifies that a fault is for a |
5527 | * valid memory region, then faults a page, or aborts otherwise. |
5528 | */ |
5529 | int |
5530 | svm_handle_np_fault(struct vcpu *vcpu) |
5531 | { |
5532 | uint64_t gpa; |
5533 | int gpa_memtype, ret; |
5534 | struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; |
5535 | |
5536 | ret = 0; |
5537 | |
5538 | gpa = vmcb->v_exitinfo2; |
5539 | |
5540 | gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa); |
5541 | switch (gpa_memtype) { |
5542 | case VMM_MEM_TYPE_REGULAR: |
5543 | ret = svm_fault_page(vcpu, gpa); |
5544 | break; |
5545 | default: |
5546 | printf("unknown memory type %d for GPA 0x%llx\n", |
5547 | gpa_memtype, gpa); |
5548 | return (EINVAL22); |
5549 | } |
5550 | |
5551 | return (ret); |
5552 | } |
5553 | |
5554 | /* |
5555 | * vmx_fault_page |
5556 | * |
5557 | * Request a new page to be faulted into the UVM map of the VM owning 'vcpu' |
5558 | * at address 'gpa'. |
5559 | * |
5560 | * Parameters: |
5561 | * vcpu: guest VCPU requiring the page to be faulted into the UVM map |
5562 | * gpa: guest physical address that triggered the fault |
5563 | * |
5564 | * Return Values: |
5565 | * 0: if successful |
5566 | * EINVAL: if fault type could not be determined or VMCS reload fails |
5567 | * EAGAIN: if a protection fault occurred, ie writing to a read-only page |
5568 | * errno: if uvm_fault(9) fails to wire in the page |
5569 | */ |
5570 | int |
5571 | vmx_fault_page(struct vcpu *vcpu, paddr_t gpa) |
5572 | { |
5573 | int fault_type, ret; |
5574 | |
5575 | fault_type = vmx_get_guest_faulttype(); |
5576 | if (fault_type == -1) { |
5577 | printf("%s: invalid fault type\n", __func__); |
5578 | return (EINVAL22); |
5579 | } |
5580 | |
5581 | if (fault_type == VM_FAULT_PROTECT((vm_fault_t) 0x1)) { |
5582 | vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_PROTECT; |
5583 | return (EAGAIN35); |
5584 | } |
5585 | |
5586 | /* We may sleep during uvm_fault(9), so reload VMCS. */ |
5587 | vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
5588 | ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, VM_FAULT_WIRE((vm_fault_t) 0x2), |
5589 | PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04); |
5590 | if (vcpu_reload_vmcs_vmx(vcpu)) { |
5591 | printf("%s: failed to reload vmcs\n", __func__); |
5592 | return (EINVAL22); |
5593 | } |
5594 | |
5595 | if (ret) |
5596 | printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n", |
5597 | __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_rip); |
5598 | |
5599 | return (ret); |
5600 | } |
5601 | |
5602 | /* |
5603 | * vmx_handle_np_fault |
5604 | * |
5605 | * High level nested paging handler for VMX. Verifies that a fault is for a |
5606 | * valid memory region, then faults a page, or aborts otherwise. |
5607 | */ |
5608 | int |
5609 | vmx_handle_np_fault(struct vcpu *vcpu) |
5610 | { |
5611 | uint64_t gpa; |
5612 | int gpa_memtype, ret; |
5613 | |
5614 | ret = 0; |
5615 | if (vmread(VMCS_GUEST_PHYSICAL_ADDRESS0x2400, &gpa)) { |
5616 | printf("%s: cannot extract faulting pa\n", __func__); |
5617 | return (EINVAL22); |
5618 | } |
5619 | |
5620 | gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa); |
5621 | switch (gpa_memtype) { |
5622 | case VMM_MEM_TYPE_REGULAR: |
5623 | ret = vmx_fault_page(vcpu, gpa); |
5624 | break; |
5625 | default: |
5626 | printf("unknown memory type %d for GPA 0x%llx\n", |
5627 | gpa_memtype, gpa); |
5628 | return (EINVAL22); |
5629 | } |
5630 | |
5631 | return (ret); |
5632 | } |
5633 | |
5634 | /* |
5635 | * vmm_get_guest_cpu_cpl |
5636 | * |
5637 | * Determines current CPL of 'vcpu'. On VMX/Intel, this is gathered from the |
5638 | * VMCS field for the DPL of SS (this seems odd, but is documented that way |
5639 | * in the SDM). For SVM/AMD, this is gathered directly from the VMCB's 'cpl' |
5640 | * field, as per the APM. |
5641 | * |
5642 | * Parameters: |
5643 | * vcpu: guest VCPU for which CPL is to be checked |
5644 | * |
5645 | * Return Values: |
5646 | * -1: the CPL could not be determined |
5647 | * 0-3 indicating the current CPL. For real mode operation, 0 is returned. |
5648 | */ |
5649 | int |
5650 | vmm_get_guest_cpu_cpl(struct vcpu *vcpu) |
5651 | { |
5652 | int mode; |
5653 | struct vmcb *vmcb; |
5654 | uint64_t ss_ar; |
5655 | |
5656 | mode = vmm_get_guest_cpu_mode(vcpu); |
5657 | |
5658 | if (mode == VMM_CPU_MODE_UNKNOWN) |
5659 | return (-1); |
5660 | |
5661 | if (mode == VMM_CPU_MODE_REAL) |
5662 | return (0); |
5663 | |
5664 | if (vmm_softc->mode == VMM_MODE_SVM || |
5665 | vmm_softc->mode == VMM_MODE_RVI) { |
5666 | vmcb = (struct vmcb *)vcpu->vc_control_va; |
5667 | return (vmcb->v_cpl); |
5668 | } else if (vmm_softc->mode == VMM_MODE_VMX || |
5669 | vmm_softc->mode == VMM_MODE_EPT) { |
5670 | if (vmread(VMCS_GUEST_IA32_SS_AR0x4818, &ss_ar)) |
5671 | return (-1); |
5672 | return ((ss_ar & 0x60) >> 5); |
5673 | } else |
5674 | return (-1); |
5675 | } |
5676 | |
5677 | /* |
5678 | * vmm_get_guest_cpu_mode |
5679 | * |
5680 | * Determines current CPU mode of 'vcpu'. |
5681 | * |
5682 | * Parameters: |
5683 | * vcpu: guest VCPU for which mode is to be checked |
5684 | * |
5685 | * Return Values: |
5686 | * One of VMM_CPU_MODE_*, or VMM_CPU_MODE_UNKNOWN if the mode could not be |
5687 | * ascertained. |
5688 | */ |
5689 | int |
5690 | vmm_get_guest_cpu_mode(struct vcpu *vcpu) |
5691 | { |
5692 | uint64_t cr0, efer, cs_ar; |
5693 | uint8_t l, dib; |
5694 | struct vmcb *vmcb; |
5695 | struct vmx_msr_store *msr_store; |
5696 | |
5697 | if (vmm_softc->mode == VMM_MODE_SVM || |
5698 | vmm_softc->mode == VMM_MODE_RVI) { |
5699 | vmcb = (struct vmcb *)vcpu->vc_control_va; |
5700 | cr0 = vmcb->v_cr0; |
5701 | efer = vmcb->v_efer; |
5702 | cs_ar = vmcb->v_cs.vs_attr; |
5703 | cs_ar = (cs_ar & 0xff) | ((cs_ar << 4) & 0xf000); |
5704 | } else if (vmm_softc->mode == VMM_MODE_VMX || |
5705 | vmm_softc->mode == VMM_MODE_EPT) { |
5706 | if (vmread(VMCS_GUEST_IA32_CR00x6800, &cr0)) |
5707 | return (VMM_CPU_MODE_UNKNOWN); |
5708 | if (vmread(VMCS_GUEST_IA32_CS_AR0x4816, &cs_ar)) |
5709 | return (VMM_CPU_MODE_UNKNOWN); |
5710 | msr_store = |
5711 | (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; |
5712 | efer = msr_store[VCPU_REGS_EFER0].vms_data; |
5713 | } else |
5714 | return (VMM_CPU_MODE_UNKNOWN); |
5715 | |
5716 | l = (cs_ar & 0x2000) >> 13; |
5717 | dib = (cs_ar & 0x4000) >> 14; |
5718 | |
5719 | /* Check CR0.PE */ |
5720 | if (!(cr0 & CR0_PE0x00000001)) |
5721 | return (VMM_CPU_MODE_REAL); |
5722 | |
5723 | /* Check EFER */ |
5724 | if (efer & EFER_LMA0x00000400) { |
5725 | /* Could be compat or long mode, check CS.L */ |
5726 | if (l) |
5727 | return (VMM_CPU_MODE_LONG); |
5728 | else |
5729 | return (VMM_CPU_MODE_COMPAT); |
5730 | } |
5731 | |
5732 | /* Check prot vs prot32 */ |
5733 | if (dib) |
5734 | return (VMM_CPU_MODE_PROT32); |
5735 | else |
5736 | return (VMM_CPU_MODE_PROT); |
5737 | } |
5738 | |
5739 | /* |
5740 | * svm_handle_inout |
5741 | * |
5742 | * Exit handler for IN/OUT instructions. |
5743 | * |
5744 | * The vmm can handle certain IN/OUTS without exiting to vmd, but most of these |
5745 | * will be passed to vmd for completion. |
5746 | * |
5747 | * Parameters: |
5748 | * vcpu: The VCPU where the IN/OUT instruction occurred |
5749 | * |
5750 | * Return values: |
5751 | * 0: if successful |
5752 | * EINVAL: an invalid IN/OUT instruction was encountered |
5753 | * EAGAIN: return to vmd - more processing needed in userland |
5754 | */ |
5755 | int |
5756 | svm_handle_inout(struct vcpu *vcpu) |
5757 | { |
5758 | uint64_t insn_length, exit_qual; |
5759 | int ret; |
5760 | struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; |
5761 | |
5762 | insn_length = vmcb->v_exitinfo2 - vmcb->v_rip; |
5763 | if (insn_length != 1 && insn_length != 2) { |
5764 | DPRINTF("%s: IN/OUT instruction with length %lld not " |
5765 | "supported\n", __func__, insn_length); |
5766 | return (EINVAL22); |
5767 | } |
5768 | |
5769 | exit_qual = vmcb->v_exitinfo1; |
5770 | |
5771 | /* Bit 0 - direction */ |
5772 | vcpu->vc_exit.vei.vei_dir = (exit_qual & 0x1); |
5773 | /* Bit 2 - string instruction? */ |
5774 | vcpu->vc_exit.vei.vei_string = (exit_qual & 0x4) >> 2; |
5775 | /* Bit 3 - REP prefix? */ |
5776 | vcpu->vc_exit.vei.vei_rep = (exit_qual & 0x8) >> 3; |
5777 | |
5778 | /* Bits 4:6 - size of exit */ |
5779 | if (exit_qual & 0x10) |
5780 | vcpu->vc_exit.vei.vei_size = 1; |
5781 | else if (exit_qual & 0x20) |
5782 | vcpu->vc_exit.vei.vei_size = 2; |
5783 | else if (exit_qual & 0x40) |
5784 | vcpu->vc_exit.vei.vei_size = 4; |
5785 | |
5786 | /* Bit 16:31 - port */ |
5787 | vcpu->vc_exit.vei.vei_port = (exit_qual & 0xFFFF0000) >> 16; |
5788 | /* Data */ |
5789 | vcpu->vc_exit.vei.vei_data = vmcb->v_rax; |
5790 | |
5791 | vcpu->vc_gueststate.vg_rip += insn_length; |
5792 | |
5793 | /* |
5794 | * The following ports usually belong to devices owned by vmd. |
5795 | * Return EAGAIN to signal help needed from userspace (vmd). |
5796 | * Return 0 to indicate we don't care about this port. |
5797 | * |
5798 | * XXX something better than a hardcoded list here, maybe |
5799 | * configure via vmd via the device list in vm create params? |
5800 | */ |
5801 | switch (vcpu->vc_exit.vei.vei_port) { |
5802 | case IO_ICU10x020 ... IO_ICU10x020 + 1: |
5803 | case 0x40 ... 0x43: |
5804 | case PCKBC_AUX0x61: |
5805 | case IO_RTC0x070 ... IO_RTC0x070 + 1: |
5806 | case IO_ICU20x0A0 ... IO_ICU20x0A0 + 1: |
5807 | case 0x3f8 ... 0x3ff: |
5808 | case ELCR00x4D0 ... ELCR10x4D1: |
5809 | case 0x500 ... 0x511: |
5810 | case 0x514: |
5811 | case 0x518: |
5812 | case 0xcf8: |
5813 | case 0xcfc ... 0xcff: |
5814 | case VMM_PCI_IO_BAR_BASE0x1000 ... VMM_PCI_IO_BAR_END0xFFFF: |
5815 | ret = EAGAIN35; |
5816 | break; |
5817 | default: |
5818 | /* Read from unsupported ports returns FFs */ |
5819 | if (vcpu->vc_exit.vei.vei_dir == 1) { |
5820 | switch(vcpu->vc_exit.vei.vei_size) { |
5821 | case 1: |
5822 | vcpu->vc_gueststate.vg_rax |= 0xFF; |
5823 | vmcb->v_rax |= 0xFF; |
5824 | break; |
5825 | case 2: |
5826 | vcpu->vc_gueststate.vg_rax |= 0xFFFF; |
5827 | vmcb->v_rax |= 0xFFFF; |
5828 | break; |
5829 | case 4: |
5830 | vcpu->vc_gueststate.vg_rax |= 0xFFFFFFFF; |
5831 | vmcb->v_rax |= 0xFFFFFFFF; |
5832 | break; |
5833 | } |
5834 | } |
5835 | ret = 0; |
5836 | } |
5837 | |
5838 | return (ret); |
5839 | } |
5840 | |
5841 | /* |
5842 | * vmx_handle_inout |
5843 | * |
5844 | * Exit handler for IN/OUT instructions. |
5845 | * |
5846 | * The vmm can handle certain IN/OUTS without exiting to vmd, but most of these |
5847 | * will be passed to vmd for completion. |
5848 | */ |
5849 | int |
5850 | vmx_handle_inout(struct vcpu *vcpu) |
5851 | { |
5852 | uint64_t insn_length, exit_qual; |
5853 | int ret; |
5854 | |
5855 | if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) { |
5856 | printf("%s: can't obtain instruction length\n", __func__); |
5857 | return (EINVAL22); |
5858 | } |
5859 | |
5860 | if (insn_length != 1 && insn_length != 2) { |
5861 | DPRINTF("%s: IN/OUT instruction with length %lld not " |
5862 | "supported\n", __func__, insn_length); |
5863 | return (EINVAL22); |
5864 | } |
5865 | |
5866 | if (vmx_get_exit_qualification(&exit_qual)) { |
5867 | printf("%s: can't get exit qual\n", __func__); |
5868 | return (EINVAL22); |
5869 | } |
5870 | |
5871 | /* Bits 0:2 - size of exit */ |
5872 | vcpu->vc_exit.vei.vei_size = (exit_qual & 0x7) + 1; |
5873 | /* Bit 3 - direction */ |
5874 | vcpu->vc_exit.vei.vei_dir = (exit_qual & 0x8) >> 3; |
5875 | /* Bit 4 - string instruction? */ |
5876 | vcpu->vc_exit.vei.vei_string = (exit_qual & 0x10) >> 4; |
5877 | /* Bit 5 - REP prefix? */ |
5878 | vcpu->vc_exit.vei.vei_rep = (exit_qual & 0x20) >> 5; |
5879 | /* Bit 6 - Operand encoding */ |
5880 | vcpu->vc_exit.vei.vei_encoding = (exit_qual & 0x40) >> 6; |
5881 | /* Bit 16:31 - port */ |
5882 | vcpu->vc_exit.vei.vei_port = (exit_qual & 0xFFFF0000) >> 16; |
5883 | /* Data */ |
5884 | vcpu->vc_exit.vei.vei_data = (uint32_t)vcpu->vc_gueststate.vg_rax; |
5885 | |
5886 | vcpu->vc_gueststate.vg_rip += insn_length; |
5887 | |
5888 | /* |
5889 | * The following ports usually belong to devices owned by vmd. |
5890 | * Return EAGAIN to signal help needed from userspace (vmd). |
5891 | * Return 0 to indicate we don't care about this port. |
5892 | * |
5893 | * XXX something better than a hardcoded list here, maybe |
5894 | * configure via vmd via the device list in vm create params? |
5895 | */ |
5896 | switch (vcpu->vc_exit.vei.vei_port) { |
5897 | case IO_ICU10x020 ... IO_ICU10x020 + 1: |
5898 | case 0x40 ... 0x43: |
5899 | case PCKBC_AUX0x61: |
5900 | case IO_RTC0x070 ... IO_RTC0x070 + 1: |
5901 | case IO_ICU20x0A0 ... IO_ICU20x0A0 + 1: |
5902 | case 0x3f8 ... 0x3ff: |
5903 | case ELCR00x4D0 ... ELCR10x4D1: |
5904 | case 0x500 ... 0x511: |
5905 | case 0x514: |
5906 | case 0x518: |
5907 | case 0xcf8: |
5908 | case 0xcfc ... 0xcff: |
5909 | case VMM_PCI_IO_BAR_BASE0x1000 ... VMM_PCI_IO_BAR_END0xFFFF: |
5910 | ret = EAGAIN35; |
5911 | break; |
5912 | default: |
5913 | /* Read from unsupported ports returns FFs */ |
5914 | if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN) { |
5915 | if (vcpu->vc_exit.vei.vei_size == 4) |
5916 | vcpu->vc_gueststate.vg_rax |= 0xFFFFFFFF; |
5917 | else if (vcpu->vc_exit.vei.vei_size == 2) |
5918 | vcpu->vc_gueststate.vg_rax |= 0xFFFF; |
5919 | else if (vcpu->vc_exit.vei.vei_size == 1) |
5920 | vcpu->vc_gueststate.vg_rax |= 0xFF; |
5921 | } |
5922 | ret = 0; |
5923 | } |
5924 | |
5925 | return (ret); |
5926 | } |
5927 | |
5928 | /* |
5929 | * vmx_load_pdptes |
5930 | * |
5931 | * Update the PDPTEs in the VMCS with the values currently indicated by the |
5932 | * guest CR3. This is used for 32-bit PAE guests when enabling paging. |
5933 | * |
5934 | * Parameters |
5935 | * vcpu: The vcpu whose PDPTEs should be loaded |
5936 | * |
5937 | * Return values: |
5938 | * 0: if successful |
5939 | * EINVAL: if the PDPTEs could not be loaded |
5940 | * ENOMEM: memory allocation failure |
5941 | */ |
5942 | int |
5943 | vmx_load_pdptes(struct vcpu *vcpu) |
5944 | { |
5945 | uint64_t cr3, cr3_host_phys; |
5946 | vaddr_t cr3_host_virt; |
5947 | pd_entry_t *pdptes; |
5948 | int ret; |
5949 | |
5950 | if (vmread(VMCS_GUEST_IA32_CR30x6802, &cr3)) { |
5951 | printf("%s: can't read guest cr3\n", __func__); |
5952 | return (EINVAL22); |
5953 | } |
5954 | |
5955 | if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, (vaddr_t)cr3, |
5956 | (paddr_t *)&cr3_host_phys)) { |
5957 | DPRINTF("%s: nonmapped guest CR3, setting PDPTEs to 0\n", |
5958 | __func__); |
5959 | if (vmwrite(VMCS_GUEST_PDPTE00x280A, 0)) { |
5960 | printf("%s: can't write guest PDPTE0\n", __func__); |
5961 | return (EINVAL22); |
5962 | } |
5963 | |
5964 | if (vmwrite(VMCS_GUEST_PDPTE10x280C, 0)) { |
5965 | printf("%s: can't write guest PDPTE1\n", __func__); |
5966 | return (EINVAL22); |
5967 | } |
5968 | |
5969 | if (vmwrite(VMCS_GUEST_PDPTE20x280E, 0)) { |
5970 | printf("%s: can't write guest PDPTE2\n", __func__); |
5971 | return (EINVAL22); |
5972 | } |
5973 | |
5974 | if (vmwrite(VMCS_GUEST_PDPTE30x2810, 0)) { |
5975 | printf("%s: can't write guest PDPTE3\n", __func__); |
5976 | return (EINVAL22); |
5977 | } |
5978 | return (0); |
5979 | } |
5980 | |
5981 | ret = 0; |
5982 | |
5983 | /* We may sleep during km_alloc(9), so reload VMCS. */ |
5984 | vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
5985 | cr3_host_virt = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_any, &kp_none, |
5986 | &kd_waitok); |
5987 | if (vcpu_reload_vmcs_vmx(vcpu)) { |
5988 | printf("%s: failed to reload vmcs\n", __func__); |
5989 | ret = EINVAL22; |
5990 | goto exit; |
5991 | } |
5992 | |
5993 | if (!cr3_host_virt) { |
5994 | printf("%s: can't allocate address for guest CR3 mapping\n", |
5995 | __func__); |
5996 | return (ENOMEM12); |
5997 | } |
5998 | |
5999 | pmap_kenter_pa(cr3_host_virt, cr3_host_phys, PROT_READ0x01); |
6000 | |
6001 | pdptes = (pd_entry_t *)cr3_host_virt; |
6002 | if (vmwrite(VMCS_GUEST_PDPTE00x280A, pdptes[0])) { |
6003 | printf("%s: can't write guest PDPTE0\n", __func__); |
6004 | ret = EINVAL22; |
6005 | goto exit; |
6006 | } |
6007 | |
6008 | if (vmwrite(VMCS_GUEST_PDPTE10x280C, pdptes[1])) { |
6009 | printf("%s: can't write guest PDPTE1\n", __func__); |
6010 | ret = EINVAL22; |
6011 | goto exit; |
6012 | } |
6013 | |
6014 | if (vmwrite(VMCS_GUEST_PDPTE20x280E, pdptes[2])) { |
6015 | printf("%s: can't write guest PDPTE2\n", __func__); |
6016 | ret = EINVAL22; |
6017 | goto exit; |
6018 | } |
6019 | |
6020 | if (vmwrite(VMCS_GUEST_PDPTE30x2810, pdptes[3])) { |
6021 | printf("%s: can't write guest PDPTE3\n", __func__); |
6022 | ret = EINVAL22; |
6023 | goto exit; |
6024 | } |
6025 | |
6026 | exit: |
6027 | pmap_kremove(cr3_host_virt, PAGE_SIZE(1 << 12)); |
6028 | |
6029 | /* km_free(9) might sleep, so we need to reload VMCS. */ |
6030 | vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
6031 | km_free((void *)cr3_host_virt, PAGE_SIZE(1 << 12), &kv_any, &kp_none); |
6032 | if (vcpu_reload_vmcs_vmx(vcpu)) { |
6033 | printf("%s: failed to reload vmcs after km_free\n", __func__); |
6034 | ret = EINVAL22; |
6035 | } |
6036 | |
6037 | return (ret); |
6038 | } |
6039 | |
6040 | /* |
6041 | * vmx_handle_cr0_write |
6042 | * |
6043 | * Write handler for CR0. This function ensures valid values are written into |
6044 | * CR0 for the cpu/vmm mode in use (cr0 must-be-0 and must-be-1 bits, etc). |
6045 | * |
6046 | * Parameters |
6047 | * vcpu: The vcpu taking the cr0 write exit |
6048 | * r: The guest's desired (incoming) cr0 value |
6049 | * |
6050 | * Return values: |
6051 | * 0: if successful |
6052 | * EINVAL: if an error occurred |
6053 | */ |
6054 | int |
6055 | vmx_handle_cr0_write(struct vcpu *vcpu, uint64_t r) |
6056 | { |
6057 | struct vmx_msr_store *msr_store; |
6058 | struct vmx_invvpid_descriptor vid; |
6059 | uint64_t ectls, oldcr0, cr4, mask; |
6060 | int ret; |
6061 | |
6062 | /* Check must-be-0 bits */ |
6063 | mask = vcpu->vc_vmx_cr0_fixed1; |
6064 | if (~r & mask) { |
6065 | /* Inject #GP, let the guest handle it */ |
6066 | DPRINTF("%s: guest set invalid bits in %%cr0. Zeros " |
6067 | "mask=0x%llx, data=0x%llx\n", __func__, |
6068 | vcpu->vc_vmx_cr0_fixed1, r); |
6069 | vmm_inject_gp(vcpu); |
6070 | return (0); |
6071 | } |
6072 | |
6073 | /* Check must-be-1 bits */ |
6074 | mask = vcpu->vc_vmx_cr0_fixed0; |
6075 | if ((r & mask) != mask) { |
6076 | /* Inject #GP, let the guest handle it */ |
6077 | DPRINTF("%s: guest set invalid bits in %%cr0. Ones " |
6078 | "mask=0x%llx, data=0x%llx\n", __func__, |
6079 | vcpu->vc_vmx_cr0_fixed0, r); |
6080 | vmm_inject_gp(vcpu); |
6081 | return (0); |
6082 | } |
6083 | |
6084 | if (r & 0xFFFFFFFF00000000ULL) { |
6085 | DPRINTF("%s: setting bits 63:32 of %%cr0 is invalid," |
6086 | " inject #GP, cr0=0x%llx\n", __func__, r); |
6087 | vmm_inject_gp(vcpu); |
6088 | return (0); |
6089 | } |
6090 | |
6 |