File: | src/usr.sbin/vmd/vmd.c |
Warning: | line 530, column 18 Access to field 'vm_vmid' results in a dereference of a null pointer (loaded from variable 'vm') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: vmd.c,v 1.152 2023/09/26 01:53:54 dv Exp $ */ | |||
2 | ||||
3 | /* | |||
4 | * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> | |||
5 | * | |||
6 | * Permission to use, copy, modify, and distribute this software for any | |||
7 | * purpose with or without fee is hereby granted, provided that the above | |||
8 | * copyright notice and this permission notice appear in all copies. | |||
9 | * | |||
10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |||
11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |||
12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |||
13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |||
14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |||
15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |||
16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |||
17 | */ | |||
18 | ||||
19 | #include <sys/types.h> | |||
20 | #include <sys/queue.h> | |||
21 | #include <sys/wait.h> | |||
22 | #include <sys/stat.h> | |||
23 | #include <sys/sysctl.h> | |||
24 | #include <sys/tty.h> | |||
25 | #include <sys/ttycom.h> | |||
26 | #include <sys/ioctl.h> | |||
27 | ||||
28 | #include <stdio.h> | |||
29 | #include <stdlib.h> | |||
30 | #include <string.h> | |||
31 | #include <termios.h> | |||
32 | #include <errno(*__errno()).h> | |||
33 | #include <event.h> | |||
34 | #include <fcntl.h> | |||
35 | #include <pwd.h> | |||
36 | #include <signal.h> | |||
37 | #include <syslog.h> | |||
38 | #include <unistd.h> | |||
39 | #include <util.h> | |||
40 | #include <ctype.h> | |||
41 | #include <grp.h> | |||
42 | ||||
43 | #include <machine/specialreg.h> | |||
44 | #include <machine/vmmvar.h> | |||
45 | ||||
46 | #include "proc.h" | |||
47 | #include "atomicio.h" | |||
48 | #include "vmd.h" | |||
49 | ||||
50 | __dead__attribute__((__noreturn__)) void usage(void); | |||
51 | ||||
52 | int main(int, char **); | |||
53 | int vmd_configure(void); | |||
54 | void vmd_sighdlr(int sig, short event, void *arg); | |||
55 | void vmd_shutdown(void); | |||
56 | int vmd_control_run(void); | |||
57 | int vmd_dispatch_control(int, struct privsep_proc *, struct imsg *); | |||
58 | int vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *); | |||
59 | int vmd_dispatch_agentx(int, struct privsep_proc *, struct imsg *); | |||
60 | int vmd_dispatch_priv(int, struct privsep_proc *, struct imsg *); | |||
61 | int vmd_check_vmh(struct vm_dump_header *); | |||
62 | ||||
63 | int vm_instance(struct privsep *, struct vmd_vm **, | |||
64 | struct vmop_create_params *, uid_t); | |||
65 | int vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t); | |||
66 | int vm_claimid(const char *, int, uint32_t *); | |||
67 | void start_vm_batch(int, short, void*); | |||
68 | ||||
69 | static inline void vm_terminate(struct vmd_vm *, const char *); | |||
70 | ||||
71 | struct vmd *env; | |||
72 | ||||
73 | static struct privsep_proc procs[] = { | |||
74 | /* Keep "priv" on top as procs[0] */ | |||
75 | { "priv", PROC_PRIV, vmd_dispatch_priv, priv }, | |||
76 | { "control", PROC_CONTROL, vmd_dispatch_control, control }, | |||
77 | { "vmm", PROC_VMM, vmd_dispatch_vmm, vmm, | |||
78 | vmm_shutdown, "/" }, | |||
79 | { "agentx", PROC_AGENTX, vmd_dispatch_agentx, vm_agentx, | |||
80 | vm_agentx_shutdown, "/" } | |||
81 | }; | |||
82 | ||||
83 | enum privsep_procid privsep_process; | |||
84 | ||||
85 | struct event staggered_start_timer; | |||
86 | ||||
87 | /* For the privileged process */ | |||
88 | static struct privsep_proc *proc_priv = &procs[0]; | |||
89 | static struct passwd proc_privpw; | |||
90 | static const uint8_t zero_mac[ETHER_ADDR_LEN6]; | |||
91 | ||||
92 | const char default_conffile[] = VMD_CONF"/etc/vm.conf"; | |||
93 | const char *conffile = default_conffile; | |||
94 | ||||
95 | int | |||
96 | vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg) | |||
97 | { | |||
98 | struct privsep *ps = p->p_ps; | |||
99 | int res = 0, ret = 0, cmd = 0, verbose; | |||
100 | unsigned int v = 0, flags; | |||
101 | struct vmop_create_params vmc; | |||
102 | struct vmop_id vid; | |||
103 | struct vmop_result vmr; | |||
104 | struct vm_dump_header vmh; | |||
105 | struct vmd_vm *vm = NULL((void *)0); | |||
106 | char *str = NULL((void *)0); | |||
107 | uint32_t id = 0; | |||
108 | struct control_sock *rcs; | |||
109 | ||||
110 | switch (imsg->hdr.type) { | |||
111 | case IMSG_VMDOP_START_VM_REQUEST: | |||
112 | IMSG_SIZE_CHECK(imsg, &vmc)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vmc)) fatalx("bad length imsg received (%s)", "&vmc" ); } while (0); | |||
113 | memcpy(&vmc, imsg->data, sizeof(vmc)); | |||
114 | vmc.vmc_kernel = imsg->fd; | |||
115 | ||||
116 | /* Try registering our VM in our list of known VMs. */ | |||
117 | if (vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid)) { | |||
118 | res = errno(*__errno()); | |||
119 | ||||
120 | /* Did we have a failure during lookup of a parent? */ | |||
121 | if (vm == NULL((void *)0)) { | |||
122 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
123 | break; | |||
124 | } | |||
125 | ||||
126 | /* Does the VM already exist? */ | |||
127 | if (res == EALREADY37) { | |||
128 | /* Is it already running? */ | |||
129 | if (vm->vm_state & VM_STATE_RUNNING0x01) { | |||
130 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
131 | break; | |||
132 | } | |||
133 | ||||
134 | /* If not running, are our flags ok? */ | |||
135 | if (vmc.vmc_flags && | |||
136 | vmc.vmc_flags != VMOP_CREATE_KERNEL0x02) { | |||
137 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
138 | break; | |||
139 | } | |||
140 | } | |||
141 | res = 0; | |||
142 | } | |||
143 | ||||
144 | /* Try to start the launch of the VM. */ | |||
145 | res = config_setvm(ps, vm, imsg->hdr.peerid, | |||
146 | vm->vm_params.vmc_owner.uid); | |||
147 | if (res) | |||
148 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
149 | break; | |||
150 | case IMSG_VMDOP_WAIT_VM_REQUEST: | |||
151 | case IMSG_VMDOP_TERMINATE_VM_REQUEST: | |||
152 | IMSG_SIZE_CHECK(imsg, &vid)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vid)) fatalx("bad length imsg received (%s)", "&vid" ); } while (0); | |||
153 | memcpy(&vid, imsg->data, sizeof(vid)); | |||
154 | flags = vid.vid_flags; | |||
155 | cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; | |||
156 | ||||
157 | if ((id = vid.vid_id) == 0) { | |||
158 | /* Lookup vm (id) by name */ | |||
159 | if ((vm = vm_getbyname(vid.vid_name)) == NULL((void *)0)) { | |||
160 | res = ENOENT2; | |||
161 | break; | |||
162 | } | |||
163 | id = vm->vm_vmid; | |||
164 | } else if ((vm = vm_getbyvmid(id)) == NULL((void *)0)) { | |||
165 | res = ENOENT2; | |||
166 | break; | |||
167 | } | |||
168 | ||||
169 | /* Validate curent state of vm */ | |||
170 | if ((vm->vm_state & VM_STATE_SHUTDOWN0x04) && | |||
171 | (flags & VMOP_FORCE0x01) == 0) { | |||
172 | res = EALREADY37; | |||
173 | break; | |||
174 | } else if (!(vm->vm_state & VM_STATE_RUNNING0x01)) { | |||
175 | res = EINVAL22; | |||
176 | break; | |||
177 | } else if (vm_checkperm(vm, &vm->vm_params.vmc_owner, vid.vid_uid)) { | |||
178 | res = EPERM1; | |||
179 | break; | |||
180 | } | |||
181 | ||||
182 | /* Only relay TERMINATION requests, not WAIT requests */ | |||
183 | if (imsg->hdr.type == IMSG_VMDOP_TERMINATE_VM_REQUEST) { | |||
184 | memset(&vid, 0, sizeof(vid)); | |||
185 | vid.vid_id = id; | |||
186 | vid.vid_flags = flags; | |||
187 | ||||
188 | if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, | |||
189 | imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1) | |||
190 | return (-1); | |||
191 | } | |||
192 | break; | |||
193 | case IMSG_VMDOP_GET_INFO_VM_REQUEST: | |||
194 | proc_forward_imsg(ps, imsg, PROC_VMM, -1); | |||
195 | break; | |||
196 | case IMSG_VMDOP_LOAD: | |||
197 | IMSG_SIZE_CHECK(imsg, str)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*str)) fatalx("bad length imsg received (%s)", "str"); } while (0); /* at least one byte for path */ | |||
198 | str = get_string((uint8_t *)imsg->data, | |||
199 | IMSG_DATA_SIZE(imsg)((imsg)->hdr.len - sizeof(struct imsg_hdr))); | |||
200 | case IMSG_VMDOP_RELOAD: | |||
201 | if (vmd_reload(0, str) == -1) | |||
202 | cmd = IMSG_CTL_FAIL; | |||
203 | else | |||
204 | cmd = IMSG_CTL_OK; | |||
205 | free(str); | |||
206 | break; | |||
207 | case IMSG_CTL_RESET: | |||
208 | IMSG_SIZE_CHECK(imsg, &v)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&v)) fatalx("bad length imsg received (%s)", "&v"); } while (0); | |||
209 | memcpy(&v, imsg->data, sizeof(v)); | |||
210 | if (vmd_reload(v, NULL((void *)0)) == -1) | |||
211 | cmd = IMSG_CTL_FAIL; | |||
212 | else | |||
213 | cmd = IMSG_CTL_OK; | |||
214 | break; | |||
215 | case IMSG_CTL_VERBOSE: | |||
216 | IMSG_SIZE_CHECK(imsg, &verbose)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&verbose)) fatalx("bad length imsg received (%s)", "&verbose" ); } while (0); | |||
217 | memcpy(&verbose, imsg->data, sizeof(verbose)); | |||
218 | log_setverbose(verbose); | |||
219 | ||||
220 | proc_forward_imsg(ps, imsg, PROC_VMM, -1); | |||
221 | proc_forward_imsg(ps, imsg, PROC_PRIV, -1); | |||
222 | cmd = IMSG_CTL_OK; | |||
223 | break; | |||
224 | case IMSG_VMDOP_PAUSE_VM: | |||
225 | case IMSG_VMDOP_UNPAUSE_VM: | |||
226 | IMSG_SIZE_CHECK(imsg, &vid)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vid)) fatalx("bad length imsg received (%s)", "&vid" ); } while (0); | |||
227 | memcpy(&vid, imsg->data, sizeof(vid)); | |||
228 | if (vid.vid_id == 0) { | |||
229 | if ((vm = vm_getbyname(vid.vid_name)) == NULL((void *)0)) { | |||
230 | res = ENOENT2; | |||
231 | cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM | |||
232 | ? IMSG_VMDOP_PAUSE_VM_RESPONSE | |||
233 | : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; | |||
234 | break; | |||
235 | } else { | |||
236 | vid.vid_id = vm->vm_vmid; | |||
237 | } | |||
238 | } else if ((vm = vm_getbyid(vid.vid_id)) == NULL((void *)0)) { | |||
239 | res = ENOENT2; | |||
240 | cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM | |||
241 | ? IMSG_VMDOP_PAUSE_VM_RESPONSE | |||
242 | : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; | |||
243 | break; | |||
244 | } | |||
245 | if (vm_checkperm(vm, &vm->vm_params.vmc_owner, | |||
246 | vid.vid_uid) != 0) { | |||
247 | res = EPERM1; | |||
248 | cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM | |||
249 | ? IMSG_VMDOP_PAUSE_VM_RESPONSE | |||
250 | : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; | |||
251 | break; | |||
252 | } | |||
253 | proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, | |||
254 | imsg->hdr.peerid, -1, &vid, sizeof(vid)); | |||
255 | break; | |||
256 | case IMSG_VMDOP_SEND_VM_REQUEST: | |||
257 | IMSG_SIZE_CHECK(imsg, &vid)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vid)) fatalx("bad length imsg received (%s)", "&vid" ); } while (0); | |||
258 | memcpy(&vid, imsg->data, sizeof(vid)); | |||
259 | id = vid.vid_id; | |||
260 | if (vid.vid_id == 0) { | |||
261 | if ((vm = vm_getbyname(vid.vid_name)) == NULL((void *)0)) { | |||
262 | res = ENOENT2; | |||
263 | cmd = IMSG_VMDOP_SEND_VM_RESPONSE; | |||
264 | close(imsg->fd); | |||
265 | break; | |||
266 | } else { | |||
267 | vid.vid_id = vm->vm_vmid; | |||
268 | } | |||
269 | } else if ((vm = vm_getbyvmid(vid.vid_id)) == NULL((void *)0)) { | |||
270 | res = ENOENT2; | |||
271 | cmd = IMSG_VMDOP_SEND_VM_RESPONSE; | |||
272 | close(imsg->fd); | |||
273 | break; | |||
274 | } | |||
275 | vmr.vmr_id = vid.vid_id; | |||
276 | log_debug("%s: sending fd to vmm", __func__); | |||
277 | proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, | |||
278 | imsg->hdr.peerid, imsg->fd, &vid, sizeof(vid)); | |||
279 | break; | |||
280 | case IMSG_VMDOP_RECEIVE_VM_REQUEST: | |||
281 | IMSG_SIZE_CHECK(imsg, &vid)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vid)) fatalx("bad length imsg received (%s)", "&vid" ); } while (0); | |||
282 | memcpy(&vid, imsg->data, sizeof(vid)); | |||
283 | if (imsg->fd == -1) { | |||
284 | log_warnx("%s: invalid fd", __func__); | |||
285 | return (-1); | |||
286 | } | |||
287 | if (atomicio(read, imsg->fd, &vmh, sizeof(vmh)) != | |||
288 | sizeof(vmh)) { | |||
289 | log_warnx("%s: error reading vmh from received vm", | |||
290 | __func__); | |||
291 | res = EIO5; | |||
292 | close(imsg->fd); | |||
293 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
294 | break; | |||
295 | } | |||
296 | ||||
297 | if (vmd_check_vmh(&vmh)) { | |||
298 | res = ENOENT2; | |||
299 | close(imsg->fd); | |||
300 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
301 | break; | |||
302 | } | |||
303 | if (atomicio(read, imsg->fd, &vmc, sizeof(vmc)) != | |||
304 | sizeof(vmc)) { | |||
305 | log_warnx("%s: error reading vmc from received vm", | |||
306 | __func__); | |||
307 | res = EIO5; | |||
308 | close(imsg->fd); | |||
309 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
310 | break; | |||
311 | } | |||
312 | strlcpy(vmc.vmc_params.vcp_name, vid.vid_name, | |||
313 | sizeof(vmc.vmc_params.vcp_name)); | |||
314 | vmc.vmc_params.vcp_id = 0; | |||
315 | ||||
316 | ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); | |||
317 | if (ret != 0) { | |||
318 | res = errno(*__errno()); | |||
319 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
320 | close(imsg->fd); | |||
321 | } else { | |||
322 | vm->vm_state |= VM_STATE_RECEIVED0x08; | |||
323 | config_setvm(ps, vm, imsg->hdr.peerid, | |||
324 | vmc.vmc_owner.uid); | |||
325 | log_debug("%s: sending fd to vmm", __func__); | |||
326 | proc_compose_imsg(ps, PROC_VMM, -1, | |||
327 | IMSG_VMDOP_RECEIVE_VM_END, vm->vm_vmid, imsg->fd, | |||
328 | NULL((void *)0), 0); | |||
329 | } | |||
330 | break; | |||
331 | case IMSG_VMDOP_DONE: | |||
332 | control_reset(&ps->ps_csock); | |||
333 | TAILQ_FOREACH(rcs, &ps->ps_rcsocks, cs_entry)for((rcs) = ((&ps->ps_rcsocks)->tqh_first); (rcs) != ((void *)0); (rcs) = ((rcs)->cs_entry.tqe_next)) | |||
334 | control_reset(rcs); | |||
335 | cmd = 0; | |||
336 | break; | |||
337 | default: | |||
338 | return (-1); | |||
339 | } | |||
340 | ||||
341 | switch (cmd) { | |||
342 | case 0: | |||
343 | break; | |||
344 | case IMSG_VMDOP_START_VM_RESPONSE: | |||
345 | case IMSG_VMDOP_TERMINATE_VM_RESPONSE: | |||
346 | memset(&vmr, 0, sizeof(vmr)); | |||
347 | vmr.vmr_result = res; | |||
348 | vmr.vmr_id = id; | |||
349 | if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, | |||
350 | imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) | |||
351 | return (-1); | |||
352 | break; | |||
353 | default: | |||
354 | if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, | |||
355 | imsg->hdr.peerid, -1, &res, sizeof(res)) == -1) | |||
356 | return (-1); | |||
357 | break; | |||
358 | } | |||
359 | ||||
360 | return (0); | |||
361 | } | |||
362 | ||||
363 | int | |||
364 | vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg) | |||
365 | { | |||
366 | struct vmop_result vmr; | |||
367 | struct privsep *ps = p->p_ps; | |||
368 | int res = 0; | |||
369 | struct vmd_vm *vm; | |||
370 | struct vm_create_params *vcp; | |||
371 | struct vmop_info_result vir; | |||
372 | ||||
373 | switch (imsg->hdr.type) { | |||
| ||||
374 | case IMSG_VMDOP_PAUSE_VM_RESPONSE: | |||
375 | IMSG_SIZE_CHECK(imsg, &vmr)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vmr)) fatalx("bad length imsg received (%s)", "&vmr" ); } while (0); | |||
376 | memcpy(&vmr, imsg->data, sizeof(vmr)); | |||
377 | if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL((void *)0)) | |||
378 | break; | |||
379 | proc_compose_imsg(ps, PROC_CONTROL, -1, | |||
380 | imsg->hdr.type, imsg->hdr.peerid, -1, | |||
381 | imsg->data, sizeof(imsg->data)); | |||
382 | log_info("%s: paused vm %d successfully", | |||
383 | vm->vm_params.vmc_params.vcp_name, | |||
384 | vm->vm_vmid); | |||
385 | vm->vm_state |= VM_STATE_PAUSED0x10; | |||
386 | break; | |||
387 | case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: | |||
388 | IMSG_SIZE_CHECK(imsg, &vmr)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vmr)) fatalx("bad length imsg received (%s)", "&vmr" ); } while (0); | |||
389 | memcpy(&vmr, imsg->data, sizeof(vmr)); | |||
390 | if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL((void *)0)) | |||
391 | break; | |||
392 | proc_compose_imsg(ps, PROC_CONTROL, -1, | |||
393 | imsg->hdr.type, imsg->hdr.peerid, -1, | |||
394 | imsg->data, sizeof(imsg->data)); | |||
395 | log_info("%s: unpaused vm %d successfully.", | |||
396 | vm->vm_params.vmc_params.vcp_name, | |||
397 | vm->vm_vmid); | |||
398 | vm->vm_state &= ~VM_STATE_PAUSED0x10; | |||
399 | break; | |||
400 | case IMSG_VMDOP_START_VM_RESPONSE: | |||
401 | IMSG_SIZE_CHECK(imsg, &vmr)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vmr)) fatalx("bad length imsg received (%s)", "&vmr" ); } while (0); | |||
402 | memcpy(&vmr, imsg->data, sizeof(vmr)); | |||
403 | if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL((void *)0)) | |||
404 | break; | |||
405 | vm->vm_pid = vmr.vmr_pid; | |||
406 | vcp = &vm->vm_params.vmc_params; | |||
407 | vcp->vcp_id = vmr.vmr_id; | |||
408 | ||||
409 | /* | |||
410 | * If the peerid is not -1, forward the response back to the | |||
411 | * the control socket. If it is -1, the request originated | |||
412 | * from the parent, not the control socket. | |||
413 | */ | |||
414 | if (vm->vm_peerid != (uint32_t)-1) { | |||
415 | (void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname, | |||
416 | sizeof(vmr.vmr_ttyname)); | |||
417 | if (proc_compose_imsg(ps, PROC_CONTROL, -1, | |||
418 | imsg->hdr.type, vm->vm_peerid, -1, | |||
419 | &vmr, sizeof(vmr)) == -1) { | |||
420 | errno(*__errno()) = vmr.vmr_result; | |||
421 | log_warn("%s: failed to forward vm result", | |||
422 | vcp->vcp_name); | |||
423 | vm_terminate(vm, __func__); | |||
424 | return (-1); | |||
425 | } | |||
426 | } | |||
427 | ||||
428 | if (vmr.vmr_result) { | |||
429 | log_warnx("%s: failed to start vm", vcp->vcp_name); | |||
430 | vm_terminate(vm, __func__); | |||
431 | errno(*__errno()) = vmr.vmr_result; | |||
432 | break; | |||
433 | } | |||
434 | ||||
435 | /* Now configure all the interfaces */ | |||
436 | if (vm_priv_ifconfig(ps, vm) == -1) { | |||
437 | log_warn("%s: failed to configure vm", vcp->vcp_name); | |||
438 | vm_terminate(vm, __func__); | |||
439 | break; | |||
440 | } | |||
441 | ||||
442 | log_info("started %s (vm %d) successfully, tty %s", | |||
443 | vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname); | |||
444 | break; | |||
445 | case IMSG_VMDOP_TERMINATE_VM_RESPONSE: | |||
446 | IMSG_SIZE_CHECK(imsg, &vmr)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vmr)) fatalx("bad length imsg received (%s)", "&vmr" ); } while (0); | |||
447 | memcpy(&vmr, imsg->data, sizeof(vmr)); | |||
448 | ||||
449 | if (vmr.vmr_result) { | |||
450 | DPRINTF("%s: forwarding TERMINATE VM for vm id %d",do {} while(0) | |||
451 | __func__, vmr.vmr_id)do {} while(0); | |||
452 | proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); | |||
453 | } else { | |||
454 | if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL((void *)0)) | |||
455 | break; | |||
456 | /* Mark VM as shutting down */ | |||
457 | vm->vm_state |= VM_STATE_SHUTDOWN0x04; | |||
458 | } | |||
459 | break; | |||
460 | case IMSG_VMDOP_SEND_VM_RESPONSE: | |||
461 | IMSG_SIZE_CHECK(imsg, &vmr)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vmr)) fatalx("bad length imsg received (%s)", "&vmr" ); } while (0); | |||
462 | memcpy(&vmr, imsg->data, sizeof(vmr)); | |||
463 | if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL((void *)0)) | |||
464 | break; | |||
465 | if (!vmr.vmr_result) { | |||
466 | log_info("%s: sent vm %d successfully.", | |||
467 | vm->vm_params.vmc_params.vcp_name, | |||
468 | vm->vm_vmid); | |||
469 | vm_terminate(vm, __func__); | |||
470 | } | |||
471 | ||||
472 | /* Send a response if a control client is waiting for it */ | |||
473 | if (imsg->hdr.peerid != (uint32_t)-1) { | |||
474 | /* the error is meaningless for deferred responses */ | |||
475 | vmr.vmr_result = 0; | |||
476 | ||||
477 | if (proc_compose_imsg(ps, PROC_CONTROL, -1, | |||
478 | IMSG_VMDOP_SEND_VM_RESPONSE, | |||
479 | imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) | |||
480 | return (-1); | |||
481 | } | |||
482 | break; | |||
483 | case IMSG_VMDOP_TERMINATE_VM_EVENT: | |||
484 | IMSG_SIZE_CHECK(imsg, &vmr)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vmr)) fatalx("bad length imsg received (%s)", "&vmr" ); } while (0); | |||
485 | memcpy(&vmr, imsg->data, sizeof(vmr)); | |||
486 | DPRINTF("%s: handling TERMINATE_EVENT for vm id %d ret %d",do {} while(0) | |||
487 | __func__, vmr.vmr_id, vmr.vmr_result)do {} while(0); | |||
488 | if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL((void *)0)) { | |||
489 | log_debug("%s: vm %d is no longer available", | |||
490 | __func__, vmr.vmr_id); | |||
491 | break; | |||
492 | } | |||
493 | if (vmr.vmr_result != EAGAIN35 || | |||
494 | vm->vm_params.vmc_bootdevice) { | |||
495 | vm_terminate(vm, __func__); | |||
496 | } else { | |||
497 | /* Stop VM instance but keep the tty open */ | |||
498 | vm_stop(vm, 1, __func__); | |||
499 | config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid); | |||
500 | } | |||
501 | ||||
502 | /* The error is meaningless for deferred responses */ | |||
503 | vmr.vmr_result = 0; | |||
504 | ||||
505 | if (proc_compose_imsg(ps, PROC_CONTROL, -1, | |||
506 | IMSG_VMDOP_TERMINATE_VM_EVENT, | |||
507 | imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) | |||
508 | return (-1); | |||
509 | break; | |||
510 | case IMSG_VMDOP_GET_INFO_VM_DATA: | |||
511 | IMSG_SIZE_CHECK(imsg, &vir)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vir)) fatalx("bad length imsg received (%s)", "&vir" ); } while (0); | |||
512 | memcpy(&vir, imsg->data, sizeof(vir)); | |||
513 | if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL((void *)0)) { | |||
514 | memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname)); | |||
515 | if (vm->vm_ttyname[0] != '\0') | |||
516 | strlcpy(vir.vir_ttyname, vm->vm_ttyname, | |||
517 | sizeof(vir.vir_ttyname)); | |||
518 | log_debug("%s: running vm: %d, vm_state: 0x%x", | |||
519 | __func__, vm->vm_vmid, vm->vm_state); | |||
520 | vir.vir_state = vm->vm_state; | |||
521 | /* get the user id who started the vm */ | |||
522 | vir.vir_uid = vm->vm_uid; | |||
523 | vir.vir_gid = vm->vm_params.vmc_owner.gid; | |||
524 | } | |||
525 | if (proc_compose_imsg(ps, | |||
526 | imsg->hdr.peerid == IMSG_AGENTX_PEERID(uint32_t)-2 ? | |||
527 | PROC_AGENTX : PROC_CONTROL, -1, imsg->hdr.type, | |||
528 | imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) { | |||
529 | log_debug("%s: GET_INFO_VM failed for vm %d, removing", | |||
530 | __func__, vm->vm_vmid); | |||
| ||||
531 | vm_terminate(vm, __func__); | |||
532 | return (-1); | |||
533 | } | |||
534 | break; | |||
535 | case IMSG_VMDOP_GET_INFO_VM_END_DATA: | |||
536 | /* | |||
537 | * PROC_VMM has responded with the *running* VMs, now we | |||
538 | * append the others. These use the special value 0 for their | |||
539 | * kernel id to indicate that they are not running. | |||
540 | */ | |||
541 | TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void * )0); (vm) = ((vm)->vm_entry.tqe_next)) { | |||
542 | if (!(vm->vm_state & VM_STATE_RUNNING0x01)) { | |||
543 | memset(&vir, 0, sizeof(vir)); | |||
544 | vir.vir_info.vir_id = vm->vm_vmid; | |||
545 | strlcpy(vir.vir_info.vir_name, | |||
546 | vm->vm_params.vmc_params.vcp_name, | |||
547 | VMM_MAX_NAME_LEN64); | |||
548 | vir.vir_info.vir_memory_size = | |||
549 | vm->vm_params.vmc_params. | |||
550 | vcp_memranges[0].vmr_size; | |||
551 | vir.vir_info.vir_ncpus = | |||
552 | vm->vm_params.vmc_params.vcp_ncpus; | |||
553 | /* get the configured user id for this vm */ | |||
554 | vir.vir_uid = vm->vm_params.vmc_owner.uid; | |||
555 | vir.vir_gid = vm->vm_params.vmc_owner.gid; | |||
556 | log_debug("%s: vm: %d, vm_state: 0x%x", | |||
557 | __func__, vm->vm_vmid, vm->vm_state); | |||
558 | vir.vir_state = vm->vm_state; | |||
559 | if (proc_compose_imsg(ps, | |||
560 | imsg->hdr.peerid == IMSG_AGENTX_PEERID(uint32_t)-2 ? | |||
561 | PROC_AGENTX : PROC_CONTROL, -1, | |||
562 | IMSG_VMDOP_GET_INFO_VM_DATA, | |||
563 | imsg->hdr.peerid, -1, &vir, | |||
564 | sizeof(vir)) == -1) { | |||
565 | log_debug("%s: GET_INFO_VM_END failed", | |||
566 | __func__); | |||
567 | vm_terminate(vm, __func__); | |||
568 | return (-1); | |||
569 | } | |||
570 | } | |||
571 | } | |||
572 | IMSG_SIZE_CHECK(imsg, &res)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&res)) fatalx("bad length imsg received (%s)", "&res" ); } while (0); | |||
573 | proc_forward_imsg(ps, imsg, | |||
574 | imsg->hdr.peerid == IMSG_AGENTX_PEERID(uint32_t)-2 ? | |||
575 | PROC_AGENTX : PROC_CONTROL, -1); | |||
576 | break; | |||
577 | default: | |||
578 | return (-1); | |||
579 | } | |||
580 | ||||
581 | return (0); | |||
582 | } | |||
583 | ||||
584 | int | |||
585 | vmd_dispatch_agentx(int fd, struct privsep_proc *p, struct imsg *imsg) | |||
586 | { | |||
587 | struct privsep *ps = p->p_ps; | |||
588 | ||||
589 | switch (imsg->hdr.type) { | |||
590 | case IMSG_VMDOP_GET_INFO_VM_REQUEST: | |||
591 | proc_forward_imsg(ps, imsg, PROC_VMM, -1); | |||
592 | return (0); | |||
593 | default: | |||
594 | break; | |||
595 | } | |||
596 | return (-1); | |||
597 | } | |||
598 | ||||
599 | int | |||
600 | vmd_dispatch_priv(int fd, struct privsep_proc *p, struct imsg *imsg) | |||
601 | { | |||
602 | struct vmop_addr_result var; | |||
603 | ||||
604 | switch (imsg->hdr.type) { | |||
605 | case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE: | |||
606 | IMSG_SIZE_CHECK(imsg, &var)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&var)) fatalx("bad length imsg received (%s)", "&var" ); } while (0); | |||
607 | memcpy(&var, imsg->data, sizeof(var)); | |||
608 | proc_forward_imsg(p->p_ps, imsg, PROC_VMM, -1); | |||
609 | break; | |||
610 | default: | |||
611 | return (-1); | |||
612 | } | |||
613 | ||||
614 | return (0); | |||
615 | } | |||
616 | ||||
617 | int | |||
618 | vmd_check_vmh(struct vm_dump_header *vmh) | |||
619 | { | |||
620 | int i; | |||
621 | unsigned int code, leaf; | |||
622 | unsigned int a, b, c, d; | |||
623 | ||||
624 | if (strncmp(vmh->vmh_signature, VM_DUMP_SIGNATURE"OpenBSDVMM58", strlen(VM_DUMP_SIGNATURE"OpenBSDVMM58")) != 0) { | |||
625 | log_warnx("%s: incompatible dump signature", __func__); | |||
626 | return (-1); | |||
627 | } | |||
628 | ||||
629 | if (vmh->vmh_version != VM_DUMP_VERSION7) { | |||
630 | log_warnx("%s: incompatible dump version", __func__); | |||
631 | return (-1); | |||
632 | } | |||
633 | ||||
634 | for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT5; i++) { | |||
635 | code = vmh->vmh_cpuids[i].code; | |||
636 | leaf = vmh->vmh_cpuids[i].leaf; | |||
637 | if (leaf != 0x00) { | |||
638 | log_debug("%s: invalid leaf 0x%x for code 0x%x", | |||
639 | __func__, leaf, code); | |||
640 | return (-1); | |||
641 | } | |||
642 | ||||
643 | switch (code) { | |||
644 | case 0x00: | |||
645 | CPUID_LEAF(code, leaf, a, b, c, d)__asm volatile("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" ( d) : "a" (code), "c" (leaf)); | |||
646 | if (vmh->vmh_cpuids[i].a > a) { | |||
647 | log_debug("%s: incompatible cpuid level", | |||
648 | __func__); | |||
649 | return (-1); | |||
650 | } | |||
651 | if (!(vmh->vmh_cpuids[i].b == b && | |||
652 | vmh->vmh_cpuids[i].c == c && | |||
653 | vmh->vmh_cpuids[i].d == d)) { | |||
654 | log_debug("%s: incompatible cpu brand", | |||
655 | __func__); | |||
656 | return (-1); | |||
657 | } | |||
658 | break; | |||
659 | ||||
660 | case 0x01: | |||
661 | CPUID_LEAF(code, leaf, a, b, c, d)__asm volatile("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" ( d) : "a" (code), "c" (leaf)); | |||
662 | if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK~(0x00000080 | 0x00000100 | 0x00000008 | 0x00008000 | 0x00000020 | 0x00000004 | 0x00000010 | 0x00000040 | 0x00000400 | 0x00000800 | 0x00004000 | 0x00020000 | 0x00040000 | 0x00200000 | 0x01000000 )) != | |||
663 | (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK~(0x00000080 | 0x00000100 | 0x00000008 | 0x00008000 | 0x00000020 | 0x00000004 | 0x00000010 | 0x00000040 | 0x00000400 | 0x00000800 | 0x00004000 | 0x00020000 | 0x00040000 | 0x00200000 | 0x01000000 ))) { | |||
664 | log_debug("%s: incompatible cpu features " | |||
665 | "code: 0x%x leaf: 0x%x reg: c", __func__, | |||
666 | code, leaf); | |||
667 | return (-1); | |||
668 | } | |||
669 | if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK~(0x00400000 | 0x20000000 | 0x10000000 | 0x00200000 | 0x00000200 | 0x00040000 | 0x08000000 | 0x80000000 | 0x00001000 | 0x00000080 | 0x00004000)) != | |||
670 | (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK~(0x00400000 | 0x20000000 | 0x10000000 | 0x00200000 | 0x00000200 | 0x00040000 | 0x08000000 | 0x80000000 | 0x00001000 | 0x00000080 | 0x00004000))) { | |||
671 | log_debug("%s: incompatible cpu features " | |||
672 | "code: 0x%x leaf: 0x%x reg: d", __func__, | |||
673 | code, leaf); | |||
674 | return (-1); | |||
675 | } | |||
676 | break; | |||
677 | ||||
678 | case 0x07: | |||
679 | CPUID_LEAF(code, leaf, a, b, c, d)__asm volatile("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" ( d) : "a" (code), "c" (leaf)); | |||
680 | if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK~(0x00000002 | 0x00000004 | 0x00000010 | 0x00000400 | 0x00000800 | 0x00001000 | 0x00004000 | 0x00400000 | 0x02000000 | 0x00010000 | 0x00020000 | 0x00200000 | 0x04000000 | 0x08000000 | 0x10000000 | 0x40000000 | 0x80000000)) != | |||
681 | (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK~(0x00000002 | 0x00000004 | 0x00000010 | 0x00000400 | 0x00000800 | 0x00001000 | 0x00004000 | 0x00400000 | 0x02000000 | 0x00010000 | 0x00020000 | 0x00200000 | 0x04000000 | 0x08000000 | 0x10000000 | 0x40000000 | 0x80000000))) { | |||
682 | log_debug("%s: incompatible cpu features " | |||
683 | "code: 0x%x leaf: 0x%x reg: c", __func__, | |||
684 | code, leaf); | |||
685 | return (-1); | |||
686 | } | |||
687 | if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK(0x00000004)) != | |||
688 | (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK(0x00000004))) { | |||
689 | log_debug("%s: incompatible cpu features " | |||
690 | "code: 0x%x leaf: 0x%x reg: d", __func__, | |||
691 | code, leaf); | |||
692 | return (-1); | |||
693 | } | |||
694 | break; | |||
695 | ||||
696 | case 0x0d: | |||
697 | CPUID_LEAF(code, leaf, a, b, c, d)__asm volatile("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" ( d) : "a" (code), "c" (leaf)); | |||
698 | if (vmh->vmh_cpuids[i].b > b) { | |||
699 | log_debug("%s: incompatible cpu: insufficient " | |||
700 | "max save area for enabled XCR0 features", | |||
701 | __func__); | |||
702 | return (-1); | |||
703 | } | |||
704 | if (vmh->vmh_cpuids[i].c > c) { | |||
705 | log_debug("%s: incompatible cpu: insufficient " | |||
706 | "max save area for supported XCR0 features", | |||
707 | __func__); | |||
708 | return (-1); | |||
709 | } | |||
710 | break; | |||
711 | ||||
712 | case 0x80000001: | |||
713 | CPUID_LEAF(code, leaf, a, b, c, d)__asm volatile("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" ( d) : "a" (code), "c" (leaf)); | |||
714 | if ((vmh->vmh_cpuids[i].a & a) != | |||
715 | vmh->vmh_cpuids[i].a) { | |||
716 | log_debug("%s: incompatible cpu features " | |||
717 | "code: 0x%x leaf: 0x%x reg: a", __func__, | |||
718 | code, leaf); | |||
719 | return (-1); | |||
720 | } | |||
721 | if ((vmh->vmh_cpuids[i].c & c) != | |||
722 | vmh->vmh_cpuids[i].c) { | |||
723 | log_debug("%s: incompatible cpu features " | |||
724 | "code: 0x%x leaf: 0x%x reg: c", __func__, | |||
725 | code, leaf); | |||
726 | return (-1); | |||
727 | } | |||
728 | if ((vmh->vmh_cpuids[i].d & d) != | |||
729 | vmh->vmh_cpuids[i].d) { | |||
730 | log_debug("%s: incompatible cpu features " | |||
731 | "code: 0x%x leaf: 0x%x reg: d", __func__, | |||
732 | code, leaf); | |||
733 | return (-1); | |||
734 | } | |||
735 | break; | |||
736 | ||||
737 | default: | |||
738 | log_debug("%s: unknown code 0x%x", __func__, code); | |||
739 | return (-1); | |||
740 | } | |||
741 | } | |||
742 | ||||
743 | return (0); | |||
744 | } | |||
745 | ||||
746 | void | |||
747 | vmd_sighdlr(int sig, short event, void *arg) | |||
748 | { | |||
749 | if (privsep_process != PROC_PARENT) | |||
750 | return; | |||
751 | log_debug("%s: handling signal", __func__); | |||
752 | ||||
753 | switch (sig) { | |||
754 | case SIGHUP1: | |||
755 | log_info("%s: reload requested with SIGHUP", __func__); | |||
756 | ||||
757 | /* | |||
758 | * This is safe because libevent uses async signal handlers | |||
759 | * that run in the event loop and not in signal context. | |||
760 | */ | |||
761 | (void)vmd_reload(0, NULL((void *)0)); | |||
762 | break; | |||
763 | case SIGPIPE13: | |||
764 | log_info("%s: ignoring SIGPIPE", __func__); | |||
765 | break; | |||
766 | case SIGUSR130: | |||
767 | log_info("%s: ignoring SIGUSR1", __func__); | |||
768 | break; | |||
769 | case SIGTERM15: | |||
770 | case SIGINT2: | |||
771 | vmd_shutdown(); | |||
772 | break; | |||
773 | default: | |||
774 | fatalx("unexpected signal"); | |||
775 | } | |||
776 | } | |||
777 | ||||
778 | __dead__attribute__((__noreturn__)) void | |||
779 | usage(void) | |||
780 | { | |||
781 | extern char *__progname; | |||
782 | fprintf(stderr(&__sF[2]), "usage: %s [-dnv] [-D macro=value] [-f file]\n", | |||
783 | __progname); | |||
784 | exit(1); | |||
785 | } | |||
786 | ||||
787 | int | |||
788 | main(int argc, char **argv) | |||
789 | { | |||
790 | struct privsep *ps; | |||
791 | int ch; | |||
792 | enum privsep_procid proc_id = PROC_PARENT; | |||
793 | int proc_instance = 0, vm_launch = 0; | |||
794 | int vmm_fd = -1, vm_fd = -1; | |||
795 | const char *errp, *title = NULL((void *)0); | |||
796 | int argc0 = argc; | |||
797 | char dev_type = '\0'; | |||
798 | ||||
799 | log_init(0, LOG_DAEMON(3<<3)); | |||
800 | ||||
801 | if ((env = calloc(1, sizeof(*env))) == NULL((void *)0)) | |||
802 | fatal("calloc: env"); | |||
803 | ||||
804 | while ((ch = getopt(argc, argv, "D:P:I:V:X:df:i:nt:vp:")) != -1) { | |||
805 | switch (ch) { | |||
806 | case 'D': | |||
807 | if (cmdline_symset(optarg) < 0) | |||
808 | log_warnx("could not parse macro definition %s", | |||
809 | optarg); | |||
810 | break; | |||
811 | case 'd': | |||
812 | env->vmd_debug = 2; | |||
813 | break; | |||
814 | case 'f': | |||
815 | conffile = optarg; | |||
816 | break; | |||
817 | case 'v': | |||
818 | env->vmd_verbose++; | |||
819 | break; | |||
820 | /* vmd fork/exec */ | |||
821 | case 'n': | |||
822 | env->vmd_noaction = 1; | |||
823 | break; | |||
824 | case 'P': | |||
825 | title = optarg; | |||
826 | proc_id = proc_getid(procs, nitems(procs)(sizeof((procs)) / sizeof((procs)[0])), title); | |||
827 | if (proc_id == PROC_MAX) | |||
828 | fatalx("invalid process name"); | |||
829 | break; | |||
830 | case 'I': | |||
831 | proc_instance = strtonum(optarg, 0, | |||
832 | PROC_MAX_INSTANCES32, &errp); | |||
833 | if (errp) | |||
834 | fatalx("invalid process instance"); | |||
835 | break; | |||
836 | /* child vm and device fork/exec */ | |||
837 | case 'p': | |||
838 | title = optarg; | |||
839 | break; | |||
840 | case 'V': | |||
841 | vm_launch = VMD_LAUNCH_VM1; | |||
842 | vm_fd = strtonum(optarg, 0, 128, &errp); | |||
843 | if (errp) | |||
844 | fatalx("invalid vm fd"); | |||
845 | break; | |||
846 | case 'X': | |||
847 | vm_launch = VMD_LAUNCH_DEV2; | |||
848 | vm_fd = strtonum(optarg, 0, 128, &errp); | |||
849 | if (errp) | |||
850 | fatalx("invalid device fd"); | |||
851 | break; | |||
852 | case 't': | |||
853 | dev_type = *optarg; | |||
854 | switch (dev_type) { | |||
855 | case VMD_DEVTYPE_NET'n': | |||
856 | case VMD_DEVTYPE_DISK'd': | |||
857 | break; | |||
858 | default: fatalx("invalid device type"); | |||
859 | } | |||
860 | break; | |||
861 | case 'i': | |||
862 | vmm_fd = strtonum(optarg, 0, 128, &errp); | |||
863 | if (errp) | |||
864 | fatalx("invalid vmm fd"); | |||
865 | break; | |||
866 | default: | |||
867 | usage(); | |||
868 | } | |||
869 | } | |||
870 | ||||
871 | argc -= optind; | |||
872 | if (argc > 0) | |||
873 | usage(); | |||
874 | ||||
875 | if (env->vmd_noaction && !env->vmd_debug) | |||
876 | env->vmd_debug = 1; | |||
877 | ||||
878 | log_init(env->vmd_debug, LOG_DAEMON(3<<3)); | |||
879 | log_setverbose(env->vmd_verbose); | |||
880 | ||||
881 | /* Re-exec from the vmm child process requires an absolute path. */ | |||
882 | if (proc_id == PROC_PARENT && *argv[0] != '/' && !env->vmd_noaction) | |||
883 | fatalx("re-exec requires execution with an absolute path"); | |||
884 | env->argv0 = argv[0]; | |||
885 | ||||
886 | /* check for root privileges */ | |||
887 | if (env->vmd_noaction == 0 && !vm_launch) { | |||
888 | if (geteuid()) | |||
889 | fatalx("need root privileges"); | |||
890 | } | |||
891 | ||||
892 | ps = &env->vmd_ps; | |||
893 | ps->ps_env = env; | |||
894 | env->vmd_fd = vmm_fd; | |||
895 | ||||
896 | if (config_init(env) == -1) | |||
897 | fatal("failed to initialize configuration"); | |||
898 | ||||
899 | if ((ps->ps_pw = getpwnam(VMD_USER"_vmd")) == NULL((void *)0)) | |||
900 | fatal("unknown user %s", VMD_USER"_vmd"); | |||
901 | ||||
902 | /* First proc runs as root without pledge but in default chroot */ | |||
903 | proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */ | |||
904 | proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */ | |||
905 | ||||
906 | /* | |||
907 | * If we're launching a new vm or its device, we short out here. | |||
908 | */ | |||
909 | if (vm_launch == VMD_LAUNCH_VM1) { | |||
910 | vm_main(vm_fd, vmm_fd); | |||
911 | /* NOTREACHED */ | |||
912 | } else if (vm_launch == VMD_LAUNCH_DEV2) { | |||
913 | if (dev_type == VMD_DEVTYPE_NET'n') { | |||
914 | log_procinit("vm/%s/vionet", title); | |||
915 | vionet_main(vm_fd, vmm_fd); | |||
916 | /* NOTREACHED */ | |||
917 | } else if (dev_type == VMD_DEVTYPE_DISK'd') { | |||
918 | log_procinit("vm/%s/vioblk", title); | |||
919 | vioblk_main(vm_fd, vmm_fd); | |||
920 | /* NOTREACHED */ | |||
921 | } | |||
922 | fatalx("unsupported device type '%c'", dev_type); | |||
923 | } | |||
924 | ||||
925 | /* Open /dev/vmm early. */ | |||
926 | if (env->vmd_noaction == 0 && proc_id == PROC_PARENT) { | |||
927 | env->vmd_fd = open(VMM_NODE"/dev/vmm", O_RDWR0x0002); | |||
928 | if (env->vmd_fd == -1) | |||
929 | fatal("%s", VMM_NODE"/dev/vmm"); | |||
930 | } | |||
931 | ||||
932 | /* Configure the control socket */ | |||
933 | ps->ps_csock.cs_name = SOCKET_NAME"/var/run/vmd.sock"; | |||
934 | TAILQ_INIT(&ps->ps_rcsocks)do { (&ps->ps_rcsocks)->tqh_first = ((void *)0); (& ps->ps_rcsocks)->tqh_last = &(&ps->ps_rcsocks )->tqh_first; } while (0); | |||
935 | ||||
936 | /* Configuration will be parsed after forking the children */ | |||
937 | env->vmd_conffile = conffile; | |||
938 | ||||
939 | if (env->vmd_noaction) | |||
940 | ps->ps_noaction = 1; | |||
941 | ps->ps_instance = proc_instance; | |||
942 | if (title != NULL((void *)0)) | |||
943 | ps->ps_title[proc_id] = title; | |||
944 | ||||
945 | /* only the parent returns */ | |||
946 | proc_init(ps, procs, nitems(procs)(sizeof((procs)) / sizeof((procs)[0])), env->vmd_debug, argc0, argv, | |||
947 | proc_id); | |||
948 | ||||
949 | if (!env->vmd_debug && daemon(0, 0) == -1) | |||
950 | fatal("can't daemonize"); | |||
951 | ||||
952 | if (ps->ps_noaction == 0) | |||
953 | log_info("startup"); | |||
954 | ||||
955 | event_init(); | |||
956 | ||||
957 | signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps)event_set(&ps->ps_evsigint, 2, 0x08|0x10, vmd_sighdlr, ps); | |||
958 | signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps)event_set(&ps->ps_evsigterm, 15, 0x08|0x10, vmd_sighdlr , ps); | |||
959 | signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps)event_set(&ps->ps_evsighup, 1, 0x08|0x10, vmd_sighdlr, ps); | |||
960 | signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps)event_set(&ps->ps_evsigpipe, 13, 0x08|0x10, vmd_sighdlr , ps); | |||
961 | signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps)event_set(&ps->ps_evsigusr1, 30, 0x08|0x10, vmd_sighdlr , ps); | |||
962 | ||||
963 | signal_add(&ps->ps_evsigint, NULL)event_add(&ps->ps_evsigint, ((void *)0)); | |||
964 | signal_add(&ps->ps_evsigterm, NULL)event_add(&ps->ps_evsigterm, ((void *)0)); | |||
965 | signal_add(&ps->ps_evsighup, NULL)event_add(&ps->ps_evsighup, ((void *)0)); | |||
966 | signal_add(&ps->ps_evsigpipe, NULL)event_add(&ps->ps_evsigpipe, ((void *)0)); | |||
967 | signal_add(&ps->ps_evsigusr1, NULL)event_add(&ps->ps_evsigusr1, ((void *)0)); | |||
968 | ||||
969 | if (!env->vmd_noaction) | |||
970 | proc_connect(ps); | |||
971 | ||||
972 | if (vmd_configure() == -1) | |||
973 | fatalx("configuration failed"); | |||
974 | ||||
975 | event_dispatch(); | |||
976 | ||||
977 | log_debug("exiting"); | |||
978 | ||||
979 | return (0); | |||
980 | } | |||
981 | ||||
982 | void | |||
983 | start_vm_batch(int fd, short type, void *args) | |||
984 | { | |||
985 | int i = 0; | |||
986 | struct vmd_vm *vm; | |||
987 | ||||
988 | log_debug("%s: starting batch of %d vms", __func__, | |||
989 | env->vmd_cfg.parallelism); | |||
990 | TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void * )0); (vm) = ((vm)->vm_entry.tqe_next)) { | |||
991 | if (!(vm->vm_state & VM_STATE_WAITING0x20)) { | |||
992 | log_debug("%s: not starting vm %s (disabled)", | |||
993 | __func__, | |||
994 | vm->vm_params.vmc_params.vcp_name); | |||
995 | continue; | |||
996 | } | |||
997 | i++; | |||
998 | if (i > env->vmd_cfg.parallelism) { | |||
999 | evtimer_add(&staggered_start_timer,event_add(&staggered_start_timer, &env->vmd_cfg.delay ) | |||
1000 | &env->vmd_cfg.delay)event_add(&staggered_start_timer, &env->vmd_cfg.delay ); | |||
1001 | break; | |||
1002 | } | |||
1003 | vm->vm_state &= ~VM_STATE_WAITING0x20; | |||
1004 | config_setvm(&env->vmd_ps, vm, -1, vm->vm_params.vmc_owner.uid); | |||
1005 | } | |||
1006 | log_debug("%s: done starting vms", __func__); | |||
1007 | } | |||
1008 | ||||
1009 | int | |||
1010 | vmd_configure(void) | |||
1011 | { | |||
1012 | int ncpus; | |||
1013 | struct vmd_switch *vsw; | |||
1014 | int ncpu_mib[] = {CTL_HW6, HW_NCPUONLINE25}; | |||
1015 | size_t ncpus_sz = sizeof(ncpus); | |||
1016 | ||||
1017 | if ((env->vmd_ptmfd = open(PATH_PTMDEV"/dev/ptm", O_RDWR0x0002|O_CLOEXEC0x10000)) == -1) | |||
1018 | fatal("open %s", PATH_PTMDEV"/dev/ptm"); | |||
1019 | ||||
1020 | /* | |||
1021 | * pledge in the parent process: | |||
1022 | * stdio - for malloc and basic I/O including events. | |||
1023 | * rpath - for reload to open and read the configuration files. | |||
1024 | * wpath - for opening disk images and tap devices. | |||
1025 | * tty - for openpty and TIOCUCNTL. | |||
1026 | * proc - run kill to terminate its children safely. | |||
1027 | * sendfd - for disks, interfaces and other fds. | |||
1028 | * recvfd - for send and receive. | |||
1029 | * getpw - lookup user or group id by name. | |||
1030 | * chown, fattr - change tty ownership | |||
1031 | * flock - locking disk files | |||
1032 | */ | |||
1033 | if (pledge("stdio rpath wpath proc tty recvfd sendfd getpw" | |||
1034 | " chown fattr flock", NULL((void *)0)) == -1) | |||
1035 | fatal("pledge"); | |||
1036 | ||||
1037 | if (parse_config(env->vmd_conffile) == -1) { | |||
1038 | proc_kill(&env->vmd_ps); | |||
1039 | exit(1); | |||
1040 | } | |||
1041 | ||||
1042 | if (env->vmd_noaction) { | |||
1043 | fprintf(stderr(&__sF[2]), "configuration OK\n"); | |||
1044 | proc_kill(&env->vmd_ps); | |||
1045 | exit(0); | |||
1046 | } | |||
1047 | ||||
1048 | /* Send VMM device fd to vmm proc. */ | |||
1049 | proc_compose_imsg(&env->vmd_ps, PROC_VMM, -1, | |||
1050 | IMSG_VMDOP_RECEIVE_VMM_FD, -1, env->vmd_fd, NULL((void *)0), 0); | |||
1051 | ||||
1052 | /* Send shared global configuration to all children */ | |||
1053 | if (config_setconfig(env) == -1) | |||
1054 | return (-1); | |||
1055 | ||||
1056 | TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry)for((vsw) = ((env->vmd_switches)->tqh_first); (vsw) != ( (void *)0); (vsw) = ((vsw)->sw_entry.tqe_next)) { | |||
1057 | if (vsw->sw_running) | |||
1058 | continue; | |||
1059 | if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { | |||
1060 | log_warn("%s: failed to create switch %s", | |||
1061 | __func__, vsw->sw_name); | |||
1062 | switch_remove(vsw); | |||
1063 | return (-1); | |||
1064 | } | |||
1065 | } | |||
1066 | ||||
1067 | if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START0x04)) { | |||
1068 | env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY30; | |||
1069 | if (sysctl(ncpu_mib, nitems(ncpu_mib)(sizeof((ncpu_mib)) / sizeof((ncpu_mib)[0])), &ncpus, &ncpus_sz, NULL((void *)0), 0) == -1) | |||
1070 | ncpus = 1; | |||
1071 | env->vmd_cfg.parallelism = ncpus; | |||
1072 | log_debug("%s: setting staggered start configuration to " | |||
1073 | "parallelism: %d and delay: %lld", | |||
1074 | __func__, ncpus, (long long) env->vmd_cfg.delay.tv_sec); | |||
1075 | } | |||
1076 | ||||
1077 | log_debug("%s: starting vms in staggered fashion", __func__); | |||
1078 | evtimer_set(&staggered_start_timer, start_vm_batch, NULL)event_set(&staggered_start_timer, -1, 0, start_vm_batch, ( (void *)0)); | |||
1079 | /* start first batch */ | |||
1080 | start_vm_batch(0, 0, NULL((void *)0)); | |||
1081 | ||||
1082 | return (0); | |||
1083 | } | |||
1084 | ||||
1085 | int | |||
1086 | vmd_reload(unsigned int reset, const char *filename) | |||
1087 | { | |||
1088 | struct vmd_vm *vm, *next_vm; | |||
1089 | struct vmd_switch *vsw; | |||
1090 | int reload = 0; | |||
1091 | ||||
1092 | /* Switch back to the default config file */ | |||
1093 | if (filename == NULL((void *)0) || *filename == '\0') { | |||
1094 | filename = env->vmd_conffile; | |||
1095 | reload = 1; | |||
1096 | } | |||
1097 | ||||
1098 | log_debug("%s: level %d config file %s", __func__, reset, filename); | |||
1099 | ||||
1100 | if (reset) { | |||
1101 | /* Purge the configuration */ | |||
1102 | config_purge(env, reset); | |||
1103 | config_setreset(env, reset); | |||
1104 | } else { | |||
1105 | /* | |||
1106 | * Load or reload the configuration. | |||
1107 | * | |||
1108 | * Reloading removes all non-running VMs before processing the | |||
1109 | * config file, whereas loading only adds to the existing list | |||
1110 | * of VMs. | |||
1111 | */ | |||
1112 | ||||
1113 | if (reload) { | |||
1114 | TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry,for ((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void *)0) && ((next_vm) = ((vm)->vm_entry.tqe_next), 1 ); (vm) = (next_vm)) | |||
1115 | next_vm)for ((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void *)0) && ((next_vm) = ((vm)->vm_entry.tqe_next), 1 ); (vm) = (next_vm)) { | |||
1116 | if (!(vm->vm_state & VM_STATE_RUNNING0x01)) { | |||
1117 | DPRINTF("%s: calling vm_remove",do {} while(0) | |||
1118 | __func__)do {} while(0); | |||
1119 | vm_remove(vm, __func__); | |||
1120 | } | |||
1121 | } | |||
1122 | } | |||
1123 | ||||
1124 | if (parse_config(filename) == -1) { | |||
1125 | log_debug("%s: failed to load config file %s", | |||
1126 | __func__, filename); | |||
1127 | return (-1); | |||
1128 | } | |||
1129 | ||||
1130 | if (reload) { | |||
1131 | /* Update shared global configuration in all children */ | |||
1132 | if (config_setconfig(env) == -1) | |||
1133 | return (-1); | |||
1134 | } | |||
1135 | ||||
1136 | TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry)for((vsw) = ((env->vmd_switches)->tqh_first); (vsw) != ( (void *)0); (vsw) = ((vsw)->sw_entry.tqe_next)) { | |||
1137 | if (vsw->sw_running) | |||
1138 | continue; | |||
1139 | if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { | |||
1140 | log_warn("%s: failed to create switch %s", | |||
1141 | __func__, vsw->sw_name); | |||
1142 | switch_remove(vsw); | |||
1143 | return (-1); | |||
1144 | } | |||
1145 | } | |||
1146 | ||||
1147 | log_debug("%s: starting vms in staggered fashion", __func__); | |||
1148 | evtimer_set(&staggered_start_timer, start_vm_batch, NULL)event_set(&staggered_start_timer, -1, 0, start_vm_batch, ( (void *)0)); | |||
1149 | /* start first batch */ | |||
1150 | start_vm_batch(0, 0, NULL((void *)0)); | |||
1151 | ||||
1152 | } | |||
1153 | ||||
1154 | return (0); | |||
1155 | } | |||
1156 | ||||
1157 | void | |||
1158 | vmd_shutdown(void) | |||
1159 | { | |||
1160 | struct vmd_vm *vm, *vm_next; | |||
1161 | ||||
1162 | log_debug("%s: performing shutdown", __func__); | |||
1163 | ||||
1164 | TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next)for ((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void *)0) && ((vm_next) = ((vm)->vm_entry.tqe_next), 1 ); (vm) = (vm_next)) { | |||
1165 | vm_remove(vm, __func__); | |||
1166 | } | |||
1167 | ||||
1168 | proc_kill(&env->vmd_ps); | |||
1169 | free(env); | |||
1170 | ||||
1171 | log_warnx("terminating"); | |||
1172 | exit(0); | |||
1173 | } | |||
1174 | ||||
1175 | struct vmd_vm * | |||
1176 | vm_getbyvmid(uint32_t vmid) | |||
1177 | { | |||
1178 | struct vmd_vm *vm; | |||
1179 | ||||
1180 | if (vmid == 0) | |||
1181 | return (NULL((void *)0)); | |||
1182 | TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void * )0); (vm) = ((vm)->vm_entry.tqe_next)) { | |||
1183 | if (vm->vm_vmid == vmid) | |||
1184 | return (vm); | |||
1185 | } | |||
1186 | ||||
1187 | return (NULL((void *)0)); | |||
1188 | } | |||
1189 | ||||
1190 | struct vmd_vm * | |||
1191 | vm_getbyid(uint32_t id) | |||
1192 | { | |||
1193 | struct vmd_vm *vm; | |||
1194 | ||||
1195 | if (id == 0) | |||
1196 | return (NULL((void *)0)); | |||
1197 | TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void * )0); (vm) = ((vm)->vm_entry.tqe_next)) { | |||
1198 | if (vm->vm_params.vmc_params.vcp_id == id) | |||
1199 | return (vm); | |||
1200 | } | |||
1201 | ||||
1202 | return (NULL((void *)0)); | |||
1203 | } | |||
1204 | ||||
1205 | uint32_t | |||
1206 | vm_id2vmid(uint32_t id, struct vmd_vm *vm) | |||
1207 | { | |||
1208 | if (vm == NULL((void *)0) && (vm = vm_getbyid(id)) == NULL((void *)0)) | |||
1209 | return (0); | |||
1210 | DPRINTF("%s: vmm id %u is vmid %u", __func__,do {} while(0) | |||
1211 | id, vm->vm_vmid)do {} while(0); | |||
1212 | return (vm->vm_vmid); | |||
1213 | } | |||
1214 | ||||
1215 | uint32_t | |||
1216 | vm_vmid2id(uint32_t vmid, struct vmd_vm *vm) | |||
1217 | { | |||
1218 | if (vm == NULL((void *)0) && (vm = vm_getbyvmid(vmid)) == NULL((void *)0)) | |||
1219 | return (0); | |||
1220 | DPRINTF("%s: vmid %u is vmm id %u", __func__,do {} while(0) | |||
1221 | vmid, vm->vm_params.vmc_params.vcp_id)do {} while(0); | |||
1222 | return (vm->vm_params.vmc_params.vcp_id); | |||
1223 | } | |||
1224 | ||||
1225 | struct vmd_vm * | |||
1226 | vm_getbyname(const char *name) | |||
1227 | { | |||
1228 | struct vmd_vm *vm; | |||
1229 | ||||
1230 | if (name == NULL((void *)0)) | |||
1231 | return (NULL((void *)0)); | |||
1232 | TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void * )0); (vm) = ((vm)->vm_entry.tqe_next)) { | |||
1233 | if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0) | |||
1234 | return (vm); | |||
1235 | } | |||
1236 | ||||
1237 | return (NULL((void *)0)); | |||
1238 | } | |||
1239 | ||||
1240 | struct vmd_vm * | |||
1241 | vm_getbypid(pid_t pid) | |||
1242 | { | |||
1243 | struct vmd_vm *vm; | |||
1244 | ||||
1245 | TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void * )0); (vm) = ((vm)->vm_entry.tqe_next)) { | |||
1246 | if (vm->vm_pid == pid) | |||
1247 | return (vm); | |||
1248 | } | |||
1249 | ||||
1250 | return (NULL((void *)0)); | |||
1251 | } | |||
1252 | ||||
1253 | void | |||
1254 | vm_stop(struct vmd_vm *vm, int keeptty, const char *caller) | |||
1255 | { | |||
1256 | struct privsep *ps = &env->vmd_ps; | |||
1257 | unsigned int i, j; | |||
1258 | ||||
1259 | if (vm == NULL((void *)0)) | |||
1260 | return; | |||
1261 | ||||
1262 | log_debug("%s: %s %s stopping vm %d%s", | |||
1263 | __func__, ps->ps_title[privsep_process], caller, | |||
1264 | vm->vm_vmid, keeptty ? ", keeping tty open" : ""); | |||
1265 | ||||
1266 | vm->vm_state &= ~(VM_STATE_RECEIVED0x08 | VM_STATE_RUNNING0x01 | |||
1267 | | VM_STATE_SHUTDOWN0x04); | |||
1268 | ||||
1269 | if (vm->vm_iev.ibuf.fd != -1) { | |||
1270 | event_del(&vm->vm_iev.ev); | |||
1271 | close(vm->vm_iev.ibuf.fd); | |||
1272 | } | |||
1273 | for (i = 0; i < VM_MAX_DISKS_PER_VM4; i++) { | |||
1274 | for (j = 0; j < VM_MAX_BASE_PER_DISK4; j++) { | |||
1275 | if (vm->vm_disks[i][j] != -1) { | |||
1276 | close(vm->vm_disks[i][j]); | |||
1277 | vm->vm_disks[i][j] = -1; | |||
1278 | } | |||
1279 | } | |||
1280 | } | |||
1281 | for (i = 0; i < VM_MAX_NICS_PER_VM4; i++) { | |||
1282 | if (vm->vm_ifs[i].vif_fd != -1) { | |||
1283 | close(vm->vm_ifs[i].vif_fd); | |||
1284 | vm->vm_ifs[i].vif_fd = -1; | |||
1285 | } | |||
1286 | free(vm->vm_ifs[i].vif_name); | |||
1287 | free(vm->vm_ifs[i].vif_switch); | |||
1288 | free(vm->vm_ifs[i].vif_group); | |||
1289 | vm->vm_ifs[i].vif_name = NULL((void *)0); | |||
1290 | vm->vm_ifs[i].vif_switch = NULL((void *)0); | |||
1291 | vm->vm_ifs[i].vif_group = NULL((void *)0); | |||
1292 | } | |||
1293 | if (vm->vm_kernel != -1) { | |||
1294 | close(vm->vm_kernel); | |||
1295 | vm->vm_kernel = -1; | |||
1296 | } | |||
1297 | if (vm->vm_cdrom != -1) { | |||
1298 | close(vm->vm_cdrom); | |||
1299 | vm->vm_cdrom = -1; | |||
1300 | } | |||
1301 | if (!keeptty) { | |||
1302 | vm_closetty(vm); | |||
1303 | vm->vm_uid = 0; | |||
1304 | } | |||
1305 | } | |||
1306 | ||||
1307 | void | |||
1308 | vm_remove(struct vmd_vm *vm, const char *caller) | |||
1309 | { | |||
1310 | struct privsep *ps = &env->vmd_ps; | |||
1311 | ||||
1312 | if (vm == NULL((void *)0)) | |||
1313 | return; | |||
1314 | ||||
1315 | log_debug("%s: %s %s removing vm %d from running config", | |||
1316 | __func__, ps->ps_title[privsep_process], caller, | |||
1317 | vm->vm_vmid); | |||
1318 | ||||
1319 | TAILQ_REMOVE(env->vmd_vms, vm, vm_entry)do { if (((vm)->vm_entry.tqe_next) != ((void *)0)) (vm)-> vm_entry.tqe_next->vm_entry.tqe_prev = (vm)->vm_entry.tqe_prev ; else (env->vmd_vms)->tqh_last = (vm)->vm_entry.tqe_prev ; *(vm)->vm_entry.tqe_prev = (vm)->vm_entry.tqe_next; ; ; } while (0); | |||
1320 | ||||
1321 | vm_stop(vm, 0, caller); | |||
1322 | if (vm->vm_kernel_path != NULL((void *)0) && !vm->vm_from_config) | |||
1323 | free(vm->vm_kernel_path); | |||
1324 | free(vm); | |||
1325 | } | |||
1326 | ||||
1327 | int | |||
1328 | vm_claimid(const char *name, int uid, uint32_t *id) | |||
1329 | { | |||
1330 | struct name2id *n2i = NULL((void *)0); | |||
1331 | ||||
1332 | TAILQ_FOREACH(n2i, env->vmd_known, entry)for((n2i) = ((env->vmd_known)->tqh_first); (n2i) != ((void *)0); (n2i) = ((n2i)->entry.tqe_next)) | |||
1333 | if (strcmp(n2i->name, name) == 0 && n2i->uid == uid) | |||
1334 | goto out; | |||
1335 | ||||
1336 | if (++env->vmd_nvm == 0) { | |||
1337 | log_warnx("too many vms"); | |||
1338 | return (-1); | |||
1339 | } | |||
1340 | if ((n2i = calloc(1, sizeof(struct name2id))) == NULL((void *)0)) { | |||
1341 | log_warnx("could not alloc vm name"); | |||
1342 | return (-1); | |||
1343 | } | |||
1344 | n2i->id = env->vmd_nvm; | |||
1345 | n2i->uid = uid; | |||
1346 | if (strlcpy(n2i->name, name, sizeof(n2i->name)) >= sizeof(n2i->name)) { | |||
1347 | log_warnx("vm name too long"); | |||
1348 | free(n2i); | |||
1349 | return (-1); | |||
1350 | } | |||
1351 | TAILQ_INSERT_TAIL(env->vmd_known, n2i, entry)do { (n2i)->entry.tqe_next = ((void *)0); (n2i)->entry. tqe_prev = (env->vmd_known)->tqh_last; *(env->vmd_known )->tqh_last = (n2i); (env->vmd_known)->tqh_last = & (n2i)->entry.tqe_next; } while (0); | |||
1352 | ||||
1353 | out: | |||
1354 | *id = n2i->id; | |||
1355 | return (0); | |||
1356 | } | |||
1357 | ||||
1358 | int | |||
1359 | vm_register(struct privsep *ps, struct vmop_create_params *vmc, | |||
1360 | struct vmd_vm **ret_vm, uint32_t id, uid_t uid) | |||
1361 | { | |||
1362 | struct vmd_vm *vm = NULL((void *)0), *vm_parent = NULL((void *)0); | |||
1363 | struct vm_create_params *vcp = &vmc->vmc_params; | |||
1364 | struct vmop_owner *vmo = NULL((void *)0); | |||
1365 | uint32_t nid, rng; | |||
1366 | unsigned int i, j; | |||
1367 | struct vmd_switch *sw; | |||
1368 | char *s; | |||
1369 | int ret = 0; | |||
1370 | ||||
1371 | /* Check if this is an instance of another VM */ | |||
1372 | if ((ret = vm_instance(ps, &vm_parent, vmc, uid)) != 0) { | |||
1373 | errno(*__errno()) = ret; /* XXX might set invalid errno */ | |||
1374 | return (-1); | |||
1375 | } | |||
1376 | ||||
1377 | errno(*__errno()) = 0; | |||
1378 | *ret_vm = NULL((void *)0); | |||
1379 | ||||
1380 | if ((vm = vm_getbyname(vcp->vcp_name)) != NULL((void *)0) || | |||
1381 | (vm = vm_getbyvmid(vcp->vcp_id)) != NULL((void *)0)) { | |||
1382 | if (vm_checkperm(vm, &vm->vm_params.vmc_owner, | |||
1383 | uid) != 0) { | |||
1384 | errno(*__errno()) = EPERM1; | |||
1385 | goto fail; | |||
1386 | } | |||
1387 | vm->vm_kernel = vmc->vmc_kernel; | |||
1388 | *ret_vm = vm; | |||
1389 | errno(*__errno()) = EALREADY37; | |||
1390 | goto fail; | |||
1391 | } | |||
1392 | ||||
1393 | if (vm_parent != NULL((void *)0)) | |||
1394 | vmo = &vm_parent->vm_params.vmc_insowner; | |||
1395 | ||||
1396 | /* non-root users can only start existing VMs or instances */ | |||
1397 | if (vm_checkperm(NULL((void *)0), vmo, uid) != 0) { | |||
1398 | log_warnx("permission denied"); | |||
1399 | errno(*__errno()) = EPERM1; | |||
1400 | goto fail; | |||
1401 | } | |||
1402 | if (vmc->vmc_flags == 0) { | |||
1403 | log_warnx("invalid configuration, no devices"); | |||
1404 | errno(*__errno()) = VMD_DISK_MISSING1002; | |||
1405 | goto fail; | |||
1406 | } | |||
1407 | if (vcp->vcp_ncpus == 0) | |||
1408 | vcp->vcp_ncpus = 1; | |||
1409 | if (vcp->vcp_memranges[0].vmr_size == 0) | |||
1410 | vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY512 * 1024 * 1024; | |||
1411 | if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM64) { | |||
1412 | log_warnx("invalid number of CPUs"); | |||
1413 | goto fail; | |||
1414 | } else if (vmc->vmc_ndisks > VM_MAX_DISKS_PER_VM4) { | |||
1415 | log_warnx("invalid number of disks"); | |||
1416 | goto fail; | |||
1417 | } else if (vmc->vmc_nnics > VM_MAX_NICS_PER_VM4) { | |||
1418 | log_warnx("invalid number of interfaces"); | |||
1419 | goto fail; | |||
1420 | } else if (vmc->vmc_kernel == -1 && vmc->vmc_ndisks == 0 | |||
1421 | && strlen(vmc->vmc_cdrom) == 0) { | |||
1422 | log_warnx("no kernel or disk/cdrom specified"); | |||
1423 | goto fail; | |||
1424 | } else if (strlen(vcp->vcp_name) == 0) { | |||
1425 | log_warnx("invalid VM name"); | |||
1426 | goto fail; | |||
1427 | } else if (*vcp->vcp_name == '-' || *vcp->vcp_name == '.' || | |||
1428 | *vcp->vcp_name == '_') { | |||
1429 | log_warnx("invalid VM name"); | |||
1430 | goto fail; | |||
1431 | } else { | |||
1432 | for (s = vcp->vcp_name; *s != '\0'; ++s) { | |||
1433 | if (!(isalnum((unsigned char)*s) || *s == '.' || \ | |||
1434 | *s == '-' || *s == '_')) { | |||
1435 | log_warnx("invalid VM name"); | |||
1436 | goto fail; | |||
1437 | } | |||
1438 | } | |||
1439 | } | |||
1440 | ||||
1441 | if ((vm = calloc(1, sizeof(*vm))) == NULL((void *)0)) | |||
1442 | goto fail; | |||
1443 | ||||
1444 | memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params)); | |||
1445 | vmc = &vm->vm_params; | |||
1446 | vcp = &vmc->vmc_params; | |||
1447 | vm->vm_pid = -1; | |||
1448 | vm->vm_tty = -1; | |||
1449 | vm->vm_receive_fd = -1; | |||
1450 | vm->vm_kernel = -1; | |||
1451 | vm->vm_state &= ~VM_STATE_PAUSED0x10; | |||
1452 | ||||
1453 | if (vmc->vmc_kernel > -1) | |||
1454 | vm->vm_kernel = vmc->vmc_kernel; | |||
1455 | ||||
1456 | for (i = 0; i < VM_MAX_DISKS_PER_VM4; i++) | |||
1457 | for (j = 0; j < VM_MAX_BASE_PER_DISK4; j++) | |||
1458 | vm->vm_disks[i][j] = -1; | |||
1459 | for (i = 0; i < VM_MAX_NICS_PER_VM4; i++) | |||
1460 | vm->vm_ifs[i].vif_fd = -1; | |||
1461 | for (i = 0; i < vmc->vmc_nnics; i++) { | |||
1462 | if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL((void *)0)) { | |||
1463 | /* inherit per-interface flags from the switch */ | |||
1464 | vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK(0x02|0x04|0x08)); | |||
1465 | } | |||
1466 | ||||
1467 | /* | |||
1468 | * If the MAC address is zero, always randomize it in vmd(8) | |||
1469 | * because we cannot rely on the guest OS to do the right | |||
1470 | * thing like OpenBSD does. Based on ether_fakeaddr() | |||
1471 | * from the kernel, incremented by one to differentiate | |||
1472 | * the source. | |||
1473 | */ | |||
1474 | if (memcmp(zero_mac, &vmc->vmc_macs[i], ETHER_ADDR_LEN6) == 0) { | |||
1475 | rng = arc4random(); | |||
1476 | vmc->vmc_macs[i][0] = 0xfe; | |||
1477 | vmc->vmc_macs[i][1] = 0xe1; | |||
1478 | vmc->vmc_macs[i][2] = 0xba + 1; | |||
1479 | vmc->vmc_macs[i][3] = 0xd0 | ((i + 1) & 0xf); | |||
1480 | vmc->vmc_macs[i][4] = rng; | |||
1481 | vmc->vmc_macs[i][5] = rng >> 8; | |||
1482 | } | |||
1483 | } | |||
1484 | vm->vm_cdrom = -1; | |||
1485 | vm->vm_iev.ibuf.fd = -1; | |||
1486 | ||||
1487 | /* | |||
1488 | * Assign a new internal Id if not specified and we succeed in | |||
1489 | * claiming a new Id. | |||
1490 | */ | |||
1491 | if (id != 0) | |||
1492 | vm->vm_vmid = id; | |||
1493 | else if (vm_claimid(vcp->vcp_name, uid, &nid) == -1) | |||
1494 | goto fail; | |||
1495 | else | |||
1496 | vm->vm_vmid = nid; | |||
1497 | ||||
1498 | log_debug("%s: registering vm %d", __func__, vm->vm_vmid); | |||
1499 | TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry)do { (vm)->vm_entry.tqe_next = ((void *)0); (vm)->vm_entry .tqe_prev = (env->vmd_vms)->tqh_last; *(env->vmd_vms )->tqh_last = (vm); (env->vmd_vms)->tqh_last = & (vm)->vm_entry.tqe_next; } while (0); | |||
1500 | ||||
1501 | *ret_vm = vm; | |||
1502 | return (0); | |||
1503 | fail: | |||
1504 | if (errno(*__errno()) == 0) | |||
1505 | errno(*__errno()) = EINVAL22; | |||
1506 | return (-1); | |||
1507 | } | |||
1508 | ||||
1509 | int | |||
1510 | vm_instance(struct privsep *ps, struct vmd_vm **vm_parent, | |||
1511 | struct vmop_create_params *vmc, uid_t uid) | |||
1512 | { | |||
1513 | char *name; | |||
1514 | struct vm_create_params *vcp = &vmc->vmc_params; | |||
1515 | struct vmop_create_params *vmcp; | |||
1516 | struct vm_create_params *vcpp; | |||
1517 | unsigned int i, j; | |||
1518 | ||||
1519 | /* return without error if the parent is NULL (nothing to inherit) */ | |||
1520 | if ((vmc->vmc_flags & VMOP_CREATE_INSTANCE0x40) == 0 || | |||
1521 | vmc->vmc_instance[0] == '\0') | |||
1522 | return (0); | |||
1523 | ||||
1524 | if ((*vm_parent = vm_getbyname(vmc->vmc_instance)) == NULL((void *)0)) { | |||
1525 | return (VMD_PARENT_INVALID1007); | |||
1526 | } | |||
1527 | ||||
1528 | vmcp = &(*vm_parent)->vm_params; | |||
1529 | vcpp = &vmcp->vmc_params; | |||
1530 | ||||
1531 | /* Are we allowed to create an instance from this VM? */ | |||
1532 | if (vm_checkperm(NULL((void *)0), &vmcp->vmc_insowner, uid) != 0) { | |||
1533 | log_warnx("vm \"%s\" no permission to create vm instance", | |||
1534 | vcpp->vcp_name); | |||
1535 | return (ENAMETOOLONG63); | |||
1536 | } | |||
1537 | ||||
1538 | name = vcp->vcp_name; | |||
1539 | ||||
1540 | if (vm_getbyname(vcp->vcp_name) != NULL((void *)0) || | |||
1541 | vm_getbyvmid(vcp->vcp_id) != NULL((void *)0)) { | |||
1542 | return (EPROCLIM67); | |||
1543 | } | |||
1544 | ||||
1545 | /* CPU */ | |||
1546 | if (vcp->vcp_ncpus == 0) | |||
1547 | vcp->vcp_ncpus = vcpp->vcp_ncpus; | |||
1548 | if (vm_checkinsflag(vmcp, VMOP_CREATE_CPU0x01, uid) != 0 && | |||
1549 | vcp->vcp_ncpus != vcpp->vcp_ncpus) { | |||
1550 | log_warnx("vm \"%s\" no permission to set cpus", name); | |||
1551 | return (EPERM1); | |||
1552 | } | |||
1553 | ||||
1554 | /* memory */ | |||
1555 | if (vcp->vcp_memranges[0].vmr_size == 0) | |||
1556 | vcp->vcp_memranges[0].vmr_size = | |||
1557 | vcpp->vcp_memranges[0].vmr_size; | |||
1558 | if (vm_checkinsflag(vmcp, VMOP_CREATE_MEMORY0x04, uid) != 0 && | |||
1559 | vcp->vcp_memranges[0].vmr_size != | |||
1560 | vcpp->vcp_memranges[0].vmr_size) { | |||
1561 | log_warnx("vm \"%s\" no permission to set memory", name); | |||
1562 | return (EPERM1); | |||
1563 | } | |||
1564 | ||||
1565 | /* disks cannot be inherited */ | |||
1566 | if (vm_checkinsflag(vmcp, VMOP_CREATE_DISK0x10, uid) != 0 && | |||
1567 | vmc->vmc_ndisks) { | |||
1568 | log_warnx("vm \"%s\" no permission to set disks", name); | |||
1569 | return (EPERM1); | |||
1570 | } | |||
1571 | for (i = 0; i < vmc->vmc_ndisks; i++) { | |||
1572 | /* Check if this disk is already used in the parent */ | |||
1573 | for (j = 0; j < vmcp->vmc_ndisks; j++) { | |||
1574 | if (strcmp(vmc->vmc_disks[i], | |||
1575 | vmcp->vmc_disks[j]) == 0) { | |||
1576 | log_warnx("vm \"%s\" disk %s cannot be reused", | |||
1577 | name, vmc->vmc_disks[i]); | |||
1578 | return (EBUSY16); | |||
1579 | } | |||
1580 | } | |||
1581 | vmc->vmc_checkaccess |= VMOP_CREATE_DISK0x10; | |||
1582 | } | |||
1583 | ||||
1584 | /* interfaces */ | |||
1585 | if (vmc->vmc_nnics > 0 && | |||
1586 | vm_checkinsflag(vmcp, VMOP_CREATE_NETWORK0x08, uid) != 0 && | |||
1587 | vmc->vmc_nnics != vmcp->vmc_nnics) { | |||
1588 | log_warnx("vm \"%s\" no permission to set interfaces", name); | |||
1589 | return (EPERM1); | |||
1590 | } | |||
1591 | for (i = 0; i < vmcp->vmc_nnics; i++) { | |||
1592 | /* Interface got overwritten */ | |||
1593 | if (i < vmc->vmc_nnics) | |||
1594 | continue; | |||
1595 | ||||
1596 | /* Copy interface from parent */ | |||
1597 | vmc->vmc_ifflags[i] = vmcp->vmc_ifflags[i]; | |||
1598 | (void)strlcpy(vmc->vmc_ifnames[i], vmcp->vmc_ifnames[i], | |||
1599 | sizeof(vmc->vmc_ifnames[i])); | |||
1600 | (void)strlcpy(vmc->vmc_ifswitch[i], vmcp->vmc_ifswitch[i], | |||
1601 | sizeof(vmc->vmc_ifswitch[i])); | |||
1602 | (void)strlcpy(vmc->vmc_ifgroup[i], vmcp->vmc_ifgroup[i], | |||
1603 | sizeof(vmc->vmc_ifgroup[i])); | |||
1604 | memcpy(vmc->vmc_macs[i], vmcp->vmc_macs[i], | |||
1605 | sizeof(vmc->vmc_macs[i])); | |||
1606 | vmc->vmc_ifrdomain[i] = vmcp->vmc_ifrdomain[i]; | |||
1607 | vmc->vmc_nnics++; | |||
1608 | } | |||
1609 | for (i = 0; i < vmc->vmc_nnics; i++) { | |||
1610 | for (j = 0; j < vmcp->vmc_nnics; j++) { | |||
1611 | if (memcmp(zero_mac, vmc->vmc_macs[i], | |||
1612 | sizeof(vmc->vmc_macs[i])) != 0 && | |||
1613 | memcmp(vmcp->vmc_macs[i], vmc->vmc_macs[i], | |||
1614 | sizeof(vmc->vmc_macs[i])) != 0) { | |||
1615 | log_warnx("vm \"%s\" lladdr cannot be reused", | |||
1616 | name); | |||
1617 | return (EBUSY16); | |||
1618 | } | |||
1619 | if (strlen(vmc->vmc_ifnames[i]) && | |||
1620 | strcmp(vmc->vmc_ifnames[i], | |||
1621 | vmcp->vmc_ifnames[j]) == 0) { | |||
1622 | log_warnx("vm \"%s\" %s cannot be reused", | |||
1623 | vmc->vmc_ifnames[i], name); | |||
1624 | return (EBUSY16); | |||
1625 | } | |||
1626 | } | |||
1627 | } | |||
1628 | ||||
1629 | /* kernel */ | |||
1630 | if (vmc->vmc_kernel > -1 || ((*vm_parent)->vm_kernel_path != NULL((void *)0) && | |||
1631 | strnlen((*vm_parent)->vm_kernel_path, PATH_MAX1024) < PATH_MAX1024)) { | |||
1632 | if (vm_checkinsflag(vmcp, VMOP_CREATE_KERNEL0x02, uid) != 0) { | |||
1633 | log_warnx("vm \"%s\" no permission to set boot image", | |||
1634 | name); | |||
1635 | return (EPERM1); | |||
1636 | } | |||
1637 | vmc->vmc_checkaccess |= VMOP_CREATE_KERNEL0x02; | |||
1638 | } | |||
1639 | ||||
1640 | /* cdrom */ | |||
1641 | if (strlen(vmc->vmc_cdrom) > 0) { | |||
1642 | if (vm_checkinsflag(vmcp, VMOP_CREATE_CDROM0x20, uid) != 0) { | |||
1643 | log_warnx("vm \"%s\" no permission to set cdrom", name); | |||
1644 | return (EPERM1); | |||
1645 | } | |||
1646 | vmc->vmc_checkaccess |= VMOP_CREATE_CDROM0x20; | |||
1647 | } else if (strlcpy(vmc->vmc_cdrom, vmcp->vmc_cdrom, | |||
1648 | sizeof(vmc->vmc_cdrom)) >= sizeof(vmc->vmc_cdrom)) { | |||
1649 | log_warnx("vm \"%s\" cdrom name too long", name); | |||
1650 | return (EINVAL22); | |||
1651 | } | |||
1652 | ||||
1653 | /* user */ | |||
1654 | if (vmc->vmc_owner.uid == 0) | |||
1655 | vmc->vmc_owner.uid = vmcp->vmc_owner.uid; | |||
1656 | else if (vmc->vmc_owner.uid != uid && | |||
1657 | vmc->vmc_owner.uid != vmcp->vmc_owner.uid) { | |||
1658 | log_warnx("vm \"%s\" user mismatch", name); | |||
1659 | return (EPERM1); | |||
1660 | } | |||
1661 | ||||
1662 | /* group */ | |||
1663 | if (vmc->vmc_owner.gid == 0) | |||
1664 | vmc->vmc_owner.gid = vmcp->vmc_owner.gid; | |||
1665 | else if (vmc->vmc_owner.gid != vmcp->vmc_owner.gid) { | |||
1666 | log_warnx("vm \"%s\" group mismatch", name); | |||
1667 | return (EPERM1); | |||
1668 | } | |||
1669 | ||||
1670 | /* child instances */ | |||
1671 | if (vmc->vmc_insflags) { | |||
1672 | log_warnx("vm \"%s\" cannot change instance permissions", name); | |||
1673 | return (EPERM1); | |||
1674 | } | |||
1675 | if (vmcp->vmc_insflags & VMOP_CREATE_INSTANCE0x40) { | |||
1676 | vmc->vmc_insowner.gid = vmcp->vmc_insowner.gid; | |||
1677 | vmc->vmc_insowner.uid = vmcp->vmc_insowner.gid; | |||
1678 | vmc->vmc_insflags = vmcp->vmc_insflags; | |||
1679 | } else { | |||
1680 | vmc->vmc_insowner.gid = 0; | |||
1681 | vmc->vmc_insowner.uid = 0; | |||
1682 | vmc->vmc_insflags = 0; | |||
1683 | } | |||
1684 | ||||
1685 | /* finished, remove instance flags */ | |||
1686 | vmc->vmc_flags &= ~VMOP_CREATE_INSTANCE0x40; | |||
1687 | ||||
1688 | return (0); | |||
1689 | } | |||
1690 | ||||
1691 | /* | |||
1692 | * vm_checkperm | |||
1693 | * | |||
1694 | * Checks if the user represented by the 'uid' parameter is allowed to | |||
1695 | * manipulate the VM described by the 'vm' parameter (or connect to said VM's | |||
1696 | * console.) | |||
1697 | * | |||
1698 | * Parameters: | |||
1699 | * vm: the VM whose permission is to be checked | |||
1700 | * vmo: the required uid/gid to be checked | |||
1701 | * uid: the user ID of the user making the request | |||
1702 | * | |||
1703 | * Return values: | |||
1704 | * 0: the permission should be granted | |||
1705 | * -1: the permission check failed (also returned if vm == null) | |||
1706 | */ | |||
1707 | int | |||
1708 | vm_checkperm(struct vmd_vm *vm, struct vmop_owner *vmo, uid_t uid) | |||
1709 | { | |||
1710 | struct group *gr; | |||
1711 | struct passwd *pw; | |||
1712 | char **grmem; | |||
1713 | ||||
1714 | /* root has no restrictions */ | |||
1715 | if (uid == 0) | |||
1716 | return (0); | |||
1717 | ||||
1718 | if (vmo == NULL((void *)0)) | |||
1719 | return (-1); | |||
1720 | ||||
1721 | /* check user */ | |||
1722 | if (vm == NULL((void *)0)) { | |||
1723 | if (vmo->uid == uid) | |||
1724 | return (0); | |||
1725 | } else { | |||
1726 | /* | |||
1727 | * check user of running vm (the owner of a running vm can | |||
1728 | * be different to (or more specific than) the configured owner. | |||
1729 | */ | |||
1730 | if (((vm->vm_state & VM_STATE_RUNNING0x01) && vm->vm_uid == uid) || | |||
1731 | (!(vm->vm_state & VM_STATE_RUNNING0x01) && vmo->uid == uid)) | |||
1732 | return (0); | |||
1733 | } | |||
1734 | ||||
1735 | /* check groups */ | |||
1736 | if (vmo->gid != -1) { | |||
1737 | if ((pw = getpwuid(uid)) == NULL((void *)0)) | |||
1738 | return (-1); | |||
1739 | if (pw->pw_gid == vmo->gid) | |||
1740 | return (0); | |||
1741 | if ((gr = getgrgid(vmo->gid)) != NULL((void *)0)) { | |||
1742 | for (grmem = gr->gr_mem; *grmem; grmem++) | |||
1743 | if (strcmp(*grmem, pw->pw_name) == 0) | |||
1744 | return (0); | |||
1745 | } | |||
1746 | } | |||
1747 | ||||
1748 | return (-1); | |||
1749 | } | |||
1750 | ||||
1751 | /* | |||
1752 | * vm_checkinsflag | |||
1753 | * | |||
1754 | * Checks whether the non-root user is allowed to set an instance option. | |||
1755 | * | |||
1756 | * Parameters: | |||
1757 | * vmc: the VM create parameters | |||
1758 | * flag: the flag to be checked | |||
1759 | * uid: the user ID of the user making the request | |||
1760 | * | |||
1761 | * Return values: | |||
1762 | * 0: the permission should be granted | |||
1763 | * -1: the permission check failed (also returned if vm == null) | |||
1764 | */ | |||
1765 | int | |||
1766 | vm_checkinsflag(struct vmop_create_params *vmc, unsigned int flag, uid_t uid) | |||
1767 | { | |||
1768 | /* root has no restrictions */ | |||
1769 | if (uid == 0) | |||
1770 | return (0); | |||
1771 | ||||
1772 | if ((vmc->vmc_insflags & flag) == 0) | |||
1773 | return (-1); | |||
1774 | ||||
1775 | return (0); | |||
1776 | } | |||
1777 | ||||
1778 | /* | |||
1779 | * vm_checkaccess | |||
1780 | * | |||
1781 | * Checks if the user represented by the 'uid' parameter is allowed to | |||
1782 | * access the file described by the 'path' parameter. | |||
1783 | * | |||
1784 | * Parameters: | |||
1785 | * fd: the file descriptor of the opened file | |||
1786 | * uflag: check if the userid has access to the file | |||
1787 | * uid: the user ID of the user making the request | |||
1788 | * amode: the access flags of R_OK and W_OK | |||
1789 | * | |||
1790 | * Return values: | |||
1791 | * 0: the permission should be granted | |||
1792 | * -1: the permission check failed | |||
1793 | */ | |||
1794 | int | |||
1795 | vm_checkaccess(int fd, unsigned int uflag, uid_t uid, int amode) | |||
1796 | { | |||
1797 | struct group *gr; | |||
1798 | struct passwd *pw; | |||
1799 | char **grmem; | |||
1800 | struct stat st; | |||
1801 | mode_t mode; | |||
1802 | ||||
1803 | if (fd == -1) | |||
1804 | return (-1); | |||
1805 | ||||
1806 | /* | |||
1807 | * File has to be accessible and a regular file | |||
1808 | */ | |||
1809 | if (fstat(fd, &st) == -1 || !S_ISREG(st.st_mode)((st.st_mode & 0170000) == 0100000)) | |||
1810 | return (-1); | |||
1811 | ||||
1812 | /* root has no restrictions */ | |||
1813 | if (uid == 0 || uflag == 0) | |||
1814 | return (0); | |||
1815 | ||||
1816 | /* check other */ | |||
1817 | mode = amode & W_OK0x02 ? S_IWOTH0000002 : 0; | |||
1818 | mode |= amode & R_OK0x04 ? S_IROTH0000004 : 0; | |||
1819 | if ((st.st_mode & mode) == mode) | |||
1820 | return (0); | |||
1821 | ||||
1822 | /* check user */ | |||
1823 | mode = amode & W_OK0x02 ? S_IWUSR0000200 : 0; | |||
1824 | mode |= amode & R_OK0x04 ? S_IRUSR0000400 : 0; | |||
1825 | if (uid == st.st_uid && (st.st_mode & mode) == mode) | |||
1826 | return (0); | |||
1827 | ||||
1828 | /* check groups */ | |||
1829 | mode = amode & W_OK0x02 ? S_IWGRP0000020 : 0; | |||
1830 | mode |= amode & R_OK0x04 ? S_IRGRP0000040 : 0; | |||
1831 | if ((st.st_mode & mode) != mode) | |||
1832 | return (-1); | |||
1833 | if ((pw = getpwuid(uid)) == NULL((void *)0)) | |||
1834 | return (-1); | |||
1835 | if (pw->pw_gid == st.st_gid) | |||
1836 | return (0); | |||
1837 | if ((gr = getgrgid(st.st_gid)) != NULL((void *)0)) { | |||
1838 | for (grmem = gr->gr_mem; *grmem; grmem++) | |||
1839 | if (strcmp(*grmem, pw->pw_name) == 0) | |||
1840 | return (0); | |||
1841 | } | |||
1842 | ||||
1843 | return (-1); | |||
1844 | } | |||
1845 | ||||
1846 | int | |||
1847 | vm_opentty(struct vmd_vm *vm) | |||
1848 | { | |||
1849 | struct ptmget ptm; | |||
1850 | struct stat st; | |||
1851 | struct group *gr; | |||
1852 | uid_t uid; | |||
1853 | gid_t gid; | |||
1854 | mode_t mode; | |||
1855 | int on; | |||
1856 | ||||
1857 | /* | |||
1858 | * Open tty with pre-opened PTM fd | |||
1859 | */ | |||
1860 | if ((ioctl(env->vmd_ptmfd, PTMGET((unsigned long)0x40000000 | ((sizeof(struct ptmget) & 0x1fff ) << 16) | ((('t')) << 8) | ((1))), &ptm) == -1)) | |||
1861 | return (-1); | |||
1862 | ||||
1863 | /* | |||
1864 | * We use user ioctl(2) mode to pass break commands. | |||
1865 | */ | |||
1866 | on = 1; | |||
1867 | if (ioctl(ptm.cfd, TIOCUCNTL((unsigned long)0x80000000 | ((sizeof(int) & 0x1fff) << 16) | ((('t')) << 8) | ((102))), &on) == -1) | |||
1868 | fatal("could not enable user ioctl mode"); | |||
1869 | ||||
1870 | vm->vm_tty = ptm.cfd; | |||
1871 | close(ptm.sfd); | |||
1872 | if (strlcpy(vm->vm_ttyname, ptm.sn, sizeof(vm->vm_ttyname)) | |||
1873 | >= sizeof(vm->vm_ttyname)) { | |||
1874 | log_warnx("%s: truncated ttyname", __func__); | |||
1875 | goto fail; | |||
1876 | } | |||
1877 | ||||
1878 | uid = vm->vm_uid; | |||
1879 | gid = vm->vm_params.vmc_owner.gid; | |||
1880 | ||||
1881 | if (vm->vm_params.vmc_owner.gid != -1) { | |||
1882 | mode = 0660; | |||
1883 | } else if ((gr = getgrnam("tty")) != NULL((void *)0)) { | |||
1884 | gid = gr->gr_gid; | |||
1885 | mode = 0620; | |||
1886 | } else { | |||
1887 | mode = 0600; | |||
1888 | gid = 0; | |||
1889 | } | |||
1890 | ||||
1891 | log_debug("%s: vm %s tty %s uid %d gid %d mode %o", | |||
1892 | __func__, vm->vm_params.vmc_params.vcp_name, | |||
1893 | vm->vm_ttyname, uid, gid, mode); | |||
1894 | ||||
1895 | /* | |||
1896 | * Change ownership and mode of the tty as required. | |||
1897 | * Loosely based on the implementation of sshpty.c | |||
1898 | */ | |||
1899 | if (stat(vm->vm_ttyname, &st) == -1) | |||
1900 | goto fail; | |||
1901 | ||||
1902 | if (st.st_uid != uid || st.st_gid != gid) { | |||
1903 | if (chown(vm->vm_ttyname, uid, gid) == -1) { | |||
1904 | log_warn("chown %s %d %d failed, uid %d", | |||
1905 | vm->vm_ttyname, uid, gid, getuid()); | |||
1906 | ||||
1907 | /* Ignore failure on read-only filesystems */ | |||
1908 | if (!((errno(*__errno()) == EROFS30) && | |||
1909 | (st.st_uid == uid || st.st_uid == 0))) | |||
1910 | goto fail; | |||
1911 | } | |||
1912 | } | |||
1913 | ||||
1914 | if ((st.st_mode & (S_IRWXU0000700|S_IRWXG0000070|S_IRWXO0000007)) != mode) { | |||
1915 | if (chmod(vm->vm_ttyname, mode) == -1) { | |||
1916 | log_warn("chmod %s %o failed, uid %d", | |||
1917 | vm->vm_ttyname, mode, getuid()); | |||
1918 | ||||
1919 | /* Ignore failure on read-only filesystems */ | |||
1920 | if (!((errno(*__errno()) == EROFS30) && | |||
1921 | (st.st_uid == uid || st.st_uid == 0))) | |||
1922 | goto fail; | |||
1923 | } | |||
1924 | } | |||
1925 | ||||
1926 | return (0); | |||
1927 | fail: | |||
1928 | vm_closetty(vm); | |||
1929 | return (-1); | |||
1930 | } | |||
1931 | ||||
1932 | void | |||
1933 | vm_closetty(struct vmd_vm *vm) | |||
1934 | { | |||
1935 | if (vm->vm_tty != -1) { | |||
1936 | /* Release and close the tty */ | |||
1937 | if (fchown(vm->vm_tty, 0, 0) == -1) | |||
1938 | log_warn("chown %s 0 0 failed", vm->vm_ttyname); | |||
1939 | if (fchmod(vm->vm_tty, 0666) == -1) | |||
1940 | log_warn("chmod %s 0666 failed", vm->vm_ttyname); | |||
1941 | close(vm->vm_tty); | |||
1942 | vm->vm_tty = -1; | |||
1943 | } | |||
1944 | memset(&vm->vm_ttyname, 0, sizeof(vm->vm_ttyname)); | |||
1945 | } | |||
1946 | ||||
1947 | void | |||
1948 | switch_remove(struct vmd_switch *vsw) | |||
1949 | { | |||
1950 | if (vsw == NULL((void *)0)) | |||
1951 | return; | |||
1952 | ||||
1953 | TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry)do { if (((vsw)->sw_entry.tqe_next) != ((void *)0)) (vsw)-> sw_entry.tqe_next->sw_entry.tqe_prev = (vsw)->sw_entry. tqe_prev; else (env->vmd_switches)->tqh_last = (vsw)-> sw_entry.tqe_prev; *(vsw)->sw_entry.tqe_prev = (vsw)->sw_entry .tqe_next; ; ; } while (0); | |||
1954 | ||||
1955 | free(vsw->sw_group); | |||
1956 | free(vsw->sw_name); | |||
1957 | free(vsw); | |||
1958 | } | |||
1959 | ||||
1960 | struct vmd_switch * | |||
1961 | switch_getbyname(const char *name) | |||
1962 | { | |||
1963 | struct vmd_switch *vsw; | |||
1964 | ||||
1965 | if (name == NULL((void *)0)) | |||
1966 | return (NULL((void *)0)); | |||
1967 | TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry)for((vsw) = ((env->vmd_switches)->tqh_first); (vsw) != ( (void *)0); (vsw) = ((vsw)->sw_entry.tqe_next)) { | |||
1968 | if (strcmp(vsw->sw_name, name) == 0) | |||
1969 | return (vsw); | |||
1970 | } | |||
1971 | ||||
1972 | return (NULL((void *)0)); | |||
1973 | } | |||
1974 | ||||
1975 | char * | |||
1976 | get_string(uint8_t *ptr, size_t len) | |||
1977 | { | |||
1978 | size_t i; | |||
1979 | ||||
1980 | for (i = 0; i < len; i++) | |||
1981 | if (!isprint((unsigned char)ptr[i])) | |||
1982 | break; | |||
1983 | ||||
1984 | return strndup(ptr, i); | |||
1985 | } | |||
1986 | ||||
1987 | uint32_t | |||
1988 | prefixlen2mask(uint8_t prefixlen) | |||
1989 | { | |||
1990 | if (prefixlen == 0) | |||
1991 | return (0); | |||
1992 | ||||
1993 | if (prefixlen > 32) | |||
1994 | prefixlen = 32; | |||
1995 | ||||
1996 | return (htonl(0xffffffff << (32 - prefixlen))(__uint32_t)(__builtin_constant_p(0xffffffff << (32 - prefixlen )) ? (__uint32_t)(((__uint32_t)(0xffffffff << (32 - prefixlen )) & 0xff) << 24 | ((__uint32_t)(0xffffffff << (32 - prefixlen)) & 0xff00) << 8 | ((__uint32_t)(0xffffffff << (32 - prefixlen)) & 0xff0000) >> 8 | ((__uint32_t )(0xffffffff << (32 - prefixlen)) & 0xff000000) >> 24) : __swap32md(0xffffffff << (32 - prefixlen)))); | |||
1997 | } | |||
1998 | ||||
1999 | void | |||
2000 | prefixlen2mask6(uint8_t prefixlen, struct in6_addr *mask) | |||
2001 | { | |||
2002 | struct in6_addr s6; | |||
2003 | int i; | |||
2004 | ||||
2005 | if (prefixlen > 128) | |||
2006 | prefixlen = 128; | |||
2007 | ||||
2008 | memset(&s6, 0, sizeof(s6)); | |||
2009 | for (i = 0; i < prefixlen / 8; i++) | |||
2010 | s6.s6_addr__u6_addr.__u6_addr8[i] = 0xff; | |||
2011 | i = prefixlen % 8; | |||
2012 | if (i) | |||
2013 | s6.s6_addr__u6_addr.__u6_addr8[prefixlen / 8] = 0xff00 >> i; | |||
2014 | ||||
2015 | memcpy(mask, &s6, sizeof(s6)); | |||
2016 | } | |||
2017 | ||||
2018 | void | |||
2019 | getmonotime(struct timeval *tv) | |||
2020 | { | |||
2021 | struct timespec ts; | |||
2022 | ||||
2023 | if (clock_gettime(CLOCK_MONOTONIC3, &ts)) | |||
2024 | fatal("clock_gettime"); | |||
2025 | ||||
2026 | TIMESPEC_TO_TIMEVAL(tv, &ts)do { (tv)->tv_sec = (&ts)->tv_sec; (tv)->tv_usec = (&ts)->tv_nsec / 1000; } while (0); | |||
2027 | } | |||
2028 | ||||
2029 | static inline void | |||
2030 | vm_terminate(struct vmd_vm *vm, const char *caller) | |||
2031 | { | |||
2032 | if (vm->vm_from_config) | |||
2033 | vm_stop(vm, 0, caller); | |||
2034 | else { | |||
2035 | /* vm_remove calls vm_stop */ | |||
2036 | vm_remove(vm, caller); | |||
2037 | } | |||
2038 | } | |||
2039 | ||||
2040 | /* | |||
2041 | * Utility function for closing vm file descriptors. Assumes an fd of -1 was | |||
2042 | * already closed or never opened. | |||
2043 | * | |||
2044 | * Returns 0 on success, otherwise -1 on failure. | |||
2045 | */ | |||
2046 | int | |||
2047 | close_fd(int fd) | |||
2048 | { | |||
2049 | int ret; | |||
2050 | ||||
2051 | if (fd == -1) | |||
2052 | return (0); | |||
2053 | ||||
2054 | #ifdef POSIX_CLOSE_RESTART | |||
2055 | do { ret = close(fd); } while (ret == -1 && errno(*__errno()) == EINTR4); | |||
2056 | #else | |||
2057 | ret = close(fd); | |||
2058 | #endif /* POSIX_CLOSE_RESTART */ | |||
2059 | ||||
2060 | if (ret == -1 && errno(*__errno()) == EIO5) | |||
2061 | log_warn("%s(%d)", __func__, fd); | |||
2062 | ||||
2063 | return (ret); | |||
2064 | } |