File: | src/usr.sbin/vmd/vmm.c |
Warning: | line 822, column 7 Access to field 'vm_from_config' results in a dereference of a null pointer (loaded from variable 'vm') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: vmm.c,v 1.116 2024/01/03 22:34:39 dv Exp $ */ | |||
2 | ||||
3 | /* | |||
4 | * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> | |||
5 | * | |||
6 | * Permission to use, copy, modify, and distribute this software for any | |||
7 | * purpose with or without fee is hereby granted, provided that the above | |||
8 | * copyright notice and this permission notice appear in all copies. | |||
9 | * | |||
10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |||
11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |||
12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |||
13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |||
14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |||
15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |||
16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |||
17 | */ | |||
18 | ||||
19 | #include <sys/types.h> | |||
20 | #include <sys/ioctl.h> | |||
21 | #include <sys/queue.h> | |||
22 | #include <sys/wait.h> | |||
23 | #include <sys/uio.h> | |||
24 | #include <sys/socket.h> | |||
25 | #include <sys/time.h> | |||
26 | #include <sys/mman.h> | |||
27 | ||||
28 | #include <dev/ic/i8253reg.h> | |||
29 | #include <dev/isa/isareg.h> | |||
30 | #include <dev/pci/pcireg.h> | |||
31 | ||||
32 | #include <machine/psl.h> | |||
33 | #include <machine/specialreg.h> | |||
34 | #include <machine/vmmvar.h> | |||
35 | ||||
36 | #include <net/if.h> | |||
37 | ||||
38 | #include <errno(*__errno()).h> | |||
39 | #include <event.h> | |||
40 | #include <fcntl.h> | |||
41 | #include <imsg.h> | |||
42 | #include <limits.h> | |||
43 | #include <poll.h> | |||
44 | #include <pthread.h> | |||
45 | #include <stddef.h> | |||
46 | #include <stdio.h> | |||
47 | #include <stdlib.h> | |||
48 | #include <string.h> | |||
49 | #include <unistd.h> | |||
50 | #include <util.h> | |||
51 | ||||
52 | #include "vmd.h" | |||
53 | #include "vmm.h" | |||
54 | #include "atomicio.h" | |||
55 | ||||
56 | void vmm_sighdlr(int, short, void *); | |||
57 | int vmm_start_vm(struct imsg *, uint32_t *, pid_t *); | |||
58 | int vmm_dispatch_parent(int, struct privsep_proc *, struct imsg *); | |||
59 | void vmm_run(struct privsep *, struct privsep_proc *, void *); | |||
60 | void vmm_dispatch_vm(int, short, void *); | |||
61 | int terminate_vm(struct vm_terminate_params *); | |||
62 | int get_info_vm(struct privsep *, struct imsg *, int); | |||
63 | int opentap(char *); | |||
64 | ||||
65 | extern struct vmd *env; | |||
66 | ||||
67 | static struct privsep_proc procs[] = { | |||
68 | { "parent", PROC_PARENT, vmm_dispatch_parent }, | |||
69 | }; | |||
70 | ||||
71 | void | |||
72 | vmm(struct privsep *ps, struct privsep_proc *p) | |||
73 | { | |||
74 | proc_run(ps, p, procs, nitems(procs)(sizeof((procs)) / sizeof((procs)[0])), vmm_run, NULL((void *)0)); | |||
75 | } | |||
76 | ||||
77 | void | |||
78 | vmm_run(struct privsep *ps, struct privsep_proc *p, void *arg) | |||
79 | { | |||
80 | if (config_init(ps->ps_env) == -1) | |||
81 | fatal("failed to initialize configuration"); | |||
82 | ||||
83 | /* | |||
84 | * We aren't root, so we can't chroot(2). Use unveil(2) instead. | |||
85 | */ | |||
86 | if (unveil(env->argv0, "x") == -1) | |||
87 | fatal("unveil %s", env->argv0); | |||
88 | if (unveil(NULL((void *)0), NULL((void *)0)) == -1) | |||
89 | fatal("unveil lock"); | |||
90 | ||||
91 | /* | |||
92 | * pledge in the vmm process: | |||
93 | * stdio - for malloc and basic I/O including events. | |||
94 | * vmm - for the vmm ioctls and operations. | |||
95 | * proc, exec - for forking and execing new vm's. | |||
96 | * sendfd - for sending send/recv fds to vm proc. | |||
97 | * recvfd - for disks, interfaces and other fds. | |||
98 | */ | |||
99 | if (pledge("stdio vmm sendfd recvfd proc exec", NULL((void *)0)) == -1) | |||
100 | fatal("pledge"); | |||
101 | ||||
102 | signal_del(&ps->ps_evsigchld)event_del(&ps->ps_evsigchld); | |||
103 | signal_set(&ps->ps_evsigchld, SIGCHLD, vmm_sighdlr, ps)event_set(&ps->ps_evsigchld, 20, 0x08|0x10, vmm_sighdlr , ps); | |||
104 | signal_add(&ps->ps_evsigchld, NULL)event_add(&ps->ps_evsigchld, ((void *)0)); | |||
105 | } | |||
106 | ||||
107 | int | |||
108 | vmm_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg) | |||
109 | { | |||
110 | struct privsep *ps = p->p_ps; | |||
111 | int res = 0, cmd = 0, verbose; | |||
112 | struct vmd_vm *vm = NULL((void *)0); | |||
113 | struct vm_terminate_params vtp; | |||
114 | struct vmop_id vid; | |||
115 | struct vmop_result vmr; | |||
116 | struct vmop_create_params vmc; | |||
117 | struct vmop_addr_result var; | |||
118 | uint32_t id = 0, peerid = imsg->hdr.peerid; | |||
119 | pid_t pid = 0; | |||
120 | unsigned int mode, flags; | |||
121 | ||||
122 | switch (imsg->hdr.type) { | |||
| ||||
123 | case IMSG_VMDOP_START_VM_REQUEST: | |||
124 | res = config_getvm(ps, imsg); | |||
125 | if (res == -1) { | |||
126 | res = errno(*__errno()); | |||
127 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
128 | } | |||
129 | break; | |||
130 | case IMSG_VMDOP_START_VM_CDROM: | |||
131 | res = config_getcdrom(ps, imsg); | |||
132 | if (res == -1) { | |||
133 | res = errno(*__errno()); | |||
134 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
135 | } | |||
136 | break; | |||
137 | case IMSG_VMDOP_START_VM_DISK: | |||
138 | res = config_getdisk(ps, imsg); | |||
139 | if (res == -1) { | |||
140 | res = errno(*__errno()); | |||
141 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
142 | } | |||
143 | break; | |||
144 | case IMSG_VMDOP_START_VM_IF: | |||
145 | res = config_getif(ps, imsg); | |||
146 | if (res == -1) { | |||
147 | res = errno(*__errno()); | |||
148 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
149 | } | |||
150 | break; | |||
151 | case IMSG_VMDOP_START_VM_END: | |||
152 | res = vmm_start_vm(imsg, &id, &pid); | |||
153 | /* Check if the ID can be mapped correctly */ | |||
154 | if (res == 0 && (id = vm_id2vmid(id, NULL((void *)0))) == 0) | |||
155 | res = ENOENT2; | |||
156 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
157 | break; | |||
158 | case IMSG_VMDOP_TERMINATE_VM_REQUEST: | |||
159 | IMSG_SIZE_CHECK(imsg, &vid)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vid)) fatalx("bad length imsg received (%s)", "&vid" ); } while (0); | |||
160 | memcpy(&vid, imsg->data, sizeof(vid)); | |||
161 | id = vid.vid_id; | |||
162 | flags = vid.vid_flags; | |||
163 | ||||
164 | DPRINTF("%s: recv'ed TERMINATE_VM for %d", __func__, id)do {} while(0); | |||
165 | ||||
166 | cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; | |||
167 | ||||
168 | if (id == 0) { | |||
169 | res = ENOENT2; | |||
170 | } else if ((vm = vm_getbyvmid(id)) != NULL((void *)0)) { | |||
171 | if (flags & VMOP_FORCE0x01) { | |||
172 | vtp.vtp_vm_id = vm_vmid2id(vm->vm_vmid, vm); | |||
173 | vm->vm_state |= VM_STATE_SHUTDOWN0x04; | |||
174 | (void)terminate_vm(&vtp); | |||
175 | res = 0; | |||
176 | } else if (!(vm->vm_state & VM_STATE_SHUTDOWN0x04)) { | |||
177 | log_debug("%s: sending shutdown request" | |||
178 | " to vm %d", __func__, id); | |||
179 | ||||
180 | /* | |||
181 | * Request reboot but mark the VM as shutting | |||
182 | * down. This way we can terminate the VM after | |||
183 | * the triple fault instead of reboot and | |||
184 | * avoid being stuck in the ACPI-less powerdown | |||
185 | * ("press any key to reboot") of the VM. | |||
186 | */ | |||
187 | vm->vm_state |= VM_STATE_SHUTDOWN0x04; | |||
188 | if (imsg_compose_event(&vm->vm_iev, | |||
189 | IMSG_VMDOP_VM_REBOOT, | |||
190 | 0, 0, -1, NULL((void *)0), 0) == -1) | |||
191 | res = errno(*__errno()); | |||
192 | else | |||
193 | res = 0; | |||
194 | } else { | |||
195 | /* | |||
196 | * VM is currently being shutdown. | |||
197 | * Check to see if the VM process is still | |||
198 | * active. If not, return VMD_VM_STOP_INVALID. | |||
199 | */ | |||
200 | if (vm_vmid2id(vm->vm_vmid, vm) == 0) { | |||
201 | log_debug("%s: no vm running anymore", | |||
202 | __func__); | |||
203 | res = VMD_VM_STOP_INVALID1004; | |||
204 | } | |||
205 | } | |||
206 | } else { | |||
207 | /* VM doesn't exist, cannot stop vm */ | |||
208 | log_debug("%s: cannot stop vm that is not running", | |||
209 | __func__); | |||
210 | res = VMD_VM_STOP_INVALID1004; | |||
211 | } | |||
212 | break; | |||
213 | case IMSG_VMDOP_GET_INFO_VM_REQUEST: | |||
214 | res = get_info_vm(ps, imsg, 0); | |||
215 | cmd = IMSG_VMDOP_GET_INFO_VM_END_DATA; | |||
216 | break; | |||
217 | case IMSG_VMDOP_CONFIG: | |||
218 | config_getconfig(env, imsg); | |||
219 | break; | |||
220 | case IMSG_CTL_RESET: | |||
221 | IMSG_SIZE_CHECK(imsg, &mode)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&mode)) fatalx("bad length imsg received (%s)", "&mode" ); } while (0); | |||
222 | memcpy(&mode, imsg->data, sizeof(mode)); | |||
223 | ||||
224 | if (mode & CONFIG_VMS0x01) { | |||
225 | /* Terminate and remove all VMs */ | |||
226 | vmm_shutdown(); | |||
227 | mode &= ~CONFIG_VMS0x01; | |||
228 | } | |||
229 | ||||
230 | config_getreset(env, imsg); | |||
231 | break; | |||
232 | case IMSG_CTL_VERBOSE: | |||
233 | IMSG_SIZE_CHECK(imsg, &verbose)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&verbose)) fatalx("bad length imsg received (%s)", "&verbose" ); } while (0); | |||
234 | memcpy(&verbose, imsg->data, sizeof(verbose)); | |||
235 | log_setverbose(verbose); | |||
236 | env->vmd_verbose = verbose; | |||
237 | /* Forward message to each VM process */ | |||
238 | TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void * )0); (vm) = ((vm)->vm_entry.tqe_next)) { | |||
239 | imsg_compose_event(&vm->vm_iev, | |||
240 | imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, | |||
241 | -1, &verbose, sizeof(verbose)); | |||
242 | } | |||
243 | break; | |||
244 | case IMSG_VMDOP_PAUSE_VM: | |||
245 | IMSG_SIZE_CHECK(imsg, &vid)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vid)) fatalx("bad length imsg received (%s)", "&vid" ); } while (0); | |||
246 | memcpy(&vid, imsg->data, sizeof(vid)); | |||
247 | id = vid.vid_id; | |||
248 | if ((vm = vm_getbyvmid(id)) == NULL((void *)0)) { | |||
249 | res = ENOENT2; | |||
250 | cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE; | |||
251 | break; | |||
252 | } | |||
253 | imsg_compose_event(&vm->vm_iev, | |||
254 | imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, | |||
255 | imsg->fd, &vid, sizeof(vid)); | |||
256 | break; | |||
257 | case IMSG_VMDOP_UNPAUSE_VM: | |||
258 | IMSG_SIZE_CHECK(imsg, &vid)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vid)) fatalx("bad length imsg received (%s)", "&vid" ); } while (0); | |||
259 | memcpy(&vid, imsg->data, sizeof(vid)); | |||
260 | id = vid.vid_id; | |||
261 | if ((vm = vm_getbyvmid(id)) == NULL((void *)0)) { | |||
262 | res = ENOENT2; | |||
263 | cmd = IMSG_VMDOP_UNPAUSE_VM_RESPONSE; | |||
264 | break; | |||
265 | } | |||
266 | imsg_compose_event(&vm->vm_iev, | |||
267 | imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, | |||
268 | imsg->fd, &vid, sizeof(vid)); | |||
269 | break; | |||
270 | case IMSG_VMDOP_SEND_VM_REQUEST: | |||
271 | IMSG_SIZE_CHECK(imsg, &vid)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vid)) fatalx("bad length imsg received (%s)", "&vid" ); } while (0); | |||
272 | memcpy(&vid, imsg->data, sizeof(vid)); | |||
273 | id = vid.vid_id; | |||
274 | if ((vm = vm_getbyvmid(id)) == NULL((void *)0)) { | |||
275 | res = ENOENT2; | |||
276 | close(imsg->fd); | |||
277 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
278 | break; | |||
279 | } | |||
280 | imsg_compose_event(&vm->vm_iev, | |||
281 | imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, | |||
282 | imsg->fd, &vid, sizeof(vid)); | |||
283 | break; | |||
284 | case IMSG_VMDOP_RECEIVE_VM_REQUEST: | |||
285 | IMSG_SIZE_CHECK(imsg, &vmc)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vmc)) fatalx("bad length imsg received (%s)", "&vmc" ); } while (0); | |||
286 | memcpy(&vmc, imsg->data, sizeof(vmc)); | |||
287 | if (vm_register(ps, &vmc, &vm, | |||
288 | imsg->hdr.peerid, vmc.vmc_owner.uid) != 0) { | |||
289 | res = errno(*__errno()); | |||
290 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
291 | break; | |||
292 | } | |||
293 | vm->vm_tty = imsg->fd; | |||
294 | vm->vm_state |= VM_STATE_RECEIVED0x08; | |||
295 | vm->vm_state |= VM_STATE_PAUSED0x10; | |||
296 | break; | |||
297 | case IMSG_VMDOP_RECEIVE_VM_END: | |||
298 | if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL((void *)0)) { | |||
299 | res = ENOENT2; | |||
300 | close(imsg->fd); | |||
301 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
302 | break; | |||
303 | } | |||
304 | vm->vm_receive_fd = imsg->fd; | |||
305 | res = vmm_start_vm(imsg, &id, &pid); | |||
306 | /* Check if the ID can be mapped correctly */ | |||
307 | if ((id = vm_id2vmid(id, NULL((void *)0))) == 0) | |||
308 | res = ENOENT2; | |||
309 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
310 | break; | |||
311 | case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE: | |||
312 | IMSG_SIZE_CHECK(imsg, &var)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&var)) fatalx("bad length imsg received (%s)", "&var" ); } while (0); | |||
313 | memcpy(&var, imsg->data, sizeof(var)); | |||
314 | if ((vm = vm_getbyvmid(var.var_vmid)) == NULL((void *)0)) { | |||
315 | res = ENOENT2; | |||
316 | break; | |||
317 | } | |||
318 | /* Forward hardware address details to the guest vm */ | |||
319 | imsg_compose_event(&vm->vm_iev, | |||
320 | imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, | |||
321 | imsg->fd, &var, sizeof(var)); | |||
322 | break; | |||
323 | case IMSG_VMDOP_RECEIVE_VMM_FD: | |||
324 | if (env->vmd_fd > -1) | |||
325 | fatalx("already received vmm fd"); | |||
326 | env->vmd_fd = imsg->fd; | |||
327 | ||||
328 | /* Get and terminate all running VMs */ | |||
329 | get_info_vm(ps, NULL((void *)0), 1); | |||
330 | break; | |||
331 | default: | |||
332 | return (-1); | |||
333 | } | |||
334 | ||||
335 | switch (cmd) { | |||
336 | case 0: | |||
337 | break; | |||
338 | case IMSG_VMDOP_START_VM_RESPONSE: | |||
339 | if (res != 0) { | |||
340 | /* Remove local reference if it exists */ | |||
341 | if ((vm = vm_getbyvmid(imsg->hdr.peerid)) != NULL((void *)0)) { | |||
342 | log_debug("%s: removing vm, START_VM_RESPONSE", | |||
343 | __func__); | |||
344 | vm_remove(vm, __func__); | |||
345 | } | |||
346 | } | |||
347 | if (id == 0) | |||
348 | id = imsg->hdr.peerid; | |||
349 | /* FALLTHROUGH */ | |||
350 | case IMSG_VMDOP_PAUSE_VM_RESPONSE: | |||
351 | case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: | |||
352 | case IMSG_VMDOP_TERMINATE_VM_RESPONSE: | |||
353 | memset(&vmr, 0, sizeof(vmr)); | |||
354 | vmr.vmr_result = res; | |||
355 | vmr.vmr_id = id; | |||
356 | vmr.vmr_pid = pid; | |||
357 | if (proc_compose_imsg(ps, PROC_PARENT, -1, cmd, | |||
358 | peerid, -1, &vmr, sizeof(vmr)) == -1) | |||
359 | return (-1); | |||
360 | break; | |||
361 | default: | |||
362 | if (proc_compose_imsg(ps, PROC_PARENT, -1, cmd, | |||
363 | peerid, -1, &res, sizeof(res)) == -1) | |||
364 | return (-1); | |||
365 | break; | |||
366 | } | |||
367 | ||||
368 | return (0); | |||
369 | } | |||
370 | ||||
371 | void | |||
372 | vmm_sighdlr(int sig, short event, void *arg) | |||
373 | { | |||
374 | struct privsep *ps = arg; | |||
375 | int status, ret = 0; | |||
376 | uint32_t vmid; | |||
377 | pid_t pid; | |||
378 | struct vmop_result vmr; | |||
379 | struct vmd_vm *vm; | |||
380 | struct vm_terminate_params vtp; | |||
381 | ||||
382 | log_debug("%s: handling signal %d", __func__, sig); | |||
383 | switch (sig) { | |||
384 | case SIGCHLD20: | |||
385 | do { | |||
386 | pid = waitpid(-1, &status, WNOHANG0x01); | |||
387 | if (pid <= 0) | |||
388 | continue; | |||
389 | ||||
390 | if (WIFEXITED(status)(((status) & 0177) == 0) || WIFSIGNALED(status)(((status) & 0177) != 0177 && ((status) & 0177 ) != 0)) { | |||
391 | vm = vm_getbypid(pid); | |||
392 | if (vm == NULL((void *)0)) { | |||
393 | /* | |||
394 | * If the VM is gone already, it | |||
395 | * got terminated via a | |||
396 | * IMSG_VMDOP_TERMINATE_VM_REQUEST. | |||
397 | */ | |||
398 | continue; | |||
399 | } | |||
400 | ||||
401 | if (WIFEXITED(status)(((status) & 0177) == 0)) | |||
402 | ret = WEXITSTATUS(status)(int)(((unsigned)(status) >> 8) & 0xff); | |||
403 | ||||
404 | /* Don't reboot on pending shutdown */ | |||
405 | if (ret == EAGAIN35 && | |||
406 | (vm->vm_state & VM_STATE_SHUTDOWN0x04)) | |||
407 | ret = 0; | |||
408 | ||||
409 | vmid = vm->vm_params.vmc_params.vcp_id; | |||
410 | vtp.vtp_vm_id = vmid; | |||
411 | ||||
412 | if (terminate_vm(&vtp) == 0) | |||
413 | log_debug("%s: terminated vm %s" | |||
414 | " (id %d)", __func__, | |||
415 | vm->vm_params.vmc_params.vcp_name, | |||
416 | vm->vm_vmid); | |||
417 | ||||
418 | memset(&vmr, 0, sizeof(vmr)); | |||
419 | vmr.vmr_result = ret; | |||
420 | vmr.vmr_id = vm_id2vmid(vmid, vm); | |||
421 | if (proc_compose_imsg(ps, PROC_PARENT, | |||
422 | -1, IMSG_VMDOP_TERMINATE_VM_EVENT, | |||
423 | vm->vm_peerid, -1, | |||
424 | &vmr, sizeof(vmr)) == -1) | |||
425 | log_warnx("could not signal " | |||
426 | "termination of VM %u to " | |||
427 | "parent", vm->vm_vmid); | |||
428 | ||||
429 | vm_remove(vm, __func__); | |||
430 | } else | |||
431 | fatalx("unexpected cause of SIGCHLD"); | |||
432 | } while (pid > 0 || (pid == -1 && errno(*__errno()) == EINTR4)); | |||
433 | break; | |||
434 | default: | |||
435 | fatalx("unexpected signal"); | |||
436 | } | |||
437 | } | |||
438 | ||||
439 | /* | |||
440 | * vmm_shutdown | |||
441 | * | |||
442 | * Terminate VMs on shutdown to avoid "zombie VM" processes. | |||
443 | */ | |||
444 | void | |||
445 | vmm_shutdown(void) | |||
446 | { | |||
447 | struct vm_terminate_params vtp; | |||
448 | struct vmd_vm *vm, *vm_next; | |||
449 | ||||
450 | TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next)for ((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void *)0) && ((vm_next) = ((vm)->vm_entry.tqe_next), 1 ); (vm) = (vm_next)) { | |||
451 | vtp.vtp_vm_id = vm_vmid2id(vm->vm_vmid, vm); | |||
452 | ||||
453 | /* XXX suspend or request graceful shutdown */ | |||
454 | (void)terminate_vm(&vtp); | |||
455 | vm_remove(vm, __func__); | |||
456 | } | |||
457 | } | |||
458 | ||||
459 | /* | |||
460 | * vmm_pipe | |||
461 | * | |||
462 | * Create a new imsg control channel between vmm parent and a VM | |||
463 | * (can be called on both sides). | |||
464 | */ | |||
465 | int | |||
466 | vmm_pipe(struct vmd_vm *vm, int fd, void (*cb)(int, short, void *)) | |||
467 | { | |||
468 | struct imsgev *iev = &vm->vm_iev; | |||
469 | ||||
470 | if (fcntl(fd, F_SETFL4, O_NONBLOCK0x0004) == -1) { | |||
471 | log_warn("failed to set nonblocking mode on vm pipe"); | |||
472 | return (-1); | |||
473 | } | |||
474 | ||||
475 | imsg_init(&iev->ibuf, fd); | |||
476 | iev->handler = cb; | |||
477 | iev->data = vm; | |||
478 | imsg_event_add(iev); | |||
479 | ||||
480 | return (0); | |||
481 | } | |||
482 | ||||
483 | /* | |||
484 | * vmm_dispatch_vm | |||
485 | * | |||
486 | * imsg callback for messages that are received from a VM child process. | |||
487 | */ | |||
488 | void | |||
489 | vmm_dispatch_vm(int fd, short event, void *arg) | |||
490 | { | |||
491 | struct vmd_vm *vm = arg; | |||
492 | struct vmop_result vmr; | |||
493 | struct imsgev *iev = &vm->vm_iev; | |||
494 | struct imsgbuf *ibuf = &iev->ibuf; | |||
495 | struct imsg imsg; | |||
496 | ssize_t n; | |||
497 | unsigned int i; | |||
498 | ||||
499 | if (event & EV_READ0x02) { | |||
500 | if ((n = imsg_read(ibuf)) == -1 && errno(*__errno()) != EAGAIN35) | |||
501 | fatal("%s: imsg_read", __func__); | |||
502 | if (n == 0) { | |||
503 | /* This pipe is dead, so remove the event handler */ | |||
504 | event_del(&iev->ev); | |||
505 | return; | |||
506 | } | |||
507 | } | |||
508 | ||||
509 | if (event & EV_WRITE0x04) { | |||
510 | if ((n = msgbuf_write(&ibuf->w)) == -1 && errno(*__errno()) != EAGAIN35) | |||
511 | fatal("%s: msgbuf_write fd %d", __func__, ibuf->fd); | |||
512 | if (n == 0) { | |||
513 | /* This pipe is dead, so remove the event handler */ | |||
514 | event_del(&iev->ev); | |||
515 | return; | |||
516 | } | |||
517 | } | |||
518 | ||||
519 | for (;;) { | |||
520 | if ((n = imsg_get(ibuf, &imsg)) == -1) | |||
521 | fatal("%s: imsg_get", __func__); | |||
522 | if (n == 0) | |||
523 | break; | |||
524 | ||||
525 | DPRINTF("%s: got imsg %d from %s",do {} while(0) | |||
526 | __func__, imsg.hdr.type,do {} while(0) | |||
527 | vm->vm_params.vmc_params.vcp_name)do {} while(0); | |||
528 | ||||
529 | switch (imsg.hdr.type) { | |||
530 | case IMSG_VMDOP_VM_SHUTDOWN: | |||
531 | vm->vm_state |= VM_STATE_SHUTDOWN0x04; | |||
532 | break; | |||
533 | case IMSG_VMDOP_VM_REBOOT: | |||
534 | vm->vm_state &= ~VM_STATE_SHUTDOWN0x04; | |||
535 | break; | |||
536 | case IMSG_VMDOP_SEND_VM_RESPONSE: | |||
537 | IMSG_SIZE_CHECK(&imsg, &vmr)do { if (((&imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof(*&vmr)) fatalx("bad length imsg received (%s)", "&vmr" ); } while (0); | |||
538 | case IMSG_VMDOP_PAUSE_VM_RESPONSE: | |||
539 | case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: | |||
540 | for (i = 0; i < nitems(procs)(sizeof((procs)) / sizeof((procs)[0])); i++) { | |||
541 | if (procs[i].p_id == PROC_PARENT) { | |||
542 | proc_forward_imsg(procs[i].p_ps, | |||
543 | &imsg, PROC_PARENT, -1); | |||
544 | break; | |||
545 | } | |||
546 | } | |||
547 | break; | |||
548 | ||||
549 | default: | |||
550 | fatalx("%s: got invalid imsg %d from %s", | |||
551 | __func__, imsg.hdr.type, | |||
552 | vm->vm_params.vmc_params.vcp_name); | |||
553 | } | |||
554 | imsg_free(&imsg); | |||
555 | } | |||
556 | imsg_event_add(iev); | |||
557 | } | |||
558 | ||||
559 | /* | |||
560 | * terminate_vm | |||
561 | * | |||
562 | * Requests vmm(4) to terminate the VM whose ID is provided in the | |||
563 | * supplied vm_terminate_params structure (vtp->vtp_vm_id) | |||
564 | * | |||
565 | * Parameters | |||
566 | * vtp: vm_terminate_params struct containing the ID of the VM to terminate | |||
567 | * | |||
568 | * Return values: | |||
569 | * 0: success | |||
570 | * !0: ioctl to vmm(4) failed (eg, ENOENT if the supplied VM is not valid) | |||
571 | */ | |||
572 | int | |||
573 | terminate_vm(struct vm_terminate_params *vtp) | |||
574 | { | |||
575 | if (ioctl(env->vmd_fd, VMM_IOC_TERM((unsigned long)0x80000000 | ((sizeof(struct vm_terminate_params ) & 0x1fff) << 16) | ((('V')) << 8) | ((4))), vtp) == -1) | |||
576 | return (errno(*__errno())); | |||
577 | ||||
578 | return (0); | |||
579 | } | |||
580 | ||||
581 | /* | |||
582 | * opentap | |||
583 | * | |||
584 | * Opens the next available tap device, up to MAX_TAP. | |||
585 | * | |||
586 | * Parameters | |||
587 | * ifname: a buffer of at least IF_NAMESIZE bytes. | |||
588 | * | |||
589 | * Returns a file descriptor to the tap node opened or -1 if no tap devices were | |||
590 | * available, setting errno to the open(2) error. | |||
591 | */ | |||
592 | int | |||
593 | opentap(char *ifname) | |||
594 | { | |||
595 | int err = 0, i, fd; | |||
596 | char path[PATH_MAX1024]; | |||
597 | ||||
598 | for (i = 0; i < MAX_TAP256; i++) { | |||
599 | snprintf(path, PATH_MAX1024, "/dev/tap%d", i); | |||
600 | ||||
601 | errno(*__errno()) = 0; | |||
602 | fd = open(path, O_RDWR0x0002 | O_NONBLOCK0x0004); | |||
603 | if (fd != -1) | |||
604 | break; | |||
605 | err = errno(*__errno()); | |||
606 | if (err == EBUSY16) { | |||
607 | /* Busy...try next tap. */ | |||
608 | continue; | |||
609 | } else if (err == ENOENT2) { | |||
610 | /* Ran out of /dev/tap* special files. */ | |||
611 | break; | |||
612 | } else { | |||
613 | log_warn("%s: unexpected error", __func__); | |||
614 | break; | |||
615 | } | |||
616 | } | |||
617 | ||||
618 | /* Record the last opened tap device. */ | |||
619 | snprintf(ifname, IF_NAMESIZE16, "tap%d", i); | |||
620 | ||||
621 | if (err) | |||
622 | errno(*__errno()) = err; | |||
623 | return (fd); | |||
624 | } | |||
625 | ||||
626 | /* | |||
627 | * vmm_start_vm | |||
628 | * | |||
629 | * Prepares and fork+execs a new VM process. | |||
630 | * | |||
631 | * Parameters: | |||
632 | * imsg: The VM data structure that is including the VM create parameters. | |||
633 | * id: Returns the VM id as reported by the kernel and obtained from the VM. | |||
634 | * pid: Returns the VM pid to the parent. | |||
635 | * | |||
636 | * Return values: | |||
637 | * 0: success | |||
638 | * !0: failure - typically an errno indicating the source of the failure | |||
639 | */ | |||
640 | int | |||
641 | vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid) | |||
642 | { | |||
643 | struct vm_create_params *vcp; | |||
644 | struct vmd_vm *vm; | |||
645 | char *nargv[8], num[32], vmm_fd[32]; | |||
646 | int fd, ret = EINVAL22; | |||
647 | int fds[2]; | |||
648 | pid_t vm_pid; | |||
649 | size_t i, j, sz; | |||
650 | ||||
651 | if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL((void *)0)) { | |||
652 | log_warnx("%s: can't find vm", __func__); | |||
653 | ret = ENOENT2; | |||
654 | goto err; | |||
655 | } | |||
656 | vcp = &vm->vm_params.vmc_params; | |||
657 | ||||
658 | if (!(vm->vm_state & VM_STATE_RECEIVED0x08)) { | |||
659 | if ((vm->vm_tty = imsg->fd) == -1) { | |||
660 | log_warnx("%s: can't get tty", __func__); | |||
661 | goto err; | |||
662 | } | |||
663 | } | |||
664 | ||||
665 | if (socketpair(AF_UNIX1, SOCK_STREAM1, PF_UNSPEC0, fds) == -1) | |||
666 | fatal("socketpair"); | |||
667 | ||||
668 | /* Keep our channel open after exec. */ | |||
669 | if (fcntl(fds[1], F_SETFD2, 0)) { | |||
670 | ret = errno(*__errno()); | |||
671 | log_warn("%s: fcntl", __func__); | |||
672 | goto err; | |||
673 | } | |||
674 | ||||
675 | /* Start child vmd for this VM (fork, chroot, drop privs) */ | |||
676 | vm_pid = fork(); | |||
677 | if (vm_pid == -1) { | |||
678 | log_warn("%s: start child failed", __func__); | |||
679 | ret = EIO5; | |||
680 | goto err; | |||
681 | } | |||
682 | ||||
683 | if (vm_pid > 0) { | |||
684 | /* Parent */ | |||
685 | vm->vm_pid = vm_pid; | |||
686 | close_fd(fds[1]); | |||
687 | ||||
688 | /* Send the details over the pipe to the child. */ | |||
689 | sz = atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, fds[0], vm, sizeof(*vm)); | |||
690 | if (sz != sizeof(*vm)) { | |||
691 | log_warnx("%s: failed to send config for vm '%s'", | |||
692 | __func__, vcp->vcp_name); | |||
693 | ret = EIO5; | |||
694 | /* Defer error handling until after fd closing. */ | |||
695 | } | |||
696 | ||||
697 | /* As the parent/vmm process, we no longer need these fds. */ | |||
698 | for (i = 0 ; i < vm->vm_params.vmc_ndisks; i++) { | |||
699 | for (j = 0; j < VM_MAX_BASE_PER_DISK4; j++) { | |||
700 | if (close_fd(vm->vm_disks[i][j]) == 0) | |||
701 | vm->vm_disks[i][j] = -1; | |||
702 | } | |||
703 | } | |||
704 | for (i = 0 ; i < vm->vm_params.vmc_nnics; i++) { | |||
705 | if (close_fd(vm->vm_ifs[i].vif_fd) == 0) | |||
706 | vm->vm_ifs[i].vif_fd = -1; | |||
707 | } | |||
708 | if (close_fd(vm->vm_kernel) == 0) | |||
709 | vm->vm_kernel = -1; | |||
710 | if (close_fd(vm->vm_cdrom) == 0) | |||
711 | vm->vm_cdrom = -1; | |||
712 | if (close_fd(vm->vm_tty) == 0) | |||
713 | vm->vm_tty = -1; | |||
714 | ||||
715 | /* Deferred error handling from sending the vm struct. */ | |||
716 | if (ret == EIO5) | |||
717 | goto err; | |||
718 | ||||
719 | /* Send the current local prefix configuration. */ | |||
720 | sz = atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, fds[0], &env->vmd_cfg.cfg_localprefix, | |||
721 | sizeof(env->vmd_cfg.cfg_localprefix)); | |||
722 | if (sz != sizeof(env->vmd_cfg.cfg_localprefix)) { | |||
723 | log_warnx("%s: failed to send local prefix for vm '%s'", | |||
724 | __func__, vcp->vcp_name); | |||
725 | ret = EIO5; | |||
726 | goto err; | |||
727 | } | |||
728 | ||||
729 | /* Read back the kernel-generated vm id from the child */ | |||
730 | sz = atomicio(read, fds[0], &vcp->vcp_id, sizeof(vcp->vcp_id)); | |||
731 | if (sz != sizeof(vcp->vcp_id)) { | |||
732 | log_debug("%s: failed to receive vm id from vm %s", | |||
733 | __func__, vcp->vcp_name); | |||
734 | /* vmd could not allocate memory for the vm. */ | |||
735 | ret = ENOMEM12; | |||
736 | goto err; | |||
737 | } | |||
738 | ||||
739 | /* Check for an invalid id. This indicates child failure. */ | |||
740 | if (vcp->vcp_id == 0) | |||
741 | goto err; | |||
742 | ||||
743 | *id = vcp->vcp_id; | |||
744 | *pid = vm->vm_pid; | |||
745 | ||||
746 | /* Wire up our pipe into the event handling. */ | |||
747 | if (vmm_pipe(vm, fds[0], vmm_dispatch_vm) == -1) | |||
748 | fatal("setup vm pipe"); | |||
749 | ||||
750 | return (0); | |||
751 | } else { | |||
752 | /* Child. Create a new session. */ | |||
753 | if (setsid() == -1) | |||
754 | fatal("setsid"); | |||
755 | ||||
756 | close_fd(fds[0]); | |||
757 | close_fd(PROC_PARENT_SOCK_FILENO3); | |||
758 | ||||
759 | /* Detach from terminal. */ | |||
760 | if (!env->vmd_debug && (fd = | |||
761 | open("/dev/null", O_RDWR0x0002, 0)) != -1) { | |||
762 | dup2(fd, STDIN_FILENO0); | |||
763 | dup2(fd, STDOUT_FILENO1); | |||
764 | dup2(fd, STDERR_FILENO2); | |||
765 | if (fd > 2) | |||
766 | close(fd); | |||
767 | } | |||
768 | ||||
769 | /* Toggle all fds to not close on exec. */ | |||
770 | for (i = 0 ; i < vm->vm_params.vmc_ndisks; i++) | |||
771 | for (j = 0; j < VM_MAX_BASE_PER_DISK4; j++) | |||
772 | if (vm->vm_disks[i][j] != -1) | |||
773 | fcntl(vm->vm_disks[i][j], F_SETFD2, 0); | |||
774 | for (i = 0 ; i < vm->vm_params.vmc_nnics; i++) | |||
775 | fcntl(vm->vm_ifs[i].vif_fd, F_SETFD2, 0); | |||
776 | if (vm->vm_kernel != -1) | |||
777 | fcntl(vm->vm_kernel, F_SETFD2, 0); | |||
778 | if (vm->vm_cdrom != -1) | |||
779 | fcntl(vm->vm_cdrom, F_SETFD2, 0); | |||
780 | if (vm->vm_tty != -1) | |||
781 | fcntl(vm->vm_tty, F_SETFD2, 0); | |||
782 | fcntl(env->vmd_fd, F_SETFD2, 0); /* vmm device fd */ | |||
783 | ||||
784 | /* | |||
785 | * Prepare our new argv for execvp(2) with the fd of our open | |||
786 | * pipe to the parent/vmm process as an argument. | |||
787 | */ | |||
788 | memset(&nargv, 0, sizeof(nargv)); | |||
789 | memset(num, 0, sizeof(num)); | |||
790 | snprintf(num, sizeof(num), "%d", fds[1]); | |||
791 | memset(vmm_fd, 0, sizeof(vmm_fd)); | |||
792 | snprintf(vmm_fd, sizeof(vmm_fd), "%d", env->vmd_fd); | |||
793 | ||||
794 | nargv[0] = env->argv0; | |||
795 | nargv[1] = "-V"; | |||
796 | nargv[2] = num; | |||
797 | nargv[3] = "-n"; | |||
798 | nargv[4] = "-i"; | |||
799 | nargv[5] = vmm_fd; | |||
800 | nargv[6] = NULL((void *)0); | |||
801 | ||||
802 | if (env->vmd_verbose == 1) { | |||
803 | nargv[6] = VMD_VERBOSE_1"-v";; | |||
804 | nargv[7] = NULL((void *)0); | |||
805 | } else if (env->vmd_verbose > 1) { | |||
806 | nargv[6] = VMD_VERBOSE_2"-vv";; | |||
807 | nargv[7] = NULL((void *)0); | |||
808 | } | |||
809 | ||||
810 | /* Control resumes in vmd main(). */ | |||
811 | execvp(nargv[0], nargv); | |||
812 | ||||
813 | ret = errno(*__errno()); | |||
814 | log_warn("execvp %s", nargv[0]); | |||
815 | _exit(ret); | |||
816 | /* NOTREACHED */ | |||
817 | } | |||
818 | ||||
819 | return (0); | |||
820 | ||||
821 | err: | |||
822 | if (!vm->vm_from_config) | |||
| ||||
823 | vm_remove(vm, __func__); | |||
824 | ||||
825 | return (ret); | |||
826 | } | |||
827 | ||||
828 | /* | |||
829 | * get_info_vm | |||
830 | * | |||
831 | * Returns a list of VMs known to vmm(4). | |||
832 | * | |||
833 | * Parameters: | |||
834 | * ps: the privsep context. | |||
835 | * imsg: the received imsg including the peer id. | |||
836 | * terminate: terminate the listed vm. | |||
837 | * | |||
838 | * Return values: | |||
839 | * 0: success | |||
840 | * !0: failure (eg, ENOMEM, EIO or another error code from vmm(4) ioctl) | |||
841 | */ | |||
842 | int | |||
843 | get_info_vm(struct privsep *ps, struct imsg *imsg, int terminate) | |||
844 | { | |||
845 | int ret; | |||
846 | size_t ct, i; | |||
847 | struct vm_info_params vip; | |||
848 | struct vm_info_result *info; | |||
849 | struct vm_terminate_params vtp; | |||
850 | struct vmop_info_result vir; | |||
851 | ||||
852 | /* | |||
853 | * We issue the VMM_IOC_INFO ioctl twice, once with an input | |||
854 | * buffer size of 0, which results in vmm(4) returning the | |||
855 | * number of bytes required back to us in vip.vip_size, | |||
856 | * and then we call it again after malloc'ing the required | |||
857 | * number of bytes. | |||
858 | * | |||
859 | * It is possible that we could fail a second time (e.g. if | |||
860 | * another VM was created in the instant between the two | |||
861 | * ioctls, but in that case the caller can just try again | |||
862 | * as vmm(4) will return a zero-sized list in that case. | |||
863 | */ | |||
864 | vip.vip_size = 0; | |||
865 | info = NULL((void *)0); | |||
866 | ret = 0; | |||
867 | memset(&vir, 0, sizeof(vir)); | |||
868 | ||||
869 | /* First ioctl to see how many bytes needed (vip.vip_size) */ | |||
870 | if (ioctl(env->vmd_fd, VMM_IOC_INFO(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_info_params) & 0x1fff) << 16) | ((('V')) << 8) | ((3))), &vip) == -1) | |||
871 | return (errno(*__errno())); | |||
872 | ||||
873 | if (vip.vip_info_ct != 0) | |||
874 | return (EIO5); | |||
875 | ||||
876 | info = malloc(vip.vip_size); | |||
877 | if (info == NULL((void *)0)) | |||
878 | return (ENOMEM12); | |||
879 | ||||
880 | /* Second ioctl to get the actual list */ | |||
881 | vip.vip_info = info; | |||
882 | if (ioctl(env->vmd_fd, VMM_IOC_INFO(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof (struct vm_info_params) & 0x1fff) << 16) | ((('V')) << 8) | ((3))), &vip) == -1) { | |||
883 | ret = errno(*__errno()); | |||
884 | free(info); | |||
885 | return (ret); | |||
886 | } | |||
887 | ||||
888 | /* Return info */ | |||
889 | ct = vip.vip_size / sizeof(struct vm_info_result); | |||
890 | for (i = 0; i < ct; i++) { | |||
891 | if (terminate) { | |||
892 | vtp.vtp_vm_id = info[i].vir_id; | |||
893 | if ((ret = terminate_vm(&vtp)) != 0) | |||
894 | break; | |||
895 | log_debug("%s: terminated vm %s (id %d)", __func__, | |||
896 | info[i].vir_name, info[i].vir_id); | |||
897 | continue; | |||
898 | } | |||
899 | memcpy(&vir.vir_info, &info[i], sizeof(vir.vir_info)); | |||
900 | vir.vir_info.vir_id = vm_id2vmid(info[i].vir_id, NULL((void *)0)); | |||
901 | if (proc_compose_imsg(ps, PROC_PARENT, -1, | |||
902 | IMSG_VMDOP_GET_INFO_VM_DATA, imsg->hdr.peerid, -1, | |||
903 | &vir, sizeof(vir)) == -1) { | |||
904 | ret = EIO5; | |||
905 | break; | |||
906 | } | |||
907 | } | |||
908 | free(info); | |||
909 | ||||
910 | return (ret); | |||
911 | } |