Bug Summary

File:src/usr.sbin/vmd/vmd.c
Warning:line 1446, column 7
Although the value stored to 'vm' is used in the enclosing expression, the value is never actually read from 'vm'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name vmd.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -pic-is-pie -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/usr.sbin/vmd/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/usr.sbin/vmd -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -fdebug-compilation-dir=/usr/src/usr.sbin/vmd/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c /usr/src/usr.sbin/vmd/vmd.c
1/* $OpenBSD: vmd.c,v 1.129 2022/01/04 15:18:44 claudio Exp $ */
2
3/*
4 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/types.h>
20#include <sys/queue.h>
21#include <sys/wait.h>
22#include <sys/stat.h>
23#include <sys/sysctl.h>
24#include <sys/tty.h>
25#include <sys/ttycom.h>
26#include <sys/ioctl.h>
27
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31#include <termios.h>
32#include <errno(*__errno()).h>
33#include <event.h>
34#include <fcntl.h>
35#include <pwd.h>
36#include <signal.h>
37#include <syslog.h>
38#include <unistd.h>
39#include <util.h>
40#include <ctype.h>
41#include <pwd.h>
42#include <grp.h>
43
44#include <machine/specialreg.h>
45#include <machine/vmmvar.h>
46
47#include "proc.h"
48#include "atomicio.h"
49#include "vmd.h"
50
51__dead__attribute__((__noreturn__)) void usage(void);
52
53int main(int, char **);
54int vmd_configure(void);
55void vmd_sighdlr(int sig, short event, void *arg);
56void vmd_shutdown(void);
57int vmd_control_run(void);
58int vmd_dispatch_control(int, struct privsep_proc *, struct imsg *);
59int vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *);
60int vmd_dispatch_priv(int, struct privsep_proc *, struct imsg *);
61int vmd_check_vmh(struct vm_dump_header *);
62
63int vm_instance(struct privsep *, struct vmd_vm **,
64 struct vmop_create_params *, uid_t);
65int vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t);
66int vm_claimid(const char *, int, uint32_t *);
67void start_vm_batch(int, short, void*);
68
69struct vmd *env;
70
71static struct privsep_proc procs[] = {
72 /* Keep "priv" on top as procs[0] */
73 { "priv", PROC_PRIV, vmd_dispatch_priv, priv },
74 { "control", PROC_CONTROL, vmd_dispatch_control, control },
75 { "vmm", PROC_VMM, vmd_dispatch_vmm, vmm, vmm_shutdown },
76};
77
78enum privsep_procid privsep_process;
79
80struct event staggered_start_timer;
81
82/* For the privileged process */
83static struct privsep_proc *proc_priv = &procs[0];
84static struct passwd proc_privpw;
85static const uint8_t zero_mac[ETHER_ADDR_LEN6];
86
87int
88vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg)
89{
90 struct privsep *ps = p->p_ps;
91 int res = 0, ret = 0, cmd = 0, verbose;
92 unsigned int v = 0, flags;
93 struct vmop_create_params vmc;
94 struct vmop_id vid;
95 struct vmop_result vmr;
96 struct vm_dump_header vmh;
97 struct vmd_vm *vm = NULL((void *)0);
98 char *str = NULL((void *)0);
99 uint32_t id = 0;
100 struct control_sock *rcs;
101
102 switch (imsg->hdr.type) {
103 case IMSG_VMDOP_START_VM_REQUEST:
104 IMSG_SIZE_CHECK(imsg, &vmc)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof
(*&vmc)) fatalx("bad length imsg received (%s)", "&vmc"
); } while (0)
;
105 memcpy(&vmc, imsg->data, sizeof(vmc));
106 ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid);
107 if (vmc.vmc_flags == 0) {
108 /* start an existing VM with pre-configured options */
109 if (!(ret == -1 && errno(*__errno()) == EALREADY37 &&
110 !(vm->vm_state & VM_STATE_RUNNING0x01))) {
111 res = errno(*__errno());
112 cmd = IMSG_VMDOP_START_VM_RESPONSE;
113 }
114 } else if (ret != 0) {
115 res = errno(*__errno());
116 cmd = IMSG_VMDOP_START_VM_RESPONSE;
117 }
118 if (res == 0) {
119 res = config_setvm(ps, vm, imsg->hdr.peerid,
120 vm->vm_params.vmc_owner.uid);
121 if (res)
122 cmd = IMSG_VMDOP_START_VM_RESPONSE;
123 }
124 break;
125 case IMSG_VMDOP_WAIT_VM_REQUEST:
126 case IMSG_VMDOP_TERMINATE_VM_REQUEST:
127 IMSG_SIZE_CHECK(imsg, &vid)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof
(*&vid)) fatalx("bad length imsg received (%s)", "&vid"
); } while (0)
;
128 memcpy(&vid, imsg->data, sizeof(vid));
129 flags = vid.vid_flags;
130 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
131
132 if ((id = vid.vid_id) == 0) {
133 /* Lookup vm (id) by name */
134 if ((vm = vm_getbyname(vid.vid_name)) == NULL((void *)0)) {
135 res = ENOENT2;
136 break;
137 } else if ((vm->vm_state & VM_STATE_SHUTDOWN0x04) &&
138 (flags & VMOP_FORCE0x01) == 0) {
139 res = EALREADY37;
140 break;
141 } else if (!(vm->vm_state & VM_STATE_RUNNING0x01)) {
142 res = EINVAL22;
143 break;
144 }
145 id = vm->vm_vmid;
146 } else if ((vm = vm_getbyvmid(id)) == NULL((void *)0)) {
147 res = ENOENT2;
148 break;
149 }
150 if (vm_checkperm(vm, &vm->vm_params.vmc_owner, vid.vid_uid)) {
151 res = EPERM1;
152 break;
153 }
154
155 /* Only relay TERMINATION requests, not WAIT requests */
156 if (imsg->hdr.type == IMSG_VMDOP_TERMINATE_VM_REQUEST) {
157 memset(&vid, 0, sizeof(vid));
158 vid.vid_id = id;
159 vid.vid_flags = flags;
160
161 if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
162 imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1)
163 return (-1);
164 }
165 break;
166 case IMSG_VMDOP_GET_INFO_VM_REQUEST:
167 proc_forward_imsg(ps, imsg, PROC_VMM, -1);
168 break;
169 case IMSG_VMDOP_LOAD:
170 IMSG_SIZE_CHECK(imsg, str)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof
(*str)) fatalx("bad length imsg received (%s)", "str"); } while
(0)
; /* at least one byte for path */
171 str = get_string((uint8_t *)imsg->data,
172 IMSG_DATA_SIZE(imsg)((imsg)->hdr.len - sizeof(struct imsg_hdr)));
173 case IMSG_VMDOP_RELOAD:
174 if (vmd_reload(0, str) == -1)
175 cmd = IMSG_CTL_FAIL;
176 else
177 cmd = IMSG_CTL_OK;
178 free(str);
179 break;
180 case IMSG_CTL_RESET:
181 IMSG_SIZE_CHECK(imsg, &v)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof
(*&v)) fatalx("bad length imsg received (%s)", "&v");
} while (0)
;
182 memcpy(&v, imsg->data, sizeof(v));
183 if (vmd_reload(v, NULL((void *)0)) == -1)
184 cmd = IMSG_CTL_FAIL;
185 else
186 cmd = IMSG_CTL_OK;
187 break;
188 case IMSG_CTL_VERBOSE:
189 IMSG_SIZE_CHECK(imsg, &verbose)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof
(*&verbose)) fatalx("bad length imsg received (%s)", "&verbose"
); } while (0)
;
190 memcpy(&verbose, imsg->data, sizeof(verbose));
191 log_setverbose(verbose);
192
193 proc_forward_imsg(ps, imsg, PROC_VMM, -1);
194 proc_forward_imsg(ps, imsg, PROC_PRIV, -1);
195 cmd = IMSG_CTL_OK;
196 break;
197 case IMSG_VMDOP_PAUSE_VM:
198 case IMSG_VMDOP_UNPAUSE_VM:
199 IMSG_SIZE_CHECK(imsg, &vid)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof
(*&vid)) fatalx("bad length imsg received (%s)", "&vid"
); } while (0)
;
200 memcpy(&vid, imsg->data, sizeof(vid));
201 if (vid.vid_id == 0) {
202 if ((vm = vm_getbyname(vid.vid_name)) == NULL((void *)0)) {
203 res = ENOENT2;
204 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM
205 ? IMSG_VMDOP_PAUSE_VM_RESPONSE
206 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE;
207 break;
208 } else {
209 vid.vid_id = vm->vm_vmid;
210 }
211 } else if ((vm = vm_getbyid(vid.vid_id)) == NULL((void *)0)) {
212 res = ENOENT2;
213 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM
214 ? IMSG_VMDOP_PAUSE_VM_RESPONSE
215 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE;
216 break;
217 }
218 if (vm_checkperm(vm, &vm->vm_params.vmc_owner,
219 vid.vid_uid) != 0) {
220 res = EPERM1;
221 cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM
222 ? IMSG_VMDOP_PAUSE_VM_RESPONSE
223 : IMSG_VMDOP_UNPAUSE_VM_RESPONSE;
224 break;
225 }
226 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
227 imsg->hdr.peerid, -1, &vid, sizeof(vid));
228 break;
229 case IMSG_VMDOP_SEND_VM_REQUEST:
230 IMSG_SIZE_CHECK(imsg, &vid)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof
(*&vid)) fatalx("bad length imsg received (%s)", "&vid"
); } while (0)
;
231 memcpy(&vid, imsg->data, sizeof(vid));
232 id = vid.vid_id;
233 if (vid.vid_id == 0) {
234 if ((vm = vm_getbyname(vid.vid_name)) == NULL((void *)0)) {
235 res = ENOENT2;
236 cmd = IMSG_VMDOP_SEND_VM_RESPONSE;
237 close(imsg->fd);
238 break;
239 } else {
240 vid.vid_id = vm->vm_vmid;
241 }
242 } else if ((vm = vm_getbyvmid(vid.vid_id)) == NULL((void *)0)) {
243 res = ENOENT2;
244 cmd = IMSG_VMDOP_SEND_VM_RESPONSE;
245 close(imsg->fd);
246 break;
247 }
248 vmr.vmr_id = vid.vid_id;
249 log_debug("%s: sending fd to vmm", __func__);
250 proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
251 imsg->hdr.peerid, imsg->fd, &vid, sizeof(vid));
252 break;
253 case IMSG_VMDOP_RECEIVE_VM_REQUEST:
254 IMSG_SIZE_CHECK(imsg, &vid)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof
(*&vid)) fatalx("bad length imsg received (%s)", "&vid"
); } while (0)
;
255 memcpy(&vid, imsg->data, sizeof(vid));
256 if (imsg->fd == -1) {
257 log_warnx("%s: invalid fd", __func__);
258 return (-1);
259 }
260 if (atomicio(read, imsg->fd, &vmh, sizeof(vmh)) !=
261 sizeof(vmh)) {
262 log_warnx("%s: error reading vmh from received vm",
263 __func__);
264 res = EIO5;
265 close(imsg->fd);
266 cmd = IMSG_VMDOP_START_VM_RESPONSE;
267 break;
268 }
269
270 if (vmd_check_vmh(&vmh)) {
271 res = ENOENT2;
272 close(imsg->fd);
273 cmd = IMSG_VMDOP_START_VM_RESPONSE;
274 break;
275 }
276 if (atomicio(read, imsg->fd, &vmc, sizeof(vmc)) !=
277 sizeof(vmc)) {
278 log_warnx("%s: error reading vmc from received vm",
279 __func__);
280 res = EIO5;
281 close(imsg->fd);
282 cmd = IMSG_VMDOP_START_VM_RESPONSE;
283 break;
284 }
285 strlcpy(vmc.vmc_params.vcp_name, vid.vid_name,
286 sizeof(vmc.vmc_params.vcp_name));
287 vmc.vmc_params.vcp_id = 0;
288
289 ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid);
290 if (ret != 0) {
291 res = errno(*__errno());
292 cmd = IMSG_VMDOP_START_VM_RESPONSE;
293 close(imsg->fd);
294 } else {
295 vm->vm_state |= VM_STATE_RECEIVED0x08;
296 config_setvm(ps, vm, imsg->hdr.peerid,
297 vmc.vmc_owner.uid);
298 log_debug("%s: sending fd to vmm", __func__);
299 proc_compose_imsg(ps, PROC_VMM, -1,
300 IMSG_VMDOP_RECEIVE_VM_END, vm->vm_vmid, imsg->fd,
301 NULL((void *)0), 0);
302 }
303 break;
304 case IMSG_VMDOP_DONE:
305 control_reset(&ps->ps_csock);
306 TAILQ_FOREACH(rcs, &ps->ps_rcsocks, cs_entry)for((rcs) = ((&ps->ps_rcsocks)->tqh_first); (rcs) !=
((void *)0); (rcs) = ((rcs)->cs_entry.tqe_next))
307 control_reset(rcs);
308 cmd = 0;
309 break;
310 default:
311 return (-1);
312 }
313
314 switch (cmd) {
315 case 0:
316 break;
317 case IMSG_VMDOP_START_VM_RESPONSE:
318 case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
319 memset(&vmr, 0, sizeof(vmr));
320 vmr.vmr_result = res;
321 vmr.vmr_id = id;
322 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd,
323 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1)
324 return (-1);
325 break;
326 default:
327 if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd,
328 imsg->hdr.peerid, -1, &res, sizeof(res)) == -1)
329 return (-1);
330 break;
331 }
332
333 return (0);
334}
335
336int
337vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg)
338{
339 struct vmop_result vmr;
340 struct privsep *ps = p->p_ps;
341 int res = 0;
342 struct vmd_vm *vm;
343 struct vm_create_params *vcp;
344 struct vmop_info_result vir;
345
346 switch (imsg->hdr.type) {
347 case IMSG_VMDOP_PAUSE_VM_RESPONSE:
348 IMSG_SIZE_CHECK(imsg, &vmr)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof
(*&vmr)) fatalx("bad length imsg received (%s)", "&vmr"
); } while (0)
;
349 memcpy(&vmr, imsg->data, sizeof(vmr));
350 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL((void *)0))
351 break;
352 proc_compose_imsg(ps, PROC_CONTROL, -1,
353 imsg->hdr.type, imsg->hdr.peerid, -1,
354 imsg->data, sizeof(imsg->data));
355 log_info("%s: paused vm %d successfully",
356 vm->vm_params.vmc_params.vcp_name,
357 vm->vm_vmid);
358 vm->vm_state |= VM_STATE_PAUSED0x10;
359 break;
360 case IMSG_VMDOP_UNPAUSE_VM_RESPONSE:
361 IMSG_SIZE_CHECK(imsg, &vmr)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof
(*&vmr)) fatalx("bad length imsg received (%s)", "&vmr"
); } while (0)
;
362 memcpy(&vmr, imsg->data, sizeof(vmr));
363 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL((void *)0))
364 break;
365 proc_compose_imsg(ps, PROC_CONTROL, -1,
366 imsg->hdr.type, imsg->hdr.peerid, -1,
367 imsg->data, sizeof(imsg->data));
368 log_info("%s: unpaused vm %d successfully.",
369 vm->vm_params.vmc_params.vcp_name,
370 vm->vm_vmid);
371 vm->vm_state &= ~VM_STATE_PAUSED0x10;
372 break;
373 case IMSG_VMDOP_START_VM_RESPONSE:
374 IMSG_SIZE_CHECK(imsg, &vmr)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof
(*&vmr)) fatalx("bad length imsg received (%s)", "&vmr"
); } while (0)
;
375 memcpy(&vmr, imsg->data, sizeof(vmr));
376 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL((void *)0))
377 break;
378 vm->vm_pid = vmr.vmr_pid;
379 vcp = &vm->vm_params.vmc_params;
380 vcp->vcp_id = vmr.vmr_id;
381
382 /*
383 * If the peerid is not -1, forward the response back to the
384 * the control socket. If it is -1, the request originated
385 * from the parent, not the control socket.
386 */
387 if (vm->vm_peerid != (uint32_t)-1) {
388 (void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname,
389 sizeof(vmr.vmr_ttyname));
390 if (proc_compose_imsg(ps, PROC_CONTROL, -1,
391 imsg->hdr.type, vm->vm_peerid, -1,
392 &vmr, sizeof(vmr)) == -1) {
393 errno(*__errno()) = vmr.vmr_result;
394 log_warn("%s: failed to foward vm result",
395 vcp->vcp_name);
396 vm_remove(vm, __func__);
397 return (-1);
398 }
399 }
400
401 if (vmr.vmr_result) {
402 errno(*__errno()) = vmr.vmr_result;
403 log_warn("%s: failed to start vm", vcp->vcp_name);
404 vm_remove(vm, __func__);
405 break;
406 }
407
408 /* Now configure all the interfaces */
409 if (vm_priv_ifconfig(ps, vm) == -1) {
410 log_warn("%s: failed to configure vm", vcp->vcp_name);
411 vm_remove(vm, __func__);
412 break;
413 }
414
415 log_info("%s: started vm %d successfully, tty %s",
416 vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname);
417 break;
418 case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
419 IMSG_SIZE_CHECK(imsg, &vmr)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof
(*&vmr)) fatalx("bad length imsg received (%s)", "&vmr"
); } while (0)
;
420 memcpy(&vmr, imsg->data, sizeof(vmr));
421
422 if (vmr.vmr_result) {
423 DPRINTF("%s: forwarding TERMINATE VM for vm id %d",do {} while(0)
424 __func__, vmr.vmr_id)do {} while(0);
425 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1);
426 } else {
427 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL((void *)0))
428 break;
429 /* Mark VM as shutting down */
430 vm->vm_state |= VM_STATE_SHUTDOWN0x04;
431 }
432 break;
433 case IMSG_VMDOP_SEND_VM_RESPONSE:
434 IMSG_SIZE_CHECK(imsg, &vmr)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof
(*&vmr)) fatalx("bad length imsg received (%s)", "&vmr"
); } while (0)
;
435 memcpy(&vmr, imsg->data, sizeof(vmr));
436 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL((void *)0))
437 break;
438 if (!vmr.vmr_result) {
439 log_info("%s: sent vm %d successfully.",
440 vm->vm_params.vmc_params.vcp_name,
441 vm->vm_vmid);
442 if (vm->vm_from_config)
443 vm_stop(vm, 0, __func__);
444 else
445 vm_remove(vm, __func__);
446 }
447
448 /* Send a response if a control client is waiting for it */
449 if (imsg->hdr.peerid != (uint32_t)-1) {
450 /* the error is meaningless for deferred responses */
451 vmr.vmr_result = 0;
452
453 if (proc_compose_imsg(ps, PROC_CONTROL, -1,
454 IMSG_VMDOP_SEND_VM_RESPONSE,
455 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1)
456 return (-1);
457 }
458 break;
459 case IMSG_VMDOP_TERMINATE_VM_EVENT:
460 IMSG_SIZE_CHECK(imsg, &vmr)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof
(*&vmr)) fatalx("bad length imsg received (%s)", "&vmr"
); } while (0)
;
461 memcpy(&vmr, imsg->data, sizeof(vmr));
462 DPRINTF("%s: handling TERMINATE_EVENT for vm id %d ret %d",do {} while(0)
463 __func__, vmr.vmr_id, vmr.vmr_result)do {} while(0);
464 if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL((void *)0)) {
465 log_debug("%s: vm %d is no longer available",
466 __func__, vmr.vmr_id);
467 break;
468 }
469 if (vmr.vmr_result != EAGAIN35 ||
470 vm->vm_params.vmc_bootdevice) {
471 if (vm->vm_from_config)
472 vm_stop(vm, 0, __func__);
473 else
474 vm_remove(vm, __func__);
475 } else {
476 /* Stop VM instance but keep the tty open */
477 vm_stop(vm, 1, __func__);
478 config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid);
479 }
480
481 /* The error is meaningless for deferred responses */
482 vmr.vmr_result = 0;
483
484 if (proc_compose_imsg(ps, PROC_CONTROL, -1,
485 IMSG_VMDOP_TERMINATE_VM_EVENT,
486 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1)
487 return (-1);
488 break;
489 case IMSG_VMDOP_GET_INFO_VM_DATA:
490 IMSG_SIZE_CHECK(imsg, &vir)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof
(*&vir)) fatalx("bad length imsg received (%s)", "&vir"
); } while (0)
;
491 memcpy(&vir, imsg->data, sizeof(vir));
492 if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL((void *)0)) {
493 memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname));
494 if (vm->vm_ttyname != NULL((void *)0))
495 strlcpy(vir.vir_ttyname, vm->vm_ttyname,
496 sizeof(vir.vir_ttyname));
497 log_debug("%s: running vm: %d, vm_state: 0x%x",
498 __func__, vm->vm_vmid, vm->vm_state);
499 vir.vir_state = vm->vm_state;
500 /* get the user id who started the vm */
501 vir.vir_uid = vm->vm_uid;
502 vir.vir_gid = vm->vm_params.vmc_owner.gid;
503 }
504 if (proc_compose_imsg(ps, PROC_CONTROL, -1, imsg->hdr.type,
505 imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) {
506 log_debug("%s: GET_INFO_VM failed for vm %d, removing",
507 __func__, vm->vm_vmid);
508 vm_remove(vm, __func__);
509 return (-1);
510 }
511 break;
512 case IMSG_VMDOP_GET_INFO_VM_END_DATA:
513 /*
514 * PROC_VMM has responded with the *running* VMs, now we
515 * append the others. These use the special value 0 for their
516 * kernel id to indicate that they are not running.
517 */
518 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void *
)0); (vm) = ((vm)->vm_entry.tqe_next))
{
519 if (!(vm->vm_state & VM_STATE_RUNNING0x01)) {
520 memset(&vir, 0, sizeof(vir));
521 vir.vir_info.vir_id = vm->vm_vmid;
522 strlcpy(vir.vir_info.vir_name,
523 vm->vm_params.vmc_params.vcp_name,
524 VMM_MAX_NAME_LEN64);
525 vir.vir_info.vir_memory_size =
526 vm->vm_params.vmc_params.
527 vcp_memranges[0].vmr_size;
528 vir.vir_info.vir_ncpus =
529 vm->vm_params.vmc_params.vcp_ncpus;
530 /* get the configured user id for this vm */
531 vir.vir_uid = vm->vm_params.vmc_owner.uid;
532 vir.vir_gid = vm->vm_params.vmc_owner.gid;
533 log_debug("%s: vm: %d, vm_state: 0x%x",
534 __func__, vm->vm_vmid, vm->vm_state);
535 vir.vir_state = vm->vm_state;
536 if (proc_compose_imsg(ps, PROC_CONTROL, -1,
537 IMSG_VMDOP_GET_INFO_VM_DATA,
538 imsg->hdr.peerid, -1, &vir,
539 sizeof(vir)) == -1) {
540 log_debug("%s: GET_INFO_VM_END failed",
541 __func__);
542 vm_remove(vm, __func__);
543 return (-1);
544 }
545 }
546 }
547 IMSG_SIZE_CHECK(imsg, &res)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof
(*&res)) fatalx("bad length imsg received (%s)", "&res"
); } while (0)
;
548 proc_forward_imsg(ps, imsg, PROC_CONTROL, -1);
549 break;
550 default:
551 return (-1);
552 }
553
554 return (0);
555}
556
557int
558vmd_dispatch_priv(int fd, struct privsep_proc *p, struct imsg *imsg)
559{
560 struct vmop_addr_result var;
561
562 switch (imsg->hdr.type) {
563 case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE:
564 IMSG_SIZE_CHECK(imsg, &var)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof
(*&var)) fatalx("bad length imsg received (%s)", "&var"
); } while (0)
;
565 memcpy(&var, imsg->data, sizeof(var));
566 proc_forward_imsg(p->p_ps, imsg, PROC_VMM, -1);
567 break;
568 default:
569 return (-1);
570 }
571
572 return (0);
573}
574
575int
576vmd_check_vmh(struct vm_dump_header *vmh)
577{
578 int i;
579 unsigned int code, leaf;
580 unsigned int a, b, c, d;
581
582 if (strncmp(vmh->vmh_signature, VM_DUMP_SIGNATURE"OpenBSDVMM58", strlen(VM_DUMP_SIGNATURE"OpenBSDVMM58")) != 0) {
583 log_warnx("%s: incompatible dump signature", __func__);
584 return (-1);
585 }
586
587 if (vmh->vmh_version != VM_DUMP_VERSION7) {
588 log_warnx("%s: incompatible dump version", __func__);
589 return (-1);
590 }
591
592 for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT5; i++) {
593 code = vmh->vmh_cpuids[i].code;
594 leaf = vmh->vmh_cpuids[i].leaf;
595 if (leaf != 0x00) {
596 log_debug("%s: invalid leaf 0x%x for code 0x%x",
597 __func__, leaf, code);
598 return (-1);
599 }
600
601 switch (code) {
602 case 0x00:
603 CPUID_LEAF(code, leaf, a, b, c, d)__asm volatile("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (
d) : "a" (code), "c" (leaf))
;
604 if (vmh->vmh_cpuids[i].a > a) {
605 log_debug("%s: incompatible cpuid level",
606 __func__);
607 return (-1);
608 }
609 if (!(vmh->vmh_cpuids[i].b == b &&
610 vmh->vmh_cpuids[i].c == c &&
611 vmh->vmh_cpuids[i].d == d)) {
612 log_debug("%s: incompatible cpu brand",
613 __func__);
614 return (-1);
615 }
616 break;
617
618 case 0x01:
619 CPUID_LEAF(code, leaf, a, b, c, d)__asm volatile("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (
d) : "a" (code), "c" (leaf))
;
620 if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK~(0x00000080 | 0x00000100 | 0x00000008 | 0x00008000 | 0x00000020
| 0x00000004 | 0x00000010 | 0x00000040 | 0x00000400 | 0x00000800
| 0x00004000 | 0x00020000 | 0x00040000 | 0x00200000 | 0x01000000
)
) !=
621 (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK~(0x00000080 | 0x00000100 | 0x00000008 | 0x00008000 | 0x00000020
| 0x00000004 | 0x00000010 | 0x00000040 | 0x00000400 | 0x00000800
| 0x00004000 | 0x00020000 | 0x00040000 | 0x00200000 | 0x01000000
)
)) {
622 log_debug("%s: incompatible cpu features "
623 "code: 0x%x leaf: 0x%x reg: c", __func__,
624 code, leaf);
625 return (-1);
626 }
627 if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK~(0x00400000 | 0x20000000 | 0x10000000 | 0x00200000 | 0x00000200
| 0x00040000 | 0x08000000 | 0x80000000 | 0x00001000 | 0x00000080
| 0x00004000)
) !=
628 (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK~(0x00400000 | 0x20000000 | 0x10000000 | 0x00200000 | 0x00000200
| 0x00040000 | 0x08000000 | 0x80000000 | 0x00001000 | 0x00000080
| 0x00004000)
)) {
629 log_debug("%s: incompatible cpu features "
630 "code: 0x%x leaf: 0x%x reg: d", __func__,
631 code, leaf);
632 return (-1);
633 }
634 break;
635
636 case 0x07:
637 CPUID_LEAF(code, leaf, a, b, c, d)__asm volatile("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (
d) : "a" (code), "c" (leaf))
;
638 if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK~(0x00000002 | 0x00000004 | 0x00000010 | 0x00000400 | 0x00000800
| 0x00001000 | 0x00004000 | 0x00400000 | 0x02000000 | 0x00010000
| 0x00020000 | 0x00200000 | 0x04000000 | 0x08000000 | 0x10000000
| 0x40000000 | 0x80000000)
) !=
639 (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK~(0x00000002 | 0x00000004 | 0x00000010 | 0x00000400 | 0x00000800
| 0x00001000 | 0x00004000 | 0x00400000 | 0x02000000 | 0x00010000
| 0x00020000 | 0x00200000 | 0x04000000 | 0x08000000 | 0x10000000
| 0x40000000 | 0x80000000)
)) {
640 log_debug("%s: incompatible cpu features "
641 "code: 0x%x leaf: 0x%x reg: c", __func__,
642 code, leaf);
643 return (-1);
644 }
645 if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK~(0x00000002)) !=
646 (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK~(0x00000002))) {
647 log_debug("%s: incompatible cpu features "
648 "code: 0x%x leaf: 0x%x reg: d", __func__,
649 code, leaf);
650 return (-1);
651 }
652 break;
653
654 case 0x0d:
655 CPUID_LEAF(code, leaf, a, b, c, d)__asm volatile("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (
d) : "a" (code), "c" (leaf))
;
656 if (vmh->vmh_cpuids[i].b > b) {
657 log_debug("%s: incompatible cpu: insufficient "
658 "max save area for enabled XCR0 features",
659 __func__);
660 return (-1);
661 }
662 if (vmh->vmh_cpuids[i].c > c) {
663 log_debug("%s: incompatible cpu: insufficient "
664 "max save area for supported XCR0 features",
665 __func__);
666 return (-1);
667 }
668 break;
669
670 case 0x80000001:
671 CPUID_LEAF(code, leaf, a, b, c, d)__asm volatile("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (
d) : "a" (code), "c" (leaf))
;
672 if ((vmh->vmh_cpuids[i].a & a) !=
673 vmh->vmh_cpuids[i].a) {
674 log_debug("%s: incompatible cpu features "
675 "code: 0x%x leaf: 0x%x reg: a", __func__,
676 code, leaf);
677 return (-1);
678 }
679 if ((vmh->vmh_cpuids[i].c & c) !=
680 vmh->vmh_cpuids[i].c) {
681 log_debug("%s: incompatible cpu features "
682 "code: 0x%x leaf: 0x%x reg: c", __func__,
683 code, leaf);
684 return (-1);
685 }
686 if ((vmh->vmh_cpuids[i].d & d) !=
687 vmh->vmh_cpuids[i].d) {
688 log_debug("%s: incompatible cpu features "
689 "code: 0x%x leaf: 0x%x reg: d", __func__,
690 code, leaf);
691 return (-1);
692 }
693 break;
694
695 default:
696 log_debug("%s: unknown code 0x%x", __func__, code);
697 return (-1);
698 }
699 }
700
701 return (0);
702}
703
704void
705vmd_sighdlr(int sig, short event, void *arg)
706{
707 if (privsep_process != PROC_PARENT)
708 return;
709 log_debug("%s: handling signal", __func__);
710
711 switch (sig) {
712 case SIGHUP1:
713 log_info("%s: reload requested with SIGHUP", __func__);
714
715 /*
716 * This is safe because libevent uses async signal handlers
717 * that run in the event loop and not in signal context.
718 */
719 (void)vmd_reload(0, NULL((void *)0));
720 break;
721 case SIGPIPE13:
722 log_info("%s: ignoring SIGPIPE", __func__);
723 break;
724 case SIGUSR130:
725 log_info("%s: ignoring SIGUSR1", __func__);
726 break;
727 case SIGTERM15:
728 case SIGINT2:
729 vmd_shutdown();
730 break;
731 default:
732 fatalx("unexpected signal");
733 }
734}
735
736__dead__attribute__((__noreturn__)) void
737usage(void)
738{
739 extern char *__progname;
740 fprintf(stderr(&__sF[2]), "usage: %s [-dnv] [-D macro=value] [-f file]\n",
741 __progname);
742 exit(1);
743}
744
745int
746main(int argc, char **argv)
747{
748 struct privsep *ps;
749 int ch;
750 const char *conffile = VMD_CONF"/etc/vm.conf";
751 enum privsep_procid proc_id = PROC_PARENT;
752 int proc_instance = 0;
753 const char *errp, *title = NULL((void *)0);
754 int argc0 = argc;
755
756 log_init(0, LOG_DAEMON(3<<3));
757
758 if ((env = calloc(1, sizeof(*env))) == NULL((void *)0))
759 fatal("calloc: env");
760
761 while ((ch = getopt(argc, argv, "D:P:I:df:vn")) != -1) {
762 switch (ch) {
763 case 'D':
764 if (cmdline_symset(optarg) < 0)
765 log_warnx("could not parse macro definition %s",
766 optarg);
767 break;
768 case 'd':
769 env->vmd_debug = 2;
770 break;
771 case 'f':
772 conffile = optarg;
773 break;
774 case 'v':
775 env->vmd_verbose++;
776 break;
777 case 'n':
778 env->vmd_noaction = 1;
779 break;
780 case 'P':
781 title = optarg;
782 proc_id = proc_getid(procs, nitems(procs)(sizeof((procs)) / sizeof((procs)[0])), title);
783 if (proc_id == PROC_MAX)
784 fatalx("invalid process name");
785 break;
786 case 'I':
787 proc_instance = strtonum(optarg, 0,
788 PROC_MAX_INSTANCES32, &errp);
789 if (errp)
790 fatalx("invalid process instance");
791 break;
792 default:
793 usage();
794 }
795 }
796
797 argc -= optind;
798 if (argc > 0)
799 usage();
800
801 if (env->vmd_noaction && !env->vmd_debug)
802 env->vmd_debug = 1;
803
804 log_init(env->vmd_debug, LOG_DAEMON(3<<3));
805 log_setverbose(env->vmd_verbose);
806
807 /* check for root privileges */
808 if (env->vmd_noaction == 0) {
809 if (geteuid())
810 fatalx("need root privileges");
811 }
812
813 ps = &env->vmd_ps;
814 ps->ps_env = env;
815 env->vmd_fd = -1;
816
817 if (config_init(env) == -1)
818 fatal("failed to initialize configuration");
819
820 if ((ps->ps_pw = getpwnam(VMD_USER"_vmd")) == NULL((void *)0))
821 fatal("unknown user %s", VMD_USER"_vmd");
822
823 /* First proc runs as root without pledge but in default chroot */
824 proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */
825 proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */
826
827 /* Open /dev/vmm */
828 if (env->vmd_noaction == 0) {
829 env->vmd_fd = open(VMM_NODE"/dev/vmm", O_RDWR0x0002);
830 if (env->vmd_fd == -1)
831 fatal("%s", VMM_NODE"/dev/vmm");
832 }
833
834 /* Configure the control socket */
835 ps->ps_csock.cs_name = SOCKET_NAME"/var/run/vmd.sock";
836 TAILQ_INIT(&ps->ps_rcsocks)do { (&ps->ps_rcsocks)->tqh_first = ((void *)0); (&
ps->ps_rcsocks)->tqh_last = &(&ps->ps_rcsocks
)->tqh_first; } while (0)
;
837
838 /* Configuration will be parsed after forking the children */
839 env->vmd_conffile = conffile;
840
841 if (env->vmd_noaction)
842 ps->ps_noaction = 1;
843 ps->ps_instance = proc_instance;
844 if (title != NULL((void *)0))
845 ps->ps_title[proc_id] = title;
846
847 /* only the parent returns */
848 proc_init(ps, procs, nitems(procs)(sizeof((procs)) / sizeof((procs)[0])), env->vmd_debug, argc0, argv,
849 proc_id);
850
851 log_procinit("parent");
852 if (!env->vmd_debug && daemon(0, 0) == -1)
853 fatal("can't daemonize");
854
855 if (ps->ps_noaction == 0)
856 log_info("startup");
857
858 event_init();
859
860 signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps)event_set(&ps->ps_evsigint, 2, 0x08|0x10, vmd_sighdlr,
ps)
;
861 signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps)event_set(&ps->ps_evsigterm, 15, 0x08|0x10, vmd_sighdlr
, ps)
;
862 signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps)event_set(&ps->ps_evsighup, 1, 0x08|0x10, vmd_sighdlr,
ps)
;
863 signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps)event_set(&ps->ps_evsigpipe, 13, 0x08|0x10, vmd_sighdlr
, ps)
;
864 signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps)event_set(&ps->ps_evsigusr1, 30, 0x08|0x10, vmd_sighdlr
, ps)
;
865
866 signal_add(&ps->ps_evsigint, NULL)event_add(&ps->ps_evsigint, ((void *)0));
867 signal_add(&ps->ps_evsigterm, NULL)event_add(&ps->ps_evsigterm, ((void *)0));
868 signal_add(&ps->ps_evsighup, NULL)event_add(&ps->ps_evsighup, ((void *)0));
869 signal_add(&ps->ps_evsigpipe, NULL)event_add(&ps->ps_evsigpipe, ((void *)0));
870 signal_add(&ps->ps_evsigusr1, NULL)event_add(&ps->ps_evsigusr1, ((void *)0));
871
872 if (!env->vmd_noaction)
873 proc_connect(ps);
874
875 if (vmd_configure() == -1)
876 fatalx("configuration failed");
877
878 event_dispatch();
879
880 log_debug("parent exiting");
881
882 return (0);
883}
884
885void
886start_vm_batch(int fd, short type, void *args)
887{
888 int i = 0;
889 struct vmd_vm *vm;
890
891 log_debug("%s: starting batch of %d vms", __func__,
892 env->vmd_cfg.parallelism);
893 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void *
)0); (vm) = ((vm)->vm_entry.tqe_next))
{
894 if (!(vm->vm_state & VM_STATE_WAITING0x20)) {
895 log_debug("%s: not starting vm %s (disabled)",
896 __func__,
897 vm->vm_params.vmc_params.vcp_name);
898 continue;
899 }
900 i++;
901 if (i > env->vmd_cfg.parallelism) {
902 evtimer_add(&staggered_start_timer,event_add(&staggered_start_timer, &env->vmd_cfg.delay
)
903 &env->vmd_cfg.delay)event_add(&staggered_start_timer, &env->vmd_cfg.delay
)
;
904 break;
905 }
906 vm->vm_state &= ~VM_STATE_WAITING0x20;
907 config_setvm(&env->vmd_ps, vm, -1, vm->vm_params.vmc_owner.uid);
908 }
909 log_debug("%s: done starting vms", __func__);
910}
911
912int
913vmd_configure(void)
914{
915 int ncpus;
916 struct vmd_switch *vsw;
917 int ncpu_mib[] = {CTL_HW6, HW_NCPUONLINE25};
918 size_t ncpus_sz = sizeof(ncpus);
919
920 if ((env->vmd_ptmfd = open(PATH_PTMDEV"/dev/ptm", O_RDWR0x0002|O_CLOEXEC0x10000)) == -1)
921 fatal("open %s", PATH_PTMDEV"/dev/ptm");
922
923 /*
924 * pledge in the parent process:
925 * stdio - for malloc and basic I/O including events.
926 * rpath - for reload to open and read the configuration files.
927 * wpath - for opening disk images and tap devices.
928 * tty - for openpty and TIOCUCNTL.
929 * proc - run kill to terminate its children safely.
930 * sendfd - for disks, interfaces and other fds.
931 * recvfd - for send and receive.
932 * getpw - lookup user or group id by name.
933 * chown, fattr - change tty ownership
934 * flock - locking disk files
935 */
936 if (pledge("stdio rpath wpath proc tty recvfd sendfd getpw"
937 " chown fattr flock", NULL((void *)0)) == -1)
938 fatal("pledge");
939
940 if (parse_config(env->vmd_conffile) == -1) {
941 proc_kill(&env->vmd_ps);
942 exit(1);
943 }
944
945 if (env->vmd_noaction) {
946 fprintf(stderr(&__sF[2]), "configuration OK\n");
947 proc_kill(&env->vmd_ps);
948 exit(0);
949 }
950
951 /* Send shared global configuration to all children */
952 if (config_setconfig(env) == -1)
953 return (-1);
954
955 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry)for((vsw) = ((env->vmd_switches)->tqh_first); (vsw) != (
(void *)0); (vsw) = ((vsw)->sw_entry.tqe_next))
{
956 if (vsw->sw_running)
957 continue;
958 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) {
959 log_warn("%s: failed to create switch %s",
960 __func__, vsw->sw_name);
961 switch_remove(vsw);
962 return (-1);
963 }
964 }
965
966 if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START0x04)) {
967 env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY30;
968 if (sysctl(ncpu_mib, nitems(ncpu_mib)(sizeof((ncpu_mib)) / sizeof((ncpu_mib)[0])), &ncpus, &ncpus_sz, NULL((void *)0), 0) == -1)
969 ncpus = 1;
970 env->vmd_cfg.parallelism = ncpus;
971 log_debug("%s: setting staggered start configuration to "
972 "parallelism: %d and delay: %lld",
973 __func__, ncpus, (long long) env->vmd_cfg.delay.tv_sec);
974 }
975
976 log_debug("%s: starting vms in staggered fashion", __func__);
977 evtimer_set(&staggered_start_timer, start_vm_batch, NULL)event_set(&staggered_start_timer, -1, 0, start_vm_batch, (
(void *)0))
;
978 /* start first batch */
979 start_vm_batch(0, 0, NULL((void *)0));
980
981 return (0);
982}
983
984int
985vmd_reload(unsigned int reset, const char *filename)
986{
987 struct vmd_vm *vm, *next_vm;
988 struct vmd_switch *vsw;
989 int reload = 0;
990
991 /* Switch back to the default config file */
992 if (filename == NULL((void *)0) || *filename == '\0') {
993 filename = env->vmd_conffile;
994 reload = 1;
995 }
996
997 log_debug("%s: level %d config file %s", __func__, reset, filename);
998
999 if (reset) {
1000 /* Purge the configuration */
1001 config_purge(env, reset);
1002 config_setreset(env, reset);
1003 } else {
1004 /*
1005 * Load or reload the configuration.
1006 *
1007 * Reloading removes all non-running VMs before processing the
1008 * config file, whereas loading only adds to the existing list
1009 * of VMs.
1010 */
1011
1012 if (reload) {
1013 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry,for ((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void
*)0) && ((next_vm) = ((vm)->vm_entry.tqe_next), 1
); (vm) = (next_vm))
1014 next_vm)for ((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void
*)0) && ((next_vm) = ((vm)->vm_entry.tqe_next), 1
); (vm) = (next_vm))
{
1015 if (!(vm->vm_state & VM_STATE_RUNNING0x01)) {
1016 DPRINTF("%s: calling vm_remove",do {} while(0)
1017 __func__)do {} while(0);
1018 vm_remove(vm, __func__);
1019 }
1020 }
1021 }
1022
1023 if (parse_config(filename) == -1) {
1024 log_debug("%s: failed to load config file %s",
1025 __func__, filename);
1026 return (-1);
1027 }
1028
1029 if (reload) {
1030 /* Update shared global configuration in all children */
1031 if (config_setconfig(env) == -1)
1032 return (-1);
1033 }
1034
1035 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry)for((vsw) = ((env->vmd_switches)->tqh_first); (vsw) != (
(void *)0); (vsw) = ((vsw)->sw_entry.tqe_next))
{
1036 if (vsw->sw_running)
1037 continue;
1038 if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) {
1039 log_warn("%s: failed to create switch %s",
1040 __func__, vsw->sw_name);
1041 switch_remove(vsw);
1042 return (-1);
1043 }
1044 }
1045
1046 log_debug("%s: starting vms in staggered fashion", __func__);
1047 evtimer_set(&staggered_start_timer, start_vm_batch, NULL)event_set(&staggered_start_timer, -1, 0, start_vm_batch, (
(void *)0))
;
1048 /* start first batch */
1049 start_vm_batch(0, 0, NULL((void *)0));
1050
1051 }
1052
1053 return (0);
1054}
1055
1056void
1057vmd_shutdown(void)
1058{
1059 struct vmd_vm *vm, *vm_next;
1060
1061 log_debug("%s: performing shutdown", __func__);
1062
1063 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next)for ((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void
*)0) && ((vm_next) = ((vm)->vm_entry.tqe_next), 1
); (vm) = (vm_next))
{
1064 vm_remove(vm, __func__);
1065 }
1066
1067 proc_kill(&env->vmd_ps);
1068 free(env);
1069
1070 log_warnx("parent terminating");
1071 exit(0);
1072}
1073
1074struct vmd_vm *
1075vm_getbyvmid(uint32_t vmid)
1076{
1077 struct vmd_vm *vm;
1078
1079 if (vmid == 0)
1080 return (NULL((void *)0));
1081 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void *
)0); (vm) = ((vm)->vm_entry.tqe_next))
{
1082 if (vm->vm_vmid == vmid)
1083 return (vm);
1084 }
1085
1086 return (NULL((void *)0));
1087}
1088
1089struct vmd_vm *
1090vm_getbyid(uint32_t id)
1091{
1092 struct vmd_vm *vm;
1093
1094 if (id == 0)
1095 return (NULL((void *)0));
1096 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void *
)0); (vm) = ((vm)->vm_entry.tqe_next))
{
1097 if (vm->vm_params.vmc_params.vcp_id == id)
1098 return (vm);
1099 }
1100
1101 return (NULL((void *)0));
1102}
1103
1104uint32_t
1105vm_id2vmid(uint32_t id, struct vmd_vm *vm)
1106{
1107 if (vm == NULL((void *)0) && (vm = vm_getbyid(id)) == NULL((void *)0))
1108 return (0);
1109 DPRINTF("%s: vmm id %u is vmid %u", __func__,do {} while(0)
1110 id, vm->vm_vmid)do {} while(0);
1111 return (vm->vm_vmid);
1112}
1113
1114uint32_t
1115vm_vmid2id(uint32_t vmid, struct vmd_vm *vm)
1116{
1117 if (vm == NULL((void *)0) && (vm = vm_getbyvmid(vmid)) == NULL((void *)0))
1118 return (0);
1119 DPRINTF("%s: vmid %u is vmm id %u", __func__,do {} while(0)
1120 vmid, vm->vm_params.vmc_params.vcp_id)do {} while(0);
1121 return (vm->vm_params.vmc_params.vcp_id);
1122}
1123
1124struct vmd_vm *
1125vm_getbyname(const char *name)
1126{
1127 struct vmd_vm *vm;
1128
1129 if (name == NULL((void *)0))
1130 return (NULL((void *)0));
1131 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void *
)0); (vm) = ((vm)->vm_entry.tqe_next))
{
1132 if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0)
1133 return (vm);
1134 }
1135
1136 return (NULL((void *)0));
1137}
1138
1139struct vmd_vm *
1140vm_getbypid(pid_t pid)
1141{
1142 struct vmd_vm *vm;
1143
1144 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void *
)0); (vm) = ((vm)->vm_entry.tqe_next))
{
1145 if (vm->vm_pid == pid)
1146 return (vm);
1147 }
1148
1149 return (NULL((void *)0));
1150}
1151
1152void
1153vm_stop(struct vmd_vm *vm, int keeptty, const char *caller)
1154{
1155 struct privsep *ps = &env->vmd_ps;
1156 unsigned int i, j;
1157
1158 if (vm == NULL((void *)0))
1159 return;
1160
1161 log_debug("%s: %s %s stopping vm %d%s",
1162 __func__, ps->ps_title[privsep_process], caller,
1163 vm->vm_vmid, keeptty ? ", keeping tty open" : "");
1164
1165 vm->vm_state &= ~(VM_STATE_RUNNING0x01 | VM_STATE_SHUTDOWN0x04);
1166
1167 user_inc(&vm->vm_params.vmc_params, vm->vm_user, 0);
1168 user_put(vm->vm_user);
1169
1170 if (vm->vm_iev.ibuf.fd != -1) {
1171 event_del(&vm->vm_iev.ev);
1172 close(vm->vm_iev.ibuf.fd);
1173 }
1174 for (i = 0; i < VMM_MAX_DISKS_PER_VM4; i++) {
1175 for (j = 0; j < VM_MAX_BASE_PER_DISK4; j++) {
1176 if (vm->vm_disks[i][j] != -1) {
1177 close(vm->vm_disks[i][j]);
1178 vm->vm_disks[i][j] = -1;
1179 }
1180 }
1181 }
1182 for (i = 0; i < VMM_MAX_NICS_PER_VM4; i++) {
1183 if (vm->vm_ifs[i].vif_fd != -1) {
1184 close(vm->vm_ifs[i].vif_fd);
1185 vm->vm_ifs[i].vif_fd = -1;
1186 }
1187 free(vm->vm_ifs[i].vif_name);
1188 free(vm->vm_ifs[i].vif_switch);
1189 free(vm->vm_ifs[i].vif_group);
1190 vm->vm_ifs[i].vif_name = NULL((void *)0);
1191 vm->vm_ifs[i].vif_switch = NULL((void *)0);
1192 vm->vm_ifs[i].vif_group = NULL((void *)0);
1193 }
1194 if (vm->vm_kernel != -1) {
1195 close(vm->vm_kernel);
1196 vm->vm_kernel = -1;
1197 }
1198 if (vm->vm_cdrom != -1) {
1199 close(vm->vm_cdrom);
1200 vm->vm_cdrom = -1;
1201 }
1202 if (!keeptty) {
1203 vm_closetty(vm);
1204 vm->vm_uid = 0;
1205 }
1206}
1207
1208void
1209vm_remove(struct vmd_vm *vm, const char *caller)
1210{
1211 struct privsep *ps = &env->vmd_ps;
1212
1213 if (vm == NULL((void *)0))
1214 return;
1215
1216 log_debug("%s: %s %s removing vm %d from running config",
1217 __func__, ps->ps_title[privsep_process], caller,
1218 vm->vm_vmid);
1219
1220 TAILQ_REMOVE(env->vmd_vms, vm, vm_entry)do { if (((vm)->vm_entry.tqe_next) != ((void *)0)) (vm)->
vm_entry.tqe_next->vm_entry.tqe_prev = (vm)->vm_entry.tqe_prev
; else (env->vmd_vms)->tqh_last = (vm)->vm_entry.tqe_prev
; *(vm)->vm_entry.tqe_prev = (vm)->vm_entry.tqe_next; ;
; } while (0)
;
1221
1222 user_put(vm->vm_user);
1223 vm_stop(vm, 0, caller);
1224 free(vm);
1225}
1226
1227int
1228vm_claimid(const char *name, int uid, uint32_t *id)
1229{
1230 struct name2id *n2i = NULL((void *)0);
1231
1232 TAILQ_FOREACH(n2i, env->vmd_known, entry)for((n2i) = ((env->vmd_known)->tqh_first); (n2i) != ((void
*)0); (n2i) = ((n2i)->entry.tqe_next))
1233 if (strcmp(n2i->name, name) == 0 && n2i->uid == uid)
1234 goto out;
1235
1236 if (++env->vmd_nvm == 0) {
1237 log_warnx("too many vms");
1238 return -1;
1239 }
1240 if ((n2i = calloc(1, sizeof(struct name2id))) == NULL((void *)0)) {
1241 log_warnx("could not alloc vm name");
1242 return -1;
1243 }
1244 n2i->id = env->vmd_nvm;
1245 n2i->uid = uid;
1246 if (strlcpy(n2i->name, name, sizeof(n2i->name)) >= sizeof(n2i->name)) {
1247 log_warnx("vm name too long");
1248 free(n2i);
1249 return -1;
1250 }
1251 TAILQ_INSERT_TAIL(env->vmd_known, n2i, entry)do { (n2i)->entry.tqe_next = ((void *)0); (n2i)->entry.
tqe_prev = (env->vmd_known)->tqh_last; *(env->vmd_known
)->tqh_last = (n2i); (env->vmd_known)->tqh_last = &
(n2i)->entry.tqe_next; } while (0)
;
1252
1253out:
1254 *id = n2i->id;
1255 return 0;
1256}
1257
1258int
1259vm_register(struct privsep *ps, struct vmop_create_params *vmc,
1260 struct vmd_vm **ret_vm, uint32_t id, uid_t uid)
1261{
1262 struct vmd_vm *vm = NULL((void *)0), *vm_parent = NULL((void *)0);
1263 struct vm_create_params *vcp = &vmc->vmc_params;
1264 struct vmop_owner *vmo = NULL((void *)0);
1265 struct vmd_user *usr = NULL((void *)0);
1266 uint32_t nid, rng;
1267 unsigned int i, j;
1268 struct vmd_switch *sw;
1269 char *s;
1270 int ret = 0;
1271
1272 /* Check if this is an instance of another VM */
1273 if ((ret = vm_instance(ps, &vm_parent, vmc, uid)) != 0) {
1274 errno(*__errno()) = ret; /* XXX might set invalid errno */
1275 return (-1);
1276 }
1277
1278 errno(*__errno()) = 0;
1279 *ret_vm = NULL((void *)0);
1280
1281 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL((void *)0) ||
1282 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL((void *)0)) {
1283 if (vm_checkperm(vm, &vm->vm_params.vmc_owner,
1284 uid) != 0) {
1285 errno(*__errno()) = EPERM1;
1286 goto fail;
1287 }
1288 *ret_vm = vm;
1289 errno(*__errno()) = EALREADY37;
1290 goto fail;
1291 }
1292
1293 if (vm_parent != NULL((void *)0))
1294 vmo = &vm_parent->vm_params.vmc_insowner;
1295
1296 /* non-root users can only start existing VMs or instances */
1297 if (vm_checkperm(NULL((void *)0), vmo, uid) != 0) {
1298 log_warnx("permission denied");
1299 errno(*__errno()) = EPERM1;
1300 goto fail;
1301 }
1302 if (vmc->vmc_flags == 0) {
1303 log_warnx("invalid configuration, no devices");
1304 errno(*__errno()) = VMD_DISK_MISSING1002;
1305 goto fail;
1306 }
1307 if (vcp->vcp_ncpus == 0)
1308 vcp->vcp_ncpus = 1;
1309 if (vcp->vcp_memranges[0].vmr_size == 0)
1310 vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY512;
1311 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM64) {
1312 log_warnx("invalid number of CPUs");
1313 goto fail;
1314 } else if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM4) {
1315 log_warnx("invalid number of disks");
1316 goto fail;
1317 } else if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM4) {
1318 log_warnx("invalid number of interfaces");
1319 goto fail;
1320 } else if (strlen(vcp->vcp_kernel) == 0 &&
1321 vcp->vcp_ndisks == 0 && strlen(vcp->vcp_cdrom) == 0) {
1322 log_warnx("no kernel or disk/cdrom specified");
1323 goto fail;
1324 } else if (strlen(vcp->vcp_name) == 0) {
1325 log_warnx("invalid VM name");
1326 goto fail;
1327 } else if (*vcp->vcp_name == '-' || *vcp->vcp_name == '.' ||
1328 *vcp->vcp_name == '_') {
1329 log_warnx("invalid VM name");
1330 goto fail;
1331 } else {
1332 for (s = vcp->vcp_name; *s != '\0'; ++s) {
1333 if (!(isalnum(*s) || *s == '.' || *s == '-' ||
1334 *s == '_')) {
1335 log_warnx("invalid VM name");
1336 goto fail;
1337 }
1338 }
1339 }
1340
1341 /* track active users */
1342 if (uid != 0 && env->vmd_users != NULL((void *)0) &&
1343 (usr = user_get(uid)) == NULL((void *)0)) {
1344 log_warnx("could not add user");
1345 goto fail;
1346 }
1347
1348 if ((vm = calloc(1, sizeof(*vm))) == NULL((void *)0))
1349 goto fail;
1350
1351 memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params));
1352 vmc = &vm->vm_params;
1353 vcp = &vmc->vmc_params;
1354 vm->vm_pid = -1;
1355 vm->vm_tty = -1;
1356 vm->vm_receive_fd = -1;
1357 vm->vm_state &= ~VM_STATE_PAUSED0x10;
1358 vm->vm_user = usr;
1359
1360 for (i = 0; i < VMM_MAX_DISKS_PER_VM4; i++)
1361 for (j = 0; j < VM_MAX_BASE_PER_DISK4; j++)
1362 vm->vm_disks[i][j] = -1;
1363 for (i = 0; i < VMM_MAX_NICS_PER_VM4; i++)
1364 vm->vm_ifs[i].vif_fd = -1;
1365 for (i = 0; i < vcp->vcp_nnics; i++) {
1366 if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL((void *)0)) {
1367 /* inherit per-interface flags from the switch */
1368 vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK(0x02|0x04|0x08));
1369 }
1370
1371 /*
1372 * If the MAC address is zero, always randomize it in vmd(8)
1373 * because we cannot rely on the guest OS to do the right
1374 * thing like OpenBSD does. Based on ether_fakeaddr()
1375 * from the kernel, incremented by one to differentiate
1376 * the source.
1377 */
1378 if (memcmp(zero_mac, &vcp->vcp_macs[i], ETHER_ADDR_LEN6) == 0) {
1379 rng = arc4random();
1380 vcp->vcp_macs[i][0] = 0xfe;
1381 vcp->vcp_macs[i][1] = 0xe1;
1382 vcp->vcp_macs[i][2] = 0xba + 1;
1383 vcp->vcp_macs[i][3] = 0xd0 | ((i + 1) & 0xf);
1384 vcp->vcp_macs[i][4] = rng;
1385 vcp->vcp_macs[i][5] = rng >> 8;
1386 }
1387 }
1388 vm->vm_kernel = -1;
1389 vm->vm_cdrom = -1;
1390 vm->vm_iev.ibuf.fd = -1;
1391
1392 /*
1393 * Assign a new internal Id if not specified and we succeed in
1394 * claiming a new Id.
1395 */
1396 if (id != 0)
1397 vm->vm_vmid = id;
1398 else if (vm_claimid(vcp->vcp_name, uid, &nid) == -1)
1399 goto fail;
1400 else
1401 vm->vm_vmid = nid;
1402
1403 log_debug("%s: registering vm %d", __func__, vm->vm_vmid);
1404 TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry)do { (vm)->vm_entry.tqe_next = ((void *)0); (vm)->vm_entry
.tqe_prev = (env->vmd_vms)->tqh_last; *(env->vmd_vms
)->tqh_last = (vm); (env->vmd_vms)->tqh_last = &
(vm)->vm_entry.tqe_next; } while (0)
;
1405
1406 *ret_vm = vm;
1407 return (0);
1408 fail:
1409 if (errno(*__errno()) == 0)
1410 errno(*__errno()) = EINVAL22;
1411 return (-1);
1412}
1413
1414int
1415vm_instance(struct privsep *ps, struct vmd_vm **vm_parent,
1416 struct vmop_create_params *vmc, uid_t uid)
1417{
1418 char *name;
1419 struct vm_create_params *vcp = &vmc->vmc_params;
1420 struct vmop_create_params *vmcp;
1421 struct vm_create_params *vcpp;
1422 struct vmd_vm *vm = NULL((void *)0);
1423 unsigned int i, j;
1424
1425 /* return without error if the parent is NULL (nothing to inherit) */
1426 if ((vmc->vmc_flags & VMOP_CREATE_INSTANCE0x40) == 0 ||
1427 vmc->vmc_instance[0] == '\0')
1428 return (0);
1429
1430 if ((*vm_parent = vm_getbyname(vmc->vmc_instance)) == NULL((void *)0)) {
1431 return (VMD_PARENT_INVALID1007);
1432 }
1433
1434 vmcp = &(*vm_parent)->vm_params;
1435 vcpp = &vmcp->vmc_params;
1436
1437 /* Are we allowed to create an instance from this VM? */
1438 if (vm_checkperm(NULL((void *)0), &vmcp->vmc_insowner, uid) != 0) {
1439 log_warnx("vm \"%s\" no permission to create vm instance",
1440 vcpp->vcp_name);
1441 return (ENAMETOOLONG63);
1442 }
1443
1444 name = vcp->vcp_name;
1445
1446 if ((vm = vm_getbyname(vcp->vcp_name)) != NULL((void *)0) ||
Although the value stored to 'vm' is used in the enclosing expression, the value is never actually read from 'vm'
1447 (vm = vm_getbyvmid(vcp->vcp_id)) != NULL((void *)0)) {
1448 return (EPROCLIM67);
1449 }
1450
1451 /* CPU */
1452 if (vcp->vcp_ncpus == 0)
1453 vcp->vcp_ncpus = vcpp->vcp_ncpus;
1454 if (vm_checkinsflag(vmcp, VMOP_CREATE_CPU0x01, uid) != 0 &&
1455 vcp->vcp_ncpus != vcpp->vcp_ncpus) {
1456 log_warnx("vm \"%s\" no permission to set cpus", name);
1457 return (EPERM1);
1458 }
1459
1460 /* memory */
1461 if (vcp->vcp_memranges[0].vmr_size == 0)
1462 vcp->vcp_memranges[0].vmr_size =
1463 vcpp->vcp_memranges[0].vmr_size;
1464 if (vm_checkinsflag(vmcp, VMOP_CREATE_MEMORY0x04, uid) != 0 &&
1465 vcp->vcp_memranges[0].vmr_size !=
1466 vcpp->vcp_memranges[0].vmr_size) {
1467 log_warnx("vm \"%s\" no permission to set memory", name);
1468 return (EPERM1);
1469 }
1470
1471 /* disks cannot be inherited */
1472 if (vm_checkinsflag(vmcp, VMOP_CREATE_DISK0x10, uid) != 0 &&
1473 vcp->vcp_ndisks) {
1474 log_warnx("vm \"%s\" no permission to set disks", name);
1475 return (EPERM1);
1476 }
1477 for (i = 0; i < vcp->vcp_ndisks; i++) {
1478 /* Check if this disk is already used in the parent */
1479 for (j = 0; j < vcpp->vcp_ndisks; j++) {
1480 if (strcmp(vcp->vcp_disks[i],
1481 vcpp->vcp_disks[j]) == 0) {
1482 log_warnx("vm \"%s\" disk %s cannot be reused",
1483 name, vcp->vcp_disks[i]);
1484 return (EBUSY16);
1485 }
1486 }
1487 vmc->vmc_checkaccess |= VMOP_CREATE_DISK0x10;
1488 }
1489
1490 /* interfaces */
1491 if (vcp->vcp_nnics > 0 &&
1492 vm_checkinsflag(vmcp, VMOP_CREATE_NETWORK0x08, uid) != 0 &&
1493 vcp->vcp_nnics != vcpp->vcp_nnics) {
1494 log_warnx("vm \"%s\" no permission to set interfaces", name);
1495 return (EPERM1);
1496 }
1497 for (i = 0; i < vcpp->vcp_nnics; i++) {
1498 /* Interface got overwritten */
1499 if (i < vcp->vcp_nnics)
1500 continue;
1501
1502 /* Copy interface from parent */
1503 vmc->vmc_ifflags[i] = vmcp->vmc_ifflags[i];
1504 (void)strlcpy(vmc->vmc_ifnames[i], vmcp->vmc_ifnames[i],
1505 sizeof(vmc->vmc_ifnames[i]));
1506 (void)strlcpy(vmc->vmc_ifswitch[i], vmcp->vmc_ifswitch[i],
1507 sizeof(vmc->vmc_ifswitch[i]));
1508 (void)strlcpy(vmc->vmc_ifgroup[i], vmcp->vmc_ifgroup[i],
1509 sizeof(vmc->vmc_ifgroup[i]));
1510 memcpy(vcp->vcp_macs[i], vcpp->vcp_macs[i],
1511 sizeof(vcp->vcp_macs[i]));
1512 vmc->vmc_ifrdomain[i] = vmcp->vmc_ifrdomain[i];
1513 vcp->vcp_nnics++;
1514 }
1515 for (i = 0; i < vcp->vcp_nnics; i++) {
1516 for (j = 0; j < vcpp->vcp_nnics; j++) {
1517 if (memcmp(zero_mac, vcp->vcp_macs[i],
1518 sizeof(vcp->vcp_macs[i])) != 0 &&
1519 memcmp(vcpp->vcp_macs[i], vcp->vcp_macs[i],
1520 sizeof(vcp->vcp_macs[i])) != 0) {
1521 log_warnx("vm \"%s\" lladdr cannot be reused",
1522 name);
1523 return (EBUSY16);
1524 }
1525 if (strlen(vmc->vmc_ifnames[i]) &&
1526 strcmp(vmc->vmc_ifnames[i],
1527 vmcp->vmc_ifnames[j]) == 0) {
1528 log_warnx("vm \"%s\" %s cannot be reused",
1529 vmc->vmc_ifnames[i], name);
1530 return (EBUSY16);
1531 }
1532 }
1533 }
1534
1535 /* kernel */
1536 if (strlen(vcp->vcp_kernel) > 0) {
1537 if (vm_checkinsflag(vmcp, VMOP_CREATE_KERNEL0x02, uid) != 0) {
1538 log_warnx("vm \"%s\" no permission to set boot image",
1539 name);
1540 return (EPERM1);
1541 }
1542 vmc->vmc_checkaccess |= VMOP_CREATE_KERNEL0x02;
1543 } else if (strlcpy(vcp->vcp_kernel, vcpp->vcp_kernel,
1544 sizeof(vcp->vcp_kernel)) >= sizeof(vcp->vcp_kernel)) {
1545 log_warnx("vm \"%s\" kernel name too long", name);
1546 return (EINVAL22);
1547 }
1548
1549 /* cdrom */
1550 if (strlen(vcp->vcp_cdrom) > 0) {
1551 if (vm_checkinsflag(vmcp, VMOP_CREATE_CDROM0x20, uid) != 0) {
1552 log_warnx("vm \"%s\" no permission to set cdrom", name);
1553 return (EPERM1);
1554 }
1555 vmc->vmc_checkaccess |= VMOP_CREATE_CDROM0x20;
1556 } else if (strlcpy(vcp->vcp_cdrom, vcpp->vcp_cdrom,
1557 sizeof(vcp->vcp_cdrom)) >= sizeof(vcp->vcp_cdrom)) {
1558 log_warnx("vm \"%s\" cdrom name too long", name);
1559 return (EINVAL22);
1560 }
1561
1562 /* user */
1563 if (vmc->vmc_owner.uid == 0)
1564 vmc->vmc_owner.uid = vmcp->vmc_owner.uid;
1565 else if (vmc->vmc_owner.uid != uid &&
1566 vmc->vmc_owner.uid != vmcp->vmc_owner.uid) {
1567 log_warnx("vm \"%s\" user mismatch", name);
1568 return (EPERM1);
1569 }
1570
1571 /* group */
1572 if (vmc->vmc_owner.gid == 0)
1573 vmc->vmc_owner.gid = vmcp->vmc_owner.gid;
1574 else if (vmc->vmc_owner.gid != vmcp->vmc_owner.gid) {
1575 log_warnx("vm \"%s\" group mismatch", name);
1576 return (EPERM1);
1577 }
1578
1579 /* child instances */
1580 if (vmc->vmc_insflags) {
1581 log_warnx("vm \"%s\" cannot change instance permissions", name);
1582 return (EPERM1);
1583 }
1584 if (vmcp->vmc_insflags & VMOP_CREATE_INSTANCE0x40) {
1585 vmc->vmc_insowner.gid = vmcp->vmc_insowner.gid;
1586 vmc->vmc_insowner.uid = vmcp->vmc_insowner.gid;
1587 vmc->vmc_insflags = vmcp->vmc_insflags;
1588 } else {
1589 vmc->vmc_insowner.gid = 0;
1590 vmc->vmc_insowner.uid = 0;
1591 vmc->vmc_insflags = 0;
1592 }
1593
1594 /* finished, remove instance flags */
1595 vmc->vmc_flags &= ~VMOP_CREATE_INSTANCE0x40;
1596
1597 return (0);
1598}
1599
1600/*
1601 * vm_checkperm
1602 *
1603 * Checks if the user represented by the 'uid' parameter is allowed to
1604 * manipulate the VM described by the 'vm' parameter (or connect to said VM's
1605 * console.)
1606 *
1607 * Parameters:
1608 * vm: the VM whose permission is to be checked
1609 * vmo: the required uid/gid to be checked
1610 * uid: the user ID of the user making the request
1611 *
1612 * Return values:
1613 * 0: the permission should be granted
1614 * -1: the permission check failed (also returned if vm == null)
1615 */
1616int
1617vm_checkperm(struct vmd_vm *vm, struct vmop_owner *vmo, uid_t uid)
1618{
1619 struct group *gr;
1620 struct passwd *pw;
1621 char **grmem;
1622
1623 /* root has no restrictions */
1624 if (uid == 0)
1625 return (0);
1626
1627 if (vmo == NULL((void *)0))
1628 return (-1);
1629
1630 /* check user */
1631 if (vm == NULL((void *)0)) {
1632 if (vmo->uid == uid)
1633 return (0);
1634 } else {
1635 /*
1636 * check user of running vm (the owner of a running vm can
1637 * be different to (or more specific than) the configured owner.
1638 */
1639 if (((vm->vm_state & VM_STATE_RUNNING0x01) && vm->vm_uid == uid) ||
1640 (!(vm->vm_state & VM_STATE_RUNNING0x01) && vmo->uid == uid))
1641 return (0);
1642 }
1643
1644 /* check groups */
1645 if (vmo->gid != -1) {
1646 if ((pw = getpwuid(uid)) == NULL((void *)0))
1647 return (-1);
1648 if (pw->pw_gid == vmo->gid)
1649 return (0);
1650 if ((gr = getgrgid(vmo->gid)) != NULL((void *)0)) {
1651 for (grmem = gr->gr_mem; *grmem; grmem++)
1652 if (strcmp(*grmem, pw->pw_name) == 0)
1653 return (0);
1654 }
1655 }
1656
1657 return (-1);
1658}
1659
1660/*
1661 * vm_checkinsflag
1662 *
1663 * Checks wheter the non-root user is allowed to set an instance option.
1664 *
1665 * Parameters:
1666 * vmc: the VM create parameters
1667 * flag: the flag to be checked
1668 * uid: the user ID of the user making the request
1669 *
1670 * Return values:
1671 * 0: the permission should be granted
1672 * -1: the permission check failed (also returned if vm == null)
1673 */
1674int
1675vm_checkinsflag(struct vmop_create_params *vmc, unsigned int flag, uid_t uid)
1676{
1677 /* root has no restrictions */
1678 if (uid == 0)
1679 return (0);
1680
1681 if ((vmc->vmc_insflags & flag) == 0)
1682 return (-1);
1683
1684 return (0);
1685}
1686
1687/*
1688 * vm_checkaccess
1689 *
1690 * Checks if the user represented by the 'uid' parameter is allowed to
1691 * access the file described by the 'path' parameter.
1692 *
1693 * Parameters:
1694 * fd: the file descriptor of the opened file
1695 * uflag: check if the userid has access to the file
1696 * uid: the user ID of the user making the request
1697 * amode: the access flags of R_OK and W_OK
1698 *
1699 * Return values:
1700 * 0: the permission should be granted
1701 * -1: the permission check failed
1702 */
1703int
1704vm_checkaccess(int fd, unsigned int uflag, uid_t uid, int amode)
1705{
1706 struct group *gr;
1707 struct passwd *pw;
1708 char **grmem;
1709 struct stat st;
1710 mode_t mode;
1711
1712 if (fd == -1)
1713 return (-1);
1714
1715 /*
1716 * File has to be accessible and a regular file
1717 */
1718 if (fstat(fd, &st) == -1 || !S_ISREG(st.st_mode)((st.st_mode & 0170000) == 0100000))
1719 return (-1);
1720
1721 /* root has no restrictions */
1722 if (uid == 0 || uflag == 0)
1723 return (0);
1724
1725 /* check other */
1726 mode = amode & W_OK0x02 ? S_IWOTH0000002 : 0;
1727 mode |= amode & R_OK0x04 ? S_IROTH0000004 : 0;
1728 if ((st.st_mode & mode) == mode)
1729 return (0);
1730
1731 /* check user */
1732 mode = amode & W_OK0x02 ? S_IWUSR0000200 : 0;
1733 mode |= amode & R_OK0x04 ? S_IRUSR0000400 : 0;
1734 if (uid == st.st_uid && (st.st_mode & mode) == mode)
1735 return (0);
1736
1737 /* check groups */
1738 mode = amode & W_OK0x02 ? S_IWGRP0000020 : 0;
1739 mode |= amode & R_OK0x04 ? S_IRGRP0000040 : 0;
1740 if ((st.st_mode & mode) != mode)
1741 return (-1);
1742 if ((pw = getpwuid(uid)) == NULL((void *)0))
1743 return (-1);
1744 if (pw->pw_gid == st.st_gid)
1745 return (0);
1746 if ((gr = getgrgid(st.st_gid)) != NULL((void *)0)) {
1747 for (grmem = gr->gr_mem; *grmem; grmem++)
1748 if (strcmp(*grmem, pw->pw_name) == 0)
1749 return (0);
1750 }
1751
1752 return (-1);
1753}
1754
1755int
1756vm_opentty(struct vmd_vm *vm)
1757{
1758 struct ptmget ptm;
1759 struct stat st;
1760 struct group *gr;
1761 uid_t uid;
1762 gid_t gid;
1763 mode_t mode;
1764 int on;
1765
1766 /*
1767 * Open tty with pre-opened PTM fd
1768 */
1769 if ((ioctl(env->vmd_ptmfd, PTMGET((unsigned long)0x40000000 | ((sizeof(struct ptmget) & 0x1fff
) << 16) | ((('t')) << 8) | ((1)))
, &ptm) == -1))
1770 return (-1);
1771
1772 /*
1773 * We use user ioctl(2) mode to pass break commands.
1774 */
1775 on = 1;
1776 if (ioctl(ptm.cfd, TIOCUCNTL((unsigned long)0x80000000 | ((sizeof(int) & 0x1fff) <<
16) | ((('t')) << 8) | ((102)))
, &on) == -1)
1777 fatal("could not enable user ioctl mode");
1778
1779 vm->vm_tty = ptm.cfd;
1780 close(ptm.sfd);
1781 if ((vm->vm_ttyname = strdup(ptm.sn)) == NULL((void *)0))
1782 goto fail;
1783
1784 uid = vm->vm_uid;
1785 gid = vm->vm_params.vmc_owner.gid;
1786
1787 if (vm->vm_params.vmc_owner.gid != -1) {
1788 mode = 0660;
1789 } else if ((gr = getgrnam("tty")) != NULL((void *)0)) {
1790 gid = gr->gr_gid;
1791 mode = 0620;
1792 } else {
1793 mode = 0600;
1794 gid = 0;
1795 }
1796
1797 log_debug("%s: vm %s tty %s uid %d gid %d mode %o",
1798 __func__, vm->vm_params.vmc_params.vcp_name,
1799 vm->vm_ttyname, uid, gid, mode);
1800
1801 /*
1802 * Change ownership and mode of the tty as required.
1803 * Loosely based on the implementation of sshpty.c
1804 */
1805 if (stat(vm->vm_ttyname, &st) == -1)
1806 goto fail;
1807
1808 if (st.st_uid != uid || st.st_gid != gid) {
1809 if (chown(vm->vm_ttyname, uid, gid) == -1) {
1810 log_warn("chown %s %d %d failed, uid %d",
1811 vm->vm_ttyname, uid, gid, getuid());
1812
1813 /* Ignore failure on read-only filesystems */
1814 if (!((errno(*__errno()) == EROFS30) &&
1815 (st.st_uid == uid || st.st_uid == 0)))
1816 goto fail;
1817 }
1818 }
1819
1820 if ((st.st_mode & (S_IRWXU0000700|S_IRWXG0000070|S_IRWXO0000007)) != mode) {
1821 if (chmod(vm->vm_ttyname, mode) == -1) {
1822 log_warn("chmod %s %o failed, uid %d",
1823 vm->vm_ttyname, mode, getuid());
1824
1825 /* Ignore failure on read-only filesystems */
1826 if (!((errno(*__errno()) == EROFS30) &&
1827 (st.st_uid == uid || st.st_uid == 0)))
1828 goto fail;
1829 }
1830 }
1831
1832 return (0);
1833 fail:
1834 vm_closetty(vm);
1835 return (-1);
1836}
1837
1838void
1839vm_closetty(struct vmd_vm *vm)
1840{
1841 if (vm->vm_tty != -1) {
1842 /* Release and close the tty */
1843 if (fchown(vm->vm_tty, 0, 0) == -1)
1844 log_warn("chown %s 0 0 failed", vm->vm_ttyname);
1845 if (fchmod(vm->vm_tty, 0666) == -1)
1846 log_warn("chmod %s 0666 failed", vm->vm_ttyname);
1847 close(vm->vm_tty);
1848 vm->vm_tty = -1;
1849 }
1850 free(vm->vm_ttyname);
1851 vm->vm_ttyname = NULL((void *)0);
1852}
1853
1854void
1855switch_remove(struct vmd_switch *vsw)
1856{
1857 if (vsw == NULL((void *)0))
1858 return;
1859
1860 TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry)do { if (((vsw)->sw_entry.tqe_next) != ((void *)0)) (vsw)->
sw_entry.tqe_next->sw_entry.tqe_prev = (vsw)->sw_entry.
tqe_prev; else (env->vmd_switches)->tqh_last = (vsw)->
sw_entry.tqe_prev; *(vsw)->sw_entry.tqe_prev = (vsw)->sw_entry
.tqe_next; ; ; } while (0)
;
1861
1862 free(vsw->sw_group);
1863 free(vsw->sw_name);
1864 free(vsw);
1865}
1866
1867struct vmd_switch *
1868switch_getbyname(const char *name)
1869{
1870 struct vmd_switch *vsw;
1871
1872 if (name == NULL((void *)0))
1873 return (NULL((void *)0));
1874 TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry)for((vsw) = ((env->vmd_switches)->tqh_first); (vsw) != (
(void *)0); (vsw) = ((vsw)->sw_entry.tqe_next))
{
1875 if (strcmp(vsw->sw_name, name) == 0)
1876 return (vsw);
1877 }
1878
1879 return (NULL((void *)0));
1880}
1881
1882struct vmd_user *
1883user_get(uid_t uid)
1884{
1885 struct vmd_user *usr;
1886
1887 if (uid == 0)
1888 return (NULL((void *)0));
1889
1890 /* first try to find an existing user */
1891 TAILQ_FOREACH(usr, env->vmd_users, usr_entry)for((usr) = ((env->vmd_users)->tqh_first); (usr) != ((void
*)0); (usr) = ((usr)->usr_entry.tqe_next))
{
1892 if (usr->usr_id.uid == uid)
1893 goto done;
1894 }
1895
1896 if ((usr = calloc(1, sizeof(*usr))) == NULL((void *)0)) {
1897 log_warn("could not allocate user");
1898 return (NULL((void *)0));
1899 }
1900
1901 usr->usr_id.uid = uid;
1902 usr->usr_id.gid = -1;
1903 TAILQ_INSERT_TAIL(env->vmd_users, usr, usr_entry)do { (usr)->usr_entry.tqe_next = ((void *)0); (usr)->usr_entry
.tqe_prev = (env->vmd_users)->tqh_last; *(env->vmd_users
)->tqh_last = (usr); (env->vmd_users)->tqh_last = &
(usr)->usr_entry.tqe_next; } while (0)
;
1904
1905 done:
1906 DPRINTF("%s: uid %d #%d +",do {} while(0)
1907 __func__, usr->usr_id.uid, usr->usr_refcnt + 1)do {} while(0);
1908 usr->usr_refcnt++;
1909
1910 return (usr);
1911}
1912
1913void
1914user_put(struct vmd_user *usr)
1915{
1916 if (usr == NULL((void *)0))
1917 return;
1918
1919 DPRINTF("%s: uid %d #%d -",do {} while(0)
1920 __func__, usr->usr_id.uid, usr->usr_refcnt - 1)do {} while(0);
1921
1922 if (--usr->usr_refcnt > 0)
1923 return;
1924
1925 TAILQ_REMOVE(env->vmd_users, usr, usr_entry)do { if (((usr)->usr_entry.tqe_next) != ((void *)0)) (usr)
->usr_entry.tqe_next->usr_entry.tqe_prev = (usr)->usr_entry
.tqe_prev; else (env->vmd_users)->tqh_last = (usr)->
usr_entry.tqe_prev; *(usr)->usr_entry.tqe_prev = (usr)->
usr_entry.tqe_next; ; ; } while (0)
;
1926 free(usr);
1927}
1928
1929void
1930user_inc(struct vm_create_params *vcp, struct vmd_user *usr, int inc)
1931{
1932 char mem[FMT_SCALED_STRSIZE7];
1933
1934 if (usr == NULL((void *)0))
1935 return;
1936
1937 /* increment or decrement counters */
1938 inc = inc ? 1 : -1;
1939
1940 usr->usr_maxcpu += vcp->vcp_ncpus * inc;
1941 usr->usr_maxmem += vcp->vcp_memranges[0].vmr_size * inc;
1942 usr->usr_maxifs += vcp->vcp_nnics * inc;
1943
1944 if (log_getverbose() > 1) {
1945 (void)fmt_scaled(usr->usr_maxmem * 1024 * 1024, mem);
1946 log_debug("%s: %c uid %d ref %d cpu %llu mem %s ifs %llu",
1947 __func__, inc == 1 ? '+' : '-',
1948 usr->usr_id.uid, usr->usr_refcnt,
1949 usr->usr_maxcpu, mem, usr->usr_maxifs);
1950 }
1951}
1952
1953int
1954user_checklimit(struct vmd_user *usr, struct vm_create_params *vcp)
1955{
1956 const char *limit = "";
1957
1958 /* XXX make the limits configurable */
1959 if (usr->usr_maxcpu > VM_DEFAULT_USER_MAXCPU4) {
1960 limit = "cpu ";
1961 goto fail;
1962 }
1963 if (usr->usr_maxmem > VM_DEFAULT_USER_MAXMEM2048) {
1964 limit = "memory ";
1965 goto fail;
1966 }
1967 if (usr->usr_maxifs > VM_DEFAULT_USER_MAXIFS8) {
1968 limit = "interface ";
1969 goto fail;
1970 }
1971
1972 return (0);
1973
1974 fail:
1975 log_warnx("%s: user %d %slimit reached", vcp->vcp_name,
1976 usr->usr_id.uid, limit);
1977 return (-1);
1978}
1979
1980char *
1981get_string(uint8_t *ptr, size_t len)
1982{
1983 size_t i;
1984
1985 for (i = 0; i < len; i++)
1986 if (!isprint(ptr[i]))
1987 break;
1988
1989 return strndup(ptr, i);
1990}
1991
1992uint32_t
1993prefixlen2mask(uint8_t prefixlen)
1994{
1995 if (prefixlen == 0)
1996 return (0);
1997
1998 if (prefixlen > 32)
1999 prefixlen = 32;
2000
2001 return (htonl(0xffffffff << (32 - prefixlen))(__uint32_t)(__builtin_constant_p(0xffffffff << (32 - prefixlen
)) ? (__uint32_t)(((__uint32_t)(0xffffffff << (32 - prefixlen
)) & 0xff) << 24 | ((__uint32_t)(0xffffffff <<
(32 - prefixlen)) & 0xff00) << 8 | ((__uint32_t)(0xffffffff
<< (32 - prefixlen)) & 0xff0000) >> 8 | ((__uint32_t
)(0xffffffff << (32 - prefixlen)) & 0xff000000) >>
24) : __swap32md(0xffffffff << (32 - prefixlen)))
);
2002}
2003
2004void
2005prefixlen2mask6(uint8_t prefixlen, struct in6_addr *mask)
2006{
2007 struct in6_addr s6;
2008 int i;
2009
2010 if (prefixlen > 128)
2011 prefixlen = 128;
2012
2013 memset(&s6, 0, sizeof(s6));
2014 for (i = 0; i < prefixlen / 8; i++)
2015 s6.s6_addr__u6_addr.__u6_addr8[i] = 0xff;
2016 i = prefixlen % 8;
2017 if (i)
2018 s6.s6_addr__u6_addr.__u6_addr8[prefixlen / 8] = 0xff00 >> i;
2019
2020 memcpy(mask, &s6, sizeof(s6));
2021}
2022
2023void
2024getmonotime(struct timeval *tv)
2025{
2026 struct timespec ts;
2027
2028 if (clock_gettime(CLOCK_MONOTONIC3, &ts))
2029 fatal("clock_gettime");
2030
2031 TIMESPEC_TO_TIMEVAL(tv, &ts)do { (tv)->tv_sec = (&ts)->tv_sec; (tv)->tv_usec
= (&ts)->tv_nsec / 1000; } while (0)
;
2032}