File: | src/usr.sbin/vmd/vmd.c |
Warning: | line 110, column 10 Access to field 'vm_state' results in a dereference of a null pointer (loaded from variable 'vm') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: vmd.c,v 1.129 2022/01/04 15:18:44 claudio Exp $ */ | |||
2 | ||||
3 | /* | |||
4 | * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> | |||
5 | * | |||
6 | * Permission to use, copy, modify, and distribute this software for any | |||
7 | * purpose with or without fee is hereby granted, provided that the above | |||
8 | * copyright notice and this permission notice appear in all copies. | |||
9 | * | |||
10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |||
11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |||
12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |||
13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |||
14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |||
15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |||
16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |||
17 | */ | |||
18 | ||||
19 | #include <sys/types.h> | |||
20 | #include <sys/queue.h> | |||
21 | #include <sys/wait.h> | |||
22 | #include <sys/stat.h> | |||
23 | #include <sys/sysctl.h> | |||
24 | #include <sys/tty.h> | |||
25 | #include <sys/ttycom.h> | |||
26 | #include <sys/ioctl.h> | |||
27 | ||||
28 | #include <stdio.h> | |||
29 | #include <stdlib.h> | |||
30 | #include <string.h> | |||
31 | #include <termios.h> | |||
32 | #include <errno(*__errno()).h> | |||
33 | #include <event.h> | |||
34 | #include <fcntl.h> | |||
35 | #include <pwd.h> | |||
36 | #include <signal.h> | |||
37 | #include <syslog.h> | |||
38 | #include <unistd.h> | |||
39 | #include <util.h> | |||
40 | #include <ctype.h> | |||
41 | #include <pwd.h> | |||
42 | #include <grp.h> | |||
43 | ||||
44 | #include <machine/specialreg.h> | |||
45 | #include <machine/vmmvar.h> | |||
46 | ||||
47 | #include "proc.h" | |||
48 | #include "atomicio.h" | |||
49 | #include "vmd.h" | |||
50 | ||||
51 | __dead__attribute__((__noreturn__)) void usage(void); | |||
52 | ||||
53 | int main(int, char **); | |||
54 | int vmd_configure(void); | |||
55 | void vmd_sighdlr(int sig, short event, void *arg); | |||
56 | void vmd_shutdown(void); | |||
57 | int vmd_control_run(void); | |||
58 | int vmd_dispatch_control(int, struct privsep_proc *, struct imsg *); | |||
59 | int vmd_dispatch_vmm(int, struct privsep_proc *, struct imsg *); | |||
60 | int vmd_dispatch_priv(int, struct privsep_proc *, struct imsg *); | |||
61 | int vmd_check_vmh(struct vm_dump_header *); | |||
62 | ||||
63 | int vm_instance(struct privsep *, struct vmd_vm **, | |||
64 | struct vmop_create_params *, uid_t); | |||
65 | int vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t); | |||
66 | int vm_claimid(const char *, int, uint32_t *); | |||
67 | void start_vm_batch(int, short, void*); | |||
68 | ||||
69 | struct vmd *env; | |||
70 | ||||
71 | static struct privsep_proc procs[] = { | |||
72 | /* Keep "priv" on top as procs[0] */ | |||
73 | { "priv", PROC_PRIV, vmd_dispatch_priv, priv }, | |||
74 | { "control", PROC_CONTROL, vmd_dispatch_control, control }, | |||
75 | { "vmm", PROC_VMM, vmd_dispatch_vmm, vmm, vmm_shutdown }, | |||
76 | }; | |||
77 | ||||
78 | enum privsep_procid privsep_process; | |||
79 | ||||
80 | struct event staggered_start_timer; | |||
81 | ||||
82 | /* For the privileged process */ | |||
83 | static struct privsep_proc *proc_priv = &procs[0]; | |||
84 | static struct passwd proc_privpw; | |||
85 | static const uint8_t zero_mac[ETHER_ADDR_LEN6]; | |||
86 | ||||
87 | int | |||
88 | vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg) | |||
89 | { | |||
90 | struct privsep *ps = p->p_ps; | |||
91 | int res = 0, ret = 0, cmd = 0, verbose; | |||
92 | unsigned int v = 0, flags; | |||
93 | struct vmop_create_params vmc; | |||
94 | struct vmop_id vid; | |||
95 | struct vmop_result vmr; | |||
96 | struct vm_dump_header vmh; | |||
97 | struct vmd_vm *vm = NULL((void *)0); | |||
| ||||
98 | char *str = NULL((void *)0); | |||
99 | uint32_t id = 0; | |||
100 | struct control_sock *rcs; | |||
101 | ||||
102 | switch (imsg->hdr.type) { | |||
103 | case IMSG_VMDOP_START_VM_REQUEST: | |||
104 | IMSG_SIZE_CHECK(imsg, &vmc)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vmc)) fatalx("bad length imsg received (%s)", "&vmc" ); } while (0); | |||
105 | memcpy(&vmc, imsg->data, sizeof(vmc)); | |||
106 | ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); | |||
107 | if (vmc.vmc_flags == 0) { | |||
108 | /* start an existing VM with pre-configured options */ | |||
109 | if (!(ret == -1 && errno(*__errno()) == EALREADY37 && | |||
110 | !(vm->vm_state & VM_STATE_RUNNING0x01))) { | |||
| ||||
111 | res = errno(*__errno()); | |||
112 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
113 | } | |||
114 | } else if (ret != 0) { | |||
115 | res = errno(*__errno()); | |||
116 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
117 | } | |||
118 | if (res == 0) { | |||
119 | res = config_setvm(ps, vm, imsg->hdr.peerid, | |||
120 | vm->vm_params.vmc_owner.uid); | |||
121 | if (res) | |||
122 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
123 | } | |||
124 | break; | |||
125 | case IMSG_VMDOP_WAIT_VM_REQUEST: | |||
126 | case IMSG_VMDOP_TERMINATE_VM_REQUEST: | |||
127 | IMSG_SIZE_CHECK(imsg, &vid)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vid)) fatalx("bad length imsg received (%s)", "&vid" ); } while (0); | |||
128 | memcpy(&vid, imsg->data, sizeof(vid)); | |||
129 | flags = vid.vid_flags; | |||
130 | cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; | |||
131 | ||||
132 | if ((id = vid.vid_id) == 0) { | |||
133 | /* Lookup vm (id) by name */ | |||
134 | if ((vm = vm_getbyname(vid.vid_name)) == NULL((void *)0)) { | |||
135 | res = ENOENT2; | |||
136 | break; | |||
137 | } else if ((vm->vm_state & VM_STATE_SHUTDOWN0x04) && | |||
138 | (flags & VMOP_FORCE0x01) == 0) { | |||
139 | res = EALREADY37; | |||
140 | break; | |||
141 | } else if (!(vm->vm_state & VM_STATE_RUNNING0x01)) { | |||
142 | res = EINVAL22; | |||
143 | break; | |||
144 | } | |||
145 | id = vm->vm_vmid; | |||
146 | } else if ((vm = vm_getbyvmid(id)) == NULL((void *)0)) { | |||
147 | res = ENOENT2; | |||
148 | break; | |||
149 | } | |||
150 | if (vm_checkperm(vm, &vm->vm_params.vmc_owner, vid.vid_uid)) { | |||
151 | res = EPERM1; | |||
152 | break; | |||
153 | } | |||
154 | ||||
155 | /* Only relay TERMINATION requests, not WAIT requests */ | |||
156 | if (imsg->hdr.type == IMSG_VMDOP_TERMINATE_VM_REQUEST) { | |||
157 | memset(&vid, 0, sizeof(vid)); | |||
158 | vid.vid_id = id; | |||
159 | vid.vid_flags = flags; | |||
160 | ||||
161 | if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, | |||
162 | imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1) | |||
163 | return (-1); | |||
164 | } | |||
165 | break; | |||
166 | case IMSG_VMDOP_GET_INFO_VM_REQUEST: | |||
167 | proc_forward_imsg(ps, imsg, PROC_VMM, -1); | |||
168 | break; | |||
169 | case IMSG_VMDOP_LOAD: | |||
170 | IMSG_SIZE_CHECK(imsg, str)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*str)) fatalx("bad length imsg received (%s)", "str"); } while (0); /* at least one byte for path */ | |||
171 | str = get_string((uint8_t *)imsg->data, | |||
172 | IMSG_DATA_SIZE(imsg)((imsg)->hdr.len - sizeof(struct imsg_hdr))); | |||
173 | case IMSG_VMDOP_RELOAD: | |||
174 | if (vmd_reload(0, str) == -1) | |||
175 | cmd = IMSG_CTL_FAIL; | |||
176 | else | |||
177 | cmd = IMSG_CTL_OK; | |||
178 | free(str); | |||
179 | break; | |||
180 | case IMSG_CTL_RESET: | |||
181 | IMSG_SIZE_CHECK(imsg, &v)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&v)) fatalx("bad length imsg received (%s)", "&v"); } while (0); | |||
182 | memcpy(&v, imsg->data, sizeof(v)); | |||
183 | if (vmd_reload(v, NULL((void *)0)) == -1) | |||
184 | cmd = IMSG_CTL_FAIL; | |||
185 | else | |||
186 | cmd = IMSG_CTL_OK; | |||
187 | break; | |||
188 | case IMSG_CTL_VERBOSE: | |||
189 | IMSG_SIZE_CHECK(imsg, &verbose)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&verbose)) fatalx("bad length imsg received (%s)", "&verbose" ); } while (0); | |||
190 | memcpy(&verbose, imsg->data, sizeof(verbose)); | |||
191 | log_setverbose(verbose); | |||
192 | ||||
193 | proc_forward_imsg(ps, imsg, PROC_VMM, -1); | |||
194 | proc_forward_imsg(ps, imsg, PROC_PRIV, -1); | |||
195 | cmd = IMSG_CTL_OK; | |||
196 | break; | |||
197 | case IMSG_VMDOP_PAUSE_VM: | |||
198 | case IMSG_VMDOP_UNPAUSE_VM: | |||
199 | IMSG_SIZE_CHECK(imsg, &vid)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vid)) fatalx("bad length imsg received (%s)", "&vid" ); } while (0); | |||
200 | memcpy(&vid, imsg->data, sizeof(vid)); | |||
201 | if (vid.vid_id == 0) { | |||
202 | if ((vm = vm_getbyname(vid.vid_name)) == NULL((void *)0)) { | |||
203 | res = ENOENT2; | |||
204 | cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM | |||
205 | ? IMSG_VMDOP_PAUSE_VM_RESPONSE | |||
206 | : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; | |||
207 | break; | |||
208 | } else { | |||
209 | vid.vid_id = vm->vm_vmid; | |||
210 | } | |||
211 | } else if ((vm = vm_getbyid(vid.vid_id)) == NULL((void *)0)) { | |||
212 | res = ENOENT2; | |||
213 | cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM | |||
214 | ? IMSG_VMDOP_PAUSE_VM_RESPONSE | |||
215 | : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; | |||
216 | break; | |||
217 | } | |||
218 | if (vm_checkperm(vm, &vm->vm_params.vmc_owner, | |||
219 | vid.vid_uid) != 0) { | |||
220 | res = EPERM1; | |||
221 | cmd = imsg->hdr.type == IMSG_VMDOP_PAUSE_VM | |||
222 | ? IMSG_VMDOP_PAUSE_VM_RESPONSE | |||
223 | : IMSG_VMDOP_UNPAUSE_VM_RESPONSE; | |||
224 | break; | |||
225 | } | |||
226 | proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, | |||
227 | imsg->hdr.peerid, -1, &vid, sizeof(vid)); | |||
228 | break; | |||
229 | case IMSG_VMDOP_SEND_VM_REQUEST: | |||
230 | IMSG_SIZE_CHECK(imsg, &vid)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vid)) fatalx("bad length imsg received (%s)", "&vid" ); } while (0); | |||
231 | memcpy(&vid, imsg->data, sizeof(vid)); | |||
232 | id = vid.vid_id; | |||
233 | if (vid.vid_id == 0) { | |||
234 | if ((vm = vm_getbyname(vid.vid_name)) == NULL((void *)0)) { | |||
235 | res = ENOENT2; | |||
236 | cmd = IMSG_VMDOP_SEND_VM_RESPONSE; | |||
237 | close(imsg->fd); | |||
238 | break; | |||
239 | } else { | |||
240 | vid.vid_id = vm->vm_vmid; | |||
241 | } | |||
242 | } else if ((vm = vm_getbyvmid(vid.vid_id)) == NULL((void *)0)) { | |||
243 | res = ENOENT2; | |||
244 | cmd = IMSG_VMDOP_SEND_VM_RESPONSE; | |||
245 | close(imsg->fd); | |||
246 | break; | |||
247 | } | |||
248 | vmr.vmr_id = vid.vid_id; | |||
249 | log_debug("%s: sending fd to vmm", __func__); | |||
250 | proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type, | |||
251 | imsg->hdr.peerid, imsg->fd, &vid, sizeof(vid)); | |||
252 | break; | |||
253 | case IMSG_VMDOP_RECEIVE_VM_REQUEST: | |||
254 | IMSG_SIZE_CHECK(imsg, &vid)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vid)) fatalx("bad length imsg received (%s)", "&vid" ); } while (0); | |||
255 | memcpy(&vid, imsg->data, sizeof(vid)); | |||
256 | if (imsg->fd == -1) { | |||
257 | log_warnx("%s: invalid fd", __func__); | |||
258 | return (-1); | |||
259 | } | |||
260 | if (atomicio(read, imsg->fd, &vmh, sizeof(vmh)) != | |||
261 | sizeof(vmh)) { | |||
262 | log_warnx("%s: error reading vmh from received vm", | |||
263 | __func__); | |||
264 | res = EIO5; | |||
265 | close(imsg->fd); | |||
266 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
267 | break; | |||
268 | } | |||
269 | ||||
270 | if (vmd_check_vmh(&vmh)) { | |||
271 | res = ENOENT2; | |||
272 | close(imsg->fd); | |||
273 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
274 | break; | |||
275 | } | |||
276 | if (atomicio(read, imsg->fd, &vmc, sizeof(vmc)) != | |||
277 | sizeof(vmc)) { | |||
278 | log_warnx("%s: error reading vmc from received vm", | |||
279 | __func__); | |||
280 | res = EIO5; | |||
281 | close(imsg->fd); | |||
282 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
283 | break; | |||
284 | } | |||
285 | strlcpy(vmc.vmc_params.vcp_name, vid.vid_name, | |||
286 | sizeof(vmc.vmc_params.vcp_name)); | |||
287 | vmc.vmc_params.vcp_id = 0; | |||
288 | ||||
289 | ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); | |||
290 | if (ret != 0) { | |||
291 | res = errno(*__errno()); | |||
292 | cmd = IMSG_VMDOP_START_VM_RESPONSE; | |||
293 | close(imsg->fd); | |||
294 | } else { | |||
295 | vm->vm_state |= VM_STATE_RECEIVED0x08; | |||
296 | config_setvm(ps, vm, imsg->hdr.peerid, | |||
297 | vmc.vmc_owner.uid); | |||
298 | log_debug("%s: sending fd to vmm", __func__); | |||
299 | proc_compose_imsg(ps, PROC_VMM, -1, | |||
300 | IMSG_VMDOP_RECEIVE_VM_END, vm->vm_vmid, imsg->fd, | |||
301 | NULL((void *)0), 0); | |||
302 | } | |||
303 | break; | |||
304 | case IMSG_VMDOP_DONE: | |||
305 | control_reset(&ps->ps_csock); | |||
306 | TAILQ_FOREACH(rcs, &ps->ps_rcsocks, cs_entry)for((rcs) = ((&ps->ps_rcsocks)->tqh_first); (rcs) != ((void *)0); (rcs) = ((rcs)->cs_entry.tqe_next)) | |||
307 | control_reset(rcs); | |||
308 | cmd = 0; | |||
309 | break; | |||
310 | default: | |||
311 | return (-1); | |||
312 | } | |||
313 | ||||
314 | switch (cmd) { | |||
315 | case 0: | |||
316 | break; | |||
317 | case IMSG_VMDOP_START_VM_RESPONSE: | |||
318 | case IMSG_VMDOP_TERMINATE_VM_RESPONSE: | |||
319 | memset(&vmr, 0, sizeof(vmr)); | |||
320 | vmr.vmr_result = res; | |||
321 | vmr.vmr_id = id; | |||
322 | if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, | |||
323 | imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) | |||
324 | return (-1); | |||
325 | break; | |||
326 | default: | |||
327 | if (proc_compose_imsg(ps, PROC_CONTROL, -1, cmd, | |||
328 | imsg->hdr.peerid, -1, &res, sizeof(res)) == -1) | |||
329 | return (-1); | |||
330 | break; | |||
331 | } | |||
332 | ||||
333 | return (0); | |||
334 | } | |||
335 | ||||
336 | int | |||
337 | vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg) | |||
338 | { | |||
339 | struct vmop_result vmr; | |||
340 | struct privsep *ps = p->p_ps; | |||
341 | int res = 0; | |||
342 | struct vmd_vm *vm; | |||
343 | struct vm_create_params *vcp; | |||
344 | struct vmop_info_result vir; | |||
345 | ||||
346 | switch (imsg->hdr.type) { | |||
347 | case IMSG_VMDOP_PAUSE_VM_RESPONSE: | |||
348 | IMSG_SIZE_CHECK(imsg, &vmr)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vmr)) fatalx("bad length imsg received (%s)", "&vmr" ); } while (0); | |||
349 | memcpy(&vmr, imsg->data, sizeof(vmr)); | |||
350 | if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL((void *)0)) | |||
351 | break; | |||
352 | proc_compose_imsg(ps, PROC_CONTROL, -1, | |||
353 | imsg->hdr.type, imsg->hdr.peerid, -1, | |||
354 | imsg->data, sizeof(imsg->data)); | |||
355 | log_info("%s: paused vm %d successfully", | |||
356 | vm->vm_params.vmc_params.vcp_name, | |||
357 | vm->vm_vmid); | |||
358 | vm->vm_state |= VM_STATE_PAUSED0x10; | |||
359 | break; | |||
360 | case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: | |||
361 | IMSG_SIZE_CHECK(imsg, &vmr)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vmr)) fatalx("bad length imsg received (%s)", "&vmr" ); } while (0); | |||
362 | memcpy(&vmr, imsg->data, sizeof(vmr)); | |||
363 | if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL((void *)0)) | |||
364 | break; | |||
365 | proc_compose_imsg(ps, PROC_CONTROL, -1, | |||
366 | imsg->hdr.type, imsg->hdr.peerid, -1, | |||
367 | imsg->data, sizeof(imsg->data)); | |||
368 | log_info("%s: unpaused vm %d successfully.", | |||
369 | vm->vm_params.vmc_params.vcp_name, | |||
370 | vm->vm_vmid); | |||
371 | vm->vm_state &= ~VM_STATE_PAUSED0x10; | |||
372 | break; | |||
373 | case IMSG_VMDOP_START_VM_RESPONSE: | |||
374 | IMSG_SIZE_CHECK(imsg, &vmr)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vmr)) fatalx("bad length imsg received (%s)", "&vmr" ); } while (0); | |||
375 | memcpy(&vmr, imsg->data, sizeof(vmr)); | |||
376 | if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL((void *)0)) | |||
377 | break; | |||
378 | vm->vm_pid = vmr.vmr_pid; | |||
379 | vcp = &vm->vm_params.vmc_params; | |||
380 | vcp->vcp_id = vmr.vmr_id; | |||
381 | ||||
382 | /* | |||
383 | * If the peerid is not -1, forward the response back to the | |||
384 | * the control socket. If it is -1, the request originated | |||
385 | * from the parent, not the control socket. | |||
386 | */ | |||
387 | if (vm->vm_peerid != (uint32_t)-1) { | |||
388 | (void)strlcpy(vmr.vmr_ttyname, vm->vm_ttyname, | |||
389 | sizeof(vmr.vmr_ttyname)); | |||
390 | if (proc_compose_imsg(ps, PROC_CONTROL, -1, | |||
391 | imsg->hdr.type, vm->vm_peerid, -1, | |||
392 | &vmr, sizeof(vmr)) == -1) { | |||
393 | errno(*__errno()) = vmr.vmr_result; | |||
394 | log_warn("%s: failed to foward vm result", | |||
395 | vcp->vcp_name); | |||
396 | vm_remove(vm, __func__); | |||
397 | return (-1); | |||
398 | } | |||
399 | } | |||
400 | ||||
401 | if (vmr.vmr_result) { | |||
402 | errno(*__errno()) = vmr.vmr_result; | |||
403 | log_warn("%s: failed to start vm", vcp->vcp_name); | |||
404 | vm_remove(vm, __func__); | |||
405 | break; | |||
406 | } | |||
407 | ||||
408 | /* Now configure all the interfaces */ | |||
409 | if (vm_priv_ifconfig(ps, vm) == -1) { | |||
410 | log_warn("%s: failed to configure vm", vcp->vcp_name); | |||
411 | vm_remove(vm, __func__); | |||
412 | break; | |||
413 | } | |||
414 | ||||
415 | log_info("%s: started vm %d successfully, tty %s", | |||
416 | vcp->vcp_name, vm->vm_vmid, vm->vm_ttyname); | |||
417 | break; | |||
418 | case IMSG_VMDOP_TERMINATE_VM_RESPONSE: | |||
419 | IMSG_SIZE_CHECK(imsg, &vmr)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vmr)) fatalx("bad length imsg received (%s)", "&vmr" ); } while (0); | |||
420 | memcpy(&vmr, imsg->data, sizeof(vmr)); | |||
421 | ||||
422 | if (vmr.vmr_result) { | |||
423 | DPRINTF("%s: forwarding TERMINATE VM for vm id %d",do {} while(0) | |||
424 | __func__, vmr.vmr_id)do {} while(0); | |||
425 | proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); | |||
426 | } else { | |||
427 | if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL((void *)0)) | |||
428 | break; | |||
429 | /* Mark VM as shutting down */ | |||
430 | vm->vm_state |= VM_STATE_SHUTDOWN0x04; | |||
431 | } | |||
432 | break; | |||
433 | case IMSG_VMDOP_SEND_VM_RESPONSE: | |||
434 | IMSG_SIZE_CHECK(imsg, &vmr)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vmr)) fatalx("bad length imsg received (%s)", "&vmr" ); } while (0); | |||
435 | memcpy(&vmr, imsg->data, sizeof(vmr)); | |||
436 | if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL((void *)0)) | |||
437 | break; | |||
438 | if (!vmr.vmr_result) { | |||
439 | log_info("%s: sent vm %d successfully.", | |||
440 | vm->vm_params.vmc_params.vcp_name, | |||
441 | vm->vm_vmid); | |||
442 | if (vm->vm_from_config) | |||
443 | vm_stop(vm, 0, __func__); | |||
444 | else | |||
445 | vm_remove(vm, __func__); | |||
446 | } | |||
447 | ||||
448 | /* Send a response if a control client is waiting for it */ | |||
449 | if (imsg->hdr.peerid != (uint32_t)-1) { | |||
450 | /* the error is meaningless for deferred responses */ | |||
451 | vmr.vmr_result = 0; | |||
452 | ||||
453 | if (proc_compose_imsg(ps, PROC_CONTROL, -1, | |||
454 | IMSG_VMDOP_SEND_VM_RESPONSE, | |||
455 | imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) | |||
456 | return (-1); | |||
457 | } | |||
458 | break; | |||
459 | case IMSG_VMDOP_TERMINATE_VM_EVENT: | |||
460 | IMSG_SIZE_CHECK(imsg, &vmr)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vmr)) fatalx("bad length imsg received (%s)", "&vmr" ); } while (0); | |||
461 | memcpy(&vmr, imsg->data, sizeof(vmr)); | |||
462 | DPRINTF("%s: handling TERMINATE_EVENT for vm id %d ret %d",do {} while(0) | |||
463 | __func__, vmr.vmr_id, vmr.vmr_result)do {} while(0); | |||
464 | if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL((void *)0)) { | |||
465 | log_debug("%s: vm %d is no longer available", | |||
466 | __func__, vmr.vmr_id); | |||
467 | break; | |||
468 | } | |||
469 | if (vmr.vmr_result != EAGAIN35 || | |||
470 | vm->vm_params.vmc_bootdevice) { | |||
471 | if (vm->vm_from_config) | |||
472 | vm_stop(vm, 0, __func__); | |||
473 | else | |||
474 | vm_remove(vm, __func__); | |||
475 | } else { | |||
476 | /* Stop VM instance but keep the tty open */ | |||
477 | vm_stop(vm, 1, __func__); | |||
478 | config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid); | |||
479 | } | |||
480 | ||||
481 | /* The error is meaningless for deferred responses */ | |||
482 | vmr.vmr_result = 0; | |||
483 | ||||
484 | if (proc_compose_imsg(ps, PROC_CONTROL, -1, | |||
485 | IMSG_VMDOP_TERMINATE_VM_EVENT, | |||
486 | imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) | |||
487 | return (-1); | |||
488 | break; | |||
489 | case IMSG_VMDOP_GET_INFO_VM_DATA: | |||
490 | IMSG_SIZE_CHECK(imsg, &vir)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&vir)) fatalx("bad length imsg received (%s)", "&vir" ); } while (0); | |||
491 | memcpy(&vir, imsg->data, sizeof(vir)); | |||
492 | if ((vm = vm_getbyvmid(vir.vir_info.vir_id)) != NULL((void *)0)) { | |||
493 | memset(vir.vir_ttyname, 0, sizeof(vir.vir_ttyname)); | |||
494 | if (vm->vm_ttyname != NULL((void *)0)) | |||
495 | strlcpy(vir.vir_ttyname, vm->vm_ttyname, | |||
496 | sizeof(vir.vir_ttyname)); | |||
497 | log_debug("%s: running vm: %d, vm_state: 0x%x", | |||
498 | __func__, vm->vm_vmid, vm->vm_state); | |||
499 | vir.vir_state = vm->vm_state; | |||
500 | /* get the user id who started the vm */ | |||
501 | vir.vir_uid = vm->vm_uid; | |||
502 | vir.vir_gid = vm->vm_params.vmc_owner.gid; | |||
503 | } | |||
504 | if (proc_compose_imsg(ps, PROC_CONTROL, -1, imsg->hdr.type, | |||
505 | imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) { | |||
506 | log_debug("%s: GET_INFO_VM failed for vm %d, removing", | |||
507 | __func__, vm->vm_vmid); | |||
508 | vm_remove(vm, __func__); | |||
509 | return (-1); | |||
510 | } | |||
511 | break; | |||
512 | case IMSG_VMDOP_GET_INFO_VM_END_DATA: | |||
513 | /* | |||
514 | * PROC_VMM has responded with the *running* VMs, now we | |||
515 | * append the others. These use the special value 0 for their | |||
516 | * kernel id to indicate that they are not running. | |||
517 | */ | |||
518 | TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void * )0); (vm) = ((vm)->vm_entry.tqe_next)) { | |||
519 | if (!(vm->vm_state & VM_STATE_RUNNING0x01)) { | |||
520 | memset(&vir, 0, sizeof(vir)); | |||
521 | vir.vir_info.vir_id = vm->vm_vmid; | |||
522 | strlcpy(vir.vir_info.vir_name, | |||
523 | vm->vm_params.vmc_params.vcp_name, | |||
524 | VMM_MAX_NAME_LEN64); | |||
525 | vir.vir_info.vir_memory_size = | |||
526 | vm->vm_params.vmc_params. | |||
527 | vcp_memranges[0].vmr_size; | |||
528 | vir.vir_info.vir_ncpus = | |||
529 | vm->vm_params.vmc_params.vcp_ncpus; | |||
530 | /* get the configured user id for this vm */ | |||
531 | vir.vir_uid = vm->vm_params.vmc_owner.uid; | |||
532 | vir.vir_gid = vm->vm_params.vmc_owner.gid; | |||
533 | log_debug("%s: vm: %d, vm_state: 0x%x", | |||
534 | __func__, vm->vm_vmid, vm->vm_state); | |||
535 | vir.vir_state = vm->vm_state; | |||
536 | if (proc_compose_imsg(ps, PROC_CONTROL, -1, | |||
537 | IMSG_VMDOP_GET_INFO_VM_DATA, | |||
538 | imsg->hdr.peerid, -1, &vir, | |||
539 | sizeof(vir)) == -1) { | |||
540 | log_debug("%s: GET_INFO_VM_END failed", | |||
541 | __func__); | |||
542 | vm_remove(vm, __func__); | |||
543 | return (-1); | |||
544 | } | |||
545 | } | |||
546 | } | |||
547 | IMSG_SIZE_CHECK(imsg, &res)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&res)) fatalx("bad length imsg received (%s)", "&res" ); } while (0); | |||
548 | proc_forward_imsg(ps, imsg, PROC_CONTROL, -1); | |||
549 | break; | |||
550 | default: | |||
551 | return (-1); | |||
552 | } | |||
553 | ||||
554 | return (0); | |||
555 | } | |||
556 | ||||
557 | int | |||
558 | vmd_dispatch_priv(int fd, struct privsep_proc *p, struct imsg *imsg) | |||
559 | { | |||
560 | struct vmop_addr_result var; | |||
561 | ||||
562 | switch (imsg->hdr.type) { | |||
563 | case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE: | |||
564 | IMSG_SIZE_CHECK(imsg, &var)do { if (((imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof (*&var)) fatalx("bad length imsg received (%s)", "&var" ); } while (0); | |||
565 | memcpy(&var, imsg->data, sizeof(var)); | |||
566 | proc_forward_imsg(p->p_ps, imsg, PROC_VMM, -1); | |||
567 | break; | |||
568 | default: | |||
569 | return (-1); | |||
570 | } | |||
571 | ||||
572 | return (0); | |||
573 | } | |||
574 | ||||
575 | int | |||
576 | vmd_check_vmh(struct vm_dump_header *vmh) | |||
577 | { | |||
578 | int i; | |||
579 | unsigned int code, leaf; | |||
580 | unsigned int a, b, c, d; | |||
581 | ||||
582 | if (strncmp(vmh->vmh_signature, VM_DUMP_SIGNATURE"OpenBSDVMM58", strlen(VM_DUMP_SIGNATURE"OpenBSDVMM58")) != 0) { | |||
583 | log_warnx("%s: incompatible dump signature", __func__); | |||
584 | return (-1); | |||
585 | } | |||
586 | ||||
587 | if (vmh->vmh_version != VM_DUMP_VERSION7) { | |||
588 | log_warnx("%s: incompatible dump version", __func__); | |||
589 | return (-1); | |||
590 | } | |||
591 | ||||
592 | for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT5; i++) { | |||
593 | code = vmh->vmh_cpuids[i].code; | |||
594 | leaf = vmh->vmh_cpuids[i].leaf; | |||
595 | if (leaf != 0x00) { | |||
596 | log_debug("%s: invalid leaf 0x%x for code 0x%x", | |||
597 | __func__, leaf, code); | |||
598 | return (-1); | |||
599 | } | |||
600 | ||||
601 | switch (code) { | |||
602 | case 0x00: | |||
603 | CPUID_LEAF(code, leaf, a, b, c, d)__asm volatile("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" ( d) : "a" (code), "c" (leaf)); | |||
604 | if (vmh->vmh_cpuids[i].a > a) { | |||
605 | log_debug("%s: incompatible cpuid level", | |||
606 | __func__); | |||
607 | return (-1); | |||
608 | } | |||
609 | if (!(vmh->vmh_cpuids[i].b == b && | |||
610 | vmh->vmh_cpuids[i].c == c && | |||
611 | vmh->vmh_cpuids[i].d == d)) { | |||
612 | log_debug("%s: incompatible cpu brand", | |||
613 | __func__); | |||
614 | return (-1); | |||
615 | } | |||
616 | break; | |||
617 | ||||
618 | case 0x01: | |||
619 | CPUID_LEAF(code, leaf, a, b, c, d)__asm volatile("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" ( d) : "a" (code), "c" (leaf)); | |||
620 | if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK~(0x00000080 | 0x00000100 | 0x00000008 | 0x00008000 | 0x00000020 | 0x00000004 | 0x00000010 | 0x00000040 | 0x00000400 | 0x00000800 | 0x00004000 | 0x00020000 | 0x00040000 | 0x00200000 | 0x01000000 )) != | |||
621 | (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK~(0x00000080 | 0x00000100 | 0x00000008 | 0x00008000 | 0x00000020 | 0x00000004 | 0x00000010 | 0x00000040 | 0x00000400 | 0x00000800 | 0x00004000 | 0x00020000 | 0x00040000 | 0x00200000 | 0x01000000 ))) { | |||
622 | log_debug("%s: incompatible cpu features " | |||
623 | "code: 0x%x leaf: 0x%x reg: c", __func__, | |||
624 | code, leaf); | |||
625 | return (-1); | |||
626 | } | |||
627 | if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK~(0x00400000 | 0x20000000 | 0x10000000 | 0x00200000 | 0x00000200 | 0x00040000 | 0x08000000 | 0x80000000 | 0x00001000 | 0x00000080 | 0x00004000)) != | |||
628 | (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK~(0x00400000 | 0x20000000 | 0x10000000 | 0x00200000 | 0x00000200 | 0x00040000 | 0x08000000 | 0x80000000 | 0x00001000 | 0x00000080 | 0x00004000))) { | |||
629 | log_debug("%s: incompatible cpu features " | |||
630 | "code: 0x%x leaf: 0x%x reg: d", __func__, | |||
631 | code, leaf); | |||
632 | return (-1); | |||
633 | } | |||
634 | break; | |||
635 | ||||
636 | case 0x07: | |||
637 | CPUID_LEAF(code, leaf, a, b, c, d)__asm volatile("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" ( d) : "a" (code), "c" (leaf)); | |||
638 | if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK~(0x00000002 | 0x00000004 | 0x00000010 | 0x00000400 | 0x00000800 | 0x00001000 | 0x00004000 | 0x00400000 | 0x02000000 | 0x00010000 | 0x00020000 | 0x00200000 | 0x04000000 | 0x08000000 | 0x10000000 | 0x40000000 | 0x80000000)) != | |||
639 | (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK~(0x00000002 | 0x00000004 | 0x00000010 | 0x00000400 | 0x00000800 | 0x00001000 | 0x00004000 | 0x00400000 | 0x02000000 | 0x00010000 | 0x00020000 | 0x00200000 | 0x04000000 | 0x08000000 | 0x10000000 | 0x40000000 | 0x80000000))) { | |||
640 | log_debug("%s: incompatible cpu features " | |||
641 | "code: 0x%x leaf: 0x%x reg: c", __func__, | |||
642 | code, leaf); | |||
643 | return (-1); | |||
644 | } | |||
645 | if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK~(0x00000002)) != | |||
646 | (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK~(0x00000002))) { | |||
647 | log_debug("%s: incompatible cpu features " | |||
648 | "code: 0x%x leaf: 0x%x reg: d", __func__, | |||
649 | code, leaf); | |||
650 | return (-1); | |||
651 | } | |||
652 | break; | |||
653 | ||||
654 | case 0x0d: | |||
655 | CPUID_LEAF(code, leaf, a, b, c, d)__asm volatile("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" ( d) : "a" (code), "c" (leaf)); | |||
656 | if (vmh->vmh_cpuids[i].b > b) { | |||
657 | log_debug("%s: incompatible cpu: insufficient " | |||
658 | "max save area for enabled XCR0 features", | |||
659 | __func__); | |||
660 | return (-1); | |||
661 | } | |||
662 | if (vmh->vmh_cpuids[i].c > c) { | |||
663 | log_debug("%s: incompatible cpu: insufficient " | |||
664 | "max save area for supported XCR0 features", | |||
665 | __func__); | |||
666 | return (-1); | |||
667 | } | |||
668 | break; | |||
669 | ||||
670 | case 0x80000001: | |||
671 | CPUID_LEAF(code, leaf, a, b, c, d)__asm volatile("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" ( d) : "a" (code), "c" (leaf)); | |||
672 | if ((vmh->vmh_cpuids[i].a & a) != | |||
673 | vmh->vmh_cpuids[i].a) { | |||
674 | log_debug("%s: incompatible cpu features " | |||
675 | "code: 0x%x leaf: 0x%x reg: a", __func__, | |||
676 | code, leaf); | |||
677 | return (-1); | |||
678 | } | |||
679 | if ((vmh->vmh_cpuids[i].c & c) != | |||
680 | vmh->vmh_cpuids[i].c) { | |||
681 | log_debug("%s: incompatible cpu features " | |||
682 | "code: 0x%x leaf: 0x%x reg: c", __func__, | |||
683 | code, leaf); | |||
684 | return (-1); | |||
685 | } | |||
686 | if ((vmh->vmh_cpuids[i].d & d) != | |||
687 | vmh->vmh_cpuids[i].d) { | |||
688 | log_debug("%s: incompatible cpu features " | |||
689 | "code: 0x%x leaf: 0x%x reg: d", __func__, | |||
690 | code, leaf); | |||
691 | return (-1); | |||
692 | } | |||
693 | break; | |||
694 | ||||
695 | default: | |||
696 | log_debug("%s: unknown code 0x%x", __func__, code); | |||
697 | return (-1); | |||
698 | } | |||
699 | } | |||
700 | ||||
701 | return (0); | |||
702 | } | |||
703 | ||||
704 | void | |||
705 | vmd_sighdlr(int sig, short event, void *arg) | |||
706 | { | |||
707 | if (privsep_process != PROC_PARENT) | |||
708 | return; | |||
709 | log_debug("%s: handling signal", __func__); | |||
710 | ||||
711 | switch (sig) { | |||
712 | case SIGHUP1: | |||
713 | log_info("%s: reload requested with SIGHUP", __func__); | |||
714 | ||||
715 | /* | |||
716 | * This is safe because libevent uses async signal handlers | |||
717 | * that run in the event loop and not in signal context. | |||
718 | */ | |||
719 | (void)vmd_reload(0, NULL((void *)0)); | |||
720 | break; | |||
721 | case SIGPIPE13: | |||
722 | log_info("%s: ignoring SIGPIPE", __func__); | |||
723 | break; | |||
724 | case SIGUSR130: | |||
725 | log_info("%s: ignoring SIGUSR1", __func__); | |||
726 | break; | |||
727 | case SIGTERM15: | |||
728 | case SIGINT2: | |||
729 | vmd_shutdown(); | |||
730 | break; | |||
731 | default: | |||
732 | fatalx("unexpected signal"); | |||
733 | } | |||
734 | } | |||
735 | ||||
736 | __dead__attribute__((__noreturn__)) void | |||
737 | usage(void) | |||
738 | { | |||
739 | extern char *__progname; | |||
740 | fprintf(stderr(&__sF[2]), "usage: %s [-dnv] [-D macro=value] [-f file]\n", | |||
741 | __progname); | |||
742 | exit(1); | |||
743 | } | |||
744 | ||||
745 | int | |||
746 | main(int argc, char **argv) | |||
747 | { | |||
748 | struct privsep *ps; | |||
749 | int ch; | |||
750 | const char *conffile = VMD_CONF"/etc/vm.conf"; | |||
751 | enum privsep_procid proc_id = PROC_PARENT; | |||
752 | int proc_instance = 0; | |||
753 | const char *errp, *title = NULL((void *)0); | |||
754 | int argc0 = argc; | |||
755 | ||||
756 | log_init(0, LOG_DAEMON(3<<3)); | |||
757 | ||||
758 | if ((env = calloc(1, sizeof(*env))) == NULL((void *)0)) | |||
759 | fatal("calloc: env"); | |||
760 | ||||
761 | while ((ch = getopt(argc, argv, "D:P:I:df:vn")) != -1) { | |||
762 | switch (ch) { | |||
763 | case 'D': | |||
764 | if (cmdline_symset(optarg) < 0) | |||
765 | log_warnx("could not parse macro definition %s", | |||
766 | optarg); | |||
767 | break; | |||
768 | case 'd': | |||
769 | env->vmd_debug = 2; | |||
770 | break; | |||
771 | case 'f': | |||
772 | conffile = optarg; | |||
773 | break; | |||
774 | case 'v': | |||
775 | env->vmd_verbose++; | |||
776 | break; | |||
777 | case 'n': | |||
778 | env->vmd_noaction = 1; | |||
779 | break; | |||
780 | case 'P': | |||
781 | title = optarg; | |||
782 | proc_id = proc_getid(procs, nitems(procs)(sizeof((procs)) / sizeof((procs)[0])), title); | |||
783 | if (proc_id == PROC_MAX) | |||
784 | fatalx("invalid process name"); | |||
785 | break; | |||
786 | case 'I': | |||
787 | proc_instance = strtonum(optarg, 0, | |||
788 | PROC_MAX_INSTANCES32, &errp); | |||
789 | if (errp) | |||
790 | fatalx("invalid process instance"); | |||
791 | break; | |||
792 | default: | |||
793 | usage(); | |||
794 | } | |||
795 | } | |||
796 | ||||
797 | argc -= optind; | |||
798 | if (argc > 0) | |||
799 | usage(); | |||
800 | ||||
801 | if (env->vmd_noaction && !env->vmd_debug) | |||
802 | env->vmd_debug = 1; | |||
803 | ||||
804 | log_init(env->vmd_debug, LOG_DAEMON(3<<3)); | |||
805 | log_setverbose(env->vmd_verbose); | |||
806 | ||||
807 | /* check for root privileges */ | |||
808 | if (env->vmd_noaction == 0) { | |||
809 | if (geteuid()) | |||
810 | fatalx("need root privileges"); | |||
811 | } | |||
812 | ||||
813 | ps = &env->vmd_ps; | |||
814 | ps->ps_env = env; | |||
815 | env->vmd_fd = -1; | |||
816 | ||||
817 | if (config_init(env) == -1) | |||
818 | fatal("failed to initialize configuration"); | |||
819 | ||||
820 | if ((ps->ps_pw = getpwnam(VMD_USER"_vmd")) == NULL((void *)0)) | |||
821 | fatal("unknown user %s", VMD_USER"_vmd"); | |||
822 | ||||
823 | /* First proc runs as root without pledge but in default chroot */ | |||
824 | proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */ | |||
825 | proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */ | |||
826 | ||||
827 | /* Open /dev/vmm */ | |||
828 | if (env->vmd_noaction == 0) { | |||
829 | env->vmd_fd = open(VMM_NODE"/dev/vmm", O_RDWR0x0002); | |||
830 | if (env->vmd_fd == -1) | |||
831 | fatal("%s", VMM_NODE"/dev/vmm"); | |||
832 | } | |||
833 | ||||
834 | /* Configure the control socket */ | |||
835 | ps->ps_csock.cs_name = SOCKET_NAME"/var/run/vmd.sock"; | |||
836 | TAILQ_INIT(&ps->ps_rcsocks)do { (&ps->ps_rcsocks)->tqh_first = ((void *)0); (& ps->ps_rcsocks)->tqh_last = &(&ps->ps_rcsocks )->tqh_first; } while (0); | |||
837 | ||||
838 | /* Configuration will be parsed after forking the children */ | |||
839 | env->vmd_conffile = conffile; | |||
840 | ||||
841 | if (env->vmd_noaction) | |||
842 | ps->ps_noaction = 1; | |||
843 | ps->ps_instance = proc_instance; | |||
844 | if (title != NULL((void *)0)) | |||
845 | ps->ps_title[proc_id] = title; | |||
846 | ||||
847 | /* only the parent returns */ | |||
848 | proc_init(ps, procs, nitems(procs)(sizeof((procs)) / sizeof((procs)[0])), env->vmd_debug, argc0, argv, | |||
849 | proc_id); | |||
850 | ||||
851 | log_procinit("parent"); | |||
852 | if (!env->vmd_debug && daemon(0, 0) == -1) | |||
853 | fatal("can't daemonize"); | |||
854 | ||||
855 | if (ps->ps_noaction == 0) | |||
856 | log_info("startup"); | |||
857 | ||||
858 | event_init(); | |||
859 | ||||
860 | signal_set(&ps->ps_evsigint, SIGINT, vmd_sighdlr, ps)event_set(&ps->ps_evsigint, 2, 0x08|0x10, vmd_sighdlr, ps); | |||
861 | signal_set(&ps->ps_evsigterm, SIGTERM, vmd_sighdlr, ps)event_set(&ps->ps_evsigterm, 15, 0x08|0x10, vmd_sighdlr , ps); | |||
862 | signal_set(&ps->ps_evsighup, SIGHUP, vmd_sighdlr, ps)event_set(&ps->ps_evsighup, 1, 0x08|0x10, vmd_sighdlr, ps); | |||
863 | signal_set(&ps->ps_evsigpipe, SIGPIPE, vmd_sighdlr, ps)event_set(&ps->ps_evsigpipe, 13, 0x08|0x10, vmd_sighdlr , ps); | |||
864 | signal_set(&ps->ps_evsigusr1, SIGUSR1, vmd_sighdlr, ps)event_set(&ps->ps_evsigusr1, 30, 0x08|0x10, vmd_sighdlr , ps); | |||
865 | ||||
866 | signal_add(&ps->ps_evsigint, NULL)event_add(&ps->ps_evsigint, ((void *)0)); | |||
867 | signal_add(&ps->ps_evsigterm, NULL)event_add(&ps->ps_evsigterm, ((void *)0)); | |||
868 | signal_add(&ps->ps_evsighup, NULL)event_add(&ps->ps_evsighup, ((void *)0)); | |||
869 | signal_add(&ps->ps_evsigpipe, NULL)event_add(&ps->ps_evsigpipe, ((void *)0)); | |||
870 | signal_add(&ps->ps_evsigusr1, NULL)event_add(&ps->ps_evsigusr1, ((void *)0)); | |||
871 | ||||
872 | if (!env->vmd_noaction) | |||
873 | proc_connect(ps); | |||
874 | ||||
875 | if (vmd_configure() == -1) | |||
876 | fatalx("configuration failed"); | |||
877 | ||||
878 | event_dispatch(); | |||
879 | ||||
880 | log_debug("parent exiting"); | |||
881 | ||||
882 | return (0); | |||
883 | } | |||
884 | ||||
885 | void | |||
886 | start_vm_batch(int fd, short type, void *args) | |||
887 | { | |||
888 | int i = 0; | |||
889 | struct vmd_vm *vm; | |||
890 | ||||
891 | log_debug("%s: starting batch of %d vms", __func__, | |||
892 | env->vmd_cfg.parallelism); | |||
893 | TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void * )0); (vm) = ((vm)->vm_entry.tqe_next)) { | |||
894 | if (!(vm->vm_state & VM_STATE_WAITING0x20)) { | |||
895 | log_debug("%s: not starting vm %s (disabled)", | |||
896 | __func__, | |||
897 | vm->vm_params.vmc_params.vcp_name); | |||
898 | continue; | |||
899 | } | |||
900 | i++; | |||
901 | if (i > env->vmd_cfg.parallelism) { | |||
902 | evtimer_add(&staggered_start_timer,event_add(&staggered_start_timer, &env->vmd_cfg.delay ) | |||
903 | &env->vmd_cfg.delay)event_add(&staggered_start_timer, &env->vmd_cfg.delay ); | |||
904 | break; | |||
905 | } | |||
906 | vm->vm_state &= ~VM_STATE_WAITING0x20; | |||
907 | config_setvm(&env->vmd_ps, vm, -1, vm->vm_params.vmc_owner.uid); | |||
908 | } | |||
909 | log_debug("%s: done starting vms", __func__); | |||
910 | } | |||
911 | ||||
912 | int | |||
913 | vmd_configure(void) | |||
914 | { | |||
915 | int ncpus; | |||
916 | struct vmd_switch *vsw; | |||
917 | int ncpu_mib[] = {CTL_HW6, HW_NCPUONLINE25}; | |||
918 | size_t ncpus_sz = sizeof(ncpus); | |||
919 | ||||
920 | if ((env->vmd_ptmfd = open(PATH_PTMDEV"/dev/ptm", O_RDWR0x0002|O_CLOEXEC0x10000)) == -1) | |||
921 | fatal("open %s", PATH_PTMDEV"/dev/ptm"); | |||
922 | ||||
923 | /* | |||
924 | * pledge in the parent process: | |||
925 | * stdio - for malloc and basic I/O including events. | |||
926 | * rpath - for reload to open and read the configuration files. | |||
927 | * wpath - for opening disk images and tap devices. | |||
928 | * tty - for openpty and TIOCUCNTL. | |||
929 | * proc - run kill to terminate its children safely. | |||
930 | * sendfd - for disks, interfaces and other fds. | |||
931 | * recvfd - for send and receive. | |||
932 | * getpw - lookup user or group id by name. | |||
933 | * chown, fattr - change tty ownership | |||
934 | * flock - locking disk files | |||
935 | */ | |||
936 | if (pledge("stdio rpath wpath proc tty recvfd sendfd getpw" | |||
937 | " chown fattr flock", NULL((void *)0)) == -1) | |||
938 | fatal("pledge"); | |||
939 | ||||
940 | if (parse_config(env->vmd_conffile) == -1) { | |||
941 | proc_kill(&env->vmd_ps); | |||
942 | exit(1); | |||
943 | } | |||
944 | ||||
945 | if (env->vmd_noaction) { | |||
946 | fprintf(stderr(&__sF[2]), "configuration OK\n"); | |||
947 | proc_kill(&env->vmd_ps); | |||
948 | exit(0); | |||
949 | } | |||
950 | ||||
951 | /* Send shared global configuration to all children */ | |||
952 | if (config_setconfig(env) == -1) | |||
953 | return (-1); | |||
954 | ||||
955 | TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry)for((vsw) = ((env->vmd_switches)->tqh_first); (vsw) != ( (void *)0); (vsw) = ((vsw)->sw_entry.tqe_next)) { | |||
956 | if (vsw->sw_running) | |||
957 | continue; | |||
958 | if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { | |||
959 | log_warn("%s: failed to create switch %s", | |||
960 | __func__, vsw->sw_name); | |||
961 | switch_remove(vsw); | |||
962 | return (-1); | |||
963 | } | |||
964 | } | |||
965 | ||||
966 | if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START0x04)) { | |||
967 | env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY30; | |||
968 | if (sysctl(ncpu_mib, nitems(ncpu_mib)(sizeof((ncpu_mib)) / sizeof((ncpu_mib)[0])), &ncpus, &ncpus_sz, NULL((void *)0), 0) == -1) | |||
969 | ncpus = 1; | |||
970 | env->vmd_cfg.parallelism = ncpus; | |||
971 | log_debug("%s: setting staggered start configuration to " | |||
972 | "parallelism: %d and delay: %lld", | |||
973 | __func__, ncpus, (long long) env->vmd_cfg.delay.tv_sec); | |||
974 | } | |||
975 | ||||
976 | log_debug("%s: starting vms in staggered fashion", __func__); | |||
977 | evtimer_set(&staggered_start_timer, start_vm_batch, NULL)event_set(&staggered_start_timer, -1, 0, start_vm_batch, ( (void *)0)); | |||
978 | /* start first batch */ | |||
979 | start_vm_batch(0, 0, NULL((void *)0)); | |||
980 | ||||
981 | return (0); | |||
982 | } | |||
983 | ||||
984 | int | |||
985 | vmd_reload(unsigned int reset, const char *filename) | |||
986 | { | |||
987 | struct vmd_vm *vm, *next_vm; | |||
988 | struct vmd_switch *vsw; | |||
989 | int reload = 0; | |||
990 | ||||
991 | /* Switch back to the default config file */ | |||
992 | if (filename == NULL((void *)0) || *filename == '\0') { | |||
993 | filename = env->vmd_conffile; | |||
994 | reload = 1; | |||
995 | } | |||
996 | ||||
997 | log_debug("%s: level %d config file %s", __func__, reset, filename); | |||
998 | ||||
999 | if (reset) { | |||
1000 | /* Purge the configuration */ | |||
1001 | config_purge(env, reset); | |||
1002 | config_setreset(env, reset); | |||
1003 | } else { | |||
1004 | /* | |||
1005 | * Load or reload the configuration. | |||
1006 | * | |||
1007 | * Reloading removes all non-running VMs before processing the | |||
1008 | * config file, whereas loading only adds to the existing list | |||
1009 | * of VMs. | |||
1010 | */ | |||
1011 | ||||
1012 | if (reload) { | |||
1013 | TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry,for ((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void *)0) && ((next_vm) = ((vm)->vm_entry.tqe_next), 1 ); (vm) = (next_vm)) | |||
1014 | next_vm)for ((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void *)0) && ((next_vm) = ((vm)->vm_entry.tqe_next), 1 ); (vm) = (next_vm)) { | |||
1015 | if (!(vm->vm_state & VM_STATE_RUNNING0x01)) { | |||
1016 | DPRINTF("%s: calling vm_remove",do {} while(0) | |||
1017 | __func__)do {} while(0); | |||
1018 | vm_remove(vm, __func__); | |||
1019 | } | |||
1020 | } | |||
1021 | } | |||
1022 | ||||
1023 | if (parse_config(filename) == -1) { | |||
1024 | log_debug("%s: failed to load config file %s", | |||
1025 | __func__, filename); | |||
1026 | return (-1); | |||
1027 | } | |||
1028 | ||||
1029 | if (reload) { | |||
1030 | /* Update shared global configuration in all children */ | |||
1031 | if (config_setconfig(env) == -1) | |||
1032 | return (-1); | |||
1033 | } | |||
1034 | ||||
1035 | TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry)for((vsw) = ((env->vmd_switches)->tqh_first); (vsw) != ( (void *)0); (vsw) = ((vsw)->sw_entry.tqe_next)) { | |||
1036 | if (vsw->sw_running) | |||
1037 | continue; | |||
1038 | if (vm_priv_brconfig(&env->vmd_ps, vsw) == -1) { | |||
1039 | log_warn("%s: failed to create switch %s", | |||
1040 | __func__, vsw->sw_name); | |||
1041 | switch_remove(vsw); | |||
1042 | return (-1); | |||
1043 | } | |||
1044 | } | |||
1045 | ||||
1046 | log_debug("%s: starting vms in staggered fashion", __func__); | |||
1047 | evtimer_set(&staggered_start_timer, start_vm_batch, NULL)event_set(&staggered_start_timer, -1, 0, start_vm_batch, ( (void *)0)); | |||
1048 | /* start first batch */ | |||
1049 | start_vm_batch(0, 0, NULL((void *)0)); | |||
1050 | ||||
1051 | } | |||
1052 | ||||
1053 | return (0); | |||
1054 | } | |||
1055 | ||||
1056 | void | |||
1057 | vmd_shutdown(void) | |||
1058 | { | |||
1059 | struct vmd_vm *vm, *vm_next; | |||
1060 | ||||
1061 | log_debug("%s: performing shutdown", __func__); | |||
1062 | ||||
1063 | TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next)for ((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void *)0) && ((vm_next) = ((vm)->vm_entry.tqe_next), 1 ); (vm) = (vm_next)) { | |||
1064 | vm_remove(vm, __func__); | |||
1065 | } | |||
1066 | ||||
1067 | proc_kill(&env->vmd_ps); | |||
1068 | free(env); | |||
1069 | ||||
1070 | log_warnx("parent terminating"); | |||
1071 | exit(0); | |||
1072 | } | |||
1073 | ||||
1074 | struct vmd_vm * | |||
1075 | vm_getbyvmid(uint32_t vmid) | |||
1076 | { | |||
1077 | struct vmd_vm *vm; | |||
1078 | ||||
1079 | if (vmid == 0) | |||
1080 | return (NULL((void *)0)); | |||
1081 | TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void * )0); (vm) = ((vm)->vm_entry.tqe_next)) { | |||
1082 | if (vm->vm_vmid == vmid) | |||
1083 | return (vm); | |||
1084 | } | |||
1085 | ||||
1086 | return (NULL((void *)0)); | |||
1087 | } | |||
1088 | ||||
1089 | struct vmd_vm * | |||
1090 | vm_getbyid(uint32_t id) | |||
1091 | { | |||
1092 | struct vmd_vm *vm; | |||
1093 | ||||
1094 | if (id == 0) | |||
1095 | return (NULL((void *)0)); | |||
1096 | TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void * )0); (vm) = ((vm)->vm_entry.tqe_next)) { | |||
1097 | if (vm->vm_params.vmc_params.vcp_id == id) | |||
1098 | return (vm); | |||
1099 | } | |||
1100 | ||||
1101 | return (NULL((void *)0)); | |||
1102 | } | |||
1103 | ||||
1104 | uint32_t | |||
1105 | vm_id2vmid(uint32_t id, struct vmd_vm *vm) | |||
1106 | { | |||
1107 | if (vm == NULL((void *)0) && (vm = vm_getbyid(id)) == NULL((void *)0)) | |||
1108 | return (0); | |||
1109 | DPRINTF("%s: vmm id %u is vmid %u", __func__,do {} while(0) | |||
1110 | id, vm->vm_vmid)do {} while(0); | |||
1111 | return (vm->vm_vmid); | |||
1112 | } | |||
1113 | ||||
1114 | uint32_t | |||
1115 | vm_vmid2id(uint32_t vmid, struct vmd_vm *vm) | |||
1116 | { | |||
1117 | if (vm == NULL((void *)0) && (vm = vm_getbyvmid(vmid)) == NULL((void *)0)) | |||
1118 | return (0); | |||
1119 | DPRINTF("%s: vmid %u is vmm id %u", __func__,do {} while(0) | |||
1120 | vmid, vm->vm_params.vmc_params.vcp_id)do {} while(0); | |||
1121 | return (vm->vm_params.vmc_params.vcp_id); | |||
1122 | } | |||
1123 | ||||
1124 | struct vmd_vm * | |||
1125 | vm_getbyname(const char *name) | |||
1126 | { | |||
1127 | struct vmd_vm *vm; | |||
1128 | ||||
1129 | if (name == NULL((void *)0)) | |||
1130 | return (NULL((void *)0)); | |||
1131 | TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void * )0); (vm) = ((vm)->vm_entry.tqe_next)) { | |||
1132 | if (strcmp(vm->vm_params.vmc_params.vcp_name, name) == 0) | |||
1133 | return (vm); | |||
1134 | } | |||
1135 | ||||
1136 | return (NULL((void *)0)); | |||
1137 | } | |||
1138 | ||||
1139 | struct vmd_vm * | |||
1140 | vm_getbypid(pid_t pid) | |||
1141 | { | |||
1142 | struct vmd_vm *vm; | |||
1143 | ||||
1144 | TAILQ_FOREACH(vm, env->vmd_vms, vm_entry)for((vm) = ((env->vmd_vms)->tqh_first); (vm) != ((void * )0); (vm) = ((vm)->vm_entry.tqe_next)) { | |||
1145 | if (vm->vm_pid == pid) | |||
1146 | return (vm); | |||
1147 | } | |||
1148 | ||||
1149 | return (NULL((void *)0)); | |||
1150 | } | |||
1151 | ||||
1152 | void | |||
1153 | vm_stop(struct vmd_vm *vm, int keeptty, const char *caller) | |||
1154 | { | |||
1155 | struct privsep *ps = &env->vmd_ps; | |||
1156 | unsigned int i, j; | |||
1157 | ||||
1158 | if (vm == NULL((void *)0)) | |||
1159 | return; | |||
1160 | ||||
1161 | log_debug("%s: %s %s stopping vm %d%s", | |||
1162 | __func__, ps->ps_title[privsep_process], caller, | |||
1163 | vm->vm_vmid, keeptty ? ", keeping tty open" : ""); | |||
1164 | ||||
1165 | vm->vm_state &= ~(VM_STATE_RUNNING0x01 | VM_STATE_SHUTDOWN0x04); | |||
1166 | ||||
1167 | user_inc(&vm->vm_params.vmc_params, vm->vm_user, 0); | |||
1168 | user_put(vm->vm_user); | |||
1169 | ||||
1170 | if (vm->vm_iev.ibuf.fd != -1) { | |||
1171 | event_del(&vm->vm_iev.ev); | |||
1172 | close(vm->vm_iev.ibuf.fd); | |||
1173 | } | |||
1174 | for (i = 0; i < VMM_MAX_DISKS_PER_VM4; i++) { | |||
1175 | for (j = 0; j < VM_MAX_BASE_PER_DISK4; j++) { | |||
1176 | if (vm->vm_disks[i][j] != -1) { | |||
1177 | close(vm->vm_disks[i][j]); | |||
1178 | vm->vm_disks[i][j] = -1; | |||
1179 | } | |||
1180 | } | |||
1181 | } | |||
1182 | for (i = 0; i < VMM_MAX_NICS_PER_VM4; i++) { | |||
1183 | if (vm->vm_ifs[i].vif_fd != -1) { | |||
1184 | close(vm->vm_ifs[i].vif_fd); | |||
1185 | vm->vm_ifs[i].vif_fd = -1; | |||
1186 | } | |||
1187 | free(vm->vm_ifs[i].vif_name); | |||
1188 | free(vm->vm_ifs[i].vif_switch); | |||
1189 | free(vm->vm_ifs[i].vif_group); | |||
1190 | vm->vm_ifs[i].vif_name = NULL((void *)0); | |||
1191 | vm->vm_ifs[i].vif_switch = NULL((void *)0); | |||
1192 | vm->vm_ifs[i].vif_group = NULL((void *)0); | |||
1193 | } | |||
1194 | if (vm->vm_kernel != -1) { | |||
1195 | close(vm->vm_kernel); | |||
1196 | vm->vm_kernel = -1; | |||
1197 | } | |||
1198 | if (vm->vm_cdrom != -1) { | |||
1199 | close(vm->vm_cdrom); | |||
1200 | vm->vm_cdrom = -1; | |||
1201 | } | |||
1202 | if (!keeptty) { | |||
1203 | vm_closetty(vm); | |||
1204 | vm->vm_uid = 0; | |||
1205 | } | |||
1206 | } | |||
1207 | ||||
1208 | void | |||
1209 | vm_remove(struct vmd_vm *vm, const char *caller) | |||
1210 | { | |||
1211 | struct privsep *ps = &env->vmd_ps; | |||
1212 | ||||
1213 | if (vm == NULL((void *)0)) | |||
1214 | return; | |||
1215 | ||||
1216 | log_debug("%s: %s %s removing vm %d from running config", | |||
1217 | __func__, ps->ps_title[privsep_process], caller, | |||
1218 | vm->vm_vmid); | |||
1219 | ||||
1220 | TAILQ_REMOVE(env->vmd_vms, vm, vm_entry)do { if (((vm)->vm_entry.tqe_next) != ((void *)0)) (vm)-> vm_entry.tqe_next->vm_entry.tqe_prev = (vm)->vm_entry.tqe_prev ; else (env->vmd_vms)->tqh_last = (vm)->vm_entry.tqe_prev ; *(vm)->vm_entry.tqe_prev = (vm)->vm_entry.tqe_next; ; ; } while (0); | |||
1221 | ||||
1222 | user_put(vm->vm_user); | |||
1223 | vm_stop(vm, 0, caller); | |||
1224 | free(vm); | |||
1225 | } | |||
1226 | ||||
1227 | int | |||
1228 | vm_claimid(const char *name, int uid, uint32_t *id) | |||
1229 | { | |||
1230 | struct name2id *n2i = NULL((void *)0); | |||
1231 | ||||
1232 | TAILQ_FOREACH(n2i, env->vmd_known, entry)for((n2i) = ((env->vmd_known)->tqh_first); (n2i) != ((void *)0); (n2i) = ((n2i)->entry.tqe_next)) | |||
1233 | if (strcmp(n2i->name, name) == 0 && n2i->uid == uid) | |||
1234 | goto out; | |||
1235 | ||||
1236 | if (++env->vmd_nvm == 0) { | |||
1237 | log_warnx("too many vms"); | |||
1238 | return -1; | |||
1239 | } | |||
1240 | if ((n2i = calloc(1, sizeof(struct name2id))) == NULL((void *)0)) { | |||
1241 | log_warnx("could not alloc vm name"); | |||
1242 | return -1; | |||
1243 | } | |||
1244 | n2i->id = env->vmd_nvm; | |||
1245 | n2i->uid = uid; | |||
1246 | if (strlcpy(n2i->name, name, sizeof(n2i->name)) >= sizeof(n2i->name)) { | |||
1247 | log_warnx("vm name too long"); | |||
1248 | free(n2i); | |||
1249 | return -1; | |||
1250 | } | |||
1251 | TAILQ_INSERT_TAIL(env->vmd_known, n2i, entry)do { (n2i)->entry.tqe_next = ((void *)0); (n2i)->entry. tqe_prev = (env->vmd_known)->tqh_last; *(env->vmd_known )->tqh_last = (n2i); (env->vmd_known)->tqh_last = & (n2i)->entry.tqe_next; } while (0); | |||
1252 | ||||
1253 | out: | |||
1254 | *id = n2i->id; | |||
1255 | return 0; | |||
1256 | } | |||
1257 | ||||
1258 | int | |||
1259 | vm_register(struct privsep *ps, struct vmop_create_params *vmc, | |||
1260 | struct vmd_vm **ret_vm, uint32_t id, uid_t uid) | |||
1261 | { | |||
1262 | struct vmd_vm *vm = NULL((void *)0), *vm_parent = NULL((void *)0); | |||
1263 | struct vm_create_params *vcp = &vmc->vmc_params; | |||
1264 | struct vmop_owner *vmo = NULL((void *)0); | |||
1265 | struct vmd_user *usr = NULL((void *)0); | |||
1266 | uint32_t nid, rng; | |||
1267 | unsigned int i, j; | |||
1268 | struct vmd_switch *sw; | |||
1269 | char *s; | |||
1270 | int ret = 0; | |||
1271 | ||||
1272 | /* Check if this is an instance of another VM */ | |||
1273 | if ((ret = vm_instance(ps, &vm_parent, vmc, uid)) != 0) { | |||
1274 | errno(*__errno()) = ret; /* XXX might set invalid errno */ | |||
1275 | return (-1); | |||
1276 | } | |||
1277 | ||||
1278 | errno(*__errno()) = 0; | |||
1279 | *ret_vm = NULL((void *)0); | |||
1280 | ||||
1281 | if ((vm = vm_getbyname(vcp->vcp_name)) != NULL((void *)0) || | |||
1282 | (vm = vm_getbyvmid(vcp->vcp_id)) != NULL((void *)0)) { | |||
1283 | if (vm_checkperm(vm, &vm->vm_params.vmc_owner, | |||
1284 | uid) != 0) { | |||
1285 | errno(*__errno()) = EPERM1; | |||
1286 | goto fail; | |||
1287 | } | |||
1288 | *ret_vm = vm; | |||
1289 | errno(*__errno()) = EALREADY37; | |||
1290 | goto fail; | |||
1291 | } | |||
1292 | ||||
1293 | if (vm_parent != NULL((void *)0)) | |||
1294 | vmo = &vm_parent->vm_params.vmc_insowner; | |||
1295 | ||||
1296 | /* non-root users can only start existing VMs or instances */ | |||
1297 | if (vm_checkperm(NULL((void *)0), vmo, uid) != 0) { | |||
1298 | log_warnx("permission denied"); | |||
1299 | errno(*__errno()) = EPERM1; | |||
1300 | goto fail; | |||
1301 | } | |||
1302 | if (vmc->vmc_flags == 0) { | |||
1303 | log_warnx("invalid configuration, no devices"); | |||
1304 | errno(*__errno()) = VMD_DISK_MISSING1002; | |||
1305 | goto fail; | |||
1306 | } | |||
1307 | if (vcp->vcp_ncpus == 0) | |||
1308 | vcp->vcp_ncpus = 1; | |||
1309 | if (vcp->vcp_memranges[0].vmr_size == 0) | |||
1310 | vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY512; | |||
1311 | if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM64) { | |||
1312 | log_warnx("invalid number of CPUs"); | |||
1313 | goto fail; | |||
1314 | } else if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM4) { | |||
1315 | log_warnx("invalid number of disks"); | |||
1316 | goto fail; | |||
1317 | } else if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM4) { | |||
1318 | log_warnx("invalid number of interfaces"); | |||
1319 | goto fail; | |||
1320 | } else if (strlen(vcp->vcp_kernel) == 0 && | |||
1321 | vcp->vcp_ndisks == 0 && strlen(vcp->vcp_cdrom) == 0) { | |||
1322 | log_warnx("no kernel or disk/cdrom specified"); | |||
1323 | goto fail; | |||
1324 | } else if (strlen(vcp->vcp_name) == 0) { | |||
1325 | log_warnx("invalid VM name"); | |||
1326 | goto fail; | |||
1327 | } else if (*vcp->vcp_name == '-' || *vcp->vcp_name == '.' || | |||
1328 | *vcp->vcp_name == '_') { | |||
1329 | log_warnx("invalid VM name"); | |||
1330 | goto fail; | |||
1331 | } else { | |||
1332 | for (s = vcp->vcp_name; *s != '\0'; ++s) { | |||
1333 | if (!(isalnum(*s) || *s == '.' || *s == '-' || | |||
1334 | *s == '_')) { | |||
1335 | log_warnx("invalid VM name"); | |||
1336 | goto fail; | |||
1337 | } | |||
1338 | } | |||
1339 | } | |||
1340 | ||||
1341 | /* track active users */ | |||
1342 | if (uid != 0 && env->vmd_users != NULL((void *)0) && | |||
1343 | (usr = user_get(uid)) == NULL((void *)0)) { | |||
1344 | log_warnx("could not add user"); | |||
1345 | goto fail; | |||
1346 | } | |||
1347 | ||||
1348 | if ((vm = calloc(1, sizeof(*vm))) == NULL((void *)0)) | |||
1349 | goto fail; | |||
1350 | ||||
1351 | memcpy(&vm->vm_params, vmc, sizeof(vm->vm_params)); | |||
1352 | vmc = &vm->vm_params; | |||
1353 | vcp = &vmc->vmc_params; | |||
1354 | vm->vm_pid = -1; | |||
1355 | vm->vm_tty = -1; | |||
1356 | vm->vm_receive_fd = -1; | |||
1357 | vm->vm_state &= ~VM_STATE_PAUSED0x10; | |||
1358 | vm->vm_user = usr; | |||
1359 | ||||
1360 | for (i = 0; i < VMM_MAX_DISKS_PER_VM4; i++) | |||
1361 | for (j = 0; j < VM_MAX_BASE_PER_DISK4; j++) | |||
1362 | vm->vm_disks[i][j] = -1; | |||
1363 | for (i = 0; i < VMM_MAX_NICS_PER_VM4; i++) | |||
1364 | vm->vm_ifs[i].vif_fd = -1; | |||
1365 | for (i = 0; i < vcp->vcp_nnics; i++) { | |||
1366 | if ((sw = switch_getbyname(vmc->vmc_ifswitch[i])) != NULL((void *)0)) { | |||
1367 | /* inherit per-interface flags from the switch */ | |||
1368 | vmc->vmc_ifflags[i] |= (sw->sw_flags & VMIFF_OPTMASK(0x02|0x04|0x08)); | |||
1369 | } | |||
1370 | ||||
1371 | /* | |||
1372 | * If the MAC address is zero, always randomize it in vmd(8) | |||
1373 | * because we cannot rely on the guest OS to do the right | |||
1374 | * thing like OpenBSD does. Based on ether_fakeaddr() | |||
1375 | * from the kernel, incremented by one to differentiate | |||
1376 | * the source. | |||
1377 | */ | |||
1378 | if (memcmp(zero_mac, &vcp->vcp_macs[i], ETHER_ADDR_LEN6) == 0) { | |||
1379 | rng = arc4random(); | |||
1380 | vcp->vcp_macs[i][0] = 0xfe; | |||
1381 | vcp->vcp_macs[i][1] = 0xe1; | |||
1382 | vcp->vcp_macs[i][2] = 0xba + 1; | |||
1383 | vcp->vcp_macs[i][3] = 0xd0 | ((i + 1) & 0xf); | |||
1384 | vcp->vcp_macs[i][4] = rng; | |||
1385 | vcp->vcp_macs[i][5] = rng >> 8; | |||
1386 | } | |||
1387 | } | |||
1388 | vm->vm_kernel = -1; | |||
1389 | vm->vm_cdrom = -1; | |||
1390 | vm->vm_iev.ibuf.fd = -1; | |||
1391 | ||||
1392 | /* | |||
1393 | * Assign a new internal Id if not specified and we succeed in | |||
1394 | * claiming a new Id. | |||
1395 | */ | |||
1396 | if (id != 0) | |||
1397 | vm->vm_vmid = id; | |||
1398 | else if (vm_claimid(vcp->vcp_name, uid, &nid) == -1) | |||
1399 | goto fail; | |||
1400 | else | |||
1401 | vm->vm_vmid = nid; | |||
1402 | ||||
1403 | log_debug("%s: registering vm %d", __func__, vm->vm_vmid); | |||
1404 | TAILQ_INSERT_TAIL(env->vmd_vms, vm, vm_entry)do { (vm)->vm_entry.tqe_next = ((void *)0); (vm)->vm_entry .tqe_prev = (env->vmd_vms)->tqh_last; *(env->vmd_vms )->tqh_last = (vm); (env->vmd_vms)->tqh_last = & (vm)->vm_entry.tqe_next; } while (0); | |||
1405 | ||||
1406 | *ret_vm = vm; | |||
1407 | return (0); | |||
1408 | fail: | |||
1409 | if (errno(*__errno()) == 0) | |||
1410 | errno(*__errno()) = EINVAL22; | |||
1411 | return (-1); | |||
1412 | } | |||
1413 | ||||
1414 | int | |||
1415 | vm_instance(struct privsep *ps, struct vmd_vm **vm_parent, | |||
1416 | struct vmop_create_params *vmc, uid_t uid) | |||
1417 | { | |||
1418 | char *name; | |||
1419 | struct vm_create_params *vcp = &vmc->vmc_params; | |||
1420 | struct vmop_create_params *vmcp; | |||
1421 | struct vm_create_params *vcpp; | |||
1422 | struct vmd_vm *vm = NULL((void *)0); | |||
1423 | unsigned int i, j; | |||
1424 | ||||
1425 | /* return without error if the parent is NULL (nothing to inherit) */ | |||
1426 | if ((vmc->vmc_flags & VMOP_CREATE_INSTANCE0x40) == 0 || | |||
1427 | vmc->vmc_instance[0] == '\0') | |||
1428 | return (0); | |||
1429 | ||||
1430 | if ((*vm_parent = vm_getbyname(vmc->vmc_instance)) == NULL((void *)0)) { | |||
1431 | return (VMD_PARENT_INVALID1007); | |||
1432 | } | |||
1433 | ||||
1434 | vmcp = &(*vm_parent)->vm_params; | |||
1435 | vcpp = &vmcp->vmc_params; | |||
1436 | ||||
1437 | /* Are we allowed to create an instance from this VM? */ | |||
1438 | if (vm_checkperm(NULL((void *)0), &vmcp->vmc_insowner, uid) != 0) { | |||
1439 | log_warnx("vm \"%s\" no permission to create vm instance", | |||
1440 | vcpp->vcp_name); | |||
1441 | return (ENAMETOOLONG63); | |||
1442 | } | |||
1443 | ||||
1444 | name = vcp->vcp_name; | |||
1445 | ||||
1446 | if ((vm = vm_getbyname(vcp->vcp_name)) != NULL((void *)0) || | |||
1447 | (vm = vm_getbyvmid(vcp->vcp_id)) != NULL((void *)0)) { | |||
1448 | return (EPROCLIM67); | |||
1449 | } | |||
1450 | ||||
1451 | /* CPU */ | |||
1452 | if (vcp->vcp_ncpus == 0) | |||
1453 | vcp->vcp_ncpus = vcpp->vcp_ncpus; | |||
1454 | if (vm_checkinsflag(vmcp, VMOP_CREATE_CPU0x01, uid) != 0 && | |||
1455 | vcp->vcp_ncpus != vcpp->vcp_ncpus) { | |||
1456 | log_warnx("vm \"%s\" no permission to set cpus", name); | |||
1457 | return (EPERM1); | |||
1458 | } | |||
1459 | ||||
1460 | /* memory */ | |||
1461 | if (vcp->vcp_memranges[0].vmr_size == 0) | |||
1462 | vcp->vcp_memranges[0].vmr_size = | |||
1463 | vcpp->vcp_memranges[0].vmr_size; | |||
1464 | if (vm_checkinsflag(vmcp, VMOP_CREATE_MEMORY0x04, uid) != 0 && | |||
1465 | vcp->vcp_memranges[0].vmr_size != | |||
1466 | vcpp->vcp_memranges[0].vmr_size) { | |||
1467 | log_warnx("vm \"%s\" no permission to set memory", name); | |||
1468 | return (EPERM1); | |||
1469 | } | |||
1470 | ||||
1471 | /* disks cannot be inherited */ | |||
1472 | if (vm_checkinsflag(vmcp, VMOP_CREATE_DISK0x10, uid) != 0 && | |||
1473 | vcp->vcp_ndisks) { | |||
1474 | log_warnx("vm \"%s\" no permission to set disks", name); | |||
1475 | return (EPERM1); | |||
1476 | } | |||
1477 | for (i = 0; i < vcp->vcp_ndisks; i++) { | |||
1478 | /* Check if this disk is already used in the parent */ | |||
1479 | for (j = 0; j < vcpp->vcp_ndisks; j++) { | |||
1480 | if (strcmp(vcp->vcp_disks[i], | |||
1481 | vcpp->vcp_disks[j]) == 0) { | |||
1482 | log_warnx("vm \"%s\" disk %s cannot be reused", | |||
1483 | name, vcp->vcp_disks[i]); | |||
1484 | return (EBUSY16); | |||
1485 | } | |||
1486 | } | |||
1487 | vmc->vmc_checkaccess |= VMOP_CREATE_DISK0x10; | |||
1488 | } | |||
1489 | ||||
1490 | /* interfaces */ | |||
1491 | if (vcp->vcp_nnics > 0 && | |||
1492 | vm_checkinsflag(vmcp, VMOP_CREATE_NETWORK0x08, uid) != 0 && | |||
1493 | vcp->vcp_nnics != vcpp->vcp_nnics) { | |||
1494 | log_warnx("vm \"%s\" no permission to set interfaces", name); | |||
1495 | return (EPERM1); | |||
1496 | } | |||
1497 | for (i = 0; i < vcpp->vcp_nnics; i++) { | |||
1498 | /* Interface got overwritten */ | |||
1499 | if (i < vcp->vcp_nnics) | |||
1500 | continue; | |||
1501 | ||||
1502 | /* Copy interface from parent */ | |||
1503 | vmc->vmc_ifflags[i] = vmcp->vmc_ifflags[i]; | |||
1504 | (void)strlcpy(vmc->vmc_ifnames[i], vmcp->vmc_ifnames[i], | |||
1505 | sizeof(vmc->vmc_ifnames[i])); | |||
1506 | (void)strlcpy(vmc->vmc_ifswitch[i], vmcp->vmc_ifswitch[i], | |||
1507 | sizeof(vmc->vmc_ifswitch[i])); | |||
1508 | (void)strlcpy(vmc->vmc_ifgroup[i], vmcp->vmc_ifgroup[i], | |||
1509 | sizeof(vmc->vmc_ifgroup[i])); | |||
1510 | memcpy(vcp->vcp_macs[i], vcpp->vcp_macs[i], | |||
1511 | sizeof(vcp->vcp_macs[i])); | |||
1512 | vmc->vmc_ifrdomain[i] = vmcp->vmc_ifrdomain[i]; | |||
1513 | vcp->vcp_nnics++; | |||
1514 | } | |||
1515 | for (i = 0; i < vcp->vcp_nnics; i++) { | |||
1516 | for (j = 0; j < vcpp->vcp_nnics; j++) { | |||
1517 | if (memcmp(zero_mac, vcp->vcp_macs[i], | |||
1518 | sizeof(vcp->vcp_macs[i])) != 0 && | |||
1519 | memcmp(vcpp->vcp_macs[i], vcp->vcp_macs[i], | |||
1520 | sizeof(vcp->vcp_macs[i])) != 0) { | |||
1521 | log_warnx("vm \"%s\" lladdr cannot be reused", | |||
1522 | name); | |||
1523 | return (EBUSY16); | |||
1524 | } | |||
1525 | if (strlen(vmc->vmc_ifnames[i]) && | |||
1526 | strcmp(vmc->vmc_ifnames[i], | |||
1527 | vmcp->vmc_ifnames[j]) == 0) { | |||
1528 | log_warnx("vm \"%s\" %s cannot be reused", | |||
1529 | vmc->vmc_ifnames[i], name); | |||
1530 | return (EBUSY16); | |||
1531 | } | |||
1532 | } | |||
1533 | } | |||
1534 | ||||
1535 | /* kernel */ | |||
1536 | if (strlen(vcp->vcp_kernel) > 0) { | |||
1537 | if (vm_checkinsflag(vmcp, VMOP_CREATE_KERNEL0x02, uid) != 0) { | |||
1538 | log_warnx("vm \"%s\" no permission to set boot image", | |||
1539 | name); | |||
1540 | return (EPERM1); | |||
1541 | } | |||
1542 | vmc->vmc_checkaccess |= VMOP_CREATE_KERNEL0x02; | |||
1543 | } else if (strlcpy(vcp->vcp_kernel, vcpp->vcp_kernel, | |||
1544 | sizeof(vcp->vcp_kernel)) >= sizeof(vcp->vcp_kernel)) { | |||
1545 | log_warnx("vm \"%s\" kernel name too long", name); | |||
1546 | return (EINVAL22); | |||
1547 | } | |||
1548 | ||||
1549 | /* cdrom */ | |||
1550 | if (strlen(vcp->vcp_cdrom) > 0) { | |||
1551 | if (vm_checkinsflag(vmcp, VMOP_CREATE_CDROM0x20, uid) != 0) { | |||
1552 | log_warnx("vm \"%s\" no permission to set cdrom", name); | |||
1553 | return (EPERM1); | |||
1554 | } | |||
1555 | vmc->vmc_checkaccess |= VMOP_CREATE_CDROM0x20; | |||
1556 | } else if (strlcpy(vcp->vcp_cdrom, vcpp->vcp_cdrom, | |||
1557 | sizeof(vcp->vcp_cdrom)) >= sizeof(vcp->vcp_cdrom)) { | |||
1558 | log_warnx("vm \"%s\" cdrom name too long", name); | |||
1559 | return (EINVAL22); | |||
1560 | } | |||
1561 | ||||
1562 | /* user */ | |||
1563 | if (vmc->vmc_owner.uid == 0) | |||
1564 | vmc->vmc_owner.uid = vmcp->vmc_owner.uid; | |||
1565 | else if (vmc->vmc_owner.uid != uid && | |||
1566 | vmc->vmc_owner.uid != vmcp->vmc_owner.uid) { | |||
1567 | log_warnx("vm \"%s\" user mismatch", name); | |||
1568 | return (EPERM1); | |||
1569 | } | |||
1570 | ||||
1571 | /* group */ | |||
1572 | if (vmc->vmc_owner.gid == 0) | |||
1573 | vmc->vmc_owner.gid = vmcp->vmc_owner.gid; | |||
1574 | else if (vmc->vmc_owner.gid != vmcp->vmc_owner.gid) { | |||
1575 | log_warnx("vm \"%s\" group mismatch", name); | |||
1576 | return (EPERM1); | |||
1577 | } | |||
1578 | ||||
1579 | /* child instances */ | |||
1580 | if (vmc->vmc_insflags) { | |||
1581 | log_warnx("vm \"%s\" cannot change instance permissions", name); | |||
1582 | return (EPERM1); | |||
1583 | } | |||
1584 | if (vmcp->vmc_insflags & VMOP_CREATE_INSTANCE0x40) { | |||
1585 | vmc->vmc_insowner.gid = vmcp->vmc_insowner.gid; | |||
1586 | vmc->vmc_insowner.uid = vmcp->vmc_insowner.gid; | |||
1587 | vmc->vmc_insflags = vmcp->vmc_insflags; | |||
1588 | } else { | |||
1589 | vmc->vmc_insowner.gid = 0; | |||
1590 | vmc->vmc_insowner.uid = 0; | |||
1591 | vmc->vmc_insflags = 0; | |||
1592 | } | |||
1593 | ||||
1594 | /* finished, remove instance flags */ | |||
1595 | vmc->vmc_flags &= ~VMOP_CREATE_INSTANCE0x40; | |||
1596 | ||||
1597 | return (0); | |||
1598 | } | |||
1599 | ||||
1600 | /* | |||
1601 | * vm_checkperm | |||
1602 | * | |||
1603 | * Checks if the user represented by the 'uid' parameter is allowed to | |||
1604 | * manipulate the VM described by the 'vm' parameter (or connect to said VM's | |||
1605 | * console.) | |||
1606 | * | |||
1607 | * Parameters: | |||
1608 | * vm: the VM whose permission is to be checked | |||
1609 | * vmo: the required uid/gid to be checked | |||
1610 | * uid: the user ID of the user making the request | |||
1611 | * | |||
1612 | * Return values: | |||
1613 | * 0: the permission should be granted | |||
1614 | * -1: the permission check failed (also returned if vm == null) | |||
1615 | */ | |||
1616 | int | |||
1617 | vm_checkperm(struct vmd_vm *vm, struct vmop_owner *vmo, uid_t uid) | |||
1618 | { | |||
1619 | struct group *gr; | |||
1620 | struct passwd *pw; | |||
1621 | char **grmem; | |||
1622 | ||||
1623 | /* root has no restrictions */ | |||
1624 | if (uid == 0) | |||
1625 | return (0); | |||
1626 | ||||
1627 | if (vmo == NULL((void *)0)) | |||
1628 | return (-1); | |||
1629 | ||||
1630 | /* check user */ | |||
1631 | if (vm == NULL((void *)0)) { | |||
1632 | if (vmo->uid == uid) | |||
1633 | return (0); | |||
1634 | } else { | |||
1635 | /* | |||
1636 | * check user of running vm (the owner of a running vm can | |||
1637 | * be different to (or more specific than) the configured owner. | |||
1638 | */ | |||
1639 | if (((vm->vm_state & VM_STATE_RUNNING0x01) && vm->vm_uid == uid) || | |||
1640 | (!(vm->vm_state & VM_STATE_RUNNING0x01) && vmo->uid == uid)) | |||
1641 | return (0); | |||
1642 | } | |||
1643 | ||||
1644 | /* check groups */ | |||
1645 | if (vmo->gid != -1) { | |||
1646 | if ((pw = getpwuid(uid)) == NULL((void *)0)) | |||
1647 | return (-1); | |||
1648 | if (pw->pw_gid == vmo->gid) | |||
1649 | return (0); | |||
1650 | if ((gr = getgrgid(vmo->gid)) != NULL((void *)0)) { | |||
1651 | for (grmem = gr->gr_mem; *grmem; grmem++) | |||
1652 | if (strcmp(*grmem, pw->pw_name) == 0) | |||
1653 | return (0); | |||
1654 | } | |||
1655 | } | |||
1656 | ||||
1657 | return (-1); | |||
1658 | } | |||
1659 | ||||
1660 | /* | |||
1661 | * vm_checkinsflag | |||
1662 | * | |||
1663 | * Checks wheter the non-root user is allowed to set an instance option. | |||
1664 | * | |||
1665 | * Parameters: | |||
1666 | * vmc: the VM create parameters | |||
1667 | * flag: the flag to be checked | |||
1668 | * uid: the user ID of the user making the request | |||
1669 | * | |||
1670 | * Return values: | |||
1671 | * 0: the permission should be granted | |||
1672 | * -1: the permission check failed (also returned if vm == null) | |||
1673 | */ | |||
1674 | int | |||
1675 | vm_checkinsflag(struct vmop_create_params *vmc, unsigned int flag, uid_t uid) | |||
1676 | { | |||
1677 | /* root has no restrictions */ | |||
1678 | if (uid == 0) | |||
1679 | return (0); | |||
1680 | ||||
1681 | if ((vmc->vmc_insflags & flag) == 0) | |||
1682 | return (-1); | |||
1683 | ||||
1684 | return (0); | |||
1685 | } | |||
1686 | ||||
1687 | /* | |||
1688 | * vm_checkaccess | |||
1689 | * | |||
1690 | * Checks if the user represented by the 'uid' parameter is allowed to | |||
1691 | * access the file described by the 'path' parameter. | |||
1692 | * | |||
1693 | * Parameters: | |||
1694 | * fd: the file descriptor of the opened file | |||
1695 | * uflag: check if the userid has access to the file | |||
1696 | * uid: the user ID of the user making the request | |||
1697 | * amode: the access flags of R_OK and W_OK | |||
1698 | * | |||
1699 | * Return values: | |||
1700 | * 0: the permission should be granted | |||
1701 | * -1: the permission check failed | |||
1702 | */ | |||
1703 | int | |||
1704 | vm_checkaccess(int fd, unsigned int uflag, uid_t uid, int amode) | |||
1705 | { | |||
1706 | struct group *gr; | |||
1707 | struct passwd *pw; | |||
1708 | char **grmem; | |||
1709 | struct stat st; | |||
1710 | mode_t mode; | |||
1711 | ||||
1712 | if (fd == -1) | |||
1713 | return (-1); | |||
1714 | ||||
1715 | /* | |||
1716 | * File has to be accessible and a regular file | |||
1717 | */ | |||
1718 | if (fstat(fd, &st) == -1 || !S_ISREG(st.st_mode)((st.st_mode & 0170000) == 0100000)) | |||
1719 | return (-1); | |||
1720 | ||||
1721 | /* root has no restrictions */ | |||
1722 | if (uid == 0 || uflag == 0) | |||
1723 | return (0); | |||
1724 | ||||
1725 | /* check other */ | |||
1726 | mode = amode & W_OK0x02 ? S_IWOTH0000002 : 0; | |||
1727 | mode |= amode & R_OK0x04 ? S_IROTH0000004 : 0; | |||
1728 | if ((st.st_mode & mode) == mode) | |||
1729 | return (0); | |||
1730 | ||||
1731 | /* check user */ | |||
1732 | mode = amode & W_OK0x02 ? S_IWUSR0000200 : 0; | |||
1733 | mode |= amode & R_OK0x04 ? S_IRUSR0000400 : 0; | |||
1734 | if (uid == st.st_uid && (st.st_mode & mode) == mode) | |||
1735 | return (0); | |||
1736 | ||||
1737 | /* check groups */ | |||
1738 | mode = amode & W_OK0x02 ? S_IWGRP0000020 : 0; | |||
1739 | mode |= amode & R_OK0x04 ? S_IRGRP0000040 : 0; | |||
1740 | if ((st.st_mode & mode) != mode) | |||
1741 | return (-1); | |||
1742 | if ((pw = getpwuid(uid)) == NULL((void *)0)) | |||
1743 | return (-1); | |||
1744 | if (pw->pw_gid == st.st_gid) | |||
1745 | return (0); | |||
1746 | if ((gr = getgrgid(st.st_gid)) != NULL((void *)0)) { | |||
1747 | for (grmem = gr->gr_mem; *grmem; grmem++) | |||
1748 | if (strcmp(*grmem, pw->pw_name) == 0) | |||
1749 | return (0); | |||
1750 | } | |||
1751 | ||||
1752 | return (-1); | |||
1753 | } | |||
1754 | ||||
1755 | int | |||
1756 | vm_opentty(struct vmd_vm *vm) | |||
1757 | { | |||
1758 | struct ptmget ptm; | |||
1759 | struct stat st; | |||
1760 | struct group *gr; | |||
1761 | uid_t uid; | |||
1762 | gid_t gid; | |||
1763 | mode_t mode; | |||
1764 | int on; | |||
1765 | ||||
1766 | /* | |||
1767 | * Open tty with pre-opened PTM fd | |||
1768 | */ | |||
1769 | if ((ioctl(env->vmd_ptmfd, PTMGET((unsigned long)0x40000000 | ((sizeof(struct ptmget) & 0x1fff ) << 16) | ((('t')) << 8) | ((1))), &ptm) == -1)) | |||
1770 | return (-1); | |||
1771 | ||||
1772 | /* | |||
1773 | * We use user ioctl(2) mode to pass break commands. | |||
1774 | */ | |||
1775 | on = 1; | |||
1776 | if (ioctl(ptm.cfd, TIOCUCNTL((unsigned long)0x80000000 | ((sizeof(int) & 0x1fff) << 16) | ((('t')) << 8) | ((102))), &on) == -1) | |||
1777 | fatal("could not enable user ioctl mode"); | |||
1778 | ||||
1779 | vm->vm_tty = ptm.cfd; | |||
1780 | close(ptm.sfd); | |||
1781 | if ((vm->vm_ttyname = strdup(ptm.sn)) == NULL((void *)0)) | |||
1782 | goto fail; | |||
1783 | ||||
1784 | uid = vm->vm_uid; | |||
1785 | gid = vm->vm_params.vmc_owner.gid; | |||
1786 | ||||
1787 | if (vm->vm_params.vmc_owner.gid != -1) { | |||
1788 | mode = 0660; | |||
1789 | } else if ((gr = getgrnam("tty")) != NULL((void *)0)) { | |||
1790 | gid = gr->gr_gid; | |||
1791 | mode = 0620; | |||
1792 | } else { | |||
1793 | mode = 0600; | |||
1794 | gid = 0; | |||
1795 | } | |||
1796 | ||||
1797 | log_debug("%s: vm %s tty %s uid %d gid %d mode %o", | |||
1798 | __func__, vm->vm_params.vmc_params.vcp_name, | |||
1799 | vm->vm_ttyname, uid, gid, mode); | |||
1800 | ||||
1801 | /* | |||
1802 | * Change ownership and mode of the tty as required. | |||
1803 | * Loosely based on the implementation of sshpty.c | |||
1804 | */ | |||
1805 | if (stat(vm->vm_ttyname, &st) == -1) | |||
1806 | goto fail; | |||
1807 | ||||
1808 | if (st.st_uid != uid || st.st_gid != gid) { | |||
1809 | if (chown(vm->vm_ttyname, uid, gid) == -1) { | |||
1810 | log_warn("chown %s %d %d failed, uid %d", | |||
1811 | vm->vm_ttyname, uid, gid, getuid()); | |||
1812 | ||||
1813 | /* Ignore failure on read-only filesystems */ | |||
1814 | if (!((errno(*__errno()) == EROFS30) && | |||
1815 | (st.st_uid == uid || st.st_uid == 0))) | |||
1816 | goto fail; | |||
1817 | } | |||
1818 | } | |||
1819 | ||||
1820 | if ((st.st_mode & (S_IRWXU0000700|S_IRWXG0000070|S_IRWXO0000007)) != mode) { | |||
1821 | if (chmod(vm->vm_ttyname, mode) == -1) { | |||
1822 | log_warn("chmod %s %o failed, uid %d", | |||
1823 | vm->vm_ttyname, mode, getuid()); | |||
1824 | ||||
1825 | /* Ignore failure on read-only filesystems */ | |||
1826 | if (!((errno(*__errno()) == EROFS30) && | |||
1827 | (st.st_uid == uid || st.st_uid == 0))) | |||
1828 | goto fail; | |||
1829 | } | |||
1830 | } | |||
1831 | ||||
1832 | return (0); | |||
1833 | fail: | |||
1834 | vm_closetty(vm); | |||
1835 | return (-1); | |||
1836 | } | |||
1837 | ||||
1838 | void | |||
1839 | vm_closetty(struct vmd_vm *vm) | |||
1840 | { | |||
1841 | if (vm->vm_tty != -1) { | |||
1842 | /* Release and close the tty */ | |||
1843 | if (fchown(vm->vm_tty, 0, 0) == -1) | |||
1844 | log_warn("chown %s 0 0 failed", vm->vm_ttyname); | |||
1845 | if (fchmod(vm->vm_tty, 0666) == -1) | |||
1846 | log_warn("chmod %s 0666 failed", vm->vm_ttyname); | |||
1847 | close(vm->vm_tty); | |||
1848 | vm->vm_tty = -1; | |||
1849 | } | |||
1850 | free(vm->vm_ttyname); | |||
1851 | vm->vm_ttyname = NULL((void *)0); | |||
1852 | } | |||
1853 | ||||
1854 | void | |||
1855 | switch_remove(struct vmd_switch *vsw) | |||
1856 | { | |||
1857 | if (vsw == NULL((void *)0)) | |||
1858 | return; | |||
1859 | ||||
1860 | TAILQ_REMOVE(env->vmd_switches, vsw, sw_entry)do { if (((vsw)->sw_entry.tqe_next) != ((void *)0)) (vsw)-> sw_entry.tqe_next->sw_entry.tqe_prev = (vsw)->sw_entry. tqe_prev; else (env->vmd_switches)->tqh_last = (vsw)-> sw_entry.tqe_prev; *(vsw)->sw_entry.tqe_prev = (vsw)->sw_entry .tqe_next; ; ; } while (0); | |||
1861 | ||||
1862 | free(vsw->sw_group); | |||
1863 | free(vsw->sw_name); | |||
1864 | free(vsw); | |||
1865 | } | |||
1866 | ||||
1867 | struct vmd_switch * | |||
1868 | switch_getbyname(const char *name) | |||
1869 | { | |||
1870 | struct vmd_switch *vsw; | |||
1871 | ||||
1872 | if (name == NULL((void *)0)) | |||
1873 | return (NULL((void *)0)); | |||
1874 | TAILQ_FOREACH(vsw, env->vmd_switches, sw_entry)for((vsw) = ((env->vmd_switches)->tqh_first); (vsw) != ( (void *)0); (vsw) = ((vsw)->sw_entry.tqe_next)) { | |||
1875 | if (strcmp(vsw->sw_name, name) == 0) | |||
1876 | return (vsw); | |||
1877 | } | |||
1878 | ||||
1879 | return (NULL((void *)0)); | |||
1880 | } | |||
1881 | ||||
1882 | struct vmd_user * | |||
1883 | user_get(uid_t uid) | |||
1884 | { | |||
1885 | struct vmd_user *usr; | |||
1886 | ||||
1887 | if (uid == 0) | |||
1888 | return (NULL((void *)0)); | |||
1889 | ||||
1890 | /* first try to find an existing user */ | |||
1891 | TAILQ_FOREACH(usr, env->vmd_users, usr_entry)for((usr) = ((env->vmd_users)->tqh_first); (usr) != ((void *)0); (usr) = ((usr)->usr_entry.tqe_next)) { | |||
1892 | if (usr->usr_id.uid == uid) | |||
1893 | goto done; | |||
1894 | } | |||
1895 | ||||
1896 | if ((usr = calloc(1, sizeof(*usr))) == NULL((void *)0)) { | |||
1897 | log_warn("could not allocate user"); | |||
1898 | return (NULL((void *)0)); | |||
1899 | } | |||
1900 | ||||
1901 | usr->usr_id.uid = uid; | |||
1902 | usr->usr_id.gid = -1; | |||
1903 | TAILQ_INSERT_TAIL(env->vmd_users, usr, usr_entry)do { (usr)->usr_entry.tqe_next = ((void *)0); (usr)->usr_entry .tqe_prev = (env->vmd_users)->tqh_last; *(env->vmd_users )->tqh_last = (usr); (env->vmd_users)->tqh_last = & (usr)->usr_entry.tqe_next; } while (0); | |||
1904 | ||||
1905 | done: | |||
1906 | DPRINTF("%s: uid %d #%d +",do {} while(0) | |||
1907 | __func__, usr->usr_id.uid, usr->usr_refcnt + 1)do {} while(0); | |||
1908 | usr->usr_refcnt++; | |||
1909 | ||||
1910 | return (usr); | |||
1911 | } | |||
1912 | ||||
1913 | void | |||
1914 | user_put(struct vmd_user *usr) | |||
1915 | { | |||
1916 | if (usr == NULL((void *)0)) | |||
1917 | return; | |||
1918 | ||||
1919 | DPRINTF("%s: uid %d #%d -",do {} while(0) | |||
1920 | __func__, usr->usr_id.uid, usr->usr_refcnt - 1)do {} while(0); | |||
1921 | ||||
1922 | if (--usr->usr_refcnt > 0) | |||
1923 | return; | |||
1924 | ||||
1925 | TAILQ_REMOVE(env->vmd_users, usr, usr_entry)do { if (((usr)->usr_entry.tqe_next) != ((void *)0)) (usr) ->usr_entry.tqe_next->usr_entry.tqe_prev = (usr)->usr_entry .tqe_prev; else (env->vmd_users)->tqh_last = (usr)-> usr_entry.tqe_prev; *(usr)->usr_entry.tqe_prev = (usr)-> usr_entry.tqe_next; ; ; } while (0); | |||
1926 | free(usr); | |||
1927 | } | |||
1928 | ||||
1929 | void | |||
1930 | user_inc(struct vm_create_params *vcp, struct vmd_user *usr, int inc) | |||
1931 | { | |||
1932 | char mem[FMT_SCALED_STRSIZE7]; | |||
1933 | ||||
1934 | if (usr == NULL((void *)0)) | |||
1935 | return; | |||
1936 | ||||
1937 | /* increment or decrement counters */ | |||
1938 | inc = inc ? 1 : -1; | |||
1939 | ||||
1940 | usr->usr_maxcpu += vcp->vcp_ncpus * inc; | |||
1941 | usr->usr_maxmem += vcp->vcp_memranges[0].vmr_size * inc; | |||
1942 | usr->usr_maxifs += vcp->vcp_nnics * inc; | |||
1943 | ||||
1944 | if (log_getverbose() > 1) { | |||
1945 | (void)fmt_scaled(usr->usr_maxmem * 1024 * 1024, mem); | |||
1946 | log_debug("%s: %c uid %d ref %d cpu %llu mem %s ifs %llu", | |||
1947 | __func__, inc == 1 ? '+' : '-', | |||
1948 | usr->usr_id.uid, usr->usr_refcnt, | |||
1949 | usr->usr_maxcpu, mem, usr->usr_maxifs); | |||
1950 | } | |||
1951 | } | |||
1952 | ||||
1953 | int | |||
1954 | user_checklimit(struct vmd_user *usr, struct vm_create_params *vcp) | |||
1955 | { | |||
1956 | const char *limit = ""; | |||
1957 | ||||
1958 | /* XXX make the limits configurable */ | |||
1959 | if (usr->usr_maxcpu > VM_DEFAULT_USER_MAXCPU4) { | |||
1960 | limit = "cpu "; | |||
1961 | goto fail; | |||
1962 | } | |||
1963 | if (usr->usr_maxmem > VM_DEFAULT_USER_MAXMEM2048) { | |||
1964 | limit = "memory "; | |||
1965 | goto fail; | |||
1966 | } | |||
1967 | if (usr->usr_maxifs > VM_DEFAULT_USER_MAXIFS8) { | |||
1968 | limit = "interface "; | |||
1969 | goto fail; | |||
1970 | } | |||
1971 | ||||
1972 | return (0); | |||
1973 | ||||
1974 | fail: | |||
1975 | log_warnx("%s: user %d %slimit reached", vcp->vcp_name, | |||
1976 | usr->usr_id.uid, limit); | |||
1977 | return (-1); | |||
1978 | } | |||
1979 | ||||
1980 | char * | |||
1981 | get_string(uint8_t *ptr, size_t len) | |||
1982 | { | |||
1983 | size_t i; | |||
1984 | ||||
1985 | for (i = 0; i < len; i++) | |||
1986 | if (!isprint(ptr[i])) | |||
1987 | break; | |||
1988 | ||||
1989 | return strndup(ptr, i); | |||
1990 | } | |||
1991 | ||||
1992 | uint32_t | |||
1993 | prefixlen2mask(uint8_t prefixlen) | |||
1994 | { | |||
1995 | if (prefixlen == 0) | |||
1996 | return (0); | |||
1997 | ||||
1998 | if (prefixlen > 32) | |||
1999 | prefixlen = 32; | |||
2000 | ||||
2001 | return (htonl(0xffffffff << (32 - prefixlen))(__uint32_t)(__builtin_constant_p(0xffffffff << (32 - prefixlen )) ? (__uint32_t)(((__uint32_t)(0xffffffff << (32 - prefixlen )) & 0xff) << 24 | ((__uint32_t)(0xffffffff << (32 - prefixlen)) & 0xff00) << 8 | ((__uint32_t)(0xffffffff << (32 - prefixlen)) & 0xff0000) >> 8 | ((__uint32_t )(0xffffffff << (32 - prefixlen)) & 0xff000000) >> 24) : __swap32md(0xffffffff << (32 - prefixlen)))); | |||
2002 | } | |||
2003 | ||||
2004 | void | |||
2005 | prefixlen2mask6(uint8_t prefixlen, struct in6_addr *mask) | |||
2006 | { | |||
2007 | struct in6_addr s6; | |||
2008 | int i; | |||
2009 | ||||
2010 | if (prefixlen > 128) | |||
2011 | prefixlen = 128; | |||
2012 | ||||
2013 | memset(&s6, 0, sizeof(s6)); | |||
2014 | for (i = 0; i < prefixlen / 8; i++) | |||
2015 | s6.s6_addr__u6_addr.__u6_addr8[i] = 0xff; | |||
2016 | i = prefixlen % 8; | |||
2017 | if (i) | |||
2018 | s6.s6_addr__u6_addr.__u6_addr8[prefixlen / 8] = 0xff00 >> i; | |||
2019 | ||||
2020 | memcpy(mask, &s6, sizeof(s6)); | |||
2021 | } | |||
2022 | ||||
2023 | void | |||
2024 | getmonotime(struct timeval *tv) | |||
2025 | { | |||
2026 | struct timespec ts; | |||
2027 | ||||
2028 | if (clock_gettime(CLOCK_MONOTONIC3, &ts)) | |||
2029 | fatal("clock_gettime"); | |||
2030 | ||||
2031 | TIMESPEC_TO_TIMEVAL(tv, &ts)do { (tv)->tv_sec = (&ts)->tv_sec; (tv)->tv_usec = (&ts)->tv_nsec / 1000; } while (0); | |||
2032 | } |