Bug Summary

File:src/usr.sbin/vmd/vm.c
Warning:line 2187, column 16
The result of the left shift is undefined due to shifting by '39', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name vm.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -pic-is-pie -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/usr.sbin/vmd/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/usr.sbin/vmd -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -fdebug-compilation-dir=/usr/src/usr.sbin/vmd/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c /usr/src/usr.sbin/vmd/vm.c
1/* $OpenBSD: vm.c,v 1.67 2021/12/30 08:12:23 claudio Exp $ */
2
3/*
4 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/param.h> /* PAGE_SIZE */
20#include <sys/types.h>
21#include <sys/ioctl.h>
22#include <sys/queue.h>
23#include <sys/wait.h>
24#include <sys/uio.h>
25#include <sys/stat.h>
26#include <sys/socket.h>
27#include <sys/time.h>
28#include <sys/mman.h>
29
30#include <dev/ic/i8253reg.h>
31#include <dev/isa/isareg.h>
32#include <dev/pci/pcireg.h>
33
34#include <machine/psl.h>
35#include <machine/pte.h>
36#include <machine/specialreg.h>
37#include <machine/vmmvar.h>
38
39#include <net/if.h>
40
41#include <errno(*__errno()).h>
42#include <event.h>
43#include <fcntl.h>
44#include <imsg.h>
45#include <limits.h>
46#include <poll.h>
47#include <pthread.h>
48#include <stddef.h>
49#include <stdio.h>
50#include <stdlib.h>
51#include <string.h>
52#include <unistd.h>
53#include <util.h>
54
55#include "atomicio.h"
56#include "fw_cfg.h"
57#include "i8253.h"
58#include "i8259.h"
59#include "loadfile.h"
60#include "mc146818.h"
61#include "ns8250.h"
62#include "pci.h"
63#include "virtio.h"
64#include "vmd.h"
65#include "vmm.h"
66
67io_fn_t ioports_map[MAX_PORTS65536];
68
69int run_vm(int, int[][VM_MAX_BASE_PER_DISK4], int *,
70 struct vmop_create_params *, struct vcpu_reg_state *);
71void vm_dispatch_vmm(int, short, void *);
72void *event_thread(void *);
73void *vcpu_run_loop(void *);
74int vcpu_exit(struct vm_run_params *);
75int vcpu_reset(uint32_t, uint32_t, struct vcpu_reg_state *);
76void create_memory_map(struct vm_create_params *);
77int alloc_guest_mem(struct vm_create_params *);
78int vmm_create_vm(struct vm_create_params *);
79void init_emulated_hw(struct vmop_create_params *, int,
80 int[][VM_MAX_BASE_PER_DISK4], int *);
81void restore_emulated_hw(struct vm_create_params *, int, int *,
82 int[][VM_MAX_BASE_PER_DISK4],int);
83void vcpu_exit_inout(struct vm_run_params *);
84int vcpu_exit_eptviolation(struct vm_run_params *);
85uint8_t vcpu_exit_pci(struct vm_run_params *);
86int vcpu_pic_intr(uint32_t, uint32_t, uint8_t);
87int loadfile_bios(gzFile, off_t, struct vcpu_reg_state *);
88int send_vm(int, struct vm_create_params *);
89int dump_send_header(int);
90int dump_vmr(int , struct vm_mem_range *);
91int dump_mem(int, struct vm_create_params *);
92void restore_vmr(int, struct vm_mem_range *);
93void restore_mem(int, struct vm_create_params *);
94int restore_vm_params(int, struct vm_create_params *);
95void pause_vm(struct vm_create_params *);
96void unpause_vm(struct vm_create_params *);
97
98int translate_gva(struct vm_exit*, uint64_t, uint64_t *, int);
99
100static struct vm_mem_range *find_gpa_range(struct vm_create_params *, paddr_t,
101 size_t);
102
103int con_fd;
104struct vmd_vm *current_vm;
105
106extern struct vmd *env;
107
108extern char *__progname;
109
110pthread_mutex_t threadmutex;
111pthread_cond_t threadcond;
112
113pthread_cond_t vcpu_run_cond[VMM_MAX_VCPUS_PER_VM64];
114pthread_mutex_t vcpu_run_mtx[VMM_MAX_VCPUS_PER_VM64];
115pthread_barrier_t vm_pause_barrier;
116pthread_cond_t vcpu_unpause_cond[VMM_MAX_VCPUS_PER_VM64];
117pthread_mutex_t vcpu_unpause_mtx[VMM_MAX_VCPUS_PER_VM64];
118uint8_t vcpu_hlt[VMM_MAX_VCPUS_PER_VM64];
119uint8_t vcpu_done[VMM_MAX_VCPUS_PER_VM64];
120
121/*
122 * Represents a standard register set for an OS to be booted
123 * as a flat 64 bit address space.
124 *
125 * NOT set here are:
126 * RIP
127 * RSP
128 * GDTR BASE
129 *
130 * Specific bootloaders should clone this structure and override
131 * those fields as needed.
132 *
133 * Note - CR3 and various bits in CR0 may be overridden by vmm(4) based on
134 * features of the CPU in use.
135 */
136static const struct vcpu_reg_state vcpu_init_flat64 = {
137 .vrs_gprs[VCPU_REGS_RFLAGS17] = 0x2,
138 .vrs_gprs[VCPU_REGS_RIP16] = 0x0,
139 .vrs_gprs[VCPU_REGS_RSP14] = 0x0,
140 .vrs_crs[VCPU_REGS_CR00] = CR0_ET0x00000010 | CR0_PE0x00000001 | CR0_PG0x80000000,
141 .vrs_crs[VCPU_REGS_CR32] = PML4_PAGE0x11000,
142 .vrs_crs[VCPU_REGS_CR43] = CR4_PAE0x00000020 | CR4_PSE0x00000010,
143 .vrs_crs[VCPU_REGS_PDPTE06] = 0ULL,
144 .vrs_crs[VCPU_REGS_PDPTE17] = 0ULL,
145 .vrs_crs[VCPU_REGS_PDPTE28] = 0ULL,
146 .vrs_crs[VCPU_REGS_PDPTE39] = 0ULL,
147 .vrs_sregs[VCPU_REGS_CS0] = { 0x8, 0xFFFFFFFF, 0xC09F, 0x0},
148 .vrs_sregs[VCPU_REGS_DS1] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
149 .vrs_sregs[VCPU_REGS_ES2] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
150 .vrs_sregs[VCPU_REGS_FS3] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
151 .vrs_sregs[VCPU_REGS_GS4] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
152 .vrs_sregs[VCPU_REGS_SS5] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
153 .vrs_gdtr = { 0x0, 0xFFFF, 0x0, 0x0},
154 .vrs_idtr = { 0x0, 0xFFFF, 0x0, 0x0},
155 .vrs_sregs[VCPU_REGS_LDTR6] = { 0x0, 0xFFFF, 0x0082, 0x0},
156 .vrs_sregs[VCPU_REGS_TR7] = { 0x0, 0xFFFF, 0x008B, 0x0},
157 .vrs_msrs[VCPU_REGS_EFER0] = EFER_LME0x00000100 | EFER_LMA0x00000400,
158 .vrs_drs[VCPU_REGS_DR00] = 0x0,
159 .vrs_drs[VCPU_REGS_DR11] = 0x0,
160 .vrs_drs[VCPU_REGS_DR22] = 0x0,
161 .vrs_drs[VCPU_REGS_DR33] = 0x0,
162 .vrs_drs[VCPU_REGS_DR64] = 0xFFFF0FF0,
163 .vrs_drs[VCPU_REGS_DR75] = 0x400,
164 .vrs_msrs[VCPU_REGS_STAR1] = 0ULL,
165 .vrs_msrs[VCPU_REGS_LSTAR2] = 0ULL,
166 .vrs_msrs[VCPU_REGS_CSTAR3] = 0ULL,
167 .vrs_msrs[VCPU_REGS_SFMASK4] = 0ULL,
168 .vrs_msrs[VCPU_REGS_KGSBASE5] = 0ULL,
169 .vrs_msrs[VCPU_REGS_MISC_ENABLE6] = 0ULL,
170 .vrs_crs[VCPU_REGS_XCR05] = XCR0_X870x00000001
171};
172
173/*
174 * Represents a standard register set for an BIOS to be booted
175 * as a flat 16 bit address space.
176 */
177static const struct vcpu_reg_state vcpu_init_flat16 = {
178 .vrs_gprs[VCPU_REGS_RFLAGS17] = 0x2,
179 .vrs_gprs[VCPU_REGS_RIP16] = 0xFFF0,
180 .vrs_gprs[VCPU_REGS_RSP14] = 0x0,
181 .vrs_crs[VCPU_REGS_CR00] = 0x60000010,
182 .vrs_crs[VCPU_REGS_CR32] = 0,
183 .vrs_sregs[VCPU_REGS_CS0] = { 0xF000, 0xFFFF, 0x809F, 0xF0000},
184 .vrs_sregs[VCPU_REGS_DS1] = { 0x0, 0xFFFF, 0x8093, 0x0},
185 .vrs_sregs[VCPU_REGS_ES2] = { 0x0, 0xFFFF, 0x8093, 0x0},
186 .vrs_sregs[VCPU_REGS_FS3] = { 0x0, 0xFFFF, 0x8093, 0x0},
187 .vrs_sregs[VCPU_REGS_GS4] = { 0x0, 0xFFFF, 0x8093, 0x0},
188 .vrs_sregs[VCPU_REGS_SS5] = { 0x0, 0xFFFF, 0x8093, 0x0},
189 .vrs_gdtr = { 0x0, 0xFFFF, 0x0, 0x0},
190 .vrs_idtr = { 0x0, 0xFFFF, 0x0, 0x0},
191 .vrs_sregs[VCPU_REGS_LDTR6] = { 0x0, 0xFFFF, 0x0082, 0x0},
192 .vrs_sregs[VCPU_REGS_TR7] = { 0x0, 0xFFFF, 0x008B, 0x0},
193 .vrs_msrs[VCPU_REGS_EFER0] = 0ULL,
194 .vrs_drs[VCPU_REGS_DR00] = 0x0,
195 .vrs_drs[VCPU_REGS_DR11] = 0x0,
196 .vrs_drs[VCPU_REGS_DR22] = 0x0,
197 .vrs_drs[VCPU_REGS_DR33] = 0x0,
198 .vrs_drs[VCPU_REGS_DR64] = 0xFFFF0FF0,
199 .vrs_drs[VCPU_REGS_DR75] = 0x400,
200 .vrs_msrs[VCPU_REGS_STAR1] = 0ULL,
201 .vrs_msrs[VCPU_REGS_LSTAR2] = 0ULL,
202 .vrs_msrs[VCPU_REGS_CSTAR3] = 0ULL,
203 .vrs_msrs[VCPU_REGS_SFMASK4] = 0ULL,
204 .vrs_msrs[VCPU_REGS_KGSBASE5] = 0ULL,
205 .vrs_crs[VCPU_REGS_XCR05] = XCR0_X870x00000001
206};
207
208/*
209 * loadfile_bios
210 *
211 * Alternatively to loadfile_elf, this function loads a non-ELF BIOS image
212 * directly into memory.
213 *
214 * Parameters:
215 * fp: file of a kernel file to load
216 * size: uncompressed size of the image
217 * (out) vrs: register state to set on init for this kernel
218 *
219 * Return values:
220 * 0 if successful
221 * various error codes returned from read(2) or loadelf functions
222 */
223int
224loadfile_bios(gzFile fp, off_t size, struct vcpu_reg_state *vrs)
225{
226 off_t off;
227
228 /* Set up a "flat 16 bit" register state for BIOS */
229 memcpy(vrs, &vcpu_init_flat16, sizeof(*vrs));
230
231 /* Seek to the beginning of the BIOS image */
232 if (gzseek(fp, 0, SEEK_SET0) == -1)
233 return (-1);
234
235 /* The BIOS image must end at 1M */
236 if ((off = 1048576 - size) < 0)
237 return (-1);
238
239 /* Read BIOS image into memory */
240 if (mread(fp, off, size) != (size_t)size) {
241 errno(*__errno()) = EIO5;
242 return (-1);
243 }
244
245 log_debug("%s: loaded BIOS image", __func__);
246
247 return (0);
248}
249
250/*
251 * start_vm
252 *
253 * After forking a new VM process, starts the new VM with the creation
254 * parameters supplied (in the incoming vm->vm_params field). This
255 * function performs a basic sanity check on the incoming parameters
256 * and then performs the following steps to complete the creation of the VM:
257 *
258 * 1. validates and create the new VM
259 * 2. opens the imsg control channel to the parent and drops more privilege
260 * 3. drops additional privleges by calling pledge(2)
261 * 4. loads the kernel from the disk image or file descriptor
262 * 5. runs the VM's VCPU loops.
263 *
264 * Parameters:
265 * vm: The VM data structure that is including the VM create parameters.
266 * fd: The imsg socket that is connected to the parent process.
267 *
268 * Return values:
269 * 0: success
270 * !0 : failure - typically an errno indicating the source of the failure
271 */
272int
273start_vm(struct vmd_vm *vm, int fd)
274{
275 struct vmop_create_params *vmc = &vm->vm_params;
276 struct vm_create_params *vcp = &vmc->vmc_params;
277 struct vcpu_reg_state vrs;
278 int nicfds[VMM_MAX_NICS_PER_VM4];
279 int ret;
280 gzFile fp;
281 size_t i;
282 struct vm_rwregs_params vrp;
283 struct stat sb;
284
285 /* Child */
286 setproctitle("%s", vcp->vcp_name);
287 log_procinit(vcp->vcp_name);
288
289 if (!(vm->vm_state & VM_STATE_RECEIVED0x08))
290 create_memory_map(vcp);
291
292 ret = alloc_guest_mem(vcp);
293
294 if (ret) {
295 errno(*__errno()) = ret;
296 fatal("could not allocate guest memory - exiting");
297 }
298
299 ret = vmm_create_vm(vcp);
300 current_vm = vm;
301
302 /* send back the kernel-generated vm id (0 on error) */
303 if (write(fd, &vcp->vcp_id, sizeof(vcp->vcp_id)) !=
304 sizeof(vcp->vcp_id))
305 fatal("write vcp id");
306
307 if (ret) {
308 errno(*__errno()) = ret;
309 fatal("create vmm ioctl failed - exiting");
310 }
311
312 /*
313 * pledge in the vm processes:
314 * stdio - for malloc and basic I/O including events.
315 * recvfd - for send/recv.
316 * vmm - for the vmm ioctls and operations.
317 */
318 if (pledge("stdio vmm recvfd", NULL((void*)0)) == -1)
319 fatal("pledge");
320
321 if (vm->vm_state & VM_STATE_RECEIVED0x08) {
322 ret = read(vm->vm_receive_fd, &vrp, sizeof(vrp));
323 if (ret != sizeof(vrp)) {
324 fatal("received incomplete vrp - exiting");
325 }
326 vrs = vrp.vrwp_regs;
327 } else {
328 /*
329 * Set up default "flat 64 bit" register state - RIP,
330 * RSP, and GDT info will be set in bootloader
331 */
332 memcpy(&vrs, &vcpu_init_flat64, sizeof(vrs));
333
334 /* Find and open kernel image */
335 if ((fp = gzdopen(vm->vm_kernel, "r")) == NULL((void*)0))
336 fatalx("failed to open kernel - exiting");
337
338 /* Load kernel image */
339 ret = loadfile_elf(fp, vcp, &vrs, vmc->vmc_bootdevice);
340
341 /*
342 * Try BIOS as a fallback (only if it was provided as an image
343 * with vm->vm_kernel and the file is not compressed)
344 */
345 if (ret && errno(*__errno()) == ENOEXEC8 && vm->vm_kernel != -1 &&
346 gzdirect(fp) && (ret = fstat(vm->vm_kernel, &sb)) == 0)
347 ret = loadfile_bios(fp, sb.st_size, &vrs);
348
349 if (ret)
350 fatal("failed to load kernel or BIOS - exiting");
351
352 gzclose(fp);
353 }
354
355 if (vm->vm_kernel != -1)
356 close(vm->vm_kernel);
357
358 con_fd = vm->vm_tty;
359 if (fcntl(con_fd, F_SETFL4, O_NONBLOCK0x0004) == -1)
360 fatal("failed to set nonblocking mode on console");
361
362 for (i = 0; i < VMM_MAX_NICS_PER_VM4; i++)
363 nicfds[i] = vm->vm_ifs[i].vif_fd;
364
365 event_init();
366
367 if (vm->vm_state & VM_STATE_RECEIVED0x08) {
368 restore_emulated_hw(vcp, vm->vm_receive_fd, nicfds,
369 vm->vm_disks, vm->vm_cdrom);
370 restore_mem(vm->vm_receive_fd, vcp);
371 if (restore_vm_params(vm->vm_receive_fd, vcp))
372 fatal("restore vm params failed");
373 unpause_vm(vcp);
374 }
375
376 if (vmm_pipe(vm, fd, vm_dispatch_vmm) == -1)
377 fatal("setup vm pipe");
378
379 /* Execute the vcpu run loop(s) for this VM */
380 ret = run_vm(vm->vm_cdrom, vm->vm_disks, nicfds, &vm->vm_params, &vrs);
381
382 /* Ensure that any in-flight data is written back */
383 virtio_shutdown(vm);
384
385 return (ret);
386}
387
388/*
389 * vm_dispatch_vmm
390 *
391 * imsg callback for messages that are received from the vmm parent process.
392 */
393void
394vm_dispatch_vmm(int fd, short event, void *arg)
395{
396 struct vmd_vm *vm = arg;
397 struct vmop_result vmr;
398 struct vmop_addr_result var;
399 struct imsgev *iev = &vm->vm_iev;
400 struct imsgbuf *ibuf = &iev->ibuf;
401 struct imsg imsg;
402 ssize_t n;
403 int verbose;
404
405 if (event & EV_READ0x02) {
406 if ((n = imsg_read(ibuf)) == -1 && errno(*__errno()) != EAGAIN35)
407 fatal("%s: imsg_read", __func__);
408 if (n == 0)
409 _exit(0);
410 }
411
412 if (event & EV_WRITE0x04) {
413 if ((n = msgbuf_write(&ibuf->w)) == -1 && errno(*__errno()) != EAGAIN35)
414 fatal("%s: msgbuf_write fd %d", __func__, ibuf->fd);
415 if (n == 0)
416 _exit(0);
417 }
418
419 for (;;) {
420 if ((n = imsg_get(ibuf, &imsg)) == -1)
421 fatal("%s: imsg_get", __func__);
422 if (n == 0)
423 break;
424
425#if DEBUG > 1
426 log_debug("%s: got imsg %d from %s",
427 __func__, imsg.hdr.type,
428 vm->vm_params.vmc_params.vcp_name);
429#endif
430
431 switch (imsg.hdr.type) {
432 case IMSG_CTL_VERBOSE:
433 IMSG_SIZE_CHECK(&imsg, &verbose)do { if (((&imsg)->hdr.len - sizeof(struct imsg_hdr)) <
sizeof(*&verbose)) fatalx("bad length imsg received (%s)"
, "&verbose"); } while (0)
;
434 memcpy(&verbose, imsg.data, sizeof(verbose));
435 log_setverbose(verbose);
436 break;
437 case IMSG_VMDOP_VM_SHUTDOWN:
438 if (vmmci_ctl(VMMCI_SHUTDOWN) == -1)
439 _exit(0);
440 break;
441 case IMSG_VMDOP_VM_REBOOT:
442 if (vmmci_ctl(VMMCI_REBOOT) == -1)
443 _exit(0);
444 break;
445 case IMSG_VMDOP_PAUSE_VM:
446 vmr.vmr_result = 0;
447 vmr.vmr_id = vm->vm_vmid;
448 pause_vm(&vm->vm_params.vmc_params);
449 imsg_compose_event(&vm->vm_iev,
450 IMSG_VMDOP_PAUSE_VM_RESPONSE,
451 imsg.hdr.peerid, imsg.hdr.pid, -1, &vmr,
452 sizeof(vmr));
453 break;
454 case IMSG_VMDOP_UNPAUSE_VM:
455 vmr.vmr_result = 0;
456 vmr.vmr_id = vm->vm_vmid;
457 unpause_vm(&vm->vm_params.vmc_params);
458 imsg_compose_event(&vm->vm_iev,
459 IMSG_VMDOP_UNPAUSE_VM_RESPONSE,
460 imsg.hdr.peerid, imsg.hdr.pid, -1, &vmr,
461 sizeof(vmr));
462 break;
463 case IMSG_VMDOP_SEND_VM_REQUEST:
464 vmr.vmr_id = vm->vm_vmid;
465 vmr.vmr_result = send_vm(imsg.fd,
466 &vm->vm_params.vmc_params);
467 imsg_compose_event(&vm->vm_iev,
468 IMSG_VMDOP_SEND_VM_RESPONSE,
469 imsg.hdr.peerid, imsg.hdr.pid, -1, &vmr,
470 sizeof(vmr));
471 if (!vmr.vmr_result) {
472 imsg_flush(&current_vm->vm_iev.ibuf);
473 _exit(0);
474 }
475 break;
476 case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE:
477 IMSG_SIZE_CHECK(&imsg, &var)do { if (((&imsg)->hdr.len - sizeof(struct imsg_hdr)) <
sizeof(*&var)) fatalx("bad length imsg received (%s)", "&var"
); } while (0)
;
478 memcpy(&var, imsg.data, sizeof(var));
479
480 log_debug("%s: received tap addr %s for nic %d",
481 vm->vm_params.vmc_params.vcp_name,
482 ether_ntoa((void *)var.var_addr), var.var_nic_idx);
483
484 vionet_set_hostmac(vm, var.var_nic_idx, var.var_addr);
485 break;
486 default:
487 fatalx("%s: got invalid imsg %d from %s",
488 __func__, imsg.hdr.type,
489 vm->vm_params.vmc_params.vcp_name);
490 }
491 imsg_free(&imsg);
492 }
493 imsg_event_add(iev);
494}
495
496/*
497 * vm_shutdown
498 *
499 * Tell the vmm parent process to shutdown or reboot the VM and exit.
500 */
501__dead__attribute__((__noreturn__)) void
502vm_shutdown(unsigned int cmd)
503{
504 switch (cmd) {
505 case VMMCI_NONE:
506 case VMMCI_SHUTDOWN:
507 (void)imsg_compose_event(&current_vm->vm_iev,
508 IMSG_VMDOP_VM_SHUTDOWN, 0, 0, -1, NULL((void*)0), 0);
509 break;
510 case VMMCI_REBOOT:
511 (void)imsg_compose_event(&current_vm->vm_iev,
512 IMSG_VMDOP_VM_REBOOT, 0, 0, -1, NULL((void*)0), 0);
513 break;
514 default:
515 fatalx("invalid vm ctl command: %d", cmd);
516 }
517 imsg_flush(&current_vm->vm_iev.ibuf);
518
519 _exit(0);
520}
521
522int
523send_vm(int fd, struct vm_create_params *vcp)
524{
525 struct vm_rwregs_params vrp;
526 struct vm_rwvmparams_params vpp;
527 struct vmop_create_params *vmc;
528 struct vm_terminate_params vtp;
529 unsigned int flags = 0;
530 unsigned int i;
531 int ret = 0;
532 size_t sz;
533
534 if (dump_send_header(fd)) {
535 log_info("%s: failed to send vm dump header", __func__);
536 goto err;
537 }
538
539 pause_vm(vcp);
540
541 vmc = calloc(1, sizeof(struct vmop_create_params));
542 if (vmc == NULL((void*)0)) {
543 log_warn("%s: calloc error geting vmc", __func__);
544 ret = -1;
545 goto err;
546 }
547
548 flags |= VMOP_CREATE_MEMORY0x04;
549 memcpy(&vmc->vmc_params, &current_vm->vm_params, sizeof(struct
550 vmop_create_params));
551 vmc->vmc_flags = flags;
552 vrp.vrwp_vm_id = vcp->vcp_id;
553 vrp.vrwp_mask = VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10);
554 vpp.vpp_mask = VM_RWVMPARAMS_ALL(0x1 | 0x2);
555 vpp.vpp_vm_id = vcp->vcp_id;
556
557 sz = atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, fd, vmc,sizeof(struct vmop_create_params));
558 if (sz != sizeof(struct vmop_create_params)) {
559 ret = -1;
560 goto err;
561 }
562
563 for (i = 0; i < vcp->vcp_ncpus; i++) {
564 vrp.vrwp_vcpu_id = i;
565 if ((ret = ioctl(env->vmd_fd, VMM_IOC_READREGS(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof
(struct vm_rwregs_params) & 0x1fff) << 16) | ((('V'
)) << 8) | ((7)))
, &vrp))) {
566 log_warn("%s: readregs failed", __func__);
567 goto err;
568 }
569
570 sz = atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, fd, &vrp,
571 sizeof(struct vm_rwregs_params));
572 if (sz != sizeof(struct vm_rwregs_params)) {
573 log_warn("%s: dumping registers failed", __func__);
574 ret = -1;
575 goto err;
576 }
577 }
578
579 if ((ret = i8253_dump(fd)))
580 goto err;
581 if ((ret = i8259_dump(fd)))
582 goto err;
583 if ((ret = ns8250_dump(fd)))
584 goto err;
585 if ((ret = mc146818_dump(fd)))
586 goto err;
587 if ((ret = fw_cfg_dump(fd)))
588 goto err;
589 if ((ret = pci_dump(fd)))
590 goto err;
591 if ((ret = virtio_dump(fd)))
592 goto err;
593 if ((ret = dump_mem(fd, vcp)))
594 goto err;
595
596 for (i = 0; i < vcp->vcp_ncpus; i++) {
597 vpp.vpp_vcpu_id = i;
598 if ((ret = ioctl(env->vmd_fd, VMM_IOC_READVMPARAMS(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof
(struct vm_rwvmparams_params) & 0x1fff) << 16) | ((
('V')) << 8) | ((9)))
, &vpp))) {
599 log_warn("%s: readvmparams failed", __func__);
600 goto err;
601 }
602
603 sz = atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, fd, &vpp,
604 sizeof(struct vm_rwvmparams_params));
605 if (sz != sizeof(struct vm_rwvmparams_params)) {
606 log_warn("%s: dumping vm params failed", __func__);
607 ret = -1;
608 goto err;
609 }
610 }
611
612 vtp.vtp_vm_id = vcp->vcp_id;
613 if (ioctl(env->vmd_fd, VMM_IOC_TERM((unsigned long)0x80000000 | ((sizeof(struct vm_terminate_params
) & 0x1fff) << 16) | ((('V')) << 8) | ((4)))
, &vtp) == -1) {
614 log_warnx("%s: term IOC error: %d, %d", __func__,
615 errno(*__errno()), ENOENT2);
616 }
617err:
618 close(fd);
619 if (ret)
620 unpause_vm(vcp);
621 return ret;
622}
623
624int
625dump_send_header(int fd) {
626 struct vm_dump_header vmh;
627 int i;
628
629 memcpy(&vmh.vmh_signature, VM_DUMP_SIGNATURE"OpenBSDVMM58",
630 sizeof(vmh.vmh_signature));
631
632 vmh.vmh_cpuids[0].code = 0x00;
633 vmh.vmh_cpuids[0].leaf = 0x00;
634
635 vmh.vmh_cpuids[1].code = 0x01;
636 vmh.vmh_cpuids[1].leaf = 0x00;
637
638 vmh.vmh_cpuids[2].code = 0x07;
639 vmh.vmh_cpuids[2].leaf = 0x00;
640
641 vmh.vmh_cpuids[3].code = 0x0d;
642 vmh.vmh_cpuids[3].leaf = 0x00;
643
644 vmh.vmh_cpuids[4].code = 0x80000001;
645 vmh.vmh_cpuids[4].leaf = 0x00;
646
647 vmh.vmh_version = VM_DUMP_VERSION7;
648
649 for (i=0; i < VM_DUMP_HEADER_CPUID_COUNT5; i++) {
650 CPUID_LEAF(vmh.vmh_cpuids[i].code,__asm volatile("cpuid" : "=a" (vmh.vmh_cpuids[i].a), "=b" (vmh
.vmh_cpuids[i].b), "=c" (vmh.vmh_cpuids[i].c), "=d" (vmh.vmh_cpuids
[i].d) : "a" (vmh.vmh_cpuids[i].code), "c" (vmh.vmh_cpuids[i]
.leaf))
651 vmh.vmh_cpuids[i].leaf,__asm volatile("cpuid" : "=a" (vmh.vmh_cpuids[i].a), "=b" (vmh
.vmh_cpuids[i].b), "=c" (vmh.vmh_cpuids[i].c), "=d" (vmh.vmh_cpuids
[i].d) : "a" (vmh.vmh_cpuids[i].code), "c" (vmh.vmh_cpuids[i]
.leaf))
652 vmh.vmh_cpuids[i].a,__asm volatile("cpuid" : "=a" (vmh.vmh_cpuids[i].a), "=b" (vmh
.vmh_cpuids[i].b), "=c" (vmh.vmh_cpuids[i].c), "=d" (vmh.vmh_cpuids
[i].d) : "a" (vmh.vmh_cpuids[i].code), "c" (vmh.vmh_cpuids[i]
.leaf))
653 vmh.vmh_cpuids[i].b,__asm volatile("cpuid" : "=a" (vmh.vmh_cpuids[i].a), "=b" (vmh
.vmh_cpuids[i].b), "=c" (vmh.vmh_cpuids[i].c), "=d" (vmh.vmh_cpuids
[i].d) : "a" (vmh.vmh_cpuids[i].code), "c" (vmh.vmh_cpuids[i]
.leaf))
654 vmh.vmh_cpuids[i].c,__asm volatile("cpuid" : "=a" (vmh.vmh_cpuids[i].a), "=b" (vmh
.vmh_cpuids[i].b), "=c" (vmh.vmh_cpuids[i].c), "=d" (vmh.vmh_cpuids
[i].d) : "a" (vmh.vmh_cpuids[i].code), "c" (vmh.vmh_cpuids[i]
.leaf))
655 vmh.vmh_cpuids[i].d)__asm volatile("cpuid" : "=a" (vmh.vmh_cpuids[i].a), "=b" (vmh
.vmh_cpuids[i].b), "=c" (vmh.vmh_cpuids[i].c), "=d" (vmh.vmh_cpuids
[i].d) : "a" (vmh.vmh_cpuids[i].code), "c" (vmh.vmh_cpuids[i]
.leaf))
;
656 }
657
658 if (atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, fd, &vmh, sizeof(vmh)) != sizeof(vmh))
659 return (-1);
660
661 return (0);
662}
663
664int
665dump_mem(int fd, struct vm_create_params *vcp)
666{
667 unsigned int i;
668 int ret;
669 struct vm_mem_range *vmr;
670
671 for (i = 0; i < vcp->vcp_nmemranges; i++) {
672 vmr = &vcp->vcp_memranges[i];
673 ret = dump_vmr(fd, vmr);
674 if (ret)
675 return ret;
676 }
677 return (0);
678}
679
680int
681restore_vm_params(int fd, struct vm_create_params *vcp) {
682 unsigned int i;
683 struct vm_rwvmparams_params vpp;
684
685 for (i = 0; i < vcp->vcp_ncpus; i++) {
686 if (atomicio(read, fd, &vpp, sizeof(vpp)) != sizeof(vpp)) {
687 log_warn("%s: error restoring vm params", __func__);
688 return (-1);
689 }
690 vpp.vpp_vm_id = vcp->vcp_id;
691 vpp.vpp_vcpu_id = i;
692 if (ioctl(env->vmd_fd, VMM_IOC_WRITEVMPARAMS((unsigned long)0x80000000 | ((sizeof(struct vm_rwvmparams_params
) & 0x1fff) << 16) | ((('V')) << 8) | ((10)))
, &vpp) < 0) {
693 log_debug("%s: writing vm params failed", __func__);
694 return (-1);
695 }
696 }
697 return (0);
698}
699
700void
701restore_mem(int fd, struct vm_create_params *vcp)
702{
703 unsigned int i;
704 struct vm_mem_range *vmr;
705
706 for (i = 0; i < vcp->vcp_nmemranges; i++) {
707 vmr = &vcp->vcp_memranges[i];
708 restore_vmr(fd, vmr);
709 }
710}
711
712int
713dump_vmr(int fd, struct vm_mem_range *vmr)
714{
715 size_t rem = vmr->vmr_size, read=0;
716 char buf[PAGE_SIZE(1 << 12)];
717
718 while (rem > 0) {
719 if (read_mem(vmr->vmr_gpa + read, buf, PAGE_SIZE(1 << 12))) {
720 log_warn("failed to read vmr");
721 return (-1);
722 }
723 if (atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, fd, buf, sizeof(buf)) != sizeof(buf)) {
724 log_warn("failed to dump vmr");
725 return (-1);
726 }
727 rem = rem - PAGE_SIZE(1 << 12);
728 read = read + PAGE_SIZE(1 << 12);
729 }
730 return (0);
731}
732
733void
734restore_vmr(int fd, struct vm_mem_range *vmr)
735{
736 size_t rem = vmr->vmr_size, wrote=0;
737 char buf[PAGE_SIZE(1 << 12)];
738
739 while (rem > 0) {
740 if (atomicio(read, fd, buf, sizeof(buf)) != sizeof(buf))
741 fatal("failed to restore vmr");
742 if (write_mem(vmr->vmr_gpa + wrote, buf, PAGE_SIZE(1 << 12)))
743 fatal("failed to write vmr");
744 rem = rem - PAGE_SIZE(1 << 12);
745 wrote = wrote + PAGE_SIZE(1 << 12);
746 }
747}
748
749void
750pause_vm(struct vm_create_params *vcp)
751{
752 unsigned int n;
753 int ret;
754 if (current_vm->vm_state & VM_STATE_PAUSED0x10)
755 return;
756
757 current_vm->vm_state |= VM_STATE_PAUSED0x10;
758
759 ret = pthread_barrier_init(&vm_pause_barrier, NULL((void*)0), vcp->vcp_ncpus + 1);
760 if (ret) {
761 log_warnx("%s: cannot initialize pause barrier (%d)",
762 __progname, ret);
763 return;
764 }
765
766 for (n = 0; n < vcp->vcp_ncpus; n++) {
767 ret = pthread_cond_broadcast(&vcpu_run_cond[n]);
768 if (ret) {
769 log_warnx("%s: can't broadcast vcpu run cond (%d)",
770 __func__, (int)ret);
771 return;
772 }
773 }
774 ret = pthread_barrier_wait(&vm_pause_barrier);
775 if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD-1) {
776 log_warnx("%s: could not wait on pause barrier (%d)",
777 __func__, (int)ret);
778 return;
779 }
780
781 ret = pthread_barrier_destroy(&vm_pause_barrier);
782 if (ret) {
783 log_warnx("%s: could not destroy pause barrier (%d)",
784 __progname, ret);
785 return;
786 }
787
788 i8253_stop();
789 mc146818_stop();
790 ns8250_stop();
791 virtio_stop(vcp);
792}
793
794void
795unpause_vm(struct vm_create_params *vcp)
796{
797 unsigned int n;
798 int ret;
799 if (!(current_vm->vm_state & VM_STATE_PAUSED0x10))
800 return;
801
802 current_vm->vm_state &= ~VM_STATE_PAUSED0x10;
803 for (n = 0; n < vcp->vcp_ncpus; n++) {
804 ret = pthread_cond_broadcast(&vcpu_unpause_cond[n]);
805 if (ret) {
806 log_warnx("%s: can't broadcast vcpu unpause cond (%d)",
807 __func__, (int)ret);
808 return;
809 }
810 }
811
812 i8253_start();
813 mc146818_start();
814 ns8250_start();
815 virtio_start(vcp);
816}
817
818/*
819 * vcpu_reset
820 *
821 * Requests vmm(4) to reset the VCPUs in the indicated VM to
822 * the register state provided
823 *
824 * Parameters
825 * vmid: VM ID to reset
826 * vcpu_id: VCPU ID to reset
827 * vrs: the register state to initialize
828 *
829 * Return values:
830 * 0: success
831 * !0 : ioctl to vmm(4) failed (eg, ENOENT if the supplied VM ID is not
832 * valid)
833 */
834int
835vcpu_reset(uint32_t vmid, uint32_t vcpu_id, struct vcpu_reg_state *vrs)
836{
837 struct vm_resetcpu_params vrp;
838
839 memset(&vrp, 0, sizeof(vrp));
840 vrp.vrp_vm_id = vmid;
841 vrp.vrp_vcpu_id = vcpu_id;
842 memcpy(&vrp.vrp_init_state, vrs, sizeof(struct vcpu_reg_state));
843
844 log_debug("%s: resetting vcpu %d for vm %d", __func__, vcpu_id, vmid);
845
846 if (ioctl(env->vmd_fd, VMM_IOC_RESETCPU((unsigned long)0x80000000 | ((sizeof(struct vm_resetcpu_params
) & 0x1fff) << 16) | ((('V')) << 8) | ((5)))
, &vrp) == -1)
847 return (errno(*__errno()));
848
849 return (0);
850}
851
852/*
853 * create_memory_map
854 *
855 * Sets up the guest physical memory ranges that the VM can access.
856 *
857 * Parameters:
858 * vcp: VM create parameters describing the VM whose memory map
859 * is being created
860 *
861 * Return values:
862 * nothing
863 */
864void
865create_memory_map(struct vm_create_params *vcp)
866{
867 size_t len, mem_bytes, mem_mb;
868
869 mem_mb = vcp->vcp_memranges[0].vmr_size;
870 vcp->vcp_nmemranges = 0;
871 if (mem_mb < 1 || mem_mb > VMM_MAX_VM_MEM_SIZE32768)
872 return;
873
874 mem_bytes = mem_mb * 1024 * 1024;
875
876 /* First memory region: 0 - LOWMEM_KB (DOS low mem) */
877 len = LOWMEM_KB640 * 1024;
878 vcp->vcp_memranges[0].vmr_gpa = 0x0;
879 vcp->vcp_memranges[0].vmr_size = len;
880 mem_bytes -= len;
881
882 /*
883 * Second memory region: LOWMEM_KB - 1MB.
884 *
885 * N.B. - Normally ROMs or parts of video RAM are mapped here.
886 * We have to add this region, because some systems
887 * unconditionally write to 0xb8000 (VGA RAM), and
888 * we need to make sure that vmm(4) permits accesses
889 * to it. So allocate guest memory for it.
890 */
891 len = 0x100000 - LOWMEM_KB640 * 1024;
892 vcp->vcp_memranges[1].vmr_gpa = LOWMEM_KB640 * 1024;
893 vcp->vcp_memranges[1].vmr_size = len;
894 mem_bytes -= len;
895
896 /* Make sure that we do not place physical memory into MMIO ranges. */
897 if (mem_bytes > VMM_PCI_MMIO_BAR_BASE0xF0000000ULL - 0x100000)
898 len = VMM_PCI_MMIO_BAR_BASE0xF0000000ULL - 0x100000;
899 else
900 len = mem_bytes;
901
902 /* Third memory region: 1MB - (1MB + len) */
903 vcp->vcp_memranges[2].vmr_gpa = 0x100000;
904 vcp->vcp_memranges[2].vmr_size = len;
905 mem_bytes -= len;
906
907 if (mem_bytes > 0) {
908 /* Fourth memory region for the remaining memory (if any) */
909 vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_END0xFFFFFFFFULL + 1;
910 vcp->vcp_memranges[3].vmr_size = mem_bytes;
911 vcp->vcp_nmemranges = 4;
912 } else
913 vcp->vcp_nmemranges = 3;
914}
915
916/*
917 * alloc_guest_mem
918 *
919 * Allocates memory for the guest.
920 * Instead of doing a single allocation with one mmap(), we allocate memory
921 * separately for every range for the following reasons:
922 * - ASLR for the individual ranges
923 * - to reduce memory consumption in the UVM subsystem: if vmm(4) had to
924 * map the single mmap'd userspace memory to the individual guest physical
925 * memory ranges, the underlying amap of the single mmap'd range would have
926 * to allocate per-page reference counters. The reason is that the
927 * individual guest physical ranges would reference the single mmap'd region
928 * only partially. However, if every guest physical range has its own
929 * corresponding mmap'd userspace allocation, there are no partial
930 * references: every guest physical range fully references an mmap'd
931 * range => no per-page reference counters have to be allocated.
932 *
933 * Return values:
934 * 0: success
935 * !0: failure - errno indicating the source of the failure
936 */
937int
938alloc_guest_mem(struct vm_create_params *vcp)
939{
940 void *p;
941 int ret;
942 size_t i, j;
943 struct vm_mem_range *vmr;
944
945 for (i = 0; i < vcp->vcp_nmemranges; i++) {
946 vmr = &vcp->vcp_memranges[i];
947 p = mmap(NULL((void*)0), vmr->vmr_size, PROT_READ0x01 | PROT_WRITE0x02,
948 MAP_PRIVATE0x0002 | MAP_ANON0x1000, -1, 0);
949 if (p == MAP_FAILED((void *)-1)) {
950 ret = errno(*__errno());
951 for (j = 0; j < i; j++) {
952 vmr = &vcp->vcp_memranges[j];
953 munmap((void *)vmr->vmr_va, vmr->vmr_size);
954 }
955
956 return (ret);
957 }
958
959 vmr->vmr_va = (vaddr_t)p;
960 }
961
962 return (0);
963}
964
965/*
966 * vmm_create_vm
967 *
968 * Requests vmm(4) to create a new VM using the supplied creation
969 * parameters. This operation results in the creation of the in-kernel
970 * structures for the VM, but does not start the VM's vcpu(s).
971 *
972 * Parameters:
973 * vcp: vm_create_params struct containing the VM's desired creation
974 * configuration
975 *
976 * Return values:
977 * 0: success
978 * !0 : ioctl to vmm(4) failed
979 */
980int
981vmm_create_vm(struct vm_create_params *vcp)
982{
983 /* Sanity check arguments */
984 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM64)
985 return (EINVAL22);
986
987 if (vcp->vcp_nmemranges == 0 ||
988 vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES16)
989 return (EINVAL22);
990
991 if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM4)
992 return (EINVAL22);
993
994 if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM4)
995 return (EINVAL22);
996
997 if (ioctl(env->vmd_fd, VMM_IOC_CREATE(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof
(struct vm_create_params) & 0x1fff) << 16) | ((('V'
)) << 8) | ((1)))
, vcp) == -1)
998 return (errno(*__errno()));
999
1000 return (0);
1001}
1002
1003/*
1004 * init_emulated_hw
1005 *
1006 * Initializes the userspace hardware emulation
1007 */
1008void
1009init_emulated_hw(struct vmop_create_params *vmc, int child_cdrom,
1010 int child_disks[][VM_MAX_BASE_PER_DISK4], int *child_taps)
1011{
1012 struct vm_create_params *vcp = &vmc->vmc_params;
1013 int i;
1014 uint64_t memlo, memhi;
1015
1016 /* Calculate memory size for NVRAM registers */
1017 memlo = memhi = 0;
1018 if (vcp->vcp_nmemranges > 2)
1019 memlo = vcp->vcp_memranges[2].vmr_size - 15 * 0x100000;
1020
1021 if (vcp->vcp_nmemranges > 3)
1022 memhi = vcp->vcp_memranges[3].vmr_size;
1023
1024 /* Reset the IO port map */
1025 memset(&ioports_map, 0, sizeof(io_fn_t) * MAX_PORTS65536);
1026
1027 /* Init i8253 PIT */
1028 i8253_init(vcp->vcp_id);
1029 ioports_map[TIMER_CTRL0x43] = vcpu_exit_i8253;
1030 ioports_map[TIMER_BASE0x40 + TIMER_CNTR00] = vcpu_exit_i8253;
1031 ioports_map[TIMER_BASE0x40 + TIMER_CNTR11] = vcpu_exit_i8253;
1032 ioports_map[TIMER_BASE0x40 + TIMER_CNTR22] = vcpu_exit_i8253;
1033 ioports_map[PCKBC_AUX0x61] = vcpu_exit_i8253_misc;
1034
1035 /* Init mc146818 RTC */
1036 mc146818_init(vcp->vcp_id, memlo, memhi);
1037 ioports_map[IO_RTC0x070] = vcpu_exit_mc146818;
1038 ioports_map[IO_RTC0x070 + 1] = vcpu_exit_mc146818;
1039
1040 /* Init master and slave PICs */
1041 i8259_init();
1042 ioports_map[IO_ICU10x020] = vcpu_exit_i8259;
1043 ioports_map[IO_ICU10x020 + 1] = vcpu_exit_i8259;
1044 ioports_map[IO_ICU20x0A0] = vcpu_exit_i8259;
1045 ioports_map[IO_ICU20x0A0 + 1] = vcpu_exit_i8259;
1046 ioports_map[ELCR00x4D0] = vcpu_exit_elcr;
1047 ioports_map[ELCR10x4D1] = vcpu_exit_elcr;
1048
1049 /* Init ns8250 UART */
1050 ns8250_init(con_fd, vcp->vcp_id);
1051 for (i = COM1_DATA0x3f8 +0; i <= COM1_SCR0x3f8 +7; i++)
1052 ioports_map[i] = vcpu_exit_com;
1053
1054 /* Init QEMU fw_cfg interface */
1055 fw_cfg_init(vmc);
1056 ioports_map[FW_CFG_IO_SELECT0x510] = vcpu_exit_fw_cfg;
1057 ioports_map[FW_CFG_IO_DATA0x511] = vcpu_exit_fw_cfg;
1058 ioports_map[FW_CFG_IO_DMA_ADDR_HIGH0x514] = vcpu_exit_fw_cfg_dma;
1059 ioports_map[FW_CFG_IO_DMA_ADDR_LOW0x518] = vcpu_exit_fw_cfg_dma;
1060
1061 /* Initialize PCI */
1062 for (i = VMM_PCI_IO_BAR_BASE0x1000; i <= VMM_PCI_IO_BAR_END0xFFFF; i++)
1063 ioports_map[i] = vcpu_exit_pci;
1064
1065 ioports_map[PCI_MODE1_ADDRESS_REG0x0cf8] = vcpu_exit_pci;
1066 ioports_map[PCI_MODE1_DATA_REG0x0cfc] = vcpu_exit_pci;
1067 ioports_map[PCI_MODE1_DATA_REG0x0cfc + 1] = vcpu_exit_pci;
1068 ioports_map[PCI_MODE1_DATA_REG0x0cfc + 2] = vcpu_exit_pci;
1069 ioports_map[PCI_MODE1_DATA_REG0x0cfc + 3] = vcpu_exit_pci;
1070 pci_init();
1071
1072 /* Initialize virtio devices */
1073 virtio_init(current_vm, child_cdrom, child_disks, child_taps);
1074}
1075/*
1076 * restore_emulated_hw
1077 *
1078 * Restores the userspace hardware emulation from fd
1079 */
1080void
1081restore_emulated_hw(struct vm_create_params *vcp, int fd,
1082 int *child_taps, int child_disks[][VM_MAX_BASE_PER_DISK4], int child_cdrom)
1083{
1084 /* struct vm_create_params *vcp = &vmc->vmc_params; */
1085 int i;
1086 memset(&ioports_map, 0, sizeof(io_fn_t) * MAX_PORTS65536);
1087
1088 /* Init i8253 PIT */
1089 i8253_restore(fd, vcp->vcp_id);
1090 ioports_map[TIMER_CTRL0x43] = vcpu_exit_i8253;
1091 ioports_map[TIMER_BASE0x40 + TIMER_CNTR00] = vcpu_exit_i8253;
1092 ioports_map[TIMER_BASE0x40 + TIMER_CNTR11] = vcpu_exit_i8253;
1093 ioports_map[TIMER_BASE0x40 + TIMER_CNTR22] = vcpu_exit_i8253;
1094
1095 /* Init master and slave PICs */
1096 i8259_restore(fd);
1097 ioports_map[IO_ICU10x020] = vcpu_exit_i8259;
1098 ioports_map[IO_ICU10x020 + 1] = vcpu_exit_i8259;
1099 ioports_map[IO_ICU20x0A0] = vcpu_exit_i8259;
1100 ioports_map[IO_ICU20x0A0 + 1] = vcpu_exit_i8259;
1101
1102 /* Init ns8250 UART */
1103 ns8250_restore(fd, con_fd, vcp->vcp_id);
1104 for (i = COM1_DATA0x3f8 +0; i <= COM1_SCR0x3f8 +7; i++)
1105 ioports_map[i] = vcpu_exit_com;
1106
1107 /* Init mc146818 RTC */
1108 mc146818_restore(fd, vcp->vcp_id);
1109 ioports_map[IO_RTC0x070] = vcpu_exit_mc146818;
1110 ioports_map[IO_RTC0x070 + 1] = vcpu_exit_mc146818;
1111
1112 /* Init QEMU fw_cfg interface */
1113 fw_cfg_restore(fd);
1114 ioports_map[FW_CFG_IO_SELECT0x510] = vcpu_exit_fw_cfg;
1115 ioports_map[FW_CFG_IO_DATA0x511] = vcpu_exit_fw_cfg;
1116 ioports_map[FW_CFG_IO_DMA_ADDR_HIGH0x514] = vcpu_exit_fw_cfg_dma;
1117 ioports_map[FW_CFG_IO_DMA_ADDR_LOW0x518] = vcpu_exit_fw_cfg_dma;
1118
1119 /* Initialize PCI */
1120 for (i = VMM_PCI_IO_BAR_BASE0x1000; i <= VMM_PCI_IO_BAR_END0xFFFF; i++)
1121 ioports_map[i] = vcpu_exit_pci;
1122
1123 ioports_map[PCI_MODE1_ADDRESS_REG0x0cf8] = vcpu_exit_pci;
1124 ioports_map[PCI_MODE1_DATA_REG0x0cfc] = vcpu_exit_pci;
1125 ioports_map[PCI_MODE1_DATA_REG0x0cfc + 1] = vcpu_exit_pci;
1126 ioports_map[PCI_MODE1_DATA_REG0x0cfc + 2] = vcpu_exit_pci;
1127 ioports_map[PCI_MODE1_DATA_REG0x0cfc + 3] = vcpu_exit_pci;
1128 pci_restore(fd);
1129 virtio_restore(fd, current_vm, child_cdrom, child_disks, child_taps);
1130}
1131
1132/*
1133 * run_vm
1134 *
1135 * Runs the VM whose creation parameters are specified in vcp
1136 *
1137 * Parameters:
1138 * child_cdrom: previously-opened child ISO disk file descriptor
1139 * child_disks: previously-opened child VM disk file file descriptors
1140 * child_taps: previously-opened child tap file descriptors
1141 * vmc: vmop_create_params struct containing the VM's desired creation
1142 * configuration
1143 * vrs: VCPU register state to initialize
1144 *
1145 * Return values:
1146 * 0: the VM exited normally
1147 * !0 : the VM exited abnormally or failed to start
1148 */
1149int
1150run_vm(int child_cdrom, int child_disks[][VM_MAX_BASE_PER_DISK4],
1151 int *child_taps, struct vmop_create_params *vmc,
1152 struct vcpu_reg_state *vrs)
1153{
1154 struct vm_create_params *vcp = &vmc->vmc_params;
1155 struct vm_rwregs_params vregsp;
1156 uint8_t evdone = 0;
1157 size_t i;
1158 int ret;
1159 pthread_t *tid, evtid;
1160 struct vm_run_params **vrp;
1161 void *exit_status;
1162
1163 if (vcp == NULL((void*)0))
1164 return (EINVAL22);
1165
1166 if (child_cdrom == -1 && strlen(vcp->vcp_cdrom))
1167 return (EINVAL22);
1168
1169 if (child_disks == NULL((void*)0) && vcp->vcp_ndisks != 0)
1170 return (EINVAL22);
1171
1172 if (child_taps == NULL((void*)0) && vcp->vcp_nnics != 0)
1173 return (EINVAL22);
1174
1175 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM64)
1176 return (EINVAL22);
1177
1178 if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM4)
1179 return (EINVAL22);
1180
1181 if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM4)
1182 return (EINVAL22);
1183
1184 if (vcp->vcp_nmemranges == 0 ||
1185 vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES16)
1186 return (EINVAL22);
1187
1188 tid = calloc(vcp->vcp_ncpus, sizeof(pthread_t));
1189 vrp = calloc(vcp->vcp_ncpus, sizeof(struct vm_run_params *));
1190 if (tid == NULL((void*)0) || vrp == NULL((void*)0)) {
1191 log_warn("%s: memory allocation error - exiting.",
1192 __progname);
1193 return (ENOMEM12);
1194 }
1195
1196 log_debug("%s: initializing hardware for vm %s", __func__,
1197 vcp->vcp_name);
1198
1199 if (!(current_vm->vm_state & VM_STATE_RECEIVED0x08))
1200 init_emulated_hw(vmc, child_cdrom, child_disks, child_taps);
1201
1202 ret = pthread_mutex_init(&threadmutex, NULL((void*)0));
1203 if (ret) {
1204 log_warn("%s: could not initialize thread state mutex",
1205 __func__);
1206 return (ret);
1207 }
1208 ret = pthread_cond_init(&threadcond, NULL((void*)0));
1209 if (ret) {
1210 log_warn("%s: could not initialize thread state "
1211 "condition variable", __func__);
1212 return (ret);
1213 }
1214
1215 mutex_lock(&threadmutex);
1216
1217 log_debug("%s: starting vcpu threads for vm %s", __func__,
1218 vcp->vcp_name);
1219
1220 /*
1221 * Create and launch one thread for each VCPU. These threads may
1222 * migrate between PCPUs over time; the need to reload CPU state
1223 * in such situations is detected and performed by vmm(4) in the
1224 * kernel.
1225 */
1226 for (i = 0 ; i < vcp->vcp_ncpus; i++) {
1227 vrp[i] = malloc(sizeof(struct vm_run_params));
1228 if (vrp[i] == NULL((void*)0)) {
1229 log_warn("%s: memory allocation error - "
1230 "exiting.", __progname);
1231 /* caller will exit, so skip freeing */
1232 return (ENOMEM12);
1233 }
1234 vrp[i]->vrp_exit = malloc(sizeof(struct vm_exit));
1235 if (vrp[i]->vrp_exit == NULL((void*)0)) {
1236 log_warn("%s: memory allocation error - "
1237 "exiting.", __progname);
1238 /* caller will exit, so skip freeing */
1239 return (ENOMEM12);
1240 }
1241 vrp[i]->vrp_vm_id = vcp->vcp_id;
1242 vrp[i]->vrp_vcpu_id = i;
1243
1244 if (vcpu_reset(vcp->vcp_id, i, vrs)) {
1245 log_warnx("%s: cannot reset VCPU %zu - exiting.",
1246 __progname, i);
1247 return (EIO5);
1248 }
1249
1250 /* once more because reset_cpu changes regs */
1251 if (current_vm->vm_state & VM_STATE_RECEIVED0x08) {
1252 vregsp.vrwp_vm_id = vcp->vcp_id;
1253 vregsp.vrwp_vcpu_id = i;
1254 vregsp.vrwp_regs = *vrs;
1255 vregsp.vrwp_mask = VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10);
1256 if ((ret = ioctl(env->vmd_fd, VMM_IOC_WRITEREGS((unsigned long)0x80000000 | ((sizeof(struct vm_rwregs_params
) & 0x1fff) << 16) | ((('V')) << 8) | ((8)))
,
1257 &vregsp)) == -1) {
1258 log_warn("%s: writeregs failed", __func__);
1259 return (ret);
1260 }
1261 }
1262
1263 ret = pthread_cond_init(&vcpu_run_cond[i], NULL((void*)0));
1264 if (ret) {
1265 log_warnx("%s: cannot initialize cond var (%d)",
1266 __progname, ret);
1267 return (ret);
1268 }
1269
1270 ret = pthread_mutex_init(&vcpu_run_mtx[i], NULL((void*)0));
1271 if (ret) {
1272 log_warnx("%s: cannot initialize mtx (%d)",
1273 __progname, ret);
1274 return (ret);
1275 }
1276
1277 ret = pthread_cond_init(&vcpu_unpause_cond[i], NULL((void*)0));
1278 if (ret) {
1279 log_warnx("%s: cannot initialize unpause var (%d)",
1280 __progname, ret);
1281 return (ret);
1282 }
1283
1284 ret = pthread_mutex_init(&vcpu_unpause_mtx[i], NULL((void*)0));
1285 if (ret) {
1286 log_warnx("%s: cannot initialize unpause mtx (%d)",
1287 __progname, ret);
1288 return (ret);
1289 }
1290
1291 vcpu_hlt[i] = 0;
1292
1293 /* Start each VCPU run thread at vcpu_run_loop */
1294 ret = pthread_create(&tid[i], NULL((void*)0), vcpu_run_loop, vrp[i]);
1295 if (ret) {
1296 /* caller will _exit after this return */
1297 ret = errno(*__errno());
1298 log_warn("%s: could not create vcpu thread %zu",
1299 __func__, i);
1300 return (ret);
1301 }
1302 }
1303
1304 log_debug("%s: waiting on events for VM %s", __func__, vcp->vcp_name);
1305 ret = pthread_create(&evtid, NULL((void*)0), event_thread, &evdone);
1306 if (ret) {
1307 errno(*__errno()) = ret;
1308 log_warn("%s: could not create event thread", __func__);
1309 return (ret);
1310 }
1311
1312 for (;;) {
1313 ret = pthread_cond_wait(&threadcond, &threadmutex);
1314 if (ret) {
1315 log_warn("%s: waiting on thread state condition "
1316 "variable failed", __func__);
1317 return (ret);
1318 }
1319
1320 /*
1321 * Did a VCPU thread exit with an error? => return the first one
1322 */
1323 for (i = 0; i < vcp->vcp_ncpus; i++) {
1324 if (vcpu_done[i] == 0)
1325 continue;
1326
1327 if (pthread_join(tid[i], &exit_status)) {
1328 log_warn("%s: failed to join thread %zd - "
1329 "exiting", __progname, i);
1330 return (EIO5);
1331 }
1332
1333 ret = (intptr_t)exit_status;
1334 }
1335
1336 /* Did the event thread exit? => return with an error */
1337 if (evdone) {
1338 if (pthread_join(evtid, &exit_status)) {
1339 log_warn("%s: failed to join event thread - "
1340 "exiting", __progname);
1341 return (EIO5);
1342 }
1343
1344 log_warnx("%s: vm %d event thread exited "
1345 "unexpectedly", __progname, vcp->vcp_id);
1346 return (EIO5);
1347 }
1348
1349 /* Did all VCPU threads exit successfully? => return */
1350 for (i = 0; i < vcp->vcp_ncpus; i++) {
1351 if (vcpu_done[i] == 0)
1352 break;
1353 }
1354 if (i == vcp->vcp_ncpus)
1355 return (ret);
1356
1357 /* Some more threads to wait for, start over */
1358 }
1359
1360 return (ret);
1361}
1362
1363void *
1364event_thread(void *arg)
1365{
1366 uint8_t *donep = arg;
1367 intptr_t ret;
1368
1369 ret = event_dispatch();
1370
1371 mutex_lock(&threadmutex);
1372 *donep = 1;
1373 pthread_cond_signal(&threadcond);
1374 mutex_unlock(&threadmutex);
1375
1376 return (void *)ret;
1377 }
1378
1379/*
1380 * vcpu_run_loop
1381 *
1382 * Runs a single VCPU until vmm(4) requires help handling an exit,
1383 * or the VM terminates.
1384 *
1385 * Parameters:
1386 * arg: vcpu_run_params for the VCPU being run by this thread
1387 *
1388 * Return values:
1389 * NULL: the VCPU shutdown properly
1390 * !NULL: error processing VCPU run, or the VCPU shutdown abnormally
1391 */
1392void *
1393vcpu_run_loop(void *arg)
1394{
1395 struct vm_run_params *vrp = (struct vm_run_params *)arg;
1396 intptr_t ret = 0;
1397 int irq;
1398 uint32_t n;
1399
1400 vrp->vrp_continue = 0;
1401 n = vrp->vrp_vcpu_id;
1402
1403 for (;;) {
1404 ret = pthread_mutex_lock(&vcpu_run_mtx[n]);
1405
1406 if (ret) {
1407 log_warnx("%s: can't lock vcpu run mtx (%d)",
1408 __func__, (int)ret);
1409 return ((void *)ret);
1410 }
1411
1412 /* If we are halted and need to pause, pause */
1413 if (vcpu_hlt[n] && (current_vm->vm_state & VM_STATE_PAUSED0x10)) {
1414 ret = pthread_barrier_wait(&vm_pause_barrier);
1415 if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD-1) {
1416 log_warnx("%s: could not wait on pause barrier (%d)",
1417 __func__, (int)ret);
1418 return ((void *)ret);
1419 }
1420
1421 ret = pthread_mutex_lock(&vcpu_unpause_mtx[n]);
1422 if (ret) {
1423 log_warnx("%s: can't lock vcpu unpause mtx (%d)",
1424 __func__, (int)ret);
1425 return ((void *)ret);
1426 }
1427
1428 ret = pthread_cond_wait(&vcpu_unpause_cond[n],
1429 &vcpu_unpause_mtx[n]);
1430 if (ret) {
1431 log_warnx(
1432 "%s: can't wait on unpause cond (%d)",
1433 __func__, (int)ret);
1434 break;
1435 }
1436 ret = pthread_mutex_unlock(&vcpu_unpause_mtx[n]);
1437 if (ret) {
1438 log_warnx("%s: can't unlock unpause mtx (%d)",
1439 __func__, (int)ret);
1440 break;
1441 }
1442 }
1443
1444 /* If we are halted and not paused, wait */
1445 if (vcpu_hlt[n]) {
1446 ret = pthread_cond_wait(&vcpu_run_cond[n],
1447 &vcpu_run_mtx[n]);
1448
1449 if (ret) {
1450 log_warnx(
1451 "%s: can't wait on cond (%d)",
1452 __func__, (int)ret);
1453 (void)pthread_mutex_unlock(
1454 &vcpu_run_mtx[n]);
1455 break;
1456 }
1457 }
1458
1459 ret = pthread_mutex_unlock(&vcpu_run_mtx[n]);
1460
1461 if (ret) {
1462 log_warnx("%s: can't unlock mutex on cond (%d)",
1463 __func__, (int)ret);
1464 break;
1465 }
1466
1467 if (vrp->vrp_irqready && i8259_is_pending()) {
1468 irq = i8259_ack();
1469 vrp->vrp_irq = irq;
1470 } else
1471 vrp->vrp_irq = 0xFFFF;
1472
1473 /* Still more pending? */
1474 if (i8259_is_pending()) {
1475 /*
1476 * XXX can probably avoid ioctls here by providing intr
1477 * in vrp
1478 */
1479 if (vcpu_pic_intr(vrp->vrp_vm_id,
1480 vrp->vrp_vcpu_id, 1)) {
1481 fatal("can't set INTR");
1482 }
1483 } else {
1484 if (vcpu_pic_intr(vrp->vrp_vm_id,
1485 vrp->vrp_vcpu_id, 0)) {
1486 fatal("can't clear INTR");
1487 }
1488 }
1489
1490 if (ioctl(env->vmd_fd, VMM_IOC_RUN(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof
(struct vm_run_params) & 0x1fff) << 16) | ((('V')) <<
8) | ((2)))
, vrp) == -1) {
1491 /* If run ioctl failed, exit */
1492 ret = errno(*__errno());
1493 log_warn("%s: vm %d / vcpu %d run ioctl failed",
1494 __func__, vrp->vrp_vm_id, n);
1495 break;
1496 }
1497
1498 /* If the VM is terminating, exit normally */
1499 if (vrp->vrp_exit_reason == VM_EXIT_TERMINATED0xFFFE) {
1500 ret = (intptr_t)NULL((void*)0);
1501 break;
1502 }
1503
1504 if (vrp->vrp_exit_reason != VM_EXIT_NONE0xFFFF) {
1505 /*
1506 * vmm(4) needs help handling an exit, handle in
1507 * vcpu_exit.
1508 */
1509 ret = vcpu_exit(vrp);
1510 if (ret)
1511 break;
1512 }
1513 }
1514
1515 mutex_lock(&threadmutex);
1516 vcpu_done[n] = 1;
1517 pthread_cond_signal(&threadcond);
1518 mutex_unlock(&threadmutex);
1519
1520 return ((void *)ret);
1521}
1522
1523int
1524vcpu_pic_intr(uint32_t vm_id, uint32_t vcpu_id, uint8_t intr)
1525{
1526 struct vm_intr_params vip;
1527
1528 memset(&vip, 0, sizeof(vip));
1529
1530 vip.vip_vm_id = vm_id;
1531 vip.vip_vcpu_id = vcpu_id; /* XXX always 0? */
1532 vip.vip_intr = intr;
1533
1534 if (ioctl(env->vmd_fd, VMM_IOC_INTR((unsigned long)0x80000000 | ((sizeof(struct vm_intr_params) &
0x1fff) << 16) | ((('V')) << 8) | ((6)))
, &vip) == -1)
1535 return (errno(*__errno()));
1536
1537 return (0);
1538}
1539
1540/*
1541 * vcpu_exit_pci
1542 *
1543 * Handle all I/O to the emulated PCI subsystem.
1544 *
1545 * Parameters:
1546 * vrp: vcpu run paramters containing guest state for this exit
1547 *
1548 * Return value:
1549 * Interrupt to inject to the guest VM, or 0xFF if no interrupt should
1550 * be injected.
1551 */
1552uint8_t
1553vcpu_exit_pci(struct vm_run_params *vrp)
1554{
1555 struct vm_exit *vei = vrp->vrp_exit;
1556 uint8_t intr;
1557
1558 intr = 0xFF;
1559
1560 switch (vei->vei.vei_port) {
1561 case PCI_MODE1_ADDRESS_REG0x0cf8:
1562 pci_handle_address_reg(vrp);
1563 break;
1564 case PCI_MODE1_DATA_REG0x0cfc:
1565 case PCI_MODE1_DATA_REG0x0cfc + 1:
1566 case PCI_MODE1_DATA_REG0x0cfc + 2:
1567 case PCI_MODE1_DATA_REG0x0cfc + 3:
1568 pci_handle_data_reg(vrp);
1569 break;
1570 case VMM_PCI_IO_BAR_BASE0x1000 ... VMM_PCI_IO_BAR_END0xFFFF:
1571 intr = pci_handle_io(vrp);
1572 break;
1573 default:
1574 log_warnx("%s: unknown PCI register 0x%llx",
1575 __progname, (uint64_t)vei->vei.vei_port);
1576 break;
1577 }
1578
1579 return (intr);
1580}
1581
1582/*
1583 * vcpu_exit_inout
1584 *
1585 * Handle all I/O exits that need to be emulated in vmd. This includes the
1586 * i8253 PIT, the com1 ns8250 UART, and the MC146818 RTC/NVRAM device.
1587 *
1588 * Parameters:
1589 * vrp: vcpu run parameters containing guest state for this exit
1590 */
1591void
1592vcpu_exit_inout(struct vm_run_params *vrp)
1593{
1594 struct vm_exit *vei = vrp->vrp_exit;
1595 uint8_t intr = 0xFF;
1596
1597 if (ioports_map[vei->vei.vei_port] != NULL((void*)0))
1598 intr = ioports_map[vei->vei.vei_port](vrp);
1599 else if (vei->vei.vei_dir == VEI_DIR_IN)
1600 set_return_data(vei, 0xFFFFFFFF);
1601
1602 if (intr != 0xFF)
1603 vcpu_assert_pic_irq(vrp->vrp_vm_id, vrp->vrp_vcpu_id, intr);
1604}
1605
1606/*
1607 * vcpu_exit_eptviolation
1608 *
1609 * handle an EPT Violation
1610 *
1611 * Parameters:
1612 * vrp: vcpu run parameters containing guest state for this exit
1613 *
1614 * Return values:
1615 * 0: no action required
1616 * EAGAIN: a protection fault occured, kill the vm.
1617 */
1618int
1619vcpu_exit_eptviolation(struct vm_run_params *vrp)
1620{
1621 struct vm_exit *ve = vrp->vrp_exit;
1622
1623 /*
1624 * vmd may be exiting to vmd to handle a pending interrupt
1625 * but last exit type may have been VMX_EXIT_EPT_VIOLATION,
1626 * check the fault_type to ensure we really are processing
1627 * a VMX_EXIT_EPT_VIOLATION.
1628 */
1629 if (ve->vee.vee_fault_type == VEE_FAULT_PROTECT) {
1630 log_debug("%s: EPT Violation: rip=0x%llx",
1631 __progname, vrp->vrp_exit->vrs.vrs_gprs[VCPU_REGS_RIP16]);
1632 return (EAGAIN35);
1633 }
1634
1635 return (0);
1636}
1637
1638/*
1639 * vcpu_exit
1640 *
1641 * Handle a vcpu exit. This function is called when it is determined that
1642 * vmm(4) requires the assistance of vmd to support a particular guest
1643 * exit type (eg, accessing an I/O port or device). Guest state is contained
1644 * in 'vrp', and will be resent to vmm(4) on exit completion.
1645 *
1646 * Upon conclusion of handling the exit, the function determines if any
1647 * interrupts should be injected into the guest, and asserts the proper
1648 * IRQ line whose interrupt should be vectored.
1649 *
1650 * Parameters:
1651 * vrp: vcpu run parameters containing guest state for this exit
1652 *
1653 * Return values:
1654 * 0: the exit was handled successfully
1655 * 1: an error occurred (eg, unknown exit reason passed in 'vrp')
1656 */
1657int
1658vcpu_exit(struct vm_run_params *vrp)
1659{
1660 int ret;
1661
1662 switch (vrp->vrp_exit_reason) {
1663 case VMX_EXIT_INT_WINDOW7:
1664 case SVM_VMEXIT_VINTR0x64:
1665 case VMX_EXIT_CPUID10:
1666 case VMX_EXIT_EXTINT1:
1667 case SVM_VMEXIT_INTR0x60:
1668 case SVM_VMEXIT_NPF0x400:
1669 case SVM_VMEXIT_MSR0x7C:
1670 case SVM_VMEXIT_CPUID0x72:
1671 /*
1672 * We may be exiting to vmd to handle a pending interrupt but
1673 * at the same time the last exit type may have been one of
1674 * these. In this case, there's nothing extra to be done
1675 * here (and falling through to the default case below results
1676 * in more vmd log spam).
1677 */
1678 break;
1679 case VMX_EXIT_EPT_VIOLATION48:
1680 ret = vcpu_exit_eptviolation(vrp);
1681 if (ret)
1682 return (ret);
1683
1684 break;
1685 case VMX_EXIT_IO30:
1686 case SVM_VMEXIT_IOIO0x7B:
1687 vcpu_exit_inout(vrp);
1688 break;
1689 case VMX_EXIT_HLT12:
1690 case SVM_VMEXIT_HLT0x78:
1691 ret = pthread_mutex_lock(&vcpu_run_mtx[vrp->vrp_vcpu_id]);
1692 if (ret) {
1693 log_warnx("%s: can't lock vcpu mutex (%d)",
1694 __func__, ret);
1695 return (ret);
1696 }
1697 vcpu_hlt[vrp->vrp_vcpu_id] = 1;
1698 ret = pthread_mutex_unlock(&vcpu_run_mtx[vrp->vrp_vcpu_id]);
1699 if (ret) {
1700 log_warnx("%s: can't unlock vcpu mutex (%d)",
1701 __func__, ret);
1702 return (ret);
1703 }
1704 break;
1705 case VMX_EXIT_TRIPLE_FAULT2:
1706 case SVM_VMEXIT_SHUTDOWN0x7F:
1707 /* reset VM */
1708 return (EAGAIN35);
1709 default:
1710 log_debug("%s: unknown exit reason 0x%x",
1711 __progname, vrp->vrp_exit_reason);
1712 }
1713
1714 vrp->vrp_continue = 1;
1715
1716 return (0);
1717}
1718
1719/*
1720 * find_gpa_range
1721 *
1722 * Search for a contiguous guest physical mem range.
1723 *
1724 * Parameters:
1725 * vcp: VM create parameters that contain the memory map to search in
1726 * gpa: the starting guest physical address
1727 * len: the length of the memory range
1728 *
1729 * Return values:
1730 * NULL: on failure if there is no memory range as described by the parameters
1731 * Pointer to vm_mem_range that contains the start of the range otherwise.
1732 */
1733static struct vm_mem_range *
1734find_gpa_range(struct vm_create_params *vcp, paddr_t gpa, size_t len)
1735{
1736 size_t i, n;
1737 struct vm_mem_range *vmr;
1738
1739 /* Find the first vm_mem_range that contains gpa */
1740 for (i = 0; i < vcp->vcp_nmemranges; i++) {
1741 vmr = &vcp->vcp_memranges[i];
1742 if (vmr->vmr_gpa + vmr->vmr_size >= gpa)
1743 break;
1744 }
1745
1746 /* No range found. */
1747 if (i == vcp->vcp_nmemranges)
1748 return (NULL((void*)0));
1749
1750 /*
1751 * vmr may cover the range [gpa, gpa + len) only partly. Make
1752 * sure that the following vm_mem_ranges are contiguous and
1753 * cover the rest.
1754 */
1755 n = vmr->vmr_size - (gpa - vmr->vmr_gpa);
1756 if (len < n)
1757 len = 0;
1758 else
1759 len -= n;
1760 gpa = vmr->vmr_gpa + vmr->vmr_size;
1761 for (i = i + 1; len != 0 && i < vcp->vcp_nmemranges; i++) {
1762 vmr = &vcp->vcp_memranges[i];
1763 if (gpa != vmr->vmr_gpa)
1764 return (NULL((void*)0));
1765 if (len <= vmr->vmr_size)
1766 len = 0;
1767 else
1768 len -= vmr->vmr_size;
1769
1770 gpa = vmr->vmr_gpa + vmr->vmr_size;
1771 }
1772
1773 if (len != 0)
1774 return (NULL((void*)0));
1775
1776 return (vmr);
1777}
1778
1779/*
1780 * write_mem
1781 *
1782 * Copies data from 'buf' into the guest VM's memory at paddr 'dst'.
1783 *
1784 * Parameters:
1785 * dst: the destination paddr_t in the guest VM
1786 * buf: data to copy (or NULL to zero the data)
1787 * len: number of bytes to copy
1788 *
1789 * Return values:
1790 * 0: success
1791 * EINVAL: if the guest physical memory range [dst, dst + len) does not
1792 * exist in the guest.
1793 */
1794int
1795write_mem(paddr_t dst, const void *buf, size_t len)
1796{
1797 const char *from = buf;
1798 char *to;
1799 size_t n, off;
1800 struct vm_mem_range *vmr;
1801
1802 vmr = find_gpa_range(&current_vm->vm_params.vmc_params, dst, len);
1803 if (vmr == NULL((void*)0)) {
1804 errno(*__errno()) = EINVAL22;
1805 log_warn("%s: failed - invalid memory range dst = 0x%lx, "
1806 "len = 0x%zx", __func__, dst, len);
1807 return (EINVAL22);
1808 }
1809
1810 off = dst - vmr->vmr_gpa;
1811 while (len != 0) {
1812 n = vmr->vmr_size - off;
1813 if (len < n)
1814 n = len;
1815
1816 to = (char *)vmr->vmr_va + off;
1817 if (buf == NULL((void*)0))
1818 memset(to, 0, n);
1819 else {
1820 memcpy(to, from, n);
1821 from += n;
1822 }
1823 len -= n;
1824 off = 0;
1825 vmr++;
1826 }
1827
1828 return (0);
1829}
1830
1831/*
1832 * read_mem
1833 *
1834 * Reads memory at guest paddr 'src' into 'buf'.
1835 *
1836 * Parameters:
1837 * src: the source paddr_t in the guest VM to read from.
1838 * buf: destination (local) buffer
1839 * len: number of bytes to read
1840 *
1841 * Return values:
1842 * 0: success
1843 * EINVAL: if the guest physical memory range [dst, dst + len) does not
1844 * exist in the guest.
1845 */
1846int
1847read_mem(paddr_t src, void *buf, size_t len)
1848{
1849 char *from, *to = buf;
1850 size_t n, off;
1851 struct vm_mem_range *vmr;
1852
1853 vmr = find_gpa_range(&current_vm->vm_params.vmc_params, src, len);
1854 if (vmr == NULL((void*)0)) {
1855 errno(*__errno()) = EINVAL22;
1856 log_warn("%s: failed - invalid memory range src = 0x%lx, "
1857 "len = 0x%zx", __func__, src, len);
1858 return (EINVAL22);
1859 }
1860
1861 off = src - vmr->vmr_gpa;
1862 while (len != 0) {
1863 n = vmr->vmr_size - off;
1864 if (len < n)
1865 n = len;
1866
1867 from = (char *)vmr->vmr_va + off;
1868 memcpy(to, from, n);
1869
1870 to += n;
1871 len -= n;
1872 off = 0;
1873 vmr++;
1874 }
1875
1876 return (0);
1877}
1878
1879/*
1880 * vcpu_assert_pic_irq
1881 *
1882 * Injects the specified IRQ on the supplied vcpu/vm
1883 *
1884 * Parameters:
1885 * vm_id: VM ID to inject to
1886 * vcpu_id: VCPU ID to inject to
1887 * irq: IRQ to inject
1888 */
1889void
1890vcpu_assert_pic_irq(uint32_t vm_id, uint32_t vcpu_id, int irq)
1891{
1892 int ret;
1893
1894 i8259_assert_irq(irq);
1895
1896 if (i8259_is_pending()) {
1897 if (vcpu_pic_intr(vm_id, vcpu_id, 1))
1898 fatalx("%s: can't assert INTR", __func__);
1899
1900 ret = pthread_mutex_lock(&vcpu_run_mtx[vcpu_id]);
1901 if (ret)
1902 fatalx("%s: can't lock vcpu mtx (%d)", __func__, ret);
1903
1904 vcpu_hlt[vcpu_id] = 0;
1905 ret = pthread_cond_signal(&vcpu_run_cond[vcpu_id]);
1906 if (ret)
1907 fatalx("%s: can't signal (%d)", __func__, ret);
1908 ret = pthread_mutex_unlock(&vcpu_run_mtx[vcpu_id]);
1909 if (ret)
1910 fatalx("%s: can't unlock vcpu mtx (%d)", __func__, ret);
1911 }
1912}
1913
1914/*
1915 * vcpu_deassert_pic_irq
1916 *
1917 * Clears the specified IRQ on the supplied vcpu/vm
1918 *
1919 * Parameters:
1920 * vm_id: VM ID to clear in
1921 * vcpu_id: VCPU ID to clear in
1922 * irq: IRQ to clear
1923 */
1924void
1925vcpu_deassert_pic_irq(uint32_t vm_id, uint32_t vcpu_id, int irq)
1926{
1927 i8259_deassert_irq(irq);
1928
1929 if (!i8259_is_pending()) {
1930 if (vcpu_pic_intr(vm_id, vcpu_id, 0))
1931 fatalx("%s: can't deassert INTR for vm_id %d, "
1932 "vcpu_id %d", __func__, vm_id, vcpu_id);
1933 }
1934}
1935
1936/*
1937 * fd_hasdata
1938 *
1939 * Determines if data can be read from a file descriptor.
1940 *
1941 * Parameters:
1942 * fd: the fd to check
1943 *
1944 * Return values:
1945 * 1 if data can be read from an fd, or 0 otherwise.
1946 */
1947int
1948fd_hasdata(int fd)
1949{
1950 struct pollfd pfd[1];
1951 int nready, hasdata = 0;
1952
1953 pfd[0].fd = fd;
1954 pfd[0].events = POLLIN0x0001;
1955 nready = poll(pfd, 1, 0);
1956 if (nready == -1)
1957 log_warn("checking file descriptor for data failed");
1958 else if (nready == 1 && pfd[0].revents & POLLIN0x0001)
1959 hasdata = 1;
1960 return (hasdata);
1961}
1962
1963/*
1964 * mutex_lock
1965 *
1966 * Wrapper function for pthread_mutex_lock that does error checking and that
1967 * exits on failure
1968 */
1969void
1970mutex_lock(pthread_mutex_t *m)
1971{
1972 int ret;
1973
1974 ret = pthread_mutex_lock(m);
1975 if (ret) {
1976 errno(*__errno()) = ret;
1977 fatal("could not acquire mutex");
1978 }
1979}
1980
1981/*
1982 * mutex_unlock
1983 *
1984 * Wrapper function for pthread_mutex_unlock that does error checking and that
1985 * exits on failure
1986 */
1987void
1988mutex_unlock(pthread_mutex_t *m)
1989{
1990 int ret;
1991
1992 ret = pthread_mutex_unlock(m);
1993 if (ret) {
1994 errno(*__errno()) = ret;
1995 fatal("could not release mutex");
1996 }
1997}
1998
1999/*
2000 * set_return_data
2001 *
2002 * Utility function for manipulating register data in vm exit info structs. This
2003 * function ensures that the data is copied to the vei->vei.vei_data field with
2004 * the proper size for the operation being performed.
2005 *
2006 * Parameters:
2007 * vei: exit information
2008 * data: return data
2009 */
2010void
2011set_return_data(struct vm_exit *vei, uint32_t data)
2012{
2013 switch (vei->vei.vei_size) {
2014 case 1:
2015 vei->vei.vei_data &= ~0xFF;
2016 vei->vei.vei_data |= (uint8_t)data;
2017 break;
2018 case 2:
2019 vei->vei.vei_data &= ~0xFFFF;
2020 vei->vei.vei_data |= (uint16_t)data;
2021 break;
2022 case 4:
2023 vei->vei.vei_data = data;
2024 break;
2025 }
2026}
2027
2028/*
2029 * get_input_data
2030 *
2031 * Utility function for manipulating register data in vm exit info
2032 * structs. This function ensures that the data is copied from the
2033 * vei->vei.vei_data field with the proper size for the operation being
2034 * performed.
2035 *
2036 * Parameters:
2037 * vei: exit information
2038 * data: location to store the result
2039 */
2040void
2041get_input_data(struct vm_exit *vei, uint32_t *data)
2042{
2043 switch (vei->vei.vei_size) {
2044 case 1:
2045 *data &= 0xFFFFFF00;
2046 *data |= (uint8_t)vei->vei.vei_data;
2047 break;
2048 case 2:
2049 *data &= 0xFFFF0000;
2050 *data |= (uint16_t)vei->vei.vei_data;
2051 break;
2052 case 4:
2053 *data = vei->vei.vei_data;
2054 break;
2055 default:
2056 log_warnx("%s: invalid i/o size %d", __func__,
2057 vei->vei.vei_size);
2058 }
2059
2060}
2061
2062/*
2063 * translate_gva
2064 *
2065 * Translates a guest virtual address to a guest physical address by walking
2066 * the currently active page table (if needed).
2067 *
2068 * Note - this function can possibly alter the supplied VCPU state.
2069 * Specifically, it may inject exceptions depending on the current VCPU
2070 * configuration, and may alter %cr2 on #PF. Consequently, this function
2071 * should only be used as part of instruction emulation.
2072 *
2073 * Parameters:
2074 * exit: The VCPU this translation should be performed for (guest MMU settings
2075 * are gathered from this VCPU)
2076 * va: virtual address to translate
2077 * pa: pointer to paddr_t variable that will receive the translated physical
2078 * address. 'pa' is unchanged on error.
2079 * mode: one of PROT_READ, PROT_WRITE, PROT_EXEC indicating the mode in which
2080 * the address should be translated
2081 *
2082 * Return values:
2083 * 0: the address was successfully translated - 'pa' contains the physical
2084 * address currently mapped by 'va'.
2085 * EFAULT: the PTE for 'VA' is unmapped. A #PF will be injected in this case
2086 * and %cr2 set in the vcpu structure.
2087 * EINVAL: an error occurred reading paging table structures
2088 */
2089int
2090translate_gva(struct vm_exit* exit, uint64_t va, uint64_t* pa, int mode)
2091{
2092 int level, shift, pdidx;
2093 uint64_t pte, pt_paddr, pte_paddr, mask, low_mask, high_mask;
2094 uint64_t shift_width, pte_size;
2095 struct vcpu_reg_state *vrs;
2096
2097 vrs = &exit->vrs;
2098
2099 if (!pa)
1
Assuming 'pa' is non-null
2
Taking false branch
2100 return (EINVAL22);
2101
2102 if (!(vrs->vrs_crs[VCPU_REGS_CR00] & CR0_PG0x80000000)) {
3
Assuming the condition is false
4
Taking false branch
2103 log_debug("%s: unpaged, va=pa=0x%llx", __func__, va);
2104 *pa = va;
2105 return (0);
2106 }
2107
2108 pt_paddr = vrs->vrs_crs[VCPU_REGS_CR32];
2109
2110 log_debug("%s: guest %%cr0=0x%llx, %%cr3=0x%llx", __func__,
2111 vrs->vrs_crs[VCPU_REGS_CR00], vrs->vrs_crs[VCPU_REGS_CR32]);
2112
2113 if (vrs->vrs_crs[VCPU_REGS_CR00] & CR0_PE0x00000001) {
5
Assuming the condition is true
6
Taking true branch
2114 if (vrs->vrs_crs[VCPU_REGS_CR43] & CR4_PAE0x00000020) {
7
Assuming the condition is true
8
Taking true branch
2115 pte_size = sizeof(uint64_t);
2116 shift_width = 9;
2117
2118 if (vrs->vrs_msrs[VCPU_REGS_EFER0] & EFER_LMA0x00000400) {
9
Assuming the condition is true
10
Taking true branch
2119 /* 4 level paging */
2120 level = 4;
2121 mask = L4_MASK0x0000ff8000000000UL;
2122 shift = L4_SHIFT39;
11
The value 39 is assigned to 'shift'
2123 } else {
2124 /* 32 bit with PAE paging */
2125 level = 3;
2126 mask = L3_MASK0x0000007fc0000000UL;
2127 shift = L3_SHIFT30;
2128 }
2129 } else {
2130 /* 32 bit paging */
2131 level = 2;
2132 shift_width = 10;
2133 mask = 0xFFC00000;
2134 shift = 22;
2135 pte_size = sizeof(uint32_t);
2136 }
2137 } else
2138 return (EINVAL22);
2139
2140 /* XXX: Check for R bit in segment selector and set A bit */
2141
2142 for (;level > 0; level--) {
12
Loop condition is true. Entering loop body
2143 pdidx = (va & mask) >> shift;
2144 pte_paddr = (pt_paddr) + (pdidx * pte_size);
2145
2146 log_debug("%s: read pte level %d @ GPA 0x%llx", __func__,
2147 level, pte_paddr);
2148 if (read_mem(pte_paddr, &pte, pte_size)) {
13
Assuming the condition is false
14
Taking false branch
2149 log_warn("%s: failed to read pte", __func__);
2150 return (EFAULT14);
2151 }
2152
2153 log_debug("%s: PTE @ 0x%llx = 0x%llx", __func__, pte_paddr,
2154 pte);
2155
2156 /* XXX: Set CR2 */
2157 if (!(pte & PG_V0x0000000000000001UL))
15
Assuming the condition is false
16
Taking false branch
2158 return (EFAULT14);
2159
2160 /* XXX: Check for SMAP */
2161 if ((mode == PROT_WRITE0x02) && !(pte & PG_RW0x0000000000000002UL))
17
Assuming 'mode' is not equal to PROT_WRITE
2162 return (EPERM1);
2163
2164 if ((exit->cpl > 0) && !(pte & PG_u0x0000000000000004UL))
18
Assuming field 'cpl' is <= 0
2165 return (EPERM1);
2166
2167 pte = pte | PG_U0x0000000000000020UL;
2168 if (mode
18.1
'mode' is not equal to PROT_WRITE
== PROT_WRITE0x02)
19
Taking false branch
2169 pte = pte | PG_M0x0000000000000040UL;
2170 if (write_mem(pte_paddr, &pte, pte_size)) {
20
Assuming the condition is false
21
Taking false branch
2171 log_warn("%s: failed to write back flags to pte",
2172 __func__);
2173 return (EIO5);
2174 }
2175
2176 /* XXX: EINVAL if in 32bit and PG_PS is 1 but CR4.PSE is 0 */
2177 if (pte & PG_PS0x0000000000000080UL)
22
Assuming the condition is true
23
Taking true branch
2178 break;
24
Execution continues on line 2187
2179
2180 if (level > 1) {
2181 pt_paddr = pte & PG_FRAME0x000ffffffffff000UL;
2182 shift -= shift_width;
2183 mask = mask >> shift_width;
2184 }
2185 }
2186
2187 low_mask = (1 << shift) - 1;
25
The result of the left shift is undefined due to shifting by '39', which is greater or equal to the width of type 'int'
2188 high_mask = (((uint64_t)1ULL << ((pte_size * 8) - 1)) - 1) ^ low_mask;
2189 *pa = (pte & high_mask) | (va & low_mask);
2190
2191 log_debug("%s: final GPA for GVA 0x%llx = 0x%llx\n", __func__, va, *pa);
2192
2193 return (0);
2194}
2195
2196/*
2197 * vm_pipe_init
2198 *
2199 * Initialize a vm_dev_pipe, setting up its file descriptors and its
2200 * event structure with the given callback.
2201 *
2202 * Parameters:
2203 * p: pointer to vm_dev_pipe struct to initizlize
2204 * cb: callback to use for READ events on the read end of the pipe
2205 */
2206void
2207vm_pipe_init(struct vm_dev_pipe *p, void (*cb)(int, short, void *))
2208{
2209 int ret;
2210 int fds[2];
2211
2212 memset(p, 0, sizeof(struct vm_dev_pipe));
2213
2214 ret = pipe(fds);
2215 if (ret)
2216 fatal("failed to create vm_dev_pipe pipe");
2217
2218 p->read = fds[0];
2219 p->write = fds[1];
2220
2221 event_set(&p->read_ev, p->read, EV_READ0x02 | EV_PERSIST0x10, cb, NULL((void*)0));
2222}
2223
2224/*
2225 * vm_pipe_send
2226 *
2227 * Send a message to an emulated device vie the provided vm_dev_pipe.
2228 *
2229 * Parameters:
2230 * p: pointer to initialized vm_dev_pipe
2231 * msg: message to send in the channel
2232 */
2233void
2234vm_pipe_send(struct vm_dev_pipe *p, enum pipe_msg_type msg)
2235{
2236 size_t n;
2237 n = write(p->write, &msg, sizeof(msg));
2238 if (n != sizeof(msg))
2239 fatal("failed to write to device pipe");
2240}
2241
2242/*
2243 * vm_pipe_recv
2244 *
2245 * Receive a message for an emulated device via the provided vm_dev_pipe.
2246 * Returns the message value, otherwise will exit on failure.
2247 *
2248 * Parameters:
2249 * p: pointer to initialized vm_dev_pipe
2250 *
2251 * Return values:
2252 * a value of enum pipe_msg_type or fatal exit on read(2) error
2253 */
2254enum pipe_msg_type
2255vm_pipe_recv(struct vm_dev_pipe *p)
2256{
2257 size_t n;
2258 enum pipe_msg_type msg;
2259 n = read(p->read, &msg, sizeof(msg));
2260 if (n != sizeof(msg))
2261 fatal("failed to read from device pipe");
2262
2263 return msg;
2264}