File: | src/usr.sbin/vmd/virtio.c |
Warning: | line 308, column 4 Value stored to 'cmd' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: virtio.c,v 1.110 2023/11/03 11:16:43 dv Exp $ */ |
2 | |
3 | /* |
4 | * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> |
5 | * |
6 | * Permission to use, copy, modify, and distribute this software for any |
7 | * purpose with or without fee is hereby granted, provided that the above |
8 | * copyright notice and this permission notice appear in all copies. |
9 | * |
10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
17 | */ |
18 | |
19 | #include <sys/param.h> /* PAGE_SIZE */ |
20 | #include <sys/socket.h> |
21 | #include <sys/wait.h> |
22 | |
23 | #include <machine/vmmvar.h> |
24 | #include <dev/pci/pcireg.h> |
25 | #include <dev/pci/pcidevs.h> |
26 | #include <dev/pv/virtioreg.h> |
27 | #include <dev/pci/virtio_pcireg.h> |
28 | #include <dev/pv/vioblkreg.h> |
29 | #include <dev/pv/vioscsireg.h> |
30 | |
31 | #include <net/if.h> |
32 | #include <netinet/in.h> |
33 | #include <netinet/if_ether.h> |
34 | #include <netinet/ip.h> |
35 | |
36 | #include <errno(*__errno()).h> |
37 | #include <event.h> |
38 | #include <fcntl.h> |
39 | #include <poll.h> |
40 | #include <stddef.h> |
41 | #include <stdlib.h> |
42 | #include <string.h> |
43 | #include <unistd.h> |
44 | |
45 | #include "atomicio.h" |
46 | #include "pci.h" |
47 | #include "vioscsi.h" |
48 | #include "virtio.h" |
49 | #include "vmd.h" |
50 | #include "vmm.h" |
51 | |
52 | extern struct vmd *env; |
53 | extern char *__progname; |
54 | |
55 | struct viornd_dev viornd; |
56 | struct vioscsi_dev *vioscsi; |
57 | struct vmmci_dev vmmci; |
58 | |
59 | /* Devices emulated in subprocesses are inserted into this list. */ |
60 | SLIST_HEAD(virtio_dev_head, virtio_dev)struct virtio_dev_head { struct virtio_dev *slh_first; } virtio_devs; |
61 | |
62 | #define MAXPHYS(64 * 1024) (64 * 1024) /* max raw I/O transfer size */ |
63 | |
64 | #define VIRTIO_NET_F_MAC(1<<5) (1<<5) |
65 | |
66 | #define VMMCI_F_TIMESYNC(1<<0) (1<<0) |
67 | #define VMMCI_F_ACK(1<<1) (1<<1) |
68 | #define VMMCI_F_SYNCRTC(1<<2) (1<<2) |
69 | |
70 | #define RXQ0 0 |
71 | #define TXQ1 1 |
72 | |
73 | static int virtio_dev_launch(struct vmd_vm *, struct virtio_dev *); |
74 | static void virtio_dispatch_dev(int, short, void *); |
75 | static int handle_dev_msg(struct viodev_msg *, struct virtio_dev *); |
76 | |
77 | const char * |
78 | virtio_reg_name(uint8_t reg) |
79 | { |
80 | switch (reg) { |
81 | case VIRTIO_CONFIG_DEVICE_FEATURES0: return "device feature"; |
82 | case VIRTIO_CONFIG_GUEST_FEATURES4: return "guest feature"; |
83 | case VIRTIO_CONFIG_QUEUE_PFN8: return "queue address"; |
84 | case VIRTIO_CONFIG_QUEUE_SIZE12: return "queue size"; |
85 | case VIRTIO_CONFIG_QUEUE_SELECT14: return "queue select"; |
86 | case VIRTIO_CONFIG_QUEUE_NOTIFY16: return "queue notify"; |
87 | case VIRTIO_CONFIG_DEVICE_STATUS18: return "device status"; |
88 | case VIRTIO_CONFIG_ISR_STATUS19: return "isr status"; |
89 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20...VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 3: |
90 | return "device config 0"; |
91 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 4: |
92 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 5: |
93 | return "device config 1"; |
94 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 8: return "device config 2"; |
95 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 12: return "device config 3"; |
96 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 16: return "device config 4"; |
97 | default: return "unknown"; |
98 | } |
99 | } |
100 | |
101 | uint32_t |
102 | vring_size(uint32_t vq_size) |
103 | { |
104 | uint32_t allocsize1, allocsize2; |
105 | |
106 | /* allocsize1: descriptor table + avail ring + pad */ |
107 | allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size(((sizeof(struct vring_desc) * vq_size + sizeof(uint16_t) * ( 2 + vq_size))+((4096)-1))& ~((4096)-1)) |
108 | + sizeof(uint16_t) * (2 + vq_size))(((sizeof(struct vring_desc) * vq_size + sizeof(uint16_t) * ( 2 + vq_size))+((4096)-1))& ~((4096)-1)); |
109 | /* allocsize2: used ring + pad */ |
110 | allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * 2(((sizeof(uint16_t) * 2 + sizeof(struct vring_used_elem) * vq_size )+((4096)-1))& ~((4096)-1)) |
111 | + sizeof(struct vring_used_elem) * vq_size)(((sizeof(uint16_t) * 2 + sizeof(struct vring_used_elem) * vq_size )+((4096)-1))& ~((4096)-1)); |
112 | |
113 | return allocsize1 + allocsize2; |
114 | } |
115 | |
116 | /* Update queue select */ |
117 | void |
118 | viornd_update_qs(void) |
119 | { |
120 | struct virtio_vq_info *vq_info; |
121 | |
122 | /* Invalid queue? */ |
123 | if (viornd.cfg.queue_select > 0) { |
124 | viornd.cfg.queue_size = 0; |
125 | return; |
126 | } |
127 | |
128 | vq_info = &viornd.vq[viornd.cfg.queue_select]; |
129 | |
130 | /* Update queue pfn/size based on queue select */ |
131 | viornd.cfg.queue_pfn = vq_info->q_gpa >> 12; |
132 | viornd.cfg.queue_size = vq_info->qs; |
133 | } |
134 | |
135 | /* Update queue address */ |
136 | void |
137 | viornd_update_qa(void) |
138 | { |
139 | struct virtio_vq_info *vq_info; |
140 | void *hva = NULL((void *)0); |
141 | |
142 | /* Invalid queue? */ |
143 | if (viornd.cfg.queue_select > 0) |
144 | return; |
145 | |
146 | vq_info = &viornd.vq[viornd.cfg.queue_select]; |
147 | vq_info->q_gpa = (uint64_t)viornd.cfg.queue_pfn * VIRTIO_PAGE_SIZE(4096); |
148 | |
149 | hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIORND_QUEUE_SIZE64)); |
150 | if (hva == NULL((void *)0)) |
151 | fatalx("viornd_update_qa"); |
152 | vq_info->q_hva = hva; |
153 | } |
154 | |
155 | int |
156 | viornd_notifyq(void) |
157 | { |
158 | size_t sz; |
159 | int dxx, ret; |
160 | uint16_t aidx, uidx; |
161 | char *vr, *rnd_data; |
162 | struct vring_desc *desc; |
163 | struct vring_avail *avail; |
164 | struct vring_used *used; |
165 | struct virtio_vq_info *vq_info; |
166 | |
167 | ret = 0; |
168 | |
169 | /* Invalid queue? */ |
170 | if (viornd.cfg.queue_notify > 0) |
171 | return (0); |
172 | |
173 | vq_info = &viornd.vq[viornd.cfg.queue_notify]; |
174 | vr = vq_info->q_hva; |
175 | if (vr == NULL((void *)0)) |
176 | fatalx("%s: null vring", __func__); |
177 | |
178 | desc = (struct vring_desc *)(vr); |
179 | avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); |
180 | used = (struct vring_used *)(vr + vq_info->vq_usedoffset); |
181 | |
182 | aidx = avail->idx & VIORND_QUEUE_MASK(64 - 1); |
183 | uidx = used->idx & VIORND_QUEUE_MASK(64 - 1); |
184 | |
185 | dxx = avail->ring[aidx] & VIORND_QUEUE_MASK(64 - 1); |
186 | |
187 | sz = desc[dxx].len; |
188 | if (sz > MAXPHYS(64 * 1024)) |
189 | fatalx("viornd descriptor size too large (%zu)", sz); |
190 | |
191 | rnd_data = malloc(sz); |
192 | |
193 | if (rnd_data != NULL((void *)0)) { |
194 | arc4random_buf(rnd_data, sz); |
195 | if (write_mem(desc[dxx].addr, rnd_data, sz)) { |
196 | log_warnx("viornd: can't write random data @ " |
197 | "0x%llx", |
198 | desc[dxx].addr); |
199 | } else { |
200 | /* ret == 1 -> interrupt needed */ |
201 | /* XXX check VIRTIO_F_NO_INTR */ |
202 | ret = 1; |
203 | viornd.cfg.isr_status = 1; |
204 | used->ring[uidx].id = dxx; |
205 | used->ring[uidx].len = sz; |
206 | __sync_synchronize(); |
207 | used->idx++; |
208 | } |
209 | free(rnd_data); |
210 | } else |
211 | fatal("memory allocation error for viornd data"); |
212 | |
213 | return (ret); |
214 | } |
215 | |
216 | int |
217 | virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, |
218 | void *unused, uint8_t sz) |
219 | { |
220 | *intr = 0xFF; |
221 | |
222 | if (dir == 0) { |
223 | switch (reg) { |
224 | case VIRTIO_CONFIG_DEVICE_FEATURES0: |
225 | case VIRTIO_CONFIG_QUEUE_SIZE12: |
226 | case VIRTIO_CONFIG_ISR_STATUS19: |
227 | log_warnx("%s: illegal write %x to %s", |
228 | __progname, *data, virtio_reg_name(reg)); |
229 | break; |
230 | case VIRTIO_CONFIG_GUEST_FEATURES4: |
231 | viornd.cfg.guest_feature = *data; |
232 | break; |
233 | case VIRTIO_CONFIG_QUEUE_PFN8: |
234 | viornd.cfg.queue_pfn = *data; |
235 | viornd_update_qa(); |
236 | break; |
237 | case VIRTIO_CONFIG_QUEUE_SELECT14: |
238 | viornd.cfg.queue_select = *data; |
239 | viornd_update_qs(); |
240 | break; |
241 | case VIRTIO_CONFIG_QUEUE_NOTIFY16: |
242 | viornd.cfg.queue_notify = *data; |
243 | if (viornd_notifyq()) |
244 | *intr = 1; |
245 | break; |
246 | case VIRTIO_CONFIG_DEVICE_STATUS18: |
247 | viornd.cfg.device_status = *data; |
248 | break; |
249 | } |
250 | } else { |
251 | switch (reg) { |
252 | case VIRTIO_CONFIG_DEVICE_FEATURES0: |
253 | *data = viornd.cfg.device_feature; |
254 | break; |
255 | case VIRTIO_CONFIG_GUEST_FEATURES4: |
256 | *data = viornd.cfg.guest_feature; |
257 | break; |
258 | case VIRTIO_CONFIG_QUEUE_PFN8: |
259 | *data = viornd.cfg.queue_pfn; |
260 | break; |
261 | case VIRTIO_CONFIG_QUEUE_SIZE12: |
262 | *data = viornd.cfg.queue_size; |
263 | break; |
264 | case VIRTIO_CONFIG_QUEUE_SELECT14: |
265 | *data = viornd.cfg.queue_select; |
266 | break; |
267 | case VIRTIO_CONFIG_QUEUE_NOTIFY16: |
268 | *data = viornd.cfg.queue_notify; |
269 | break; |
270 | case VIRTIO_CONFIG_DEVICE_STATUS18: |
271 | *data = viornd.cfg.device_status; |
272 | break; |
273 | case VIRTIO_CONFIG_ISR_STATUS19: |
274 | *data = viornd.cfg.isr_status; |
275 | viornd.cfg.isr_status = 0; |
276 | vcpu_deassert_pic_irq(viornd.vm_id, 0, viornd.irq); |
277 | break; |
278 | } |
279 | } |
280 | return (0); |
281 | } |
282 | |
283 | int |
284 | vmmci_ctl(unsigned int cmd) |
285 | { |
286 | struct timeval tv = { 0, 0 }; |
287 | |
288 | if ((vmmci.cfg.device_status & |
289 | VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK4) == 0) |
290 | return (-1); |
291 | |
292 | if (cmd == vmmci.cmd) |
293 | return (0); |
294 | |
295 | switch (cmd) { |
296 | case VMMCI_NONE: |
297 | break; |
298 | case VMMCI_SHUTDOWN: |
299 | case VMMCI_REBOOT: |
300 | /* Update command */ |
301 | vmmci.cmd = cmd; |
302 | |
303 | /* |
304 | * vmm VMs do not support powerdown, send a reboot request |
305 | * instead and turn it off after the triple fault. |
306 | */ |
307 | if (cmd == VMMCI_SHUTDOWN) |
308 | cmd = VMMCI_REBOOT; |
Value stored to 'cmd' is never read | |
309 | |
310 | /* Trigger interrupt */ |
311 | vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE2; |
312 | vcpu_assert_pic_irq(vmmci.vm_id, 0, vmmci.irq); |
313 | |
314 | /* Add ACK timeout */ |
315 | tv.tv_sec = VMMCI_TIMEOUT3; |
316 | evtimer_add(&vmmci.timeout, &tv)event_add(&vmmci.timeout, &tv); |
317 | break; |
318 | case VMMCI_SYNCRTC: |
319 | if (vmmci.cfg.guest_feature & VMMCI_F_SYNCRTC(1<<2)) { |
320 | /* RTC updated, request guest VM resync of its RTC */ |
321 | vmmci.cmd = cmd; |
322 | |
323 | vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE2; |
324 | vcpu_assert_pic_irq(vmmci.vm_id, 0, vmmci.irq); |
325 | } else { |
326 | log_debug("%s: RTC sync skipped (guest does not " |
327 | "support RTC sync)\n", __func__); |
328 | } |
329 | break; |
330 | default: |
331 | fatalx("invalid vmmci command: %d", cmd); |
332 | } |
333 | |
334 | return (0); |
335 | } |
336 | |
337 | void |
338 | vmmci_ack(unsigned int cmd) |
339 | { |
340 | struct timeval tv = { 0, 0 }; |
341 | |
342 | switch (cmd) { |
343 | case VMMCI_NONE: |
344 | break; |
345 | case VMMCI_SHUTDOWN: |
346 | /* |
347 | * The shutdown was requested by the VM if we don't have |
348 | * a pending shutdown request. In this case add a short |
349 | * timeout to give the VM a chance to reboot before the |
350 | * timer is expired. |
351 | */ |
352 | if (vmmci.cmd == 0) { |
353 | log_debug("%s: vm %u requested shutdown", __func__, |
354 | vmmci.vm_id); |
355 | tv.tv_sec = VMMCI_TIMEOUT3; |
356 | evtimer_add(&vmmci.timeout, &tv)event_add(&vmmci.timeout, &tv); |
357 | return; |
358 | } |
359 | /* FALLTHROUGH */ |
360 | case VMMCI_REBOOT: |
361 | /* |
362 | * If the VM acknowledged our shutdown request, give it |
363 | * enough time to shutdown or reboot gracefully. This |
364 | * might take a considerable amount of time (running |
365 | * rc.shutdown on the VM), so increase the timeout before |
366 | * killing it forcefully. |
367 | */ |
368 | if (cmd == vmmci.cmd && |
369 | evtimer_pending(&vmmci.timeout, NULL)event_pending(&vmmci.timeout, 0x01, ((void *)0))) { |
370 | log_debug("%s: vm %u acknowledged shutdown request", |
371 | __func__, vmmci.vm_id); |
372 | tv.tv_sec = VMMCI_SHUTDOWN_TIMEOUT120; |
373 | evtimer_add(&vmmci.timeout, &tv)event_add(&vmmci.timeout, &tv); |
374 | } |
375 | break; |
376 | case VMMCI_SYNCRTC: |
377 | log_debug("%s: vm %u acknowledged RTC sync request", |
378 | __func__, vmmci.vm_id); |
379 | vmmci.cmd = VMMCI_NONE; |
380 | break; |
381 | default: |
382 | log_warnx("%s: illegal request %u", __func__, cmd); |
383 | break; |
384 | } |
385 | } |
386 | |
387 | void |
388 | vmmci_timeout(int fd, short type, void *arg) |
389 | { |
390 | log_debug("%s: vm %u shutdown", __progname, vmmci.vm_id); |
391 | vm_shutdown(vmmci.cmd == VMMCI_REBOOT ? VMMCI_REBOOT : VMMCI_SHUTDOWN); |
392 | } |
393 | |
394 | int |
395 | vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, |
396 | void *unused, uint8_t sz) |
397 | { |
398 | *intr = 0xFF; |
399 | |
400 | if (dir == 0) { |
401 | switch (reg) { |
402 | case VIRTIO_CONFIG_DEVICE_FEATURES0: |
403 | case VIRTIO_CONFIG_QUEUE_SIZE12: |
404 | case VIRTIO_CONFIG_ISR_STATUS19: |
405 | log_warnx("%s: illegal write %x to %s", |
406 | __progname, *data, virtio_reg_name(reg)); |
407 | break; |
408 | case VIRTIO_CONFIG_GUEST_FEATURES4: |
409 | vmmci.cfg.guest_feature = *data; |
410 | break; |
411 | case VIRTIO_CONFIG_QUEUE_PFN8: |
412 | vmmci.cfg.queue_pfn = *data; |
413 | break; |
414 | case VIRTIO_CONFIG_QUEUE_SELECT14: |
415 | vmmci.cfg.queue_select = *data; |
416 | break; |
417 | case VIRTIO_CONFIG_QUEUE_NOTIFY16: |
418 | vmmci.cfg.queue_notify = *data; |
419 | break; |
420 | case VIRTIO_CONFIG_DEVICE_STATUS18: |
421 | vmmci.cfg.device_status = *data; |
422 | break; |
423 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20: |
424 | vmmci_ack(*data); |
425 | break; |
426 | } |
427 | } else { |
428 | switch (reg) { |
429 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20: |
430 | *data = vmmci.cmd; |
431 | break; |
432 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 4: |
433 | /* Update time once when reading the first register */ |
434 | gettimeofday(&vmmci.time, NULL((void *)0)); |
435 | *data = (uint64_t)vmmci.time.tv_sec; |
436 | break; |
437 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 8: |
438 | *data = (uint64_t)vmmci.time.tv_sec << 32; |
439 | break; |
440 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 12: |
441 | *data = (uint64_t)vmmci.time.tv_usec; |
442 | break; |
443 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 16: |
444 | *data = (uint64_t)vmmci.time.tv_usec << 32; |
445 | break; |
446 | case VIRTIO_CONFIG_DEVICE_FEATURES0: |
447 | *data = vmmci.cfg.device_feature; |
448 | break; |
449 | case VIRTIO_CONFIG_GUEST_FEATURES4: |
450 | *data = vmmci.cfg.guest_feature; |
451 | break; |
452 | case VIRTIO_CONFIG_QUEUE_PFN8: |
453 | *data = vmmci.cfg.queue_pfn; |
454 | break; |
455 | case VIRTIO_CONFIG_QUEUE_SIZE12: |
456 | *data = vmmci.cfg.queue_size; |
457 | break; |
458 | case VIRTIO_CONFIG_QUEUE_SELECT14: |
459 | *data = vmmci.cfg.queue_select; |
460 | break; |
461 | case VIRTIO_CONFIG_QUEUE_NOTIFY16: |
462 | *data = vmmci.cfg.queue_notify; |
463 | break; |
464 | case VIRTIO_CONFIG_DEVICE_STATUS18: |
465 | *data = vmmci.cfg.device_status; |
466 | break; |
467 | case VIRTIO_CONFIG_ISR_STATUS19: |
468 | *data = vmmci.cfg.isr_status; |
469 | vmmci.cfg.isr_status = 0; |
470 | vcpu_deassert_pic_irq(vmmci.vm_id, 0, vmmci.irq); |
471 | break; |
472 | } |
473 | } |
474 | return (0); |
475 | } |
476 | |
477 | int |
478 | virtio_get_base(int fd, char *path, size_t npath, int type, const char *dpath) |
479 | { |
480 | switch (type) { |
481 | case VMDF_RAW0x01: |
482 | return 0; |
483 | case VMDF_QCOW20x02: |
484 | return virtio_qcow2_get_base(fd, path, npath, dpath); |
485 | } |
486 | log_warnx("%s: invalid disk format", __func__); |
487 | return -1; |
488 | } |
489 | |
490 | void |
491 | virtio_init(struct vmd_vm *vm, int child_cdrom, |
492 | int child_disks[][VM_MAX_BASE_PER_DISK4], int *child_taps) |
493 | { |
494 | struct vmop_create_params *vmc = &vm->vm_params; |
495 | struct vm_create_params *vcp = &vmc->vmc_params; |
496 | struct virtio_dev *dev; |
497 | uint8_t id; |
498 | uint8_t i, j; |
499 | |
500 | /* Virtio entropy device */ |
501 | if (pci_add_device(&id, PCI_VENDOR_QUMRANET0x1af4, |
502 | PCI_PRODUCT_QUMRANET_VIO_RNG0x1005, PCI_CLASS_SYSTEM0x08, |
503 | PCI_SUBCLASS_SYSTEM_MISC0x80, |
504 | PCI_VENDOR_OPENBSD0x0b5d, |
505 | PCI_PRODUCT_VIRTIO_ENTROPY4, 1, NULL((void *)0))) { |
506 | log_warnx("%s: can't add PCI virtio rng device", |
507 | __progname); |
508 | return; |
509 | } |
510 | |
511 | if (pci_add_bar(id, PCI_MAPREG_TYPE_IO0x00000001, virtio_rnd_io, NULL((void *)0))) { |
512 | log_warnx("%s: can't add bar for virtio rng device", |
513 | __progname); |
514 | return; |
515 | } |
516 | |
517 | memset(&viornd, 0, sizeof(viornd)); |
518 | viornd.vq[0].qs = VIORND_QUEUE_SIZE64; |
519 | viornd.vq[0].vq_availoffset = sizeof(struct vring_desc) * |
520 | VIORND_QUEUE_SIZE64; |
521 | viornd.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN((((sizeof(struct vring_desc) * 64 + sizeof(uint16_t) * (2 + 64 ))+((4096)-1))& ~((4096)-1)) |
522 | sizeof(struct vring_desc) * VIORND_QUEUE_SIZE(((sizeof(struct vring_desc) * 64 + sizeof(uint16_t) * (2 + 64 ))+((4096)-1))& ~((4096)-1)) |
523 | + sizeof(uint16_t) * (2 + VIORND_QUEUE_SIZE))(((sizeof(struct vring_desc) * 64 + sizeof(uint16_t) * (2 + 64 ))+((4096)-1))& ~((4096)-1)); |
524 | viornd.pci_id = id; |
525 | viornd.irq = pci_get_dev_irq(id); |
526 | viornd.vm_id = vcp->vcp_id; |
527 | |
528 | SLIST_INIT(&virtio_devs){ ((&virtio_devs)->slh_first) = ((void *)0); }; |
529 | |
530 | if (vmc->vmc_nnics > 0) { |
531 | for (i = 0; i < vmc->vmc_nnics; i++) { |
532 | dev = calloc(1, sizeof(struct virtio_dev)); |
533 | if (dev == NULL((void *)0)) { |
534 | log_warn("%s: calloc failure allocating vionet", |
535 | __progname); |
536 | return; |
537 | } |
538 | /* Virtio network */ |
539 | dev->dev_type = VMD_DEVTYPE_NET'n'; |
540 | |
541 | if (pci_add_device(&id, PCI_VENDOR_QUMRANET0x1af4, |
542 | PCI_PRODUCT_QUMRANET_VIO_NET0x1000, PCI_CLASS_SYSTEM0x08, |
543 | PCI_SUBCLASS_SYSTEM_MISC0x80, PCI_VENDOR_OPENBSD0x0b5d, |
544 | PCI_PRODUCT_VIRTIO_NETWORK1, 1, NULL((void *)0))) { |
545 | log_warnx("%s: can't add PCI virtio net device", |
546 | __progname); |
547 | return; |
548 | } |
549 | dev->pci_id = id; |
550 | dev->sync_fd = -1; |
551 | dev->async_fd = -1; |
552 | dev->vm_id = vcp->vcp_id; |
553 | dev->vm_vmid = vm->vm_vmid; |
554 | dev->irq = pci_get_dev_irq(id); |
555 | |
556 | /* The vionet pci bar function is called by the vcpu. */ |
557 | if (pci_add_bar(id, PCI_MAPREG_TYPE_IO0x00000001, virtio_pci_io, |
558 | dev)) { |
559 | log_warnx("%s: can't add bar for virtio net " |
560 | "device", __progname); |
561 | return; |
562 | } |
563 | |
564 | dev->vionet.vq[RXQ0].qs = VIONET_QUEUE_SIZE256; |
565 | dev->vionet.vq[RXQ0].vq_availoffset = |
566 | sizeof(struct vring_desc) * VIONET_QUEUE_SIZE256; |
567 | dev->vionet.vq[RXQ0].vq_usedoffset = VIRTQUEUE_ALIGN((((sizeof(struct vring_desc) * 256 + sizeof(uint16_t) * (2 + 256 ))+((4096)-1))& ~((4096)-1)) |
568 | sizeof(struct vring_desc) * VIONET_QUEUE_SIZE(((sizeof(struct vring_desc) * 256 + sizeof(uint16_t) * (2 + 256 ))+((4096)-1))& ~((4096)-1)) |
569 | + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE))(((sizeof(struct vring_desc) * 256 + sizeof(uint16_t) * (2 + 256 ))+((4096)-1))& ~((4096)-1)); |
570 | dev->vionet.vq[RXQ0].last_avail = 0; |
571 | dev->vionet.vq[RXQ0].notified_avail = 0; |
572 | |
573 | dev->vionet.vq[TXQ1].qs = VIONET_QUEUE_SIZE256; |
574 | dev->vionet.vq[TXQ1].vq_availoffset = |
575 | sizeof(struct vring_desc) * VIONET_QUEUE_SIZE256; |
576 | dev->vionet.vq[TXQ1].vq_usedoffset = VIRTQUEUE_ALIGN((((sizeof(struct vring_desc) * 256 + sizeof(uint16_t) * (2 + 256 ))+((4096)-1))& ~((4096)-1)) |
577 | sizeof(struct vring_desc) * VIONET_QUEUE_SIZE(((sizeof(struct vring_desc) * 256 + sizeof(uint16_t) * (2 + 256 ))+((4096)-1))& ~((4096)-1)) |
578 | + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE))(((sizeof(struct vring_desc) * 256 + sizeof(uint16_t) * (2 + 256 ))+((4096)-1))& ~((4096)-1)); |
579 | dev->vionet.vq[TXQ1].last_avail = 0; |
580 | dev->vionet.vq[TXQ1].notified_avail = 0; |
581 | |
582 | dev->vionet.data_fd = child_taps[i]; |
583 | |
584 | /* MAC address has been assigned by the parent */ |
585 | memcpy(&dev->vionet.mac, &vmc->vmc_macs[i], 6); |
586 | dev->vionet.cfg.device_feature = VIRTIO_NET_F_MAC(1<<5); |
587 | |
588 | dev->vionet.lockedmac = |
589 | vmc->vmc_ifflags[i] & VMIFF_LOCKED0x02 ? 1 : 0; |
590 | dev->vionet.local = |
591 | vmc->vmc_ifflags[i] & VMIFF_LOCAL0x04 ? 1 : 0; |
592 | if (i == 0 && vmc->vmc_bootdevice & VMBOOTDEV_NET3) |
593 | dev->vionet.pxeboot = 1; |
594 | memcpy(&dev->vionet.local_prefix, |
595 | &env->vmd_cfg.cfg_localprefix, |
596 | sizeof(dev->vionet.local_prefix)); |
597 | log_debug("%s: vm \"%s\" vio%u lladdr %s%s%s%s", |
598 | __func__, vcp->vcp_name, i, |
599 | ether_ntoa((void *)dev->vionet.mac), |
600 | dev->vionet.lockedmac ? ", locked" : "", |
601 | dev->vionet.local ? ", local" : "", |
602 | dev->vionet.pxeboot ? ", pxeboot" : ""); |
603 | |
604 | /* Add the vionet to our device list. */ |
605 | dev->vionet.idx = i; |
606 | SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next)do { (dev)->dev_next.sle_next = (&virtio_devs)->slh_first ; (&virtio_devs)->slh_first = (dev); } while (0); |
607 | } |
608 | } |
609 | |
610 | if (vmc->vmc_ndisks > 0) { |
611 | for (i = 0; i < vmc->vmc_ndisks; i++) { |
612 | dev = calloc(1, sizeof(struct virtio_dev)); |
613 | if (dev == NULL((void *)0)) { |
614 | log_warn("%s: calloc failure allocating vioblk", |
615 | __progname); |
616 | return; |
617 | } |
618 | |
619 | /* One vioblk device for each disk defined in vcp */ |
620 | dev->dev_type = VMD_DEVTYPE_DISK'd'; |
621 | |
622 | if (pci_add_device(&id, PCI_VENDOR_QUMRANET0x1af4, |
623 | PCI_PRODUCT_QUMRANET_VIO_BLOCK0x1001, |
624 | PCI_CLASS_MASS_STORAGE0x01, |
625 | PCI_SUBCLASS_MASS_STORAGE_SCSI0x00, |
626 | PCI_VENDOR_OPENBSD0x0b5d, |
627 | PCI_PRODUCT_VIRTIO_BLOCK2, 1, NULL((void *)0))) { |
628 | log_warnx("%s: can't add PCI virtio block " |
629 | "device", __progname); |
630 | return; |
631 | } |
632 | dev->pci_id = id; |
633 | dev->sync_fd = -1; |
634 | dev->async_fd = -1; |
635 | dev->vm_id = vcp->vcp_id; |
636 | dev->vm_vmid = vm->vm_vmid; |
637 | dev->irq = pci_get_dev_irq(id); |
638 | |
639 | if (pci_add_bar(id, PCI_MAPREG_TYPE_IO0x00000001, virtio_pci_io, |
640 | &dev->vioblk)) { |
641 | log_warnx("%s: can't add bar for virtio block " |
642 | "device", __progname); |
643 | return; |
644 | } |
645 | dev->vioblk.vq[0].qs = VIOBLK_QUEUE_SIZE128; |
646 | dev->vioblk.vq[0].vq_availoffset = |
647 | sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE128; |
648 | dev->vioblk.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN((((sizeof(struct vring_desc) * 128 + sizeof(uint16_t) * (2 + 128 ))+((4096)-1))& ~((4096)-1)) |
649 | sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE(((sizeof(struct vring_desc) * 128 + sizeof(uint16_t) * (2 + 128 ))+((4096)-1))& ~((4096)-1)) |
650 | + sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE))(((sizeof(struct vring_desc) * 128 + sizeof(uint16_t) * (2 + 128 ))+((4096)-1))& ~((4096)-1)); |
651 | dev->vioblk.vq[0].last_avail = 0; |
652 | dev->vioblk.cfg.device_feature = |
653 | VIRTIO_BLK_F_SEG_MAX(1ULL<<2); |
654 | dev->vioblk.seg_max = VIOBLK_SEG_MAX(128 - 2); |
655 | |
656 | /* |
657 | * Initialize disk fds to an invalid fd (-1), then |
658 | * set any child disk fds. |
659 | */ |
660 | memset(&dev->vioblk.disk_fd, -1, |
661 | sizeof(dev->vioblk.disk_fd)); |
662 | dev->vioblk.ndisk_fd = vmc->vmc_diskbases[i]; |
663 | for (j = 0; j < dev->vioblk.ndisk_fd; j++) |
664 | dev->vioblk.disk_fd[j] = child_disks[i][j]; |
665 | |
666 | dev->vioblk.idx = i; |
667 | SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next)do { (dev)->dev_next.sle_next = (&virtio_devs)->slh_first ; (&virtio_devs)->slh_first = (dev); } while (0); |
668 | } |
669 | } |
670 | |
671 | /* |
672 | * Launch virtio devices that support subprocess execution. |
673 | */ |
674 | SLIST_FOREACH(dev, &virtio_devs, dev_next)for((dev) = ((&virtio_devs)->slh_first); (dev) != ((void *)0); (dev) = ((dev)->dev_next.sle_next)) { |
675 | if (virtio_dev_launch(vm, dev) != 0) |
676 | fatalx("failed to launch virtio device"); |
677 | } |
678 | |
679 | /* vioscsi cdrom */ |
680 | if (strlen(vmc->vmc_cdrom)) { |
681 | vioscsi = calloc(1, sizeof(struct vioscsi_dev)); |
682 | if (vioscsi == NULL((void *)0)) { |
683 | log_warn("%s: calloc failure allocating vioscsi", |
684 | __progname); |
685 | return; |
686 | } |
687 | |
688 | if (pci_add_device(&id, PCI_VENDOR_QUMRANET0x1af4, |
689 | PCI_PRODUCT_QUMRANET_VIO_SCSI0x1004, |
690 | PCI_CLASS_MASS_STORAGE0x01, |
691 | PCI_SUBCLASS_MASS_STORAGE_SCSI0x00, |
692 | PCI_VENDOR_OPENBSD0x0b5d, |
693 | PCI_PRODUCT_VIRTIO_SCSI8, 1, NULL((void *)0))) { |
694 | log_warnx("%s: can't add PCI vioscsi device", |
695 | __progname); |
696 | return; |
697 | } |
698 | |
699 | if (pci_add_bar(id, PCI_MAPREG_TYPE_IO0x00000001, vioscsi_io, vioscsi)) { |
700 | log_warnx("%s: can't add bar for vioscsi device", |
701 | __progname); |
702 | return; |
703 | } |
704 | |
705 | for (i = 0; i < VIRTIO_MAX_QUEUES3; i++) { |
706 | vioscsi->vq[i].qs = VIOSCSI_QUEUE_SIZE128; |
707 | vioscsi->vq[i].vq_availoffset = |
708 | sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE128; |
709 | vioscsi->vq[i].vq_usedoffset = VIRTQUEUE_ALIGN((((sizeof(struct vring_desc) * 128 + sizeof(uint16_t) * (2 + 128 ))+((4096)-1))& ~((4096)-1)) |
710 | sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE(((sizeof(struct vring_desc) * 128 + sizeof(uint16_t) * (2 + 128 ))+((4096)-1))& ~((4096)-1)) |
711 | + sizeof(uint16_t) * (2 + VIOSCSI_QUEUE_SIZE))(((sizeof(struct vring_desc) * 128 + sizeof(uint16_t) * (2 + 128 ))+((4096)-1))& ~((4096)-1)); |
712 | vioscsi->vq[i].last_avail = 0; |
713 | } |
714 | if (virtio_raw_init(&vioscsi->file, &vioscsi->sz, &child_cdrom, |
715 | 1) == -1) { |
716 | log_warnx("%s: unable to determine iso format", |
717 | __func__); |
718 | return; |
719 | } |
720 | vioscsi->locked = 0; |
721 | vioscsi->lba = 0; |
722 | vioscsi->n_blocks = vioscsi->sz / VIOSCSI_BLOCK_SIZE_CDROM2048; |
723 | vioscsi->max_xfer = VIOSCSI_BLOCK_SIZE_CDROM2048; |
724 | vioscsi->pci_id = id; |
725 | vioscsi->vm_id = vcp->vcp_id; |
726 | vioscsi->irq = pci_get_dev_irq(id); |
727 | } |
728 | |
729 | /* virtio control device */ |
730 | if (pci_add_device(&id, PCI_VENDOR_OPENBSD0x0b5d, |
731 | PCI_PRODUCT_OPENBSD_CONTROL0x0777, |
732 | PCI_CLASS_COMMUNICATIONS0x07, |
733 | PCI_SUBCLASS_COMMUNICATIONS_MISC0x80, |
734 | PCI_VENDOR_OPENBSD0x0b5d, |
735 | PCI_PRODUCT_VIRTIO_VMMCI65535, 1, NULL((void *)0))) { |
736 | log_warnx("%s: can't add PCI vmm control device", |
737 | __progname); |
738 | return; |
739 | } |
740 | |
741 | if (pci_add_bar(id, PCI_MAPREG_TYPE_IO0x00000001, vmmci_io, NULL((void *)0))) { |
742 | log_warnx("%s: can't add bar for vmm control device", |
743 | __progname); |
744 | return; |
745 | } |
746 | |
747 | memset(&vmmci, 0, sizeof(vmmci)); |
748 | vmmci.cfg.device_feature = VMMCI_F_TIMESYNC(1<<0) | VMMCI_F_ACK(1<<1) | |
749 | VMMCI_F_SYNCRTC(1<<2); |
750 | vmmci.vm_id = vcp->vcp_id; |
751 | vmmci.irq = pci_get_dev_irq(id); |
752 | vmmci.pci_id = id; |
753 | |
754 | evtimer_set(&vmmci.timeout, vmmci_timeout, NULL)event_set(&vmmci.timeout, -1, 0, vmmci_timeout, ((void *) 0)); |
755 | } |
756 | |
757 | /* |
758 | * vionet_set_hostmac |
759 | * |
760 | * Sets the hardware address for the host-side tap(4) on a vionet_dev. |
761 | * |
762 | * This should only be called from the event-loop thread |
763 | * |
764 | * vm: pointer to the current vmd_vm instance |
765 | * idx: index into the array of vionet_dev's for the target vionet_dev |
766 | * addr: ethernet address to set |
767 | */ |
768 | void |
769 | vionet_set_hostmac(struct vmd_vm *vm, unsigned int idx, uint8_t *addr) |
770 | { |
771 | struct vmop_create_params *vmc = &vm->vm_params; |
772 | struct virtio_dev *dev; |
773 | struct vionet_dev *vionet = NULL((void *)0); |
774 | int ret; |
775 | |
776 | if (idx > vmc->vmc_nnics) |
777 | fatalx("%s: invalid vionet index: %u", __func__, idx); |
778 | |
779 | SLIST_FOREACH(dev, &virtio_devs, dev_next)for((dev) = ((&virtio_devs)->slh_first); (dev) != ((void *)0); (dev) = ((dev)->dev_next.sle_next)) { |
780 | if (dev->dev_type == VMD_DEVTYPE_NET'n' |
781 | && dev->vionet.idx == idx) { |
782 | vionet = &dev->vionet; |
783 | break; |
784 | } |
785 | } |
786 | if (vionet == NULL((void *)0)) |
787 | fatalx("%s: dev == NULL, idx = %u", __func__, idx); |
788 | |
789 | /* Set the local vm process copy. */ |
790 | memcpy(vionet->hostmac, addr, sizeof(vionet->hostmac)); |
791 | |
792 | /* Send the information to the device process. */ |
793 | ret = imsg_compose_event(&dev->async_iev, IMSG_DEVOP_HOSTMAC, 0, 0, -1, |
794 | vionet->hostmac, sizeof(vionet->hostmac)); |
795 | if (ret == -1) { |
796 | log_warnx("%s: failed to queue hostmac to vionet dev %u", |
797 | __func__, idx); |
798 | return; |
799 | } |
800 | } |
801 | |
802 | void |
803 | virtio_shutdown(struct vmd_vm *vm) |
804 | { |
805 | int ret, status; |
806 | pid_t pid = 0; |
807 | struct virtio_dev *dev, *tmp; |
808 | struct viodev_msg msg; |
809 | struct imsgbuf *ibuf; |
810 | |
811 | /* Ensure that our disks are synced. */ |
812 | if (vioscsi != NULL((void *)0)) |
813 | vioscsi->file.close(vioscsi->file.p, 0); |
814 | |
815 | /* |
816 | * Broadcast shutdown to child devices. We need to do this |
817 | * synchronously as we have already stopped the async event thread. |
818 | */ |
819 | SLIST_FOREACH(dev, &virtio_devs, dev_next)for((dev) = ((&virtio_devs)->slh_first); (dev) != ((void *)0); (dev) = ((dev)->dev_next.sle_next)) { |
820 | memset(&msg, 0, sizeof(msg)); |
821 | msg.type = VIODEV_MSG_SHUTDOWN7; |
822 | ibuf = &dev->sync_iev.ibuf; |
823 | ret = imsg_compose(ibuf, VIODEV_MSG_SHUTDOWN7, 0, 0, -1, |
824 | &msg, sizeof(msg)); |
825 | if (ret == -1) |
826 | fatalx("%s: failed to send shutdown to device", |
827 | __func__); |
828 | if (imsg_flush(ibuf) == -1) |
829 | fatalx("%s: imsg_flush", __func__); |
830 | } |
831 | |
832 | /* |
833 | * Wait for all children to shutdown using a simple approach of |
834 | * iterating over known child devices and waiting for them to die. |
835 | */ |
836 | SLIST_FOREACH_SAFE(dev, &virtio_devs, dev_next, tmp)for ((dev) = ((&virtio_devs)->slh_first); (dev) && ((tmp) = ((dev)->dev_next.sle_next), 1); (dev) = (tmp)) { |
837 | log_debug("%s: waiting on device pid %d", __func__, |
838 | dev->dev_pid); |
839 | do { |
840 | pid = waitpid(dev->dev_pid, &status, WNOHANG0x01); |
841 | } while (pid == 0 || (pid == -1 && errno(*__errno()) == EINTR4)); |
842 | if (pid == dev->dev_pid) |
843 | log_debug("%s: device for pid %d is stopped", |
844 | __func__, pid); |
845 | else |
846 | log_warnx("%s: unexpected pid %d", __func__, pid); |
847 | free(dev); |
848 | } |
849 | } |
850 | |
851 | int |
852 | vmmci_restore(int fd, uint32_t vm_id) |
853 | { |
854 | log_debug("%s: receiving vmmci", __func__); |
855 | if (atomicio(read, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) { |
856 | log_warnx("%s: error reading vmmci from fd", __func__); |
857 | return (-1); |
858 | } |
859 | |
860 | if (pci_set_bar_fn(vmmci.pci_id, 0, vmmci_io, NULL((void *)0))) { |
861 | log_warnx("%s: can't set bar fn for vmm control device", |
862 | __progname); |
863 | return (-1); |
864 | } |
865 | vmmci.vm_id = vm_id; |
866 | vmmci.irq = pci_get_dev_irq(vmmci.pci_id); |
867 | memset(&vmmci.timeout, 0, sizeof(struct event)); |
868 | evtimer_set(&vmmci.timeout, vmmci_timeout, NULL)event_set(&vmmci.timeout, -1, 0, vmmci_timeout, ((void *) 0)); |
869 | return (0); |
870 | } |
871 | |
872 | int |
873 | viornd_restore(int fd, struct vmd_vm *vm) |
874 | { |
875 | void *hva = NULL((void *)0); |
876 | |
877 | log_debug("%s: receiving viornd", __func__); |
878 | if (atomicio(read, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) { |
879 | log_warnx("%s: error reading viornd from fd", __func__); |
880 | return (-1); |
881 | } |
882 | if (pci_set_bar_fn(viornd.pci_id, 0, virtio_rnd_io, NULL((void *)0))) { |
883 | log_warnx("%s: can't set bar fn for virtio rng device", |
884 | __progname); |
885 | return (-1); |
886 | } |
887 | viornd.vm_id = vm->vm_params.vmc_params.vcp_id; |
888 | viornd.irq = pci_get_dev_irq(viornd.pci_id); |
889 | |
890 | hva = hvaddr_mem(viornd.vq[0].q_gpa, vring_size(VIORND_QUEUE_SIZE64)); |
891 | if (hva == NULL((void *)0)) |
892 | fatal("failed to restore viornd virtqueue"); |
893 | viornd.vq[0].q_hva = hva; |
894 | |
895 | return (0); |
896 | } |
897 | |
898 | int |
899 | vionet_restore(int fd, struct vmd_vm *vm, int *child_taps) |
900 | { |
901 | struct vmop_create_params *vmc = &vm->vm_params; |
902 | struct vm_create_params *vcp = &vmc->vmc_params; |
903 | struct virtio_dev *dev; |
904 | uint8_t i; |
905 | |
906 | if (vmc->vmc_nnics == 0) |
907 | return (0); |
908 | |
909 | for (i = 0; i < vmc->vmc_nnics; i++) { |
910 | dev = calloc(1, sizeof(struct virtio_dev)); |
911 | if (dev == NULL((void *)0)) { |
912 | log_warn("%s: calloc failure allocating vionet", |
913 | __progname); |
914 | return (-1); |
915 | } |
916 | |
917 | log_debug("%s: receiving virtio network device", __func__); |
918 | if (atomicio(read, fd, dev, sizeof(struct virtio_dev)) |
919 | != sizeof(struct virtio_dev)) { |
920 | log_warnx("%s: error reading vionet from fd", |
921 | __func__); |
922 | return (-1); |
923 | } |
924 | |
925 | /* Virtio network */ |
926 | if (dev->dev_type != VMD_DEVTYPE_NET'n') { |
927 | log_warnx("%s: invalid device type", __func__); |
928 | return (-1); |
929 | } |
930 | |
931 | dev->sync_fd = -1; |
932 | dev->async_fd = -1; |
933 | dev->vm_id = vcp->vcp_id; |
934 | dev->vm_vmid = vm->vm_vmid; |
935 | dev->irq = pci_get_dev_irq(dev->pci_id); |
936 | |
937 | if (pci_set_bar_fn(dev->pci_id, 0, virtio_pci_io, dev)) { |
938 | log_warnx("%s: can't set bar fn for virtio net " |
939 | "device", __progname); |
940 | return (-1); |
941 | } |
942 | |
943 | dev->vionet.data_fd = child_taps[i]; |
944 | dev->vionet.idx = i; |
945 | |
946 | SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next)do { (dev)->dev_next.sle_next = (&virtio_devs)->slh_first ; (&virtio_devs)->slh_first = (dev); } while (0); |
947 | } |
948 | |
949 | return (0); |
950 | } |
951 | |
952 | int |
953 | vioblk_restore(int fd, struct vmd_vm *vm, |
954 | int child_disks[][VM_MAX_BASE_PER_DISK4]) |
955 | { |
956 | struct vmop_create_params *vmc = &vm->vm_params; |
957 | struct virtio_dev *dev; |
958 | uint8_t i, j; |
959 | |
960 | if (vmc->vmc_ndisks == 0) |
961 | return (0); |
962 | |
963 | for (i = 0; i < vmc->vmc_ndisks; i++) { |
964 | dev = calloc(1, sizeof(struct virtio_dev)); |
965 | if (dev == NULL((void *)0)) { |
966 | log_warn("%s: calloc failure allocating vioblks", |
967 | __progname); |
968 | return (-1); |
969 | } |
970 | |
971 | log_debug("%s: receiving vioblk", __func__); |
972 | if (atomicio(read, fd, dev, sizeof(struct virtio_dev)) |
973 | != sizeof(struct virtio_dev)) { |
974 | log_warnx("%s: error reading vioblk from fd", __func__); |
975 | return (-1); |
976 | } |
977 | if (dev->dev_type != VMD_DEVTYPE_DISK'd') { |
978 | log_warnx("%s: invalid device type", __func__); |
979 | return (-1); |
980 | } |
981 | |
982 | dev->sync_fd = -1; |
983 | dev->async_fd = -1; |
984 | |
985 | if (pci_set_bar_fn(dev->pci_id, 0, virtio_pci_io, dev)) { |
986 | log_warnx("%s: can't set bar fn for virtio block " |
987 | "device", __progname); |
988 | return (-1); |
989 | } |
990 | dev->vm_id = vmc->vmc_params.vcp_id; |
991 | dev->irq = pci_get_dev_irq(dev->pci_id); |
992 | |
993 | memset(&dev->vioblk.disk_fd, -1, sizeof(dev->vioblk.disk_fd)); |
994 | dev->vioblk.ndisk_fd = vmc->vmc_diskbases[i]; |
995 | for (j = 0; j < dev->vioblk.ndisk_fd; j++) |
996 | dev->vioblk.disk_fd[j] = child_disks[i][j]; |
997 | |
998 | dev->vioblk.idx = i; |
999 | SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next)do { (dev)->dev_next.sle_next = (&virtio_devs)->slh_first ; (&virtio_devs)->slh_first = (dev); } while (0); |
1000 | } |
1001 | return (0); |
1002 | } |
1003 | |
1004 | int |
1005 | vioscsi_restore(int fd, struct vmd_vm *vm, int child_cdrom) |
1006 | { |
1007 | void *hva = NULL((void *)0); |
1008 | unsigned int i; |
1009 | |
1010 | if (!strlen(vm->vm_params.vmc_cdrom)) |
1011 | return (0); |
1012 | |
1013 | vioscsi = calloc(1, sizeof(struct vioscsi_dev)); |
1014 | if (vioscsi == NULL((void *)0)) { |
1015 | log_warn("%s: calloc failure allocating vioscsi", __progname); |
1016 | return (-1); |
1017 | } |
1018 | |
1019 | log_debug("%s: receiving vioscsi", __func__); |
1020 | |
1021 | if (atomicio(read, fd, vioscsi, sizeof(struct vioscsi_dev)) != |
1022 | sizeof(struct vioscsi_dev)) { |
1023 | log_warnx("%s: error reading vioscsi from fd", __func__); |
1024 | return (-1); |
1025 | } |
1026 | |
1027 | if (pci_set_bar_fn(vioscsi->pci_id, 0, vioscsi_io, vioscsi)) { |
1028 | log_warnx("%s: can't set bar fn for vmm control device", |
1029 | __progname); |
1030 | return (-1); |
1031 | } |
1032 | |
1033 | vioscsi->vm_id = vm->vm_params.vmc_params.vcp_id; |
1034 | vioscsi->irq = pci_get_dev_irq(vioscsi->pci_id); |
1035 | |
1036 | /* vioscsi uses 3 virtqueues. */ |
1037 | for (i = 0; i < 3; i++) { |
1038 | hva = hvaddr_mem(vioscsi->vq[i].q_gpa, |
1039 | vring_size(VIOSCSI_QUEUE_SIZE128)); |
1040 | if (hva == NULL((void *)0)) |
1041 | fatal("failed to restore vioscsi virtqueue"); |
1042 | vioscsi->vq[i].q_hva = hva; |
1043 | } |
1044 | |
1045 | return (0); |
1046 | } |
1047 | |
1048 | int |
1049 | virtio_restore(int fd, struct vmd_vm *vm, int child_cdrom, |
1050 | int child_disks[][VM_MAX_BASE_PER_DISK4], int *child_taps) |
1051 | { |
1052 | struct virtio_dev *dev; |
1053 | int ret; |
1054 | |
1055 | SLIST_INIT(&virtio_devs){ ((&virtio_devs)->slh_first) = ((void *)0); }; |
1056 | |
1057 | if ((ret = viornd_restore(fd, vm)) == -1) |
1058 | return (ret); |
1059 | |
1060 | if ((ret = vioblk_restore(fd, vm, child_disks)) == -1) |
1061 | return (ret); |
1062 | |
1063 | if ((ret = vioscsi_restore(fd, vm, child_cdrom)) == -1) |
1064 | return (ret); |
1065 | |
1066 | if ((ret = vionet_restore(fd, vm, child_taps)) == -1) |
1067 | return (ret); |
1068 | |
1069 | if ((ret = vmmci_restore(fd, vm->vm_params.vmc_params.vcp_id)) == -1) |
1070 | return (ret); |
1071 | |
1072 | SLIST_FOREACH(dev, &virtio_devs, dev_next)for((dev) = ((&virtio_devs)->slh_first); (dev) != ((void *)0); (dev) = ((dev)->dev_next.sle_next)) { |
1073 | if (virtio_dev_launch(vm, dev) != 0) |
1074 | fatalx("%s: failed to restore virtio dev", __func__); |
1075 | } |
1076 | |
1077 | return (0); |
1078 | } |
1079 | |
1080 | int |
1081 | viornd_dump(int fd) |
1082 | { |
1083 | log_debug("%s: sending viornd", __func__); |
1084 | |
1085 | viornd.vq[0].q_hva = NULL((void *)0); |
1086 | |
1087 | if (atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) { |
1088 | log_warnx("%s: error writing viornd to fd", __func__); |
1089 | return (-1); |
1090 | } |
1091 | return (0); |
1092 | } |
1093 | |
1094 | int |
1095 | vmmci_dump(int fd) |
1096 | { |
1097 | log_debug("%s: sending vmmci", __func__); |
1098 | |
1099 | if (atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) { |
1100 | log_warnx("%s: error writing vmmci to fd", __func__); |
1101 | return (-1); |
1102 | } |
1103 | return (0); |
1104 | } |
1105 | |
1106 | int |
1107 | vionet_dump(int fd) |
1108 | { |
1109 | struct virtio_dev *dev, temp; |
1110 | struct viodev_msg msg; |
1111 | struct imsg imsg; |
1112 | struct imsgbuf *ibuf = NULL((void *)0); |
1113 | size_t sz; |
1114 | int ret; |
1115 | |
1116 | log_debug("%s: dumping vionet", __func__); |
1117 | |
1118 | SLIST_FOREACH(dev, &virtio_devs, dev_next)for((dev) = ((&virtio_devs)->slh_first); (dev) != ((void *)0); (dev) = ((dev)->dev_next.sle_next)) { |
1119 | if (dev->dev_type != VMD_DEVTYPE_NET'n') |
1120 | continue; |
1121 | |
1122 | memset(&msg, 0, sizeof(msg)); |
1123 | memset(&imsg, 0, sizeof(imsg)); |
1124 | |
1125 | ibuf = &dev->sync_iev.ibuf; |
1126 | msg.type = VIODEV_MSG_DUMP6; |
1127 | |
1128 | ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg, |
1129 | sizeof(msg)); |
1130 | if (ret == -1) { |
1131 | log_warnx("%s: failed requesting dump of vionet[%d]", |
1132 | __func__, dev->vionet.idx); |
1133 | return (-1); |
1134 | } |
1135 | if (imsg_flush(ibuf) == -1) { |
1136 | log_warnx("%s: imsg_flush", __func__); |
1137 | return (-1); |
1138 | } |
1139 | |
1140 | sz = atomicio(read, dev->sync_fd, &temp, sizeof(temp)); |
1141 | if (sz != sizeof(temp)) { |
1142 | log_warnx("%s: failed to dump vionet[%d]", __func__, |
1143 | dev->vionet.idx); |
1144 | return (-1); |
1145 | } |
1146 | |
1147 | /* Clear volatile state. Will reinitialize on restore. */ |
1148 | temp.vionet.vq[RXQ0].q_hva = NULL((void *)0); |
1149 | temp.vionet.vq[TXQ1].q_hva = NULL((void *)0); |
1150 | temp.async_fd = -1; |
1151 | temp.sync_fd = -1; |
1152 | memset(&temp.async_iev, 0, sizeof(temp.async_iev)); |
1153 | memset(&temp.sync_iev, 0, sizeof(temp.sync_iev)); |
1154 | |
1155 | if (atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, fd, &temp, sizeof(temp)) != sizeof(temp)) { |
1156 | log_warnx("%s: error writing vionet to fd", __func__); |
1157 | return (-1); |
1158 | } |
1159 | } |
1160 | |
1161 | return (0); |
1162 | } |
1163 | |
1164 | int |
1165 | vioblk_dump(int fd) |
1166 | { |
1167 | struct virtio_dev *dev, temp; |
1168 | struct viodev_msg msg; |
1169 | struct imsg imsg; |
1170 | struct imsgbuf *ibuf = NULL((void *)0); |
1171 | size_t sz; |
1172 | int ret; |
1173 | |
1174 | log_debug("%s: dumping vioblk", __func__); |
1175 | |
1176 | SLIST_FOREACH(dev, &virtio_devs, dev_next)for((dev) = ((&virtio_devs)->slh_first); (dev) != ((void *)0); (dev) = ((dev)->dev_next.sle_next)) { |
1177 | if (dev->dev_type != VMD_DEVTYPE_DISK'd') |
1178 | continue; |
1179 | |
1180 | memset(&msg, 0, sizeof(msg)); |
1181 | memset(&imsg, 0, sizeof(imsg)); |
1182 | |
1183 | ibuf = &dev->sync_iev.ibuf; |
1184 | msg.type = VIODEV_MSG_DUMP6; |
1185 | |
1186 | ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg, |
1187 | sizeof(msg)); |
1188 | if (ret == -1) { |
1189 | log_warnx("%s: failed requesting dump of vioblk[%d]", |
1190 | __func__, dev->vioblk.idx); |
1191 | return (-1); |
1192 | } |
1193 | if (imsg_flush(ibuf) == -1) { |
1194 | log_warnx("%s: imsg_flush", __func__); |
1195 | return (-1); |
1196 | } |
1197 | |
1198 | |
1199 | sz = atomicio(read, dev->sync_fd, &temp, sizeof(temp)); |
1200 | if (sz != sizeof(temp)) { |
1201 | log_warnx("%s: failed to dump vioblk[%d]", __func__, |
1202 | dev->vioblk.idx); |
1203 | return (-1); |
1204 | } |
1205 | |
1206 | /* Clear volatile state. Will reinitialize on restore. */ |
1207 | temp.vioblk.vq[0].q_hva = NULL((void *)0); |
1208 | temp.async_fd = -1; |
1209 | temp.sync_fd = -1; |
1210 | memset(&temp.async_iev, 0, sizeof(temp.async_iev)); |
1211 | memset(&temp.sync_iev, 0, sizeof(temp.sync_iev)); |
1212 | |
1213 | if (atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, fd, &temp, sizeof(temp)) != sizeof(temp)) { |
1214 | log_warnx("%s: error writing vioblk to fd", __func__); |
1215 | return (-1); |
1216 | } |
1217 | } |
1218 | |
1219 | return (0); |
1220 | } |
1221 | |
1222 | int |
1223 | vioscsi_dump(int fd) |
1224 | { |
1225 | unsigned int i; |
1226 | |
1227 | if (vioscsi == NULL((void *)0)) |
1228 | return (0); |
1229 | |
1230 | log_debug("%s: sending vioscsi", __func__); |
1231 | |
1232 | for (i = 0; i < 3; i++) |
1233 | vioscsi->vq[i].q_hva = NULL((void *)0); |
1234 | |
1235 | if (atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, fd, vioscsi, sizeof(struct vioscsi_dev)) != |
1236 | sizeof(struct vioscsi_dev)) { |
1237 | log_warnx("%s: error writing vioscsi to fd", __func__); |
1238 | return (-1); |
1239 | } |
1240 | return (0); |
1241 | } |
1242 | |
1243 | int |
1244 | virtio_dump(int fd) |
1245 | { |
1246 | int ret; |
1247 | |
1248 | if ((ret = viornd_dump(fd)) == -1) |
1249 | return ret; |
1250 | |
1251 | if ((ret = vioblk_dump(fd)) == -1) |
1252 | return ret; |
1253 | |
1254 | if ((ret = vioscsi_dump(fd)) == -1) |
1255 | return ret; |
1256 | |
1257 | if ((ret = vionet_dump(fd)) == -1) |
1258 | return ret; |
1259 | |
1260 | if ((ret = vmmci_dump(fd)) == -1) |
1261 | return ret; |
1262 | |
1263 | return (0); |
1264 | } |
1265 | |
1266 | void virtio_broadcast_imsg(struct vmd_vm *vm, uint16_t type, void *data, |
1267 | uint16_t datalen) |
1268 | { |
1269 | struct virtio_dev *dev; |
1270 | int ret; |
1271 | |
1272 | SLIST_FOREACH(dev, &virtio_devs, dev_next)for((dev) = ((&virtio_devs)->slh_first); (dev) != ((void *)0); (dev) = ((dev)->dev_next.sle_next)) { |
1273 | ret = imsg_compose_event(&dev->async_iev, type, 0, 0, -1, data, |
1274 | datalen); |
1275 | if (ret == -1) { |
1276 | log_warnx("%s: failed to broadcast imsg type %u", |
1277 | __func__, type); |
1278 | } |
1279 | } |
1280 | |
1281 | } |
1282 | |
1283 | void |
1284 | virtio_stop(struct vmd_vm *vm) |
1285 | { |
1286 | return virtio_broadcast_imsg(vm, IMSG_VMDOP_PAUSE_VM, NULL((void *)0), 0); |
1287 | } |
1288 | |
1289 | void |
1290 | virtio_start(struct vmd_vm *vm) |
1291 | { |
1292 | return virtio_broadcast_imsg(vm, IMSG_VMDOP_UNPAUSE_VM, NULL((void *)0), 0); |
1293 | } |
1294 | |
1295 | /* |
1296 | * Fork+exec a child virtio device. Returns 0 on success. |
1297 | */ |
1298 | static int |
1299 | virtio_dev_launch(struct vmd_vm *vm, struct virtio_dev *dev) |
1300 | { |
1301 | char *nargv[12], num[32], vmm_fd[32], vm_name[VM_NAME_MAX64], t[2]; |
1302 | pid_t dev_pid; |
1303 | int data_fds[VM_MAX_BASE_PER_DISK4], sync_fds[2], async_fds[2], ret = 0; |
1304 | size_t i, data_fds_sz, sz = 0; |
1305 | struct viodev_msg msg; |
1306 | struct imsg imsg; |
1307 | struct imsgev *iev = &dev->sync_iev; |
1308 | |
1309 | switch (dev->dev_type) { |
1310 | case VMD_DEVTYPE_NET'n': |
1311 | data_fds[0] = dev->vionet.data_fd; |
1312 | data_fds_sz = 1; |
1313 | log_debug("%s: launching vionet%d", |
1314 | vm->vm_params.vmc_params.vcp_name, dev->vionet.idx); |
1315 | break; |
1316 | case VMD_DEVTYPE_DISK'd': |
1317 | memcpy(&data_fds, dev->vioblk.disk_fd, sizeof(data_fds)); |
1318 | data_fds_sz = dev->vioblk.ndisk_fd; |
1319 | log_debug("%s: launching vioblk%d", |
1320 | vm->vm_params.vmc_params.vcp_name, dev->vioblk.idx); |
1321 | break; |
1322 | /* NOTREACHED */ |
1323 | default: |
1324 | log_warn("%s: invalid device type", __func__); |
1325 | return (EINVAL22); |
1326 | } |
1327 | |
1328 | /* We need two channels: one synchronous (IO reads) and one async. */ |
1329 | if (socketpair(AF_UNIX1, SOCK_STREAM1, PF_UNSPEC0, sync_fds) == -1) { |
1330 | log_warn("failed to create socketpair"); |
1331 | return (errno(*__errno())); |
1332 | } |
1333 | if (socketpair(AF_UNIX1, SOCK_STREAM1, PF_UNSPEC0, async_fds) == -1) { |
1334 | log_warn("failed to create async socketpair"); |
1335 | return (errno(*__errno())); |
1336 | } |
1337 | |
1338 | /* Keep communication channels open after exec. */ |
1339 | if (fcntl(sync_fds[1], F_SETFD2, 0)) { |
1340 | ret = errno(*__errno()); |
1341 | log_warn("%s: fcntl", __func__); |
1342 | goto err; |
1343 | } |
1344 | if (fcntl(async_fds[1], F_SETFD2, 0)) { |
1345 | ret = errno(*__errno()); |
1346 | log_warn("%s: fcnt", __func__); |
1347 | goto err; |
1348 | } |
1349 | |
1350 | /* Fork... */ |
1351 | dev_pid = fork(); |
1352 | if (dev_pid == -1) { |
1353 | ret = errno(*__errno()); |
1354 | log_warn("%s: fork failed", __func__); |
1355 | goto err; |
1356 | } |
1357 | |
1358 | if (dev_pid > 0) { |
1359 | /* Parent */ |
1360 | close_fd(sync_fds[1]); |
1361 | close_fd(async_fds[1]); |
1362 | |
1363 | /* Save the child's pid to help with cleanup. */ |
1364 | dev->dev_pid = dev_pid; |
1365 | |
1366 | /* Set the channel fds to the child's before sending. */ |
1367 | dev->sync_fd = sync_fds[1]; |
1368 | dev->async_fd = async_fds[1]; |
1369 | |
1370 | /* Close data fds. Only the child device needs them now. */ |
1371 | for (i = 0; i < data_fds_sz; i++) |
1372 | close_fd(data_fds[i]); |
1373 | |
1374 | /* Set our synchronous channel to non-blocking. */ |
1375 | if (fcntl(sync_fds[0], F_SETFL4, O_NONBLOCK0x0004) == -1) { |
1376 | ret = errno(*__errno()); |
1377 | log_warn("%s: fcntl", __func__); |
1378 | goto err; |
1379 | } |
1380 | |
1381 | /* 1. Send over our configured device. */ |
1382 | log_debug("%s: sending '%c' type device struct", __func__, |
1383 | dev->dev_type); |
1384 | sz = atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, sync_fds[0], dev, sizeof(*dev)); |
1385 | if (sz != sizeof(*dev)) { |
1386 | log_warnx("%s: failed to send device", __func__); |
1387 | ret = EIO5; |
1388 | goto err; |
1389 | } |
1390 | |
1391 | /* 2. Send over details on the VM (including memory fds). */ |
1392 | log_debug("%s: sending vm message for '%s'", __func__, |
1393 | vm->vm_params.vmc_params.vcp_name); |
1394 | sz = atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, sync_fds[0], vm, sizeof(*vm)); |
1395 | if (sz != sizeof(*vm)) { |
1396 | log_warnx("%s: failed to send vm details", __func__); |
1397 | ret = EIO5; |
1398 | goto err; |
1399 | } |
1400 | |
1401 | /* |
1402 | * Initialize our imsg channel to the child device. The initial |
1403 | * communication will be synchronous. We expect the child to |
1404 | * report itself "ready" to confirm the launch was a success. |
1405 | */ |
1406 | imsg_init(&iev->ibuf, sync_fds[0]); |
1407 | do |
1408 | ret = imsg_read(&iev->ibuf); |
1409 | while (ret == -1 && errno(*__errno()) == EAGAIN35); |
1410 | if (ret == 0 || ret == -1) { |
1411 | log_warnx("%s: failed to receive ready message from " |
1412 | "'%c' type device", __func__, dev->dev_type); |
1413 | ret = EIO5; |
1414 | goto err; |
1415 | } |
1416 | ret = 0; |
1417 | |
1418 | log_debug("%s: receiving reply", __func__); |
1419 | if (imsg_get(&iev->ibuf, &imsg) < 1) { |
1420 | log_warnx("%s: imsg_get", __func__); |
1421 | ret = EIO5; |
1422 | goto err; |
1423 | } |
1424 | IMSG_SIZE_CHECK(&imsg, &msg)do { if (((&imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof(*&msg)) fatalx("bad length imsg received (%s)", "&msg" ); } while (0); |
1425 | memcpy(&msg, imsg.data, sizeof(msg)); |
1426 | imsg_free(&imsg); |
1427 | |
1428 | if (msg.type != VIODEV_MSG_READY1) { |
1429 | log_warnx("%s: expected ready message, got type %d", |
1430 | __func__, msg.type); |
1431 | ret = EINVAL22; |
1432 | goto err; |
1433 | } |
1434 | log_debug("%s: device reports ready via sync channel", |
1435 | __func__); |
1436 | |
1437 | /* |
1438 | * Wire in the async event handling, but after reverting back |
1439 | * to the parent's fd's. |
1440 | */ |
1441 | dev->sync_fd = sync_fds[0]; |
1442 | dev->async_fd = async_fds[0]; |
1443 | vm_device_pipe(dev, virtio_dispatch_dev); |
1444 | } else { |
1445 | /* Child */ |
1446 | close_fd(async_fds[0]); |
1447 | close_fd(sync_fds[0]); |
1448 | |
1449 | /* Keep data file descriptors open after exec. */ |
1450 | for (i = 0; i < data_fds_sz; i++) { |
1451 | log_debug("%s: marking fd %d !close-on-exec", __func__, |
1452 | data_fds[i]); |
1453 | if (fcntl(data_fds[i], F_SETFD2, 0)) { |
1454 | ret = errno(*__errno()); |
1455 | log_warn("%s: fcntl", __func__); |
1456 | goto err; |
1457 | } |
1458 | } |
1459 | |
1460 | memset(&nargv, 0, sizeof(nargv)); |
1461 | memset(num, 0, sizeof(num)); |
1462 | snprintf(num, sizeof(num), "%d", sync_fds[1]); |
1463 | memset(vmm_fd, 0, sizeof(vmm_fd)); |
1464 | snprintf(vmm_fd, sizeof(vmm_fd), "%d", env->vmd_fd); |
1465 | memset(vm_name, 0, sizeof(vm_name)); |
1466 | snprintf(vm_name, sizeof(vm_name), "%s", |
1467 | vm->vm_params.vmc_params.vcp_name); |
1468 | |
1469 | t[0] = dev->dev_type; |
1470 | t[1] = '\0'; |
1471 | |
1472 | nargv[0] = env->argv0; |
1473 | nargv[1] = "-X"; |
1474 | nargv[2] = num; |
1475 | nargv[3] = "-t"; |
1476 | nargv[4] = t; |
1477 | nargv[5] = "-i"; |
1478 | nargv[6] = vmm_fd; |
1479 | nargv[7] = "-p"; |
1480 | nargv[8] = vm_name; |
1481 | nargv[9] = "-n"; |
1482 | nargv[10] = NULL((void *)0); |
1483 | |
1484 | if (env->vmd_verbose == 1) { |
1485 | nargv[10] = VMD_VERBOSE_1"-v";; |
1486 | nargv[11] = NULL((void *)0); |
1487 | } else if (env->vmd_verbose > 1) { |
1488 | nargv[10] = VMD_VERBOSE_2"-vv";; |
1489 | nargv[11] = NULL((void *)0); |
1490 | } |
1491 | |
1492 | /* Control resumes in vmd.c:main(). */ |
1493 | execvp(nargv[0], nargv); |
1494 | |
1495 | ret = errno(*__errno()); |
1496 | log_warn("%s: failed to exec device", __func__); |
1497 | _exit(ret); |
1498 | /* NOTREACHED */ |
1499 | } |
1500 | |
1501 | return (ret); |
1502 | |
1503 | err: |
1504 | close_fd(sync_fds[0]); |
1505 | close_fd(sync_fds[1]); |
1506 | close_fd(async_fds[0]); |
1507 | close_fd(async_fds[1]); |
1508 | return (ret); |
1509 | } |
1510 | |
1511 | /* |
1512 | * Initialize an async imsg channel for a virtio device. |
1513 | */ |
1514 | int |
1515 | vm_device_pipe(struct virtio_dev *dev, void (*cb)(int, short, void *)) |
1516 | { |
1517 | struct imsgev *iev = &dev->async_iev; |
1518 | int fd = dev->async_fd; |
1519 | |
1520 | log_debug("%s: initializing '%c' device pipe (fd=%d)", __func__, |
1521 | dev->dev_type, fd); |
1522 | |
1523 | if (fcntl(fd, F_SETFL4, O_NONBLOCK0x0004) == -1) { |
1524 | log_warn("failed to set nonblocking mode on vm device pipe"); |
1525 | return (-1); |
1526 | } |
1527 | |
1528 | imsg_init(&iev->ibuf, fd); |
1529 | iev->handler = cb; |
1530 | iev->data = dev; |
1531 | iev->events = EV_READ0x02; |
1532 | imsg_event_add(iev); |
1533 | |
1534 | return (0); |
1535 | } |
1536 | |
1537 | void |
1538 | virtio_dispatch_dev(int fd, short event, void *arg) |
1539 | { |
1540 | struct virtio_dev *dev = (struct virtio_dev*)arg; |
1541 | struct imsgev *iev = &dev->async_iev; |
1542 | struct imsgbuf *ibuf = &iev->ibuf; |
1543 | struct imsg imsg; |
1544 | struct viodev_msg msg; |
1545 | ssize_t n = 0; |
1546 | |
1547 | if (event & EV_READ0x02) { |
1548 | if ((n = imsg_read(ibuf)) == -1 && errno(*__errno()) != EAGAIN35) |
1549 | fatal("%s: imsg_read", __func__); |
1550 | if (n == 0) { |
1551 | /* this pipe is dead, so remove the event handler */ |
1552 | log_debug("%s: pipe dead (EV_READ)", __func__); |
1553 | event_del(&iev->ev); |
1554 | event_loopexit(NULL((void *)0)); |
1555 | return; |
1556 | } |
1557 | } |
1558 | |
1559 | if (event & EV_WRITE0x04) { |
1560 | if ((n = msgbuf_write(&ibuf->w)) == -1 && errno(*__errno()) != EAGAIN35) |
1561 | fatal("%s: msgbuf_write", __func__); |
1562 | if (n == 0) { |
1563 | /* this pipe is dead, so remove the event handler */ |
1564 | log_debug("%s: pipe dead (EV_WRITE)", __func__); |
1565 | event_del(&iev->ev); |
1566 | event_loopexit(NULL((void *)0)); |
1567 | return; |
1568 | } |
1569 | } |
1570 | |
1571 | for (;;) { |
1572 | if ((n = imsg_get(ibuf, &imsg)) == -1) |
1573 | fatal("%s: imsg_get", __func__); |
1574 | if (n == 0) |
1575 | break; |
1576 | |
1577 | switch (imsg.hdr.type) { |
1578 | case IMSG_DEVOP_MSG: |
1579 | IMSG_SIZE_CHECK(&imsg, &msg)do { if (((&imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof(*&msg)) fatalx("bad length imsg received (%s)", "&msg" ); } while (0); |
1580 | memcpy(&msg, imsg.data, sizeof(msg)); |
1581 | handle_dev_msg(&msg, dev); |
1582 | break; |
1583 | default: |
1584 | log_warnx("%s: got non devop imsg %d", __func__, |
1585 | imsg.hdr.type); |
1586 | break; |
1587 | } |
1588 | imsg_free(&imsg); |
1589 | } |
1590 | imsg_event_add(iev); |
1591 | } |
1592 | |
1593 | |
1594 | static int |
1595 | handle_dev_msg(struct viodev_msg *msg, struct virtio_dev *gdev) |
1596 | { |
1597 | uint32_t vm_id = gdev->vm_id; |
1598 | int irq = gdev->irq; |
1599 | |
1600 | switch (msg->type) { |
1601 | case VIODEV_MSG_KICK3: |
1602 | if (msg->state == INTR_STATE_ASSERT1) |
1603 | vcpu_assert_pic_irq(vm_id, msg->vcpu, irq); |
1604 | else if (msg->state == INTR_STATE_DEASSERT-1) |
1605 | vcpu_deassert_pic_irq(vm_id, msg->vcpu, irq); |
1606 | break; |
1607 | case VIODEV_MSG_READY1: |
1608 | log_debug("%s: device reports ready", __func__); |
1609 | break; |
1610 | case VIODEV_MSG_ERROR2: |
1611 | log_warnx("%s: device reported error", __func__); |
1612 | break; |
1613 | case VIODEV_MSG_INVALID0: |
1614 | case VIODEV_MSG_IO_READ4: |
1615 | case VIODEV_MSG_IO_WRITE5: |
1616 | /* FALLTHROUGH */ |
1617 | default: |
1618 | log_warnx("%s: unsupported device message type %d", __func__, |
1619 | msg->type); |
1620 | return (1); |
1621 | } |
1622 | |
1623 | return (0); |
1624 | }; |
1625 | |
1626 | /* |
1627 | * Called by the VM process while processing IO from the VCPU thread. |
1628 | * |
1629 | * N.b. Since the VCPU thread calls this function, we cannot mutate the event |
1630 | * system. All ipc messages must be sent manually and cannot be queued for |
1631 | * the event loop to push them. (We need to perform a synchronous read, so |
1632 | * this isn't really a big deal.) |
1633 | */ |
1634 | int |
1635 | virtio_pci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, |
1636 | void *cookie, uint8_t sz) |
1637 | { |
1638 | struct virtio_dev *dev = (struct virtio_dev *)cookie; |
1639 | struct imsgbuf *ibuf = &dev->sync_iev.ibuf; |
1640 | struct imsg imsg; |
1641 | struct viodev_msg msg; |
1642 | ssize_t n; |
1643 | int ret = 0; |
1644 | |
1645 | memset(&msg, 0, sizeof(msg)); |
1646 | msg.reg = reg; |
1647 | msg.io_sz = sz; |
1648 | |
1649 | if (dir == 0) { |
1650 | msg.type = VIODEV_MSG_IO_WRITE5; |
1651 | msg.data = *data; |
1652 | msg.data_valid = 1; |
1653 | } else |
1654 | msg.type = VIODEV_MSG_IO_READ4; |
1655 | |
1656 | if (msg.type == VIODEV_MSG_IO_WRITE5) { |
1657 | /* |
1658 | * Write request. No reply expected. |
1659 | */ |
1660 | ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg, |
1661 | sizeof(msg)); |
1662 | if (ret == -1) { |
1663 | log_warn("%s: failed to send async io event to virtio" |
1664 | " device", __func__); |
1665 | return (ret); |
1666 | } |
1667 | if (imsg_flush(ibuf) == -1) { |
1668 | log_warnx("%s: imsg_flush (write)", __func__); |
1669 | return (-1); |
1670 | } |
1671 | } else { |
1672 | /* |
1673 | * Read request. Requires waiting for a reply. |
1674 | */ |
1675 | ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg, |
1676 | sizeof(msg)); |
1677 | if (ret == -1) { |
1678 | log_warnx("%s: failed to send sync io event to virtio" |
1679 | " device", __func__); |
1680 | return (ret); |
1681 | } |
1682 | if (imsg_flush(ibuf) == -1) { |
1683 | log_warnx("%s: imsg_flush (read)", __func__); |
1684 | return (-1); |
1685 | } |
1686 | |
1687 | /* Read our reply. */ |
1688 | do |
1689 | n = imsg_read(ibuf); |
1690 | while (n == -1 && errno(*__errno()) == EAGAIN35); |
1691 | if (n == 0 || n == -1) { |
1692 | log_warn("%s: imsg_read (n=%ld)", __func__, n); |
1693 | return (-1); |
1694 | } |
1695 | if ((n = imsg_get(ibuf, &imsg)) == -1) { |
1696 | log_warn("%s: imsg_get (n=%ld)", __func__, n); |
1697 | return (-1); |
1698 | } |
1699 | if (n == 0) { |
1700 | log_warnx("%s: invalid imsg", __func__); |
1701 | return (-1); |
1702 | } |
1703 | |
1704 | IMSG_SIZE_CHECK(&imsg, &msg)do { if (((&imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof(*&msg)) fatalx("bad length imsg received (%s)", "&msg" ); } while (0); |
1705 | memcpy(&msg, imsg.data, sizeof(msg)); |
1706 | imsg_free(&imsg); |
1707 | |
1708 | if (msg.type == VIODEV_MSG_IO_READ4 && msg.data_valid) { |
1709 | #if DEBUG |
1710 | log_debug("%s: got sync read response (reg=%s)", |
1711 | __func__, virtio_reg_name(msg.reg)); |
1712 | #endif /* DEBUG */ |
1713 | *data = msg.data; |
1714 | /* |
1715 | * It's possible we're asked to {de,}assert after the |
1716 | * device performs a register read. |
1717 | */ |
1718 | if (msg.state == INTR_STATE_ASSERT1) |
1719 | vcpu_assert_pic_irq(dev->vm_id, msg.vcpu, msg.irq); |
1720 | else if (msg.state == INTR_STATE_DEASSERT-1) |
1721 | vcpu_deassert_pic_irq(dev->vm_id, msg.vcpu, msg.irq); |
1722 | } else { |
1723 | log_warnx("%s: expected IO_READ, got %d", __func__, |
1724 | msg.type); |
1725 | return (-1); |
1726 | } |
1727 | } |
1728 | |
1729 | return (0); |
1730 | } |
1731 | |
1732 | void |
1733 | virtio_assert_pic_irq(struct virtio_dev *dev, int vcpu) |
1734 | { |
1735 | struct viodev_msg msg; |
1736 | int ret; |
1737 | |
1738 | memset(&msg, 0, sizeof(msg)); |
1739 | msg.irq = dev->irq; |
1740 | msg.vcpu = vcpu; |
1741 | msg.type = VIODEV_MSG_KICK3; |
1742 | msg.state = INTR_STATE_ASSERT1; |
1743 | |
1744 | ret = imsg_compose_event(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1, |
1745 | &msg, sizeof(msg)); |
1746 | if (ret == -1) |
1747 | log_warnx("%s: failed to assert irq %d", __func__, dev->irq); |
1748 | } |
1749 | |
1750 | void |
1751 | virtio_deassert_pic_irq(struct virtio_dev *dev, int vcpu) |
1752 | { |
1753 | struct viodev_msg msg; |
1754 | int ret; |
1755 | |
1756 | memset(&msg, 0, sizeof(msg)); |
1757 | msg.irq = dev->irq; |
1758 | msg.vcpu = vcpu; |
1759 | msg.type = VIODEV_MSG_KICK3; |
1760 | msg.state = INTR_STATE_DEASSERT-1; |
1761 | |
1762 | ret = imsg_compose_event(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1, |
1763 | &msg, sizeof(msg)); |
1764 | if (ret == -1) |
1765 | log_warnx("%s: failed to deassert irq %d", __func__, dev->irq); |
1766 | } |