File: | src/usr.sbin/vmd/virtio.c |
Warning: | line 545, column 5 Potential leak of memory pointed to by 'dev' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: virtio.c,v 1.110 2023/11/03 11:16:43 dv Exp $ */ | |||
2 | ||||
3 | /* | |||
4 | * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> | |||
5 | * | |||
6 | * Permission to use, copy, modify, and distribute this software for any | |||
7 | * purpose with or without fee is hereby granted, provided that the above | |||
8 | * copyright notice and this permission notice appear in all copies. | |||
9 | * | |||
10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |||
11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |||
12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |||
13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |||
14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |||
15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |||
16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |||
17 | */ | |||
18 | ||||
19 | #include <sys/param.h> /* PAGE_SIZE */ | |||
20 | #include <sys/socket.h> | |||
21 | #include <sys/wait.h> | |||
22 | ||||
23 | #include <machine/vmmvar.h> | |||
24 | #include <dev/pci/pcireg.h> | |||
25 | #include <dev/pci/pcidevs.h> | |||
26 | #include <dev/pv/virtioreg.h> | |||
27 | #include <dev/pci/virtio_pcireg.h> | |||
28 | #include <dev/pv/vioblkreg.h> | |||
29 | #include <dev/pv/vioscsireg.h> | |||
30 | ||||
31 | #include <net/if.h> | |||
32 | #include <netinet/in.h> | |||
33 | #include <netinet/if_ether.h> | |||
34 | #include <netinet/ip.h> | |||
35 | ||||
36 | #include <errno(*__errno()).h> | |||
37 | #include <event.h> | |||
38 | #include <fcntl.h> | |||
39 | #include <poll.h> | |||
40 | #include <stddef.h> | |||
41 | #include <stdlib.h> | |||
42 | #include <string.h> | |||
43 | #include <unistd.h> | |||
44 | ||||
45 | #include "atomicio.h" | |||
46 | #include "pci.h" | |||
47 | #include "vioscsi.h" | |||
48 | #include "virtio.h" | |||
49 | #include "vmd.h" | |||
50 | #include "vmm.h" | |||
51 | ||||
52 | extern struct vmd *env; | |||
53 | extern char *__progname; | |||
54 | ||||
55 | struct viornd_dev viornd; | |||
56 | struct vioscsi_dev *vioscsi; | |||
57 | struct vmmci_dev vmmci; | |||
58 | ||||
59 | /* Devices emulated in subprocesses are inserted into this list. */ | |||
60 | SLIST_HEAD(virtio_dev_head, virtio_dev)struct virtio_dev_head { struct virtio_dev *slh_first; } virtio_devs; | |||
61 | ||||
62 | #define MAXPHYS(64 * 1024) (64 * 1024) /* max raw I/O transfer size */ | |||
63 | ||||
64 | #define VIRTIO_NET_F_MAC(1<<5) (1<<5) | |||
65 | ||||
66 | #define VMMCI_F_TIMESYNC(1<<0) (1<<0) | |||
67 | #define VMMCI_F_ACK(1<<1) (1<<1) | |||
68 | #define VMMCI_F_SYNCRTC(1<<2) (1<<2) | |||
69 | ||||
70 | #define RXQ0 0 | |||
71 | #define TXQ1 1 | |||
72 | ||||
73 | static int virtio_dev_launch(struct vmd_vm *, struct virtio_dev *); | |||
74 | static void virtio_dispatch_dev(int, short, void *); | |||
75 | static int handle_dev_msg(struct viodev_msg *, struct virtio_dev *); | |||
76 | ||||
77 | const char * | |||
78 | virtio_reg_name(uint8_t reg) | |||
79 | { | |||
80 | switch (reg) { | |||
81 | case VIRTIO_CONFIG_DEVICE_FEATURES0: return "device feature"; | |||
82 | case VIRTIO_CONFIG_GUEST_FEATURES4: return "guest feature"; | |||
83 | case VIRTIO_CONFIG_QUEUE_PFN8: return "queue address"; | |||
84 | case VIRTIO_CONFIG_QUEUE_SIZE12: return "queue size"; | |||
85 | case VIRTIO_CONFIG_QUEUE_SELECT14: return "queue select"; | |||
86 | case VIRTIO_CONFIG_QUEUE_NOTIFY16: return "queue notify"; | |||
87 | case VIRTIO_CONFIG_DEVICE_STATUS18: return "device status"; | |||
88 | case VIRTIO_CONFIG_ISR_STATUS19: return "isr status"; | |||
89 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20...VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 3: | |||
90 | return "device config 0"; | |||
91 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 4: | |||
92 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 5: | |||
93 | return "device config 1"; | |||
94 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 8: return "device config 2"; | |||
95 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 12: return "device config 3"; | |||
96 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 16: return "device config 4"; | |||
97 | default: return "unknown"; | |||
98 | } | |||
99 | } | |||
100 | ||||
101 | uint32_t | |||
102 | vring_size(uint32_t vq_size) | |||
103 | { | |||
104 | uint32_t allocsize1, allocsize2; | |||
105 | ||||
106 | /* allocsize1: descriptor table + avail ring + pad */ | |||
107 | allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size(((sizeof(struct vring_desc) * vq_size + sizeof(uint16_t) * ( 2 + vq_size))+((4096)-1))& ~((4096)-1)) | |||
108 | + sizeof(uint16_t) * (2 + vq_size))(((sizeof(struct vring_desc) * vq_size + sizeof(uint16_t) * ( 2 + vq_size))+((4096)-1))& ~((4096)-1)); | |||
109 | /* allocsize2: used ring + pad */ | |||
110 | allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * 2(((sizeof(uint16_t) * 2 + sizeof(struct vring_used_elem) * vq_size )+((4096)-1))& ~((4096)-1)) | |||
111 | + sizeof(struct vring_used_elem) * vq_size)(((sizeof(uint16_t) * 2 + sizeof(struct vring_used_elem) * vq_size )+((4096)-1))& ~((4096)-1)); | |||
112 | ||||
113 | return allocsize1 + allocsize2; | |||
114 | } | |||
115 | ||||
116 | /* Update queue select */ | |||
117 | void | |||
118 | viornd_update_qs(void) | |||
119 | { | |||
120 | struct virtio_vq_info *vq_info; | |||
121 | ||||
122 | /* Invalid queue? */ | |||
123 | if (viornd.cfg.queue_select > 0) { | |||
124 | viornd.cfg.queue_size = 0; | |||
125 | return; | |||
126 | } | |||
127 | ||||
128 | vq_info = &viornd.vq[viornd.cfg.queue_select]; | |||
129 | ||||
130 | /* Update queue pfn/size based on queue select */ | |||
131 | viornd.cfg.queue_pfn = vq_info->q_gpa >> 12; | |||
132 | viornd.cfg.queue_size = vq_info->qs; | |||
133 | } | |||
134 | ||||
135 | /* Update queue address */ | |||
136 | void | |||
137 | viornd_update_qa(void) | |||
138 | { | |||
139 | struct virtio_vq_info *vq_info; | |||
140 | void *hva = NULL((void *)0); | |||
141 | ||||
142 | /* Invalid queue? */ | |||
143 | if (viornd.cfg.queue_select > 0) | |||
144 | return; | |||
145 | ||||
146 | vq_info = &viornd.vq[viornd.cfg.queue_select]; | |||
147 | vq_info->q_gpa = (uint64_t)viornd.cfg.queue_pfn * VIRTIO_PAGE_SIZE(4096); | |||
148 | ||||
149 | hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIORND_QUEUE_SIZE64)); | |||
150 | if (hva == NULL((void *)0)) | |||
151 | fatalx("viornd_update_qa"); | |||
152 | vq_info->q_hva = hva; | |||
153 | } | |||
154 | ||||
155 | int | |||
156 | viornd_notifyq(void) | |||
157 | { | |||
158 | size_t sz; | |||
159 | int dxx, ret; | |||
160 | uint16_t aidx, uidx; | |||
161 | char *vr, *rnd_data; | |||
162 | struct vring_desc *desc; | |||
163 | struct vring_avail *avail; | |||
164 | struct vring_used *used; | |||
165 | struct virtio_vq_info *vq_info; | |||
166 | ||||
167 | ret = 0; | |||
168 | ||||
169 | /* Invalid queue? */ | |||
170 | if (viornd.cfg.queue_notify > 0) | |||
171 | return (0); | |||
172 | ||||
173 | vq_info = &viornd.vq[viornd.cfg.queue_notify]; | |||
174 | vr = vq_info->q_hva; | |||
175 | if (vr == NULL((void *)0)) | |||
176 | fatalx("%s: null vring", __func__); | |||
177 | ||||
178 | desc = (struct vring_desc *)(vr); | |||
179 | avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); | |||
180 | used = (struct vring_used *)(vr + vq_info->vq_usedoffset); | |||
181 | ||||
182 | aidx = avail->idx & VIORND_QUEUE_MASK(64 - 1); | |||
183 | uidx = used->idx & VIORND_QUEUE_MASK(64 - 1); | |||
184 | ||||
185 | dxx = avail->ring[aidx] & VIORND_QUEUE_MASK(64 - 1); | |||
186 | ||||
187 | sz = desc[dxx].len; | |||
188 | if (sz > MAXPHYS(64 * 1024)) | |||
189 | fatalx("viornd descriptor size too large (%zu)", sz); | |||
190 | ||||
191 | rnd_data = malloc(sz); | |||
192 | ||||
193 | if (rnd_data != NULL((void *)0)) { | |||
194 | arc4random_buf(rnd_data, sz); | |||
195 | if (write_mem(desc[dxx].addr, rnd_data, sz)) { | |||
196 | log_warnx("viornd: can't write random data @ " | |||
197 | "0x%llx", | |||
198 | desc[dxx].addr); | |||
199 | } else { | |||
200 | /* ret == 1 -> interrupt needed */ | |||
201 | /* XXX check VIRTIO_F_NO_INTR */ | |||
202 | ret = 1; | |||
203 | viornd.cfg.isr_status = 1; | |||
204 | used->ring[uidx].id = dxx; | |||
205 | used->ring[uidx].len = sz; | |||
206 | __sync_synchronize(); | |||
207 | used->idx++; | |||
208 | } | |||
209 | free(rnd_data); | |||
210 | } else | |||
211 | fatal("memory allocation error for viornd data"); | |||
212 | ||||
213 | return (ret); | |||
214 | } | |||
215 | ||||
216 | int | |||
217 | virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, | |||
218 | void *unused, uint8_t sz) | |||
219 | { | |||
220 | *intr = 0xFF; | |||
221 | ||||
222 | if (dir == 0) { | |||
223 | switch (reg) { | |||
224 | case VIRTIO_CONFIG_DEVICE_FEATURES0: | |||
225 | case VIRTIO_CONFIG_QUEUE_SIZE12: | |||
226 | case VIRTIO_CONFIG_ISR_STATUS19: | |||
227 | log_warnx("%s: illegal write %x to %s", | |||
228 | __progname, *data, virtio_reg_name(reg)); | |||
229 | break; | |||
230 | case VIRTIO_CONFIG_GUEST_FEATURES4: | |||
231 | viornd.cfg.guest_feature = *data; | |||
232 | break; | |||
233 | case VIRTIO_CONFIG_QUEUE_PFN8: | |||
234 | viornd.cfg.queue_pfn = *data; | |||
235 | viornd_update_qa(); | |||
236 | break; | |||
237 | case VIRTIO_CONFIG_QUEUE_SELECT14: | |||
238 | viornd.cfg.queue_select = *data; | |||
239 | viornd_update_qs(); | |||
240 | break; | |||
241 | case VIRTIO_CONFIG_QUEUE_NOTIFY16: | |||
242 | viornd.cfg.queue_notify = *data; | |||
243 | if (viornd_notifyq()) | |||
244 | *intr = 1; | |||
245 | break; | |||
246 | case VIRTIO_CONFIG_DEVICE_STATUS18: | |||
247 | viornd.cfg.device_status = *data; | |||
248 | break; | |||
249 | } | |||
250 | } else { | |||
251 | switch (reg) { | |||
252 | case VIRTIO_CONFIG_DEVICE_FEATURES0: | |||
253 | *data = viornd.cfg.device_feature; | |||
254 | break; | |||
255 | case VIRTIO_CONFIG_GUEST_FEATURES4: | |||
256 | *data = viornd.cfg.guest_feature; | |||
257 | break; | |||
258 | case VIRTIO_CONFIG_QUEUE_PFN8: | |||
259 | *data = viornd.cfg.queue_pfn; | |||
260 | break; | |||
261 | case VIRTIO_CONFIG_QUEUE_SIZE12: | |||
262 | *data = viornd.cfg.queue_size; | |||
263 | break; | |||
264 | case VIRTIO_CONFIG_QUEUE_SELECT14: | |||
265 | *data = viornd.cfg.queue_select; | |||
266 | break; | |||
267 | case VIRTIO_CONFIG_QUEUE_NOTIFY16: | |||
268 | *data = viornd.cfg.queue_notify; | |||
269 | break; | |||
270 | case VIRTIO_CONFIG_DEVICE_STATUS18: | |||
271 | *data = viornd.cfg.device_status; | |||
272 | break; | |||
273 | case VIRTIO_CONFIG_ISR_STATUS19: | |||
274 | *data = viornd.cfg.isr_status; | |||
275 | viornd.cfg.isr_status = 0; | |||
276 | vcpu_deassert_pic_irq(viornd.vm_id, 0, viornd.irq); | |||
277 | break; | |||
278 | } | |||
279 | } | |||
280 | return (0); | |||
281 | } | |||
282 | ||||
283 | int | |||
284 | vmmci_ctl(unsigned int cmd) | |||
285 | { | |||
286 | struct timeval tv = { 0, 0 }; | |||
287 | ||||
288 | if ((vmmci.cfg.device_status & | |||
289 | VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK4) == 0) | |||
290 | return (-1); | |||
291 | ||||
292 | if (cmd == vmmci.cmd) | |||
293 | return (0); | |||
294 | ||||
295 | switch (cmd) { | |||
296 | case VMMCI_NONE: | |||
297 | break; | |||
298 | case VMMCI_SHUTDOWN: | |||
299 | case VMMCI_REBOOT: | |||
300 | /* Update command */ | |||
301 | vmmci.cmd = cmd; | |||
302 | ||||
303 | /* | |||
304 | * vmm VMs do not support powerdown, send a reboot request | |||
305 | * instead and turn it off after the triple fault. | |||
306 | */ | |||
307 | if (cmd == VMMCI_SHUTDOWN) | |||
308 | cmd = VMMCI_REBOOT; | |||
309 | ||||
310 | /* Trigger interrupt */ | |||
311 | vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE2; | |||
312 | vcpu_assert_pic_irq(vmmci.vm_id, 0, vmmci.irq); | |||
313 | ||||
314 | /* Add ACK timeout */ | |||
315 | tv.tv_sec = VMMCI_TIMEOUT3; | |||
316 | evtimer_add(&vmmci.timeout, &tv)event_add(&vmmci.timeout, &tv); | |||
317 | break; | |||
318 | case VMMCI_SYNCRTC: | |||
319 | if (vmmci.cfg.guest_feature & VMMCI_F_SYNCRTC(1<<2)) { | |||
320 | /* RTC updated, request guest VM resync of its RTC */ | |||
321 | vmmci.cmd = cmd; | |||
322 | ||||
323 | vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE2; | |||
324 | vcpu_assert_pic_irq(vmmci.vm_id, 0, vmmci.irq); | |||
325 | } else { | |||
326 | log_debug("%s: RTC sync skipped (guest does not " | |||
327 | "support RTC sync)\n", __func__); | |||
328 | } | |||
329 | break; | |||
330 | default: | |||
331 | fatalx("invalid vmmci command: %d", cmd); | |||
332 | } | |||
333 | ||||
334 | return (0); | |||
335 | } | |||
336 | ||||
337 | void | |||
338 | vmmci_ack(unsigned int cmd) | |||
339 | { | |||
340 | struct timeval tv = { 0, 0 }; | |||
341 | ||||
342 | switch (cmd) { | |||
343 | case VMMCI_NONE: | |||
344 | break; | |||
345 | case VMMCI_SHUTDOWN: | |||
346 | /* | |||
347 | * The shutdown was requested by the VM if we don't have | |||
348 | * a pending shutdown request. In this case add a short | |||
349 | * timeout to give the VM a chance to reboot before the | |||
350 | * timer is expired. | |||
351 | */ | |||
352 | if (vmmci.cmd == 0) { | |||
353 | log_debug("%s: vm %u requested shutdown", __func__, | |||
354 | vmmci.vm_id); | |||
355 | tv.tv_sec = VMMCI_TIMEOUT3; | |||
356 | evtimer_add(&vmmci.timeout, &tv)event_add(&vmmci.timeout, &tv); | |||
357 | return; | |||
358 | } | |||
359 | /* FALLTHROUGH */ | |||
360 | case VMMCI_REBOOT: | |||
361 | /* | |||
362 | * If the VM acknowledged our shutdown request, give it | |||
363 | * enough time to shutdown or reboot gracefully. This | |||
364 | * might take a considerable amount of time (running | |||
365 | * rc.shutdown on the VM), so increase the timeout before | |||
366 | * killing it forcefully. | |||
367 | */ | |||
368 | if (cmd == vmmci.cmd && | |||
369 | evtimer_pending(&vmmci.timeout, NULL)event_pending(&vmmci.timeout, 0x01, ((void *)0))) { | |||
370 | log_debug("%s: vm %u acknowledged shutdown request", | |||
371 | __func__, vmmci.vm_id); | |||
372 | tv.tv_sec = VMMCI_SHUTDOWN_TIMEOUT120; | |||
373 | evtimer_add(&vmmci.timeout, &tv)event_add(&vmmci.timeout, &tv); | |||
374 | } | |||
375 | break; | |||
376 | case VMMCI_SYNCRTC: | |||
377 | log_debug("%s: vm %u acknowledged RTC sync request", | |||
378 | __func__, vmmci.vm_id); | |||
379 | vmmci.cmd = VMMCI_NONE; | |||
380 | break; | |||
381 | default: | |||
382 | log_warnx("%s: illegal request %u", __func__, cmd); | |||
383 | break; | |||
384 | } | |||
385 | } | |||
386 | ||||
387 | void | |||
388 | vmmci_timeout(int fd, short type, void *arg) | |||
389 | { | |||
390 | log_debug("%s: vm %u shutdown", __progname, vmmci.vm_id); | |||
391 | vm_shutdown(vmmci.cmd == VMMCI_REBOOT ? VMMCI_REBOOT : VMMCI_SHUTDOWN); | |||
392 | } | |||
393 | ||||
394 | int | |||
395 | vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, | |||
396 | void *unused, uint8_t sz) | |||
397 | { | |||
398 | *intr = 0xFF; | |||
399 | ||||
400 | if (dir == 0) { | |||
401 | switch (reg) { | |||
402 | case VIRTIO_CONFIG_DEVICE_FEATURES0: | |||
403 | case VIRTIO_CONFIG_QUEUE_SIZE12: | |||
404 | case VIRTIO_CONFIG_ISR_STATUS19: | |||
405 | log_warnx("%s: illegal write %x to %s", | |||
406 | __progname, *data, virtio_reg_name(reg)); | |||
407 | break; | |||
408 | case VIRTIO_CONFIG_GUEST_FEATURES4: | |||
409 | vmmci.cfg.guest_feature = *data; | |||
410 | break; | |||
411 | case VIRTIO_CONFIG_QUEUE_PFN8: | |||
412 | vmmci.cfg.queue_pfn = *data; | |||
413 | break; | |||
414 | case VIRTIO_CONFIG_QUEUE_SELECT14: | |||
415 | vmmci.cfg.queue_select = *data; | |||
416 | break; | |||
417 | case VIRTIO_CONFIG_QUEUE_NOTIFY16: | |||
418 | vmmci.cfg.queue_notify = *data; | |||
419 | break; | |||
420 | case VIRTIO_CONFIG_DEVICE_STATUS18: | |||
421 | vmmci.cfg.device_status = *data; | |||
422 | break; | |||
423 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20: | |||
424 | vmmci_ack(*data); | |||
425 | break; | |||
426 | } | |||
427 | } else { | |||
428 | switch (reg) { | |||
429 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20: | |||
430 | *data = vmmci.cmd; | |||
431 | break; | |||
432 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 4: | |||
433 | /* Update time once when reading the first register */ | |||
434 | gettimeofday(&vmmci.time, NULL((void *)0)); | |||
435 | *data = (uint64_t)vmmci.time.tv_sec; | |||
436 | break; | |||
437 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 8: | |||
438 | *data = (uint64_t)vmmci.time.tv_sec << 32; | |||
439 | break; | |||
440 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 12: | |||
441 | *data = (uint64_t)vmmci.time.tv_usec; | |||
442 | break; | |||
443 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 16: | |||
444 | *data = (uint64_t)vmmci.time.tv_usec << 32; | |||
445 | break; | |||
446 | case VIRTIO_CONFIG_DEVICE_FEATURES0: | |||
447 | *data = vmmci.cfg.device_feature; | |||
448 | break; | |||
449 | case VIRTIO_CONFIG_GUEST_FEATURES4: | |||
450 | *data = vmmci.cfg.guest_feature; | |||
451 | break; | |||
452 | case VIRTIO_CONFIG_QUEUE_PFN8: | |||
453 | *data = vmmci.cfg.queue_pfn; | |||
454 | break; | |||
455 | case VIRTIO_CONFIG_QUEUE_SIZE12: | |||
456 | *data = vmmci.cfg.queue_size; | |||
457 | break; | |||
458 | case VIRTIO_CONFIG_QUEUE_SELECT14: | |||
459 | *data = vmmci.cfg.queue_select; | |||
460 | break; | |||
461 | case VIRTIO_CONFIG_QUEUE_NOTIFY16: | |||
462 | *data = vmmci.cfg.queue_notify; | |||
463 | break; | |||
464 | case VIRTIO_CONFIG_DEVICE_STATUS18: | |||
465 | *data = vmmci.cfg.device_status; | |||
466 | break; | |||
467 | case VIRTIO_CONFIG_ISR_STATUS19: | |||
468 | *data = vmmci.cfg.isr_status; | |||
469 | vmmci.cfg.isr_status = 0; | |||
470 | vcpu_deassert_pic_irq(vmmci.vm_id, 0, vmmci.irq); | |||
471 | break; | |||
472 | } | |||
473 | } | |||
474 | return (0); | |||
475 | } | |||
476 | ||||
477 | int | |||
478 | virtio_get_base(int fd, char *path, size_t npath, int type, const char *dpath) | |||
479 | { | |||
480 | switch (type) { | |||
481 | case VMDF_RAW0x01: | |||
482 | return 0; | |||
483 | case VMDF_QCOW20x02: | |||
484 | return virtio_qcow2_get_base(fd, path, npath, dpath); | |||
485 | } | |||
486 | log_warnx("%s: invalid disk format", __func__); | |||
487 | return -1; | |||
488 | } | |||
489 | ||||
490 | void | |||
491 | virtio_init(struct vmd_vm *vm, int child_cdrom, | |||
492 | int child_disks[][VM_MAX_BASE_PER_DISK4], int *child_taps) | |||
493 | { | |||
494 | struct vmop_create_params *vmc = &vm->vm_params; | |||
495 | struct vm_create_params *vcp = &vmc->vmc_params; | |||
496 | struct virtio_dev *dev; | |||
497 | uint8_t id; | |||
498 | uint8_t i, j; | |||
499 | ||||
500 | /* Virtio entropy device */ | |||
501 | if (pci_add_device(&id, PCI_VENDOR_QUMRANET0x1af4, | |||
| ||||
502 | PCI_PRODUCT_QUMRANET_VIO_RNG0x1005, PCI_CLASS_SYSTEM0x08, | |||
503 | PCI_SUBCLASS_SYSTEM_MISC0x80, | |||
504 | PCI_VENDOR_OPENBSD0x0b5d, | |||
505 | PCI_PRODUCT_VIRTIO_ENTROPY4, 1, NULL((void *)0))) { | |||
506 | log_warnx("%s: can't add PCI virtio rng device", | |||
507 | __progname); | |||
508 | return; | |||
509 | } | |||
510 | ||||
511 | if (pci_add_bar(id, PCI_MAPREG_TYPE_IO0x00000001, virtio_rnd_io, NULL((void *)0))) { | |||
512 | log_warnx("%s: can't add bar for virtio rng device", | |||
513 | __progname); | |||
514 | return; | |||
515 | } | |||
516 | ||||
517 | memset(&viornd, 0, sizeof(viornd)); | |||
518 | viornd.vq[0].qs = VIORND_QUEUE_SIZE64; | |||
519 | viornd.vq[0].vq_availoffset = sizeof(struct vring_desc) * | |||
520 | VIORND_QUEUE_SIZE64; | |||
521 | viornd.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN((((sizeof(struct vring_desc) * 64 + sizeof(uint16_t) * (2 + 64 ))+((4096)-1))& ~((4096)-1)) | |||
522 | sizeof(struct vring_desc) * VIORND_QUEUE_SIZE(((sizeof(struct vring_desc) * 64 + sizeof(uint16_t) * (2 + 64 ))+((4096)-1))& ~((4096)-1)) | |||
523 | + sizeof(uint16_t) * (2 + VIORND_QUEUE_SIZE))(((sizeof(struct vring_desc) * 64 + sizeof(uint16_t) * (2 + 64 ))+((4096)-1))& ~((4096)-1)); | |||
524 | viornd.pci_id = id; | |||
525 | viornd.irq = pci_get_dev_irq(id); | |||
526 | viornd.vm_id = vcp->vcp_id; | |||
527 | ||||
528 | SLIST_INIT(&virtio_devs){ ((&virtio_devs)->slh_first) = ((void *)0); }; | |||
529 | ||||
530 | if (vmc->vmc_nnics > 0) { | |||
531 | for (i = 0; i
| |||
532 | dev = calloc(1, sizeof(struct virtio_dev)); | |||
533 | if (dev == NULL((void *)0)) { | |||
534 | log_warn("%s: calloc failure allocating vionet", | |||
535 | __progname); | |||
536 | return; | |||
537 | } | |||
538 | /* Virtio network */ | |||
539 | dev->dev_type = VMD_DEVTYPE_NET'n'; | |||
540 | ||||
541 | if (pci_add_device(&id, PCI_VENDOR_QUMRANET0x1af4, | |||
542 | PCI_PRODUCT_QUMRANET_VIO_NET0x1000, PCI_CLASS_SYSTEM0x08, | |||
543 | PCI_SUBCLASS_SYSTEM_MISC0x80, PCI_VENDOR_OPENBSD0x0b5d, | |||
544 | PCI_PRODUCT_VIRTIO_NETWORK1, 1, NULL((void *)0))) { | |||
545 | log_warnx("%s: can't add PCI virtio net device", | |||
| ||||
546 | __progname); | |||
547 | return; | |||
548 | } | |||
549 | dev->pci_id = id; | |||
550 | dev->sync_fd = -1; | |||
551 | dev->async_fd = -1; | |||
552 | dev->vm_id = vcp->vcp_id; | |||
553 | dev->vm_vmid = vm->vm_vmid; | |||
554 | dev->irq = pci_get_dev_irq(id); | |||
555 | ||||
556 | /* The vionet pci bar function is called by the vcpu. */ | |||
557 | if (pci_add_bar(id, PCI_MAPREG_TYPE_IO0x00000001, virtio_pci_io, | |||
558 | dev)) { | |||
559 | log_warnx("%s: can't add bar for virtio net " | |||
560 | "device", __progname); | |||
561 | return; | |||
562 | } | |||
563 | ||||
564 | dev->vionet.vq[RXQ0].qs = VIONET_QUEUE_SIZE256; | |||
565 | dev->vionet.vq[RXQ0].vq_availoffset = | |||
566 | sizeof(struct vring_desc) * VIONET_QUEUE_SIZE256; | |||
567 | dev->vionet.vq[RXQ0].vq_usedoffset = VIRTQUEUE_ALIGN((((sizeof(struct vring_desc) * 256 + sizeof(uint16_t) * (2 + 256 ))+((4096)-1))& ~((4096)-1)) | |||
568 | sizeof(struct vring_desc) * VIONET_QUEUE_SIZE(((sizeof(struct vring_desc) * 256 + sizeof(uint16_t) * (2 + 256 ))+((4096)-1))& ~((4096)-1)) | |||
569 | + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE))(((sizeof(struct vring_desc) * 256 + sizeof(uint16_t) * (2 + 256 ))+((4096)-1))& ~((4096)-1)); | |||
570 | dev->vionet.vq[RXQ0].last_avail = 0; | |||
571 | dev->vionet.vq[RXQ0].notified_avail = 0; | |||
572 | ||||
573 | dev->vionet.vq[TXQ1].qs = VIONET_QUEUE_SIZE256; | |||
574 | dev->vionet.vq[TXQ1].vq_availoffset = | |||
575 | sizeof(struct vring_desc) * VIONET_QUEUE_SIZE256; | |||
576 | dev->vionet.vq[TXQ1].vq_usedoffset = VIRTQUEUE_ALIGN((((sizeof(struct vring_desc) * 256 + sizeof(uint16_t) * (2 + 256 ))+((4096)-1))& ~((4096)-1)) | |||
577 | sizeof(struct vring_desc) * VIONET_QUEUE_SIZE(((sizeof(struct vring_desc) * 256 + sizeof(uint16_t) * (2 + 256 ))+((4096)-1))& ~((4096)-1)) | |||
578 | + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE))(((sizeof(struct vring_desc) * 256 + sizeof(uint16_t) * (2 + 256 ))+((4096)-1))& ~((4096)-1)); | |||
579 | dev->vionet.vq[TXQ1].last_avail = 0; | |||
580 | dev->vionet.vq[TXQ1].notified_avail = 0; | |||
581 | ||||
582 | dev->vionet.data_fd = child_taps[i]; | |||
583 | ||||
584 | /* MAC address has been assigned by the parent */ | |||
585 | memcpy(&dev->vionet.mac, &vmc->vmc_macs[i], 6); | |||
586 | dev->vionet.cfg.device_feature = VIRTIO_NET_F_MAC(1<<5); | |||
587 | ||||
588 | dev->vionet.lockedmac = | |||
589 | vmc->vmc_ifflags[i] & VMIFF_LOCKED0x02 ? 1 : 0; | |||
590 | dev->vionet.local = | |||
591 | vmc->vmc_ifflags[i] & VMIFF_LOCAL0x04 ? 1 : 0; | |||
592 | if (i == 0 && vmc->vmc_bootdevice & VMBOOTDEV_NET3) | |||
593 | dev->vionet.pxeboot = 1; | |||
594 | memcpy(&dev->vionet.local_prefix, | |||
595 | &env->vmd_cfg.cfg_localprefix, | |||
596 | sizeof(dev->vionet.local_prefix)); | |||
597 | log_debug("%s: vm \"%s\" vio%u lladdr %s%s%s%s", | |||
598 | __func__, vcp->vcp_name, i, | |||
599 | ether_ntoa((void *)dev->vionet.mac), | |||
600 | dev->vionet.lockedmac ? ", locked" : "", | |||
601 | dev->vionet.local ? ", local" : "", | |||
602 | dev->vionet.pxeboot ? ", pxeboot" : ""); | |||
603 | ||||
604 | /* Add the vionet to our device list. */ | |||
605 | dev->vionet.idx = i; | |||
606 | SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next)do { (dev)->dev_next.sle_next = (&virtio_devs)->slh_first ; (&virtio_devs)->slh_first = (dev); } while (0); | |||
607 | } | |||
608 | } | |||
609 | ||||
610 | if (vmc->vmc_ndisks > 0) { | |||
611 | for (i = 0; i < vmc->vmc_ndisks; i++) { | |||
612 | dev = calloc(1, sizeof(struct virtio_dev)); | |||
613 | if (dev == NULL((void *)0)) { | |||
614 | log_warn("%s: calloc failure allocating vioblk", | |||
615 | __progname); | |||
616 | return; | |||
617 | } | |||
618 | ||||
619 | /* One vioblk device for each disk defined in vcp */ | |||
620 | dev->dev_type = VMD_DEVTYPE_DISK'd'; | |||
621 | ||||
622 | if (pci_add_device(&id, PCI_VENDOR_QUMRANET0x1af4, | |||
623 | PCI_PRODUCT_QUMRANET_VIO_BLOCK0x1001, | |||
624 | PCI_CLASS_MASS_STORAGE0x01, | |||
625 | PCI_SUBCLASS_MASS_STORAGE_SCSI0x00, | |||
626 | PCI_VENDOR_OPENBSD0x0b5d, | |||
627 | PCI_PRODUCT_VIRTIO_BLOCK2, 1, NULL((void *)0))) { | |||
628 | log_warnx("%s: can't add PCI virtio block " | |||
629 | "device", __progname); | |||
630 | return; | |||
631 | } | |||
632 | dev->pci_id = id; | |||
633 | dev->sync_fd = -1; | |||
634 | dev->async_fd = -1; | |||
635 | dev->vm_id = vcp->vcp_id; | |||
636 | dev->vm_vmid = vm->vm_vmid; | |||
637 | dev->irq = pci_get_dev_irq(id); | |||
638 | ||||
639 | if (pci_add_bar(id, PCI_MAPREG_TYPE_IO0x00000001, virtio_pci_io, | |||
640 | &dev->vioblk)) { | |||
641 | log_warnx("%s: can't add bar for virtio block " | |||
642 | "device", __progname); | |||
643 | return; | |||
644 | } | |||
645 | dev->vioblk.vq[0].qs = VIOBLK_QUEUE_SIZE128; | |||
646 | dev->vioblk.vq[0].vq_availoffset = | |||
647 | sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE128; | |||
648 | dev->vioblk.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN((((sizeof(struct vring_desc) * 128 + sizeof(uint16_t) * (2 + 128 ))+((4096)-1))& ~((4096)-1)) | |||
649 | sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE(((sizeof(struct vring_desc) * 128 + sizeof(uint16_t) * (2 + 128 ))+((4096)-1))& ~((4096)-1)) | |||
650 | + sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE))(((sizeof(struct vring_desc) * 128 + sizeof(uint16_t) * (2 + 128 ))+((4096)-1))& ~((4096)-1)); | |||
651 | dev->vioblk.vq[0].last_avail = 0; | |||
652 | dev->vioblk.cfg.device_feature = | |||
653 | VIRTIO_BLK_F_SEG_MAX(1ULL<<2); | |||
654 | dev->vioblk.seg_max = VIOBLK_SEG_MAX(128 - 2); | |||
655 | ||||
656 | /* | |||
657 | * Initialize disk fds to an invalid fd (-1), then | |||
658 | * set any child disk fds. | |||
659 | */ | |||
660 | memset(&dev->vioblk.disk_fd, -1, | |||
661 | sizeof(dev->vioblk.disk_fd)); | |||
662 | dev->vioblk.ndisk_fd = vmc->vmc_diskbases[i]; | |||
663 | for (j = 0; j < dev->vioblk.ndisk_fd; j++) | |||
664 | dev->vioblk.disk_fd[j] = child_disks[i][j]; | |||
665 | ||||
666 | dev->vioblk.idx = i; | |||
667 | SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next)do { (dev)->dev_next.sle_next = (&virtio_devs)->slh_first ; (&virtio_devs)->slh_first = (dev); } while (0); | |||
668 | } | |||
669 | } | |||
670 | ||||
671 | /* | |||
672 | * Launch virtio devices that support subprocess execution. | |||
673 | */ | |||
674 | SLIST_FOREACH(dev, &virtio_devs, dev_next)for((dev) = ((&virtio_devs)->slh_first); (dev) != ((void *)0); (dev) = ((dev)->dev_next.sle_next)) { | |||
675 | if (virtio_dev_launch(vm, dev) != 0) | |||
676 | fatalx("failed to launch virtio device"); | |||
677 | } | |||
678 | ||||
679 | /* vioscsi cdrom */ | |||
680 | if (strlen(vmc->vmc_cdrom)) { | |||
681 | vioscsi = calloc(1, sizeof(struct vioscsi_dev)); | |||
682 | if (vioscsi == NULL((void *)0)) { | |||
683 | log_warn("%s: calloc failure allocating vioscsi", | |||
684 | __progname); | |||
685 | return; | |||
686 | } | |||
687 | ||||
688 | if (pci_add_device(&id, PCI_VENDOR_QUMRANET0x1af4, | |||
689 | PCI_PRODUCT_QUMRANET_VIO_SCSI0x1004, | |||
690 | PCI_CLASS_MASS_STORAGE0x01, | |||
691 | PCI_SUBCLASS_MASS_STORAGE_SCSI0x00, | |||
692 | PCI_VENDOR_OPENBSD0x0b5d, | |||
693 | PCI_PRODUCT_VIRTIO_SCSI8, 1, NULL((void *)0))) { | |||
694 | log_warnx("%s: can't add PCI vioscsi device", | |||
695 | __progname); | |||
696 | return; | |||
697 | } | |||
698 | ||||
699 | if (pci_add_bar(id, PCI_MAPREG_TYPE_IO0x00000001, vioscsi_io, vioscsi)) { | |||
700 | log_warnx("%s: can't add bar for vioscsi device", | |||
701 | __progname); | |||
702 | return; | |||
703 | } | |||
704 | ||||
705 | for (i = 0; i < VIRTIO_MAX_QUEUES3; i++) { | |||
706 | vioscsi->vq[i].qs = VIOSCSI_QUEUE_SIZE128; | |||
707 | vioscsi->vq[i].vq_availoffset = | |||
708 | sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE128; | |||
709 | vioscsi->vq[i].vq_usedoffset = VIRTQUEUE_ALIGN((((sizeof(struct vring_desc) * 128 + sizeof(uint16_t) * (2 + 128 ))+((4096)-1))& ~((4096)-1)) | |||
710 | sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE(((sizeof(struct vring_desc) * 128 + sizeof(uint16_t) * (2 + 128 ))+((4096)-1))& ~((4096)-1)) | |||
711 | + sizeof(uint16_t) * (2 + VIOSCSI_QUEUE_SIZE))(((sizeof(struct vring_desc) * 128 + sizeof(uint16_t) * (2 + 128 ))+((4096)-1))& ~((4096)-1)); | |||
712 | vioscsi->vq[i].last_avail = 0; | |||
713 | } | |||
714 | if (virtio_raw_init(&vioscsi->file, &vioscsi->sz, &child_cdrom, | |||
715 | 1) == -1) { | |||
716 | log_warnx("%s: unable to determine iso format", | |||
717 | __func__); | |||
718 | return; | |||
719 | } | |||
720 | vioscsi->locked = 0; | |||
721 | vioscsi->lba = 0; | |||
722 | vioscsi->n_blocks = vioscsi->sz / VIOSCSI_BLOCK_SIZE_CDROM2048; | |||
723 | vioscsi->max_xfer = VIOSCSI_BLOCK_SIZE_CDROM2048; | |||
724 | vioscsi->pci_id = id; | |||
725 | vioscsi->vm_id = vcp->vcp_id; | |||
726 | vioscsi->irq = pci_get_dev_irq(id); | |||
727 | } | |||
728 | ||||
729 | /* virtio control device */ | |||
730 | if (pci_add_device(&id, PCI_VENDOR_OPENBSD0x0b5d, | |||
731 | PCI_PRODUCT_OPENBSD_CONTROL0x0777, | |||
732 | PCI_CLASS_COMMUNICATIONS0x07, | |||
733 | PCI_SUBCLASS_COMMUNICATIONS_MISC0x80, | |||
734 | PCI_VENDOR_OPENBSD0x0b5d, | |||
735 | PCI_PRODUCT_VIRTIO_VMMCI65535, 1, NULL((void *)0))) { | |||
736 | log_warnx("%s: can't add PCI vmm control device", | |||
737 | __progname); | |||
738 | return; | |||
739 | } | |||
740 | ||||
741 | if (pci_add_bar(id, PCI_MAPREG_TYPE_IO0x00000001, vmmci_io, NULL((void *)0))) { | |||
742 | log_warnx("%s: can't add bar for vmm control device", | |||
743 | __progname); | |||
744 | return; | |||
745 | } | |||
746 | ||||
747 | memset(&vmmci, 0, sizeof(vmmci)); | |||
748 | vmmci.cfg.device_feature = VMMCI_F_TIMESYNC(1<<0) | VMMCI_F_ACK(1<<1) | | |||
749 | VMMCI_F_SYNCRTC(1<<2); | |||
750 | vmmci.vm_id = vcp->vcp_id; | |||
751 | vmmci.irq = pci_get_dev_irq(id); | |||
752 | vmmci.pci_id = id; | |||
753 | ||||
754 | evtimer_set(&vmmci.timeout, vmmci_timeout, NULL)event_set(&vmmci.timeout, -1, 0, vmmci_timeout, ((void *) 0)); | |||
755 | } | |||
756 | ||||
757 | /* | |||
758 | * vionet_set_hostmac | |||
759 | * | |||
760 | * Sets the hardware address for the host-side tap(4) on a vionet_dev. | |||
761 | * | |||
762 | * This should only be called from the event-loop thread | |||
763 | * | |||
764 | * vm: pointer to the current vmd_vm instance | |||
765 | * idx: index into the array of vionet_dev's for the target vionet_dev | |||
766 | * addr: ethernet address to set | |||
767 | */ | |||
768 | void | |||
769 | vionet_set_hostmac(struct vmd_vm *vm, unsigned int idx, uint8_t *addr) | |||
770 | { | |||
771 | struct vmop_create_params *vmc = &vm->vm_params; | |||
772 | struct virtio_dev *dev; | |||
773 | struct vionet_dev *vionet = NULL((void *)0); | |||
774 | int ret; | |||
775 | ||||
776 | if (idx > vmc->vmc_nnics) | |||
777 | fatalx("%s: invalid vionet index: %u", __func__, idx); | |||
778 | ||||
779 | SLIST_FOREACH(dev, &virtio_devs, dev_next)for((dev) = ((&virtio_devs)->slh_first); (dev) != ((void *)0); (dev) = ((dev)->dev_next.sle_next)) { | |||
780 | if (dev->dev_type == VMD_DEVTYPE_NET'n' | |||
781 | && dev->vionet.idx == idx) { | |||
782 | vionet = &dev->vionet; | |||
783 | break; | |||
784 | } | |||
785 | } | |||
786 | if (vionet == NULL((void *)0)) | |||
787 | fatalx("%s: dev == NULL, idx = %u", __func__, idx); | |||
788 | ||||
789 | /* Set the local vm process copy. */ | |||
790 | memcpy(vionet->hostmac, addr, sizeof(vionet->hostmac)); | |||
791 | ||||
792 | /* Send the information to the device process. */ | |||
793 | ret = imsg_compose_event(&dev->async_iev, IMSG_DEVOP_HOSTMAC, 0, 0, -1, | |||
794 | vionet->hostmac, sizeof(vionet->hostmac)); | |||
795 | if (ret == -1) { | |||
796 | log_warnx("%s: failed to queue hostmac to vionet dev %u", | |||
797 | __func__, idx); | |||
798 | return; | |||
799 | } | |||
800 | } | |||
801 | ||||
802 | void | |||
803 | virtio_shutdown(struct vmd_vm *vm) | |||
804 | { | |||
805 | int ret, status; | |||
806 | pid_t pid = 0; | |||
807 | struct virtio_dev *dev, *tmp; | |||
808 | struct viodev_msg msg; | |||
809 | struct imsgbuf *ibuf; | |||
810 | ||||
811 | /* Ensure that our disks are synced. */ | |||
812 | if (vioscsi != NULL((void *)0)) | |||
813 | vioscsi->file.close(vioscsi->file.p, 0); | |||
814 | ||||
815 | /* | |||
816 | * Broadcast shutdown to child devices. We need to do this | |||
817 | * synchronously as we have already stopped the async event thread. | |||
818 | */ | |||
819 | SLIST_FOREACH(dev, &virtio_devs, dev_next)for((dev) = ((&virtio_devs)->slh_first); (dev) != ((void *)0); (dev) = ((dev)->dev_next.sle_next)) { | |||
820 | memset(&msg, 0, sizeof(msg)); | |||
821 | msg.type = VIODEV_MSG_SHUTDOWN7; | |||
822 | ibuf = &dev->sync_iev.ibuf; | |||
823 | ret = imsg_compose(ibuf, VIODEV_MSG_SHUTDOWN7, 0, 0, -1, | |||
824 | &msg, sizeof(msg)); | |||
825 | if (ret == -1) | |||
826 | fatalx("%s: failed to send shutdown to device", | |||
827 | __func__); | |||
828 | if (imsg_flush(ibuf) == -1) | |||
829 | fatalx("%s: imsg_flush", __func__); | |||
830 | } | |||
831 | ||||
832 | /* | |||
833 | * Wait for all children to shutdown using a simple approach of | |||
834 | * iterating over known child devices and waiting for them to die. | |||
835 | */ | |||
836 | SLIST_FOREACH_SAFE(dev, &virtio_devs, dev_next, tmp)for ((dev) = ((&virtio_devs)->slh_first); (dev) && ((tmp) = ((dev)->dev_next.sle_next), 1); (dev) = (tmp)) { | |||
837 | log_debug("%s: waiting on device pid %d", __func__, | |||
838 | dev->dev_pid); | |||
839 | do { | |||
840 | pid = waitpid(dev->dev_pid, &status, WNOHANG0x01); | |||
841 | } while (pid == 0 || (pid == -1 && errno(*__errno()) == EINTR4)); | |||
842 | if (pid == dev->dev_pid) | |||
843 | log_debug("%s: device for pid %d is stopped", | |||
844 | __func__, pid); | |||
845 | else | |||
846 | log_warnx("%s: unexpected pid %d", __func__, pid); | |||
847 | free(dev); | |||
848 | } | |||
849 | } | |||
850 | ||||
851 | int | |||
852 | vmmci_restore(int fd, uint32_t vm_id) | |||
853 | { | |||
854 | log_debug("%s: receiving vmmci", __func__); | |||
855 | if (atomicio(read, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) { | |||
856 | log_warnx("%s: error reading vmmci from fd", __func__); | |||
857 | return (-1); | |||
858 | } | |||
859 | ||||
860 | if (pci_set_bar_fn(vmmci.pci_id, 0, vmmci_io, NULL((void *)0))) { | |||
861 | log_warnx("%s: can't set bar fn for vmm control device", | |||
862 | __progname); | |||
863 | return (-1); | |||
864 | } | |||
865 | vmmci.vm_id = vm_id; | |||
866 | vmmci.irq = pci_get_dev_irq(vmmci.pci_id); | |||
867 | memset(&vmmci.timeout, 0, sizeof(struct event)); | |||
868 | evtimer_set(&vmmci.timeout, vmmci_timeout, NULL)event_set(&vmmci.timeout, -1, 0, vmmci_timeout, ((void *) 0)); | |||
869 | return (0); | |||
870 | } | |||
871 | ||||
872 | int | |||
873 | viornd_restore(int fd, struct vmd_vm *vm) | |||
874 | { | |||
875 | void *hva = NULL((void *)0); | |||
876 | ||||
877 | log_debug("%s: receiving viornd", __func__); | |||
878 | if (atomicio(read, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) { | |||
879 | log_warnx("%s: error reading viornd from fd", __func__); | |||
880 | return (-1); | |||
881 | } | |||
882 | if (pci_set_bar_fn(viornd.pci_id, 0, virtio_rnd_io, NULL((void *)0))) { | |||
883 | log_warnx("%s: can't set bar fn for virtio rng device", | |||
884 | __progname); | |||
885 | return (-1); | |||
886 | } | |||
887 | viornd.vm_id = vm->vm_params.vmc_params.vcp_id; | |||
888 | viornd.irq = pci_get_dev_irq(viornd.pci_id); | |||
889 | ||||
890 | hva = hvaddr_mem(viornd.vq[0].q_gpa, vring_size(VIORND_QUEUE_SIZE64)); | |||
891 | if (hva == NULL((void *)0)) | |||
892 | fatal("failed to restore viornd virtqueue"); | |||
893 | viornd.vq[0].q_hva = hva; | |||
894 | ||||
895 | return (0); | |||
896 | } | |||
897 | ||||
898 | int | |||
899 | vionet_restore(int fd, struct vmd_vm *vm, int *child_taps) | |||
900 | { | |||
901 | struct vmop_create_params *vmc = &vm->vm_params; | |||
902 | struct vm_create_params *vcp = &vmc->vmc_params; | |||
903 | struct virtio_dev *dev; | |||
904 | uint8_t i; | |||
905 | ||||
906 | if (vmc->vmc_nnics == 0) | |||
907 | return (0); | |||
908 | ||||
909 | for (i = 0; i < vmc->vmc_nnics; i++) { | |||
910 | dev = calloc(1, sizeof(struct virtio_dev)); | |||
911 | if (dev == NULL((void *)0)) { | |||
912 | log_warn("%s: calloc failure allocating vionet", | |||
913 | __progname); | |||
914 | return (-1); | |||
915 | } | |||
916 | ||||
917 | log_debug("%s: receiving virtio network device", __func__); | |||
918 | if (atomicio(read, fd, dev, sizeof(struct virtio_dev)) | |||
919 | != sizeof(struct virtio_dev)) { | |||
920 | log_warnx("%s: error reading vionet from fd", | |||
921 | __func__); | |||
922 | return (-1); | |||
923 | } | |||
924 | ||||
925 | /* Virtio network */ | |||
926 | if (dev->dev_type != VMD_DEVTYPE_NET'n') { | |||
927 | log_warnx("%s: invalid device type", __func__); | |||
928 | return (-1); | |||
929 | } | |||
930 | ||||
931 | dev->sync_fd = -1; | |||
932 | dev->async_fd = -1; | |||
933 | dev->vm_id = vcp->vcp_id; | |||
934 | dev->vm_vmid = vm->vm_vmid; | |||
935 | dev->irq = pci_get_dev_irq(dev->pci_id); | |||
936 | ||||
937 | if (pci_set_bar_fn(dev->pci_id, 0, virtio_pci_io, dev)) { | |||
938 | log_warnx("%s: can't set bar fn for virtio net " | |||
939 | "device", __progname); | |||
940 | return (-1); | |||
941 | } | |||
942 | ||||
943 | dev->vionet.data_fd = child_taps[i]; | |||
944 | dev->vionet.idx = i; | |||
945 | ||||
946 | SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next)do { (dev)->dev_next.sle_next = (&virtio_devs)->slh_first ; (&virtio_devs)->slh_first = (dev); } while (0); | |||
947 | } | |||
948 | ||||
949 | return (0); | |||
950 | } | |||
951 | ||||
952 | int | |||
953 | vioblk_restore(int fd, struct vmd_vm *vm, | |||
954 | int child_disks[][VM_MAX_BASE_PER_DISK4]) | |||
955 | { | |||
956 | struct vmop_create_params *vmc = &vm->vm_params; | |||
957 | struct virtio_dev *dev; | |||
958 | uint8_t i, j; | |||
959 | ||||
960 | if (vmc->vmc_ndisks == 0) | |||
961 | return (0); | |||
962 | ||||
963 | for (i = 0; i < vmc->vmc_ndisks; i++) { | |||
964 | dev = calloc(1, sizeof(struct virtio_dev)); | |||
965 | if (dev == NULL((void *)0)) { | |||
966 | log_warn("%s: calloc failure allocating vioblks", | |||
967 | __progname); | |||
968 | return (-1); | |||
969 | } | |||
970 | ||||
971 | log_debug("%s: receiving vioblk", __func__); | |||
972 | if (atomicio(read, fd, dev, sizeof(struct virtio_dev)) | |||
973 | != sizeof(struct virtio_dev)) { | |||
974 | log_warnx("%s: error reading vioblk from fd", __func__); | |||
975 | return (-1); | |||
976 | } | |||
977 | if (dev->dev_type != VMD_DEVTYPE_DISK'd') { | |||
978 | log_warnx("%s: invalid device type", __func__); | |||
979 | return (-1); | |||
980 | } | |||
981 | ||||
982 | dev->sync_fd = -1; | |||
983 | dev->async_fd = -1; | |||
984 | ||||
985 | if (pci_set_bar_fn(dev->pci_id, 0, virtio_pci_io, dev)) { | |||
986 | log_warnx("%s: can't set bar fn for virtio block " | |||
987 | "device", __progname); | |||
988 | return (-1); | |||
989 | } | |||
990 | dev->vm_id = vmc->vmc_params.vcp_id; | |||
991 | dev->irq = pci_get_dev_irq(dev->pci_id); | |||
992 | ||||
993 | memset(&dev->vioblk.disk_fd, -1, sizeof(dev->vioblk.disk_fd)); | |||
994 | dev->vioblk.ndisk_fd = vmc->vmc_diskbases[i]; | |||
995 | for (j = 0; j < dev->vioblk.ndisk_fd; j++) | |||
996 | dev->vioblk.disk_fd[j] = child_disks[i][j]; | |||
997 | ||||
998 | dev->vioblk.idx = i; | |||
999 | SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next)do { (dev)->dev_next.sle_next = (&virtio_devs)->slh_first ; (&virtio_devs)->slh_first = (dev); } while (0); | |||
1000 | } | |||
1001 | return (0); | |||
1002 | } | |||
1003 | ||||
1004 | int | |||
1005 | vioscsi_restore(int fd, struct vmd_vm *vm, int child_cdrom) | |||
1006 | { | |||
1007 | void *hva = NULL((void *)0); | |||
1008 | unsigned int i; | |||
1009 | ||||
1010 | if (!strlen(vm->vm_params.vmc_cdrom)) | |||
1011 | return (0); | |||
1012 | ||||
1013 | vioscsi = calloc(1, sizeof(struct vioscsi_dev)); | |||
1014 | if (vioscsi == NULL((void *)0)) { | |||
1015 | log_warn("%s: calloc failure allocating vioscsi", __progname); | |||
1016 | return (-1); | |||
1017 | } | |||
1018 | ||||
1019 | log_debug("%s: receiving vioscsi", __func__); | |||
1020 | ||||
1021 | if (atomicio(read, fd, vioscsi, sizeof(struct vioscsi_dev)) != | |||
1022 | sizeof(struct vioscsi_dev)) { | |||
1023 | log_warnx("%s: error reading vioscsi from fd", __func__); | |||
1024 | return (-1); | |||
1025 | } | |||
1026 | ||||
1027 | if (pci_set_bar_fn(vioscsi->pci_id, 0, vioscsi_io, vioscsi)) { | |||
1028 | log_warnx("%s: can't set bar fn for vmm control device", | |||
1029 | __progname); | |||
1030 | return (-1); | |||
1031 | } | |||
1032 | ||||
1033 | vioscsi->vm_id = vm->vm_params.vmc_params.vcp_id; | |||
1034 | vioscsi->irq = pci_get_dev_irq(vioscsi->pci_id); | |||
1035 | ||||
1036 | /* vioscsi uses 3 virtqueues. */ | |||
1037 | for (i = 0; i < 3; i++) { | |||
1038 | hva = hvaddr_mem(vioscsi->vq[i].q_gpa, | |||
1039 | vring_size(VIOSCSI_QUEUE_SIZE128)); | |||
1040 | if (hva == NULL((void *)0)) | |||
1041 | fatal("failed to restore vioscsi virtqueue"); | |||
1042 | vioscsi->vq[i].q_hva = hva; | |||
1043 | } | |||
1044 | ||||
1045 | return (0); | |||
1046 | } | |||
1047 | ||||
1048 | int | |||
1049 | virtio_restore(int fd, struct vmd_vm *vm, int child_cdrom, | |||
1050 | int child_disks[][VM_MAX_BASE_PER_DISK4], int *child_taps) | |||
1051 | { | |||
1052 | struct virtio_dev *dev; | |||
1053 | int ret; | |||
1054 | ||||
1055 | SLIST_INIT(&virtio_devs){ ((&virtio_devs)->slh_first) = ((void *)0); }; | |||
1056 | ||||
1057 | if ((ret = viornd_restore(fd, vm)) == -1) | |||
1058 | return (ret); | |||
1059 | ||||
1060 | if ((ret = vioblk_restore(fd, vm, child_disks)) == -1) | |||
1061 | return (ret); | |||
1062 | ||||
1063 | if ((ret = vioscsi_restore(fd, vm, child_cdrom)) == -1) | |||
1064 | return (ret); | |||
1065 | ||||
1066 | if ((ret = vionet_restore(fd, vm, child_taps)) == -1) | |||
1067 | return (ret); | |||
1068 | ||||
1069 | if ((ret = vmmci_restore(fd, vm->vm_params.vmc_params.vcp_id)) == -1) | |||
1070 | return (ret); | |||
1071 | ||||
1072 | SLIST_FOREACH(dev, &virtio_devs, dev_next)for((dev) = ((&virtio_devs)->slh_first); (dev) != ((void *)0); (dev) = ((dev)->dev_next.sle_next)) { | |||
1073 | if (virtio_dev_launch(vm, dev) != 0) | |||
1074 | fatalx("%s: failed to restore virtio dev", __func__); | |||
1075 | } | |||
1076 | ||||
1077 | return (0); | |||
1078 | } | |||
1079 | ||||
1080 | int | |||
1081 | viornd_dump(int fd) | |||
1082 | { | |||
1083 | log_debug("%s: sending viornd", __func__); | |||
1084 | ||||
1085 | viornd.vq[0].q_hva = NULL((void *)0); | |||
1086 | ||||
1087 | if (atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) { | |||
1088 | log_warnx("%s: error writing viornd to fd", __func__); | |||
1089 | return (-1); | |||
1090 | } | |||
1091 | return (0); | |||
1092 | } | |||
1093 | ||||
1094 | int | |||
1095 | vmmci_dump(int fd) | |||
1096 | { | |||
1097 | log_debug("%s: sending vmmci", __func__); | |||
1098 | ||||
1099 | if (atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) { | |||
1100 | log_warnx("%s: error writing vmmci to fd", __func__); | |||
1101 | return (-1); | |||
1102 | } | |||
1103 | return (0); | |||
1104 | } | |||
1105 | ||||
1106 | int | |||
1107 | vionet_dump(int fd) | |||
1108 | { | |||
1109 | struct virtio_dev *dev, temp; | |||
1110 | struct viodev_msg msg; | |||
1111 | struct imsg imsg; | |||
1112 | struct imsgbuf *ibuf = NULL((void *)0); | |||
1113 | size_t sz; | |||
1114 | int ret; | |||
1115 | ||||
1116 | log_debug("%s: dumping vionet", __func__); | |||
1117 | ||||
1118 | SLIST_FOREACH(dev, &virtio_devs, dev_next)for((dev) = ((&virtio_devs)->slh_first); (dev) != ((void *)0); (dev) = ((dev)->dev_next.sle_next)) { | |||
1119 | if (dev->dev_type != VMD_DEVTYPE_NET'n') | |||
1120 | continue; | |||
1121 | ||||
1122 | memset(&msg, 0, sizeof(msg)); | |||
1123 | memset(&imsg, 0, sizeof(imsg)); | |||
1124 | ||||
1125 | ibuf = &dev->sync_iev.ibuf; | |||
1126 | msg.type = VIODEV_MSG_DUMP6; | |||
1127 | ||||
1128 | ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg, | |||
1129 | sizeof(msg)); | |||
1130 | if (ret == -1) { | |||
1131 | log_warnx("%s: failed requesting dump of vionet[%d]", | |||
1132 | __func__, dev->vionet.idx); | |||
1133 | return (-1); | |||
1134 | } | |||
1135 | if (imsg_flush(ibuf) == -1) { | |||
1136 | log_warnx("%s: imsg_flush", __func__); | |||
1137 | return (-1); | |||
1138 | } | |||
1139 | ||||
1140 | sz = atomicio(read, dev->sync_fd, &temp, sizeof(temp)); | |||
1141 | if (sz != sizeof(temp)) { | |||
1142 | log_warnx("%s: failed to dump vionet[%d]", __func__, | |||
1143 | dev->vionet.idx); | |||
1144 | return (-1); | |||
1145 | } | |||
1146 | ||||
1147 | /* Clear volatile state. Will reinitialize on restore. */ | |||
1148 | temp.vionet.vq[RXQ0].q_hva = NULL((void *)0); | |||
1149 | temp.vionet.vq[TXQ1].q_hva = NULL((void *)0); | |||
1150 | temp.async_fd = -1; | |||
1151 | temp.sync_fd = -1; | |||
1152 | memset(&temp.async_iev, 0, sizeof(temp.async_iev)); | |||
1153 | memset(&temp.sync_iev, 0, sizeof(temp.sync_iev)); | |||
1154 | ||||
1155 | if (atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, fd, &temp, sizeof(temp)) != sizeof(temp)) { | |||
1156 | log_warnx("%s: error writing vionet to fd", __func__); | |||
1157 | return (-1); | |||
1158 | } | |||
1159 | } | |||
1160 | ||||
1161 | return (0); | |||
1162 | } | |||
1163 | ||||
1164 | int | |||
1165 | vioblk_dump(int fd) | |||
1166 | { | |||
1167 | struct virtio_dev *dev, temp; | |||
1168 | struct viodev_msg msg; | |||
1169 | struct imsg imsg; | |||
1170 | struct imsgbuf *ibuf = NULL((void *)0); | |||
1171 | size_t sz; | |||
1172 | int ret; | |||
1173 | ||||
1174 | log_debug("%s: dumping vioblk", __func__); | |||
1175 | ||||
1176 | SLIST_FOREACH(dev, &virtio_devs, dev_next)for((dev) = ((&virtio_devs)->slh_first); (dev) != ((void *)0); (dev) = ((dev)->dev_next.sle_next)) { | |||
1177 | if (dev->dev_type != VMD_DEVTYPE_DISK'd') | |||
1178 | continue; | |||
1179 | ||||
1180 | memset(&msg, 0, sizeof(msg)); | |||
1181 | memset(&imsg, 0, sizeof(imsg)); | |||
1182 | ||||
1183 | ibuf = &dev->sync_iev.ibuf; | |||
1184 | msg.type = VIODEV_MSG_DUMP6; | |||
1185 | ||||
1186 | ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg, | |||
1187 | sizeof(msg)); | |||
1188 | if (ret == -1) { | |||
1189 | log_warnx("%s: failed requesting dump of vioblk[%d]", | |||
1190 | __func__, dev->vioblk.idx); | |||
1191 | return (-1); | |||
1192 | } | |||
1193 | if (imsg_flush(ibuf) == -1) { | |||
1194 | log_warnx("%s: imsg_flush", __func__); | |||
1195 | return (-1); | |||
1196 | } | |||
1197 | ||||
1198 | ||||
1199 | sz = atomicio(read, dev->sync_fd, &temp, sizeof(temp)); | |||
1200 | if (sz != sizeof(temp)) { | |||
1201 | log_warnx("%s: failed to dump vioblk[%d]", __func__, | |||
1202 | dev->vioblk.idx); | |||
1203 | return (-1); | |||
1204 | } | |||
1205 | ||||
1206 | /* Clear volatile state. Will reinitialize on restore. */ | |||
1207 | temp.vioblk.vq[0].q_hva = NULL((void *)0); | |||
1208 | temp.async_fd = -1; | |||
1209 | temp.sync_fd = -1; | |||
1210 | memset(&temp.async_iev, 0, sizeof(temp.async_iev)); | |||
1211 | memset(&temp.sync_iev, 0, sizeof(temp.sync_iev)); | |||
1212 | ||||
1213 | if (atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, fd, &temp, sizeof(temp)) != sizeof(temp)) { | |||
1214 | log_warnx("%s: error writing vioblk to fd", __func__); | |||
1215 | return (-1); | |||
1216 | } | |||
1217 | } | |||
1218 | ||||
1219 | return (0); | |||
1220 | } | |||
1221 | ||||
1222 | int | |||
1223 | vioscsi_dump(int fd) | |||
1224 | { | |||
1225 | unsigned int i; | |||
1226 | ||||
1227 | if (vioscsi == NULL((void *)0)) | |||
1228 | return (0); | |||
1229 | ||||
1230 | log_debug("%s: sending vioscsi", __func__); | |||
1231 | ||||
1232 | for (i = 0; i < 3; i++) | |||
1233 | vioscsi->vq[i].q_hva = NULL((void *)0); | |||
1234 | ||||
1235 | if (atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, fd, vioscsi, sizeof(struct vioscsi_dev)) != | |||
1236 | sizeof(struct vioscsi_dev)) { | |||
1237 | log_warnx("%s: error writing vioscsi to fd", __func__); | |||
1238 | return (-1); | |||
1239 | } | |||
1240 | return (0); | |||
1241 | } | |||
1242 | ||||
1243 | int | |||
1244 | virtio_dump(int fd) | |||
1245 | { | |||
1246 | int ret; | |||
1247 | ||||
1248 | if ((ret = viornd_dump(fd)) == -1) | |||
1249 | return ret; | |||
1250 | ||||
1251 | if ((ret = vioblk_dump(fd)) == -1) | |||
1252 | return ret; | |||
1253 | ||||
1254 | if ((ret = vioscsi_dump(fd)) == -1) | |||
1255 | return ret; | |||
1256 | ||||
1257 | if ((ret = vionet_dump(fd)) == -1) | |||
1258 | return ret; | |||
1259 | ||||
1260 | if ((ret = vmmci_dump(fd)) == -1) | |||
1261 | return ret; | |||
1262 | ||||
1263 | return (0); | |||
1264 | } | |||
1265 | ||||
1266 | void virtio_broadcast_imsg(struct vmd_vm *vm, uint16_t type, void *data, | |||
1267 | uint16_t datalen) | |||
1268 | { | |||
1269 | struct virtio_dev *dev; | |||
1270 | int ret; | |||
1271 | ||||
1272 | SLIST_FOREACH(dev, &virtio_devs, dev_next)for((dev) = ((&virtio_devs)->slh_first); (dev) != ((void *)0); (dev) = ((dev)->dev_next.sle_next)) { | |||
1273 | ret = imsg_compose_event(&dev->async_iev, type, 0, 0, -1, data, | |||
1274 | datalen); | |||
1275 | if (ret == -1) { | |||
1276 | log_warnx("%s: failed to broadcast imsg type %u", | |||
1277 | __func__, type); | |||
1278 | } | |||
1279 | } | |||
1280 | ||||
1281 | } | |||
1282 | ||||
1283 | void | |||
1284 | virtio_stop(struct vmd_vm *vm) | |||
1285 | { | |||
1286 | return virtio_broadcast_imsg(vm, IMSG_VMDOP_PAUSE_VM, NULL((void *)0), 0); | |||
1287 | } | |||
1288 | ||||
1289 | void | |||
1290 | virtio_start(struct vmd_vm *vm) | |||
1291 | { | |||
1292 | return virtio_broadcast_imsg(vm, IMSG_VMDOP_UNPAUSE_VM, NULL((void *)0), 0); | |||
1293 | } | |||
1294 | ||||
1295 | /* | |||
1296 | * Fork+exec a child virtio device. Returns 0 on success. | |||
1297 | */ | |||
1298 | static int | |||
1299 | virtio_dev_launch(struct vmd_vm *vm, struct virtio_dev *dev) | |||
1300 | { | |||
1301 | char *nargv[12], num[32], vmm_fd[32], vm_name[VM_NAME_MAX64], t[2]; | |||
1302 | pid_t dev_pid; | |||
1303 | int data_fds[VM_MAX_BASE_PER_DISK4], sync_fds[2], async_fds[2], ret = 0; | |||
1304 | size_t i, data_fds_sz, sz = 0; | |||
1305 | struct viodev_msg msg; | |||
1306 | struct imsg imsg; | |||
1307 | struct imsgev *iev = &dev->sync_iev; | |||
1308 | ||||
1309 | switch (dev->dev_type) { | |||
1310 | case VMD_DEVTYPE_NET'n': | |||
1311 | data_fds[0] = dev->vionet.data_fd; | |||
1312 | data_fds_sz = 1; | |||
1313 | log_debug("%s: launching vionet%d", | |||
1314 | vm->vm_params.vmc_params.vcp_name, dev->vionet.idx); | |||
1315 | break; | |||
1316 | case VMD_DEVTYPE_DISK'd': | |||
1317 | memcpy(&data_fds, dev->vioblk.disk_fd, sizeof(data_fds)); | |||
1318 | data_fds_sz = dev->vioblk.ndisk_fd; | |||
1319 | log_debug("%s: launching vioblk%d", | |||
1320 | vm->vm_params.vmc_params.vcp_name, dev->vioblk.idx); | |||
1321 | break; | |||
1322 | /* NOTREACHED */ | |||
1323 | default: | |||
1324 | log_warn("%s: invalid device type", __func__); | |||
1325 | return (EINVAL22); | |||
1326 | } | |||
1327 | ||||
1328 | /* We need two channels: one synchronous (IO reads) and one async. */ | |||
1329 | if (socketpair(AF_UNIX1, SOCK_STREAM1, PF_UNSPEC0, sync_fds) == -1) { | |||
1330 | log_warn("failed to create socketpair"); | |||
1331 | return (errno(*__errno())); | |||
1332 | } | |||
1333 | if (socketpair(AF_UNIX1, SOCK_STREAM1, PF_UNSPEC0, async_fds) == -1) { | |||
1334 | log_warn("failed to create async socketpair"); | |||
1335 | return (errno(*__errno())); | |||
1336 | } | |||
1337 | ||||
1338 | /* Keep communication channels open after exec. */ | |||
1339 | if (fcntl(sync_fds[1], F_SETFD2, 0)) { | |||
1340 | ret = errno(*__errno()); | |||
1341 | log_warn("%s: fcntl", __func__); | |||
1342 | goto err; | |||
1343 | } | |||
1344 | if (fcntl(async_fds[1], F_SETFD2, 0)) { | |||
1345 | ret = errno(*__errno()); | |||
1346 | log_warn("%s: fcnt", __func__); | |||
1347 | goto err; | |||
1348 | } | |||
1349 | ||||
1350 | /* Fork... */ | |||
1351 | dev_pid = fork(); | |||
1352 | if (dev_pid == -1) { | |||
1353 | ret = errno(*__errno()); | |||
1354 | log_warn("%s: fork failed", __func__); | |||
1355 | goto err; | |||
1356 | } | |||
1357 | ||||
1358 | if (dev_pid > 0) { | |||
1359 | /* Parent */ | |||
1360 | close_fd(sync_fds[1]); | |||
1361 | close_fd(async_fds[1]); | |||
1362 | ||||
1363 | /* Save the child's pid to help with cleanup. */ | |||
1364 | dev->dev_pid = dev_pid; | |||
1365 | ||||
1366 | /* Set the channel fds to the child's before sending. */ | |||
1367 | dev->sync_fd = sync_fds[1]; | |||
1368 | dev->async_fd = async_fds[1]; | |||
1369 | ||||
1370 | /* Close data fds. Only the child device needs them now. */ | |||
1371 | for (i = 0; i < data_fds_sz; i++) | |||
1372 | close_fd(data_fds[i]); | |||
1373 | ||||
1374 | /* Set our synchronous channel to non-blocking. */ | |||
1375 | if (fcntl(sync_fds[0], F_SETFL4, O_NONBLOCK0x0004) == -1) { | |||
1376 | ret = errno(*__errno()); | |||
1377 | log_warn("%s: fcntl", __func__); | |||
1378 | goto err; | |||
1379 | } | |||
1380 | ||||
1381 | /* 1. Send over our configured device. */ | |||
1382 | log_debug("%s: sending '%c' type device struct", __func__, | |||
1383 | dev->dev_type); | |||
1384 | sz = atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, sync_fds[0], dev, sizeof(*dev)); | |||
1385 | if (sz != sizeof(*dev)) { | |||
1386 | log_warnx("%s: failed to send device", __func__); | |||
1387 | ret = EIO5; | |||
1388 | goto err; | |||
1389 | } | |||
1390 | ||||
1391 | /* 2. Send over details on the VM (including memory fds). */ | |||
1392 | log_debug("%s: sending vm message for '%s'", __func__, | |||
1393 | vm->vm_params.vmc_params.vcp_name); | |||
1394 | sz = atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, sync_fds[0], vm, sizeof(*vm)); | |||
1395 | if (sz != sizeof(*vm)) { | |||
1396 | log_warnx("%s: failed to send vm details", __func__); | |||
1397 | ret = EIO5; | |||
1398 | goto err; | |||
1399 | } | |||
1400 | ||||
1401 | /* | |||
1402 | * Initialize our imsg channel to the child device. The initial | |||
1403 | * communication will be synchronous. We expect the child to | |||
1404 | * report itself "ready" to confirm the launch was a success. | |||
1405 | */ | |||
1406 | imsg_init(&iev->ibuf, sync_fds[0]); | |||
1407 | do | |||
1408 | ret = imsg_read(&iev->ibuf); | |||
1409 | while (ret == -1 && errno(*__errno()) == EAGAIN35); | |||
1410 | if (ret == 0 || ret == -1) { | |||
1411 | log_warnx("%s: failed to receive ready message from " | |||
1412 | "'%c' type device", __func__, dev->dev_type); | |||
1413 | ret = EIO5; | |||
1414 | goto err; | |||
1415 | } | |||
1416 | ret = 0; | |||
1417 | ||||
1418 | log_debug("%s: receiving reply", __func__); | |||
1419 | if (imsg_get(&iev->ibuf, &imsg) < 1) { | |||
1420 | log_warnx("%s: imsg_get", __func__); | |||
1421 | ret = EIO5; | |||
1422 | goto err; | |||
1423 | } | |||
1424 | IMSG_SIZE_CHECK(&imsg, &msg)do { if (((&imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof(*&msg)) fatalx("bad length imsg received (%s)", "&msg" ); } while (0); | |||
1425 | memcpy(&msg, imsg.data, sizeof(msg)); | |||
1426 | imsg_free(&imsg); | |||
1427 | ||||
1428 | if (msg.type != VIODEV_MSG_READY1) { | |||
1429 | log_warnx("%s: expected ready message, got type %d", | |||
1430 | __func__, msg.type); | |||
1431 | ret = EINVAL22; | |||
1432 | goto err; | |||
1433 | } | |||
1434 | log_debug("%s: device reports ready via sync channel", | |||
1435 | __func__); | |||
1436 | ||||
1437 | /* | |||
1438 | * Wire in the async event handling, but after reverting back | |||
1439 | * to the parent's fd's. | |||
1440 | */ | |||
1441 | dev->sync_fd = sync_fds[0]; | |||
1442 | dev->async_fd = async_fds[0]; | |||
1443 | vm_device_pipe(dev, virtio_dispatch_dev); | |||
1444 | } else { | |||
1445 | /* Child */ | |||
1446 | close_fd(async_fds[0]); | |||
1447 | close_fd(sync_fds[0]); | |||
1448 | ||||
1449 | /* Keep data file descriptors open after exec. */ | |||
1450 | for (i = 0; i < data_fds_sz; i++) { | |||
1451 | log_debug("%s: marking fd %d !close-on-exec", __func__, | |||
1452 | data_fds[i]); | |||
1453 | if (fcntl(data_fds[i], F_SETFD2, 0)) { | |||
1454 | ret = errno(*__errno()); | |||
1455 | log_warn("%s: fcntl", __func__); | |||
1456 | goto err; | |||
1457 | } | |||
1458 | } | |||
1459 | ||||
1460 | memset(&nargv, 0, sizeof(nargv)); | |||
1461 | memset(num, 0, sizeof(num)); | |||
1462 | snprintf(num, sizeof(num), "%d", sync_fds[1]); | |||
1463 | memset(vmm_fd, 0, sizeof(vmm_fd)); | |||
1464 | snprintf(vmm_fd, sizeof(vmm_fd), "%d", env->vmd_fd); | |||
1465 | memset(vm_name, 0, sizeof(vm_name)); | |||
1466 | snprintf(vm_name, sizeof(vm_name), "%s", | |||
1467 | vm->vm_params.vmc_params.vcp_name); | |||
1468 | ||||
1469 | t[0] = dev->dev_type; | |||
1470 | t[1] = '\0'; | |||
1471 | ||||
1472 | nargv[0] = env->argv0; | |||
1473 | nargv[1] = "-X"; | |||
1474 | nargv[2] = num; | |||
1475 | nargv[3] = "-t"; | |||
1476 | nargv[4] = t; | |||
1477 | nargv[5] = "-i"; | |||
1478 | nargv[6] = vmm_fd; | |||
1479 | nargv[7] = "-p"; | |||
1480 | nargv[8] = vm_name; | |||
1481 | nargv[9] = "-n"; | |||
1482 | nargv[10] = NULL((void *)0); | |||
1483 | ||||
1484 | if (env->vmd_verbose == 1) { | |||
1485 | nargv[10] = VMD_VERBOSE_1"-v";; | |||
1486 | nargv[11] = NULL((void *)0); | |||
1487 | } else if (env->vmd_verbose > 1) { | |||
1488 | nargv[10] = VMD_VERBOSE_2"-vv";; | |||
1489 | nargv[11] = NULL((void *)0); | |||
1490 | } | |||
1491 | ||||
1492 | /* Control resumes in vmd.c:main(). */ | |||
1493 | execvp(nargv[0], nargv); | |||
1494 | ||||
1495 | ret = errno(*__errno()); | |||
1496 | log_warn("%s: failed to exec device", __func__); | |||
1497 | _exit(ret); | |||
1498 | /* NOTREACHED */ | |||
1499 | } | |||
1500 | ||||
1501 | return (ret); | |||
1502 | ||||
1503 | err: | |||
1504 | close_fd(sync_fds[0]); | |||
1505 | close_fd(sync_fds[1]); | |||
1506 | close_fd(async_fds[0]); | |||
1507 | close_fd(async_fds[1]); | |||
1508 | return (ret); | |||
1509 | } | |||
1510 | ||||
1511 | /* | |||
1512 | * Initialize an async imsg channel for a virtio device. | |||
1513 | */ | |||
1514 | int | |||
1515 | vm_device_pipe(struct virtio_dev *dev, void (*cb)(int, short, void *)) | |||
1516 | { | |||
1517 | struct imsgev *iev = &dev->async_iev; | |||
1518 | int fd = dev->async_fd; | |||
1519 | ||||
1520 | log_debug("%s: initializing '%c' device pipe (fd=%d)", __func__, | |||
1521 | dev->dev_type, fd); | |||
1522 | ||||
1523 | if (fcntl(fd, F_SETFL4, O_NONBLOCK0x0004) == -1) { | |||
1524 | log_warn("failed to set nonblocking mode on vm device pipe"); | |||
1525 | return (-1); | |||
1526 | } | |||
1527 | ||||
1528 | imsg_init(&iev->ibuf, fd); | |||
1529 | iev->handler = cb; | |||
1530 | iev->data = dev; | |||
1531 | iev->events = EV_READ0x02; | |||
1532 | imsg_event_add(iev); | |||
1533 | ||||
1534 | return (0); | |||
1535 | } | |||
1536 | ||||
1537 | void | |||
1538 | virtio_dispatch_dev(int fd, short event, void *arg) | |||
1539 | { | |||
1540 | struct virtio_dev *dev = (struct virtio_dev*)arg; | |||
1541 | struct imsgev *iev = &dev->async_iev; | |||
1542 | struct imsgbuf *ibuf = &iev->ibuf; | |||
1543 | struct imsg imsg; | |||
1544 | struct viodev_msg msg; | |||
1545 | ssize_t n = 0; | |||
1546 | ||||
1547 | if (event & EV_READ0x02) { | |||
1548 | if ((n = imsg_read(ibuf)) == -1 && errno(*__errno()) != EAGAIN35) | |||
1549 | fatal("%s: imsg_read", __func__); | |||
1550 | if (n == 0) { | |||
1551 | /* this pipe is dead, so remove the event handler */ | |||
1552 | log_debug("%s: pipe dead (EV_READ)", __func__); | |||
1553 | event_del(&iev->ev); | |||
1554 | event_loopexit(NULL((void *)0)); | |||
1555 | return; | |||
1556 | } | |||
1557 | } | |||
1558 | ||||
1559 | if (event & EV_WRITE0x04) { | |||
1560 | if ((n = msgbuf_write(&ibuf->w)) == -1 && errno(*__errno()) != EAGAIN35) | |||
1561 | fatal("%s: msgbuf_write", __func__); | |||
1562 | if (n == 0) { | |||
1563 | /* this pipe is dead, so remove the event handler */ | |||
1564 | log_debug("%s: pipe dead (EV_WRITE)", __func__); | |||
1565 | event_del(&iev->ev); | |||
1566 | event_loopexit(NULL((void *)0)); | |||
1567 | return; | |||
1568 | } | |||
1569 | } | |||
1570 | ||||
1571 | for (;;) { | |||
1572 | if ((n = imsg_get(ibuf, &imsg)) == -1) | |||
1573 | fatal("%s: imsg_get", __func__); | |||
1574 | if (n == 0) | |||
1575 | break; | |||
1576 | ||||
1577 | switch (imsg.hdr.type) { | |||
1578 | case IMSG_DEVOP_MSG: | |||
1579 | IMSG_SIZE_CHECK(&imsg, &msg)do { if (((&imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof(*&msg)) fatalx("bad length imsg received (%s)", "&msg" ); } while (0); | |||
1580 | memcpy(&msg, imsg.data, sizeof(msg)); | |||
1581 | handle_dev_msg(&msg, dev); | |||
1582 | break; | |||
1583 | default: | |||
1584 | log_warnx("%s: got non devop imsg %d", __func__, | |||
1585 | imsg.hdr.type); | |||
1586 | break; | |||
1587 | } | |||
1588 | imsg_free(&imsg); | |||
1589 | } | |||
1590 | imsg_event_add(iev); | |||
1591 | } | |||
1592 | ||||
1593 | ||||
1594 | static int | |||
1595 | handle_dev_msg(struct viodev_msg *msg, struct virtio_dev *gdev) | |||
1596 | { | |||
1597 | uint32_t vm_id = gdev->vm_id; | |||
1598 | int irq = gdev->irq; | |||
1599 | ||||
1600 | switch (msg->type) { | |||
1601 | case VIODEV_MSG_KICK3: | |||
1602 | if (msg->state == INTR_STATE_ASSERT1) | |||
1603 | vcpu_assert_pic_irq(vm_id, msg->vcpu, irq); | |||
1604 | else if (msg->state == INTR_STATE_DEASSERT-1) | |||
1605 | vcpu_deassert_pic_irq(vm_id, msg->vcpu, irq); | |||
1606 | break; | |||
1607 | case VIODEV_MSG_READY1: | |||
1608 | log_debug("%s: device reports ready", __func__); | |||
1609 | break; | |||
1610 | case VIODEV_MSG_ERROR2: | |||
1611 | log_warnx("%s: device reported error", __func__); | |||
1612 | break; | |||
1613 | case VIODEV_MSG_INVALID0: | |||
1614 | case VIODEV_MSG_IO_READ4: | |||
1615 | case VIODEV_MSG_IO_WRITE5: | |||
1616 | /* FALLTHROUGH */ | |||
1617 | default: | |||
1618 | log_warnx("%s: unsupported device message type %d", __func__, | |||
1619 | msg->type); | |||
1620 | return (1); | |||
1621 | } | |||
1622 | ||||
1623 | return (0); | |||
1624 | }; | |||
1625 | ||||
1626 | /* | |||
1627 | * Called by the VM process while processing IO from the VCPU thread. | |||
1628 | * | |||
1629 | * N.b. Since the VCPU thread calls this function, we cannot mutate the event | |||
1630 | * system. All ipc messages must be sent manually and cannot be queued for | |||
1631 | * the event loop to push them. (We need to perform a synchronous read, so | |||
1632 | * this isn't really a big deal.) | |||
1633 | */ | |||
1634 | int | |||
1635 | virtio_pci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, | |||
1636 | void *cookie, uint8_t sz) | |||
1637 | { | |||
1638 | struct virtio_dev *dev = (struct virtio_dev *)cookie; | |||
1639 | struct imsgbuf *ibuf = &dev->sync_iev.ibuf; | |||
1640 | struct imsg imsg; | |||
1641 | struct viodev_msg msg; | |||
1642 | ssize_t n; | |||
1643 | int ret = 0; | |||
1644 | ||||
1645 | memset(&msg, 0, sizeof(msg)); | |||
1646 | msg.reg = reg; | |||
1647 | msg.io_sz = sz; | |||
1648 | ||||
1649 | if (dir == 0) { | |||
1650 | msg.type = VIODEV_MSG_IO_WRITE5; | |||
1651 | msg.data = *data; | |||
1652 | msg.data_valid = 1; | |||
1653 | } else | |||
1654 | msg.type = VIODEV_MSG_IO_READ4; | |||
1655 | ||||
1656 | if (msg.type == VIODEV_MSG_IO_WRITE5) { | |||
1657 | /* | |||
1658 | * Write request. No reply expected. | |||
1659 | */ | |||
1660 | ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg, | |||
1661 | sizeof(msg)); | |||
1662 | if (ret == -1) { | |||
1663 | log_warn("%s: failed to send async io event to virtio" | |||
1664 | " device", __func__); | |||
1665 | return (ret); | |||
1666 | } | |||
1667 | if (imsg_flush(ibuf) == -1) { | |||
1668 | log_warnx("%s: imsg_flush (write)", __func__); | |||
1669 | return (-1); | |||
1670 | } | |||
1671 | } else { | |||
1672 | /* | |||
1673 | * Read request. Requires waiting for a reply. | |||
1674 | */ | |||
1675 | ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg, | |||
1676 | sizeof(msg)); | |||
1677 | if (ret == -1) { | |||
1678 | log_warnx("%s: failed to send sync io event to virtio" | |||
1679 | " device", __func__); | |||
1680 | return (ret); | |||
1681 | } | |||
1682 | if (imsg_flush(ibuf) == -1) { | |||
1683 | log_warnx("%s: imsg_flush (read)", __func__); | |||
1684 | return (-1); | |||
1685 | } | |||
1686 | ||||
1687 | /* Read our reply. */ | |||
1688 | do | |||
1689 | n = imsg_read(ibuf); | |||
1690 | while (n == -1 && errno(*__errno()) == EAGAIN35); | |||
1691 | if (n == 0 || n == -1) { | |||
1692 | log_warn("%s: imsg_read (n=%ld)", __func__, n); | |||
1693 | return (-1); | |||
1694 | } | |||
1695 | if ((n = imsg_get(ibuf, &imsg)) == -1) { | |||
1696 | log_warn("%s: imsg_get (n=%ld)", __func__, n); | |||
1697 | return (-1); | |||
1698 | } | |||
1699 | if (n == 0) { | |||
1700 | log_warnx("%s: invalid imsg", __func__); | |||
1701 | return (-1); | |||
1702 | } | |||
1703 | ||||
1704 | IMSG_SIZE_CHECK(&imsg, &msg)do { if (((&imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof(*&msg)) fatalx("bad length imsg received (%s)", "&msg" ); } while (0); | |||
1705 | memcpy(&msg, imsg.data, sizeof(msg)); | |||
1706 | imsg_free(&imsg); | |||
1707 | ||||
1708 | if (msg.type == VIODEV_MSG_IO_READ4 && msg.data_valid) { | |||
1709 | #if DEBUG | |||
1710 | log_debug("%s: got sync read response (reg=%s)", | |||
1711 | __func__, virtio_reg_name(msg.reg)); | |||
1712 | #endif /* DEBUG */ | |||
1713 | *data = msg.data; | |||
1714 | /* | |||
1715 | * It's possible we're asked to {de,}assert after the | |||
1716 | * device performs a register read. | |||
1717 | */ | |||
1718 | if (msg.state == INTR_STATE_ASSERT1) | |||
1719 | vcpu_assert_pic_irq(dev->vm_id, msg.vcpu, msg.irq); | |||
1720 | else if (msg.state == INTR_STATE_DEASSERT-1) | |||
1721 | vcpu_deassert_pic_irq(dev->vm_id, msg.vcpu, msg.irq); | |||
1722 | } else { | |||
1723 | log_warnx("%s: expected IO_READ, got %d", __func__, | |||
1724 | msg.type); | |||
1725 | return (-1); | |||
1726 | } | |||
1727 | } | |||
1728 | ||||
1729 | return (0); | |||
1730 | } | |||
1731 | ||||
1732 | void | |||
1733 | virtio_assert_pic_irq(struct virtio_dev *dev, int vcpu) | |||
1734 | { | |||
1735 | struct viodev_msg msg; | |||
1736 | int ret; | |||
1737 | ||||
1738 | memset(&msg, 0, sizeof(msg)); | |||
1739 | msg.irq = dev->irq; | |||
1740 | msg.vcpu = vcpu; | |||
1741 | msg.type = VIODEV_MSG_KICK3; | |||
1742 | msg.state = INTR_STATE_ASSERT1; | |||
1743 | ||||
1744 | ret = imsg_compose_event(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1, | |||
1745 | &msg, sizeof(msg)); | |||
1746 | if (ret == -1) | |||
1747 | log_warnx("%s: failed to assert irq %d", __func__, dev->irq); | |||
1748 | } | |||
1749 | ||||
1750 | void | |||
1751 | virtio_deassert_pic_irq(struct virtio_dev *dev, int vcpu) | |||
1752 | { | |||
1753 | struct viodev_msg msg; | |||
1754 | int ret; | |||
1755 | ||||
1756 | memset(&msg, 0, sizeof(msg)); | |||
1757 | msg.irq = dev->irq; | |||
1758 | msg.vcpu = vcpu; | |||
1759 | msg.type = VIODEV_MSG_KICK3; | |||
1760 | msg.state = INTR_STATE_DEASSERT-1; | |||
1761 | ||||
1762 | ret = imsg_compose_event(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1, | |||
1763 | &msg, sizeof(msg)); | |||
1764 | if (ret == -1) | |||
1765 | log_warnx("%s: failed to deassert irq %d", __func__, dev->irq); | |||
1766 | } |