File: | src/usr.sbin/vmd/vioblk.c |
Warning: | line 231, column 12 Array access (via field 'disk_fd') results in an undefined pointer dereference |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: vioblk.c,v 1.9 2023/09/26 01:53:54 dv Exp $ */ | |||
2 | ||||
3 | /* | |||
4 | * Copyright (c) 2023 Dave Voutila <dv@openbsd.org> | |||
5 | * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> | |||
6 | * | |||
7 | * Permission to use, copy, modify, and distribute this software for any | |||
8 | * purpose with or without fee is hereby granted, provided that the above | |||
9 | * copyright notice and this permission notice appear in all copies. | |||
10 | * | |||
11 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |||
12 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |||
13 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |||
14 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |||
15 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |||
16 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |||
17 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |||
18 | */ | |||
19 | #include <sys/mman.h> | |||
20 | #include <sys/param.h> /* PAGE_SIZE */ | |||
21 | ||||
22 | #include <dev/pci/virtio_pcireg.h> | |||
23 | #include <dev/pv/vioblkreg.h> | |||
24 | #include <dev/pv/virtioreg.h> | |||
25 | ||||
26 | #include <errno(*__errno()).h> | |||
27 | #include <event.h> | |||
28 | #include <fcntl.h> | |||
29 | #include <stdlib.h> | |||
30 | #include <string.h> | |||
31 | #include <unistd.h> | |||
32 | ||||
33 | #include "atomicio.h" | |||
34 | #include "pci.h" | |||
35 | #include "virtio.h" | |||
36 | #include "vmd.h" | |||
37 | ||||
38 | extern char *__progname; | |||
39 | extern struct vmd_vm *current_vm; | |||
40 | struct iovec io_v[VIOBLK_QUEUE_SIZE128]; | |||
41 | ||||
42 | static const char *disk_type(int); | |||
43 | static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *, | |||
44 | int8_t *); | |||
45 | static int handle_io_write(struct viodev_msg *, struct virtio_dev *); | |||
46 | ||||
47 | static void vioblk_update_qs(struct vioblk_dev *); | |||
48 | static void vioblk_update_qa(struct vioblk_dev *); | |||
49 | static int vioblk_notifyq(struct vioblk_dev *); | |||
50 | static ssize_t vioblk_rw(struct vioblk_dev *, int, off_t, | |||
51 | struct vring_desc *, struct vring_desc **); | |||
52 | ||||
53 | static void dev_dispatch_vm(int, short, void *); | |||
54 | static void handle_sync_io(int, short, void *); | |||
55 | ||||
56 | static const char * | |||
57 | disk_type(int type) | |||
58 | { | |||
59 | switch (type) { | |||
60 | case VMDF_RAW0x01: return "raw"; | |||
61 | case VMDF_QCOW20x02: return "qcow2"; | |||
62 | } | |||
63 | return "unknown"; | |||
64 | } | |||
65 | ||||
66 | __dead__attribute__((__noreturn__)) void | |||
67 | vioblk_main(int fd, int fd_vmm) | |||
68 | { | |||
69 | struct virtio_dev dev; | |||
70 | struct vioblk_dev *vioblk; | |||
| ||||
71 | struct viodev_msg msg; | |||
72 | struct vmd_vm vm; | |||
73 | struct vm_create_params *vcp; | |||
74 | ssize_t sz; | |||
75 | off_t szp = 0; | |||
76 | int i, ret, type; | |||
77 | ||||
78 | /* | |||
79 | * stdio - needed for read/write to disk fds and channels to the vm. | |||
80 | * vmm + proc - needed to create shared vm mappings. | |||
81 | */ | |||
82 | if (pledge("stdio vmm proc", NULL((void *)0)) == -1) | |||
83 | fatal("pledge"); | |||
84 | ||||
85 | /* Zero and initialize io work queue. */ | |||
86 | memset(io_v, 0, nitems(io_v)(sizeof((io_v)) / sizeof((io_v)[0]))*sizeof(io_v[0])); | |||
87 | ||||
88 | /* Receive our virtio_dev, mostly preconfigured. */ | |||
89 | memset(&dev, 0, sizeof(dev)); | |||
90 | sz = atomicio(read, fd, &dev, sizeof(dev)); | |||
91 | if (sz != sizeof(dev)) { | |||
92 | ret = errno(*__errno()); | |||
93 | log_warn("failed to receive vioblk"); | |||
94 | goto fail; | |||
95 | } | |||
96 | if (dev.dev_type != VMD_DEVTYPE_DISK'd') { | |||
97 | ret = EINVAL22; | |||
98 | log_warn("received invalid device type"); | |||
99 | goto fail; | |||
100 | } | |||
101 | dev.sync_fd = fd; | |||
102 | vioblk = &dev.vioblk; | |||
103 | ||||
104 | log_debug("%s: got viblk dev. num disk fds = %d, sync fd = %d, " | |||
105 | "async fd = %d, capacity = %lld seg_max = %u, vmm fd = %d", | |||
106 | __func__, vioblk->ndisk_fd, dev.sync_fd, dev.async_fd, | |||
107 | vioblk->capacity, vioblk->seg_max, fd_vmm); | |||
108 | ||||
109 | /* Receive our vm information from the vm process. */ | |||
110 | memset(&vm, 0, sizeof(vm)); | |||
111 | sz = atomicio(read, dev.sync_fd, &vm, sizeof(vm)); | |||
112 | if (sz != sizeof(vm)) { | |||
113 | ret = EIO5; | |||
114 | log_warnx("failed to receive vm details"); | |||
115 | goto fail; | |||
116 | } | |||
117 | vcp = &vm.vm_params.vmc_params; | |||
118 | current_vm = &vm; | |||
119 | ||||
120 | setproctitle("%s/vioblk%d", vcp->vcp_name, vioblk->idx); | |||
121 | log_procinit("vm/%s/vioblk%d", vcp->vcp_name, vioblk->idx); | |||
122 | ||||
123 | /* Now that we have our vm information, we can remap memory. */ | |||
124 | ret = remap_guest_mem(&vm, fd_vmm); | |||
125 | if (ret) { | |||
126 | log_warnx("failed to remap guest memory"); | |||
127 | goto fail; | |||
128 | } | |||
129 | ||||
130 | /* | |||
131 | * We no longer need /dev/vmm access. | |||
132 | */ | |||
133 | close_fd(fd_vmm); | |||
134 | if (pledge("stdio", NULL((void *)0)) == -1) | |||
135 | fatal("pledge2"); | |||
136 | ||||
137 | /* Initialize the virtio block abstractions. */ | |||
138 | type = vm.vm_params.vmc_disktypes[vioblk->idx]; | |||
139 | switch (type) { | |||
140 | case VMDF_RAW0x01: | |||
141 | ret = virtio_raw_init(&vioblk->file, &szp, vioblk->disk_fd, | |||
142 | vioblk->ndisk_fd); | |||
143 | break; | |||
144 | case VMDF_QCOW20x02: | |||
145 | ret = virtio_qcow2_init(&vioblk->file, &szp, vioblk->disk_fd, | |||
146 | vioblk->ndisk_fd); | |||
147 | break; | |||
148 | default: | |||
149 | log_warnx("invalid disk image type"); | |||
150 | goto fail; | |||
151 | } | |||
152 | if (ret || szp < 0) { | |||
153 | log_warnx("failed to init disk %s image", disk_type(type)); | |||
154 | goto fail; | |||
155 | } | |||
156 | vioblk->capacity = szp / 512; | |||
157 | log_debug("%s: initialized vioblk%d with %s image (capacity=%lld)", | |||
158 | __func__, vioblk->idx, disk_type(type), vioblk->capacity); | |||
159 | ||||
160 | /* If we're restoring hardware, reinitialize the virtqueue hva. */ | |||
161 | if (vm.vm_state & VM_STATE_RECEIVED0x08) | |||
162 | vioblk_update_qa(vioblk); | |||
163 | ||||
164 | /* Initialize libevent so we can start wiring event handlers. */ | |||
165 | event_init(); | |||
166 | ||||
167 | /* Wire up an async imsg channel. */ | |||
168 | log_debug("%s: wiring in async vm event handler (fd=%d)", __func__, | |||
169 | dev.async_fd); | |||
170 | if (vm_device_pipe(&dev, dev_dispatch_vm)) { | |||
171 | ret = EIO5; | |||
172 | log_warnx("vm_device_pipe"); | |||
173 | goto fail; | |||
174 | } | |||
175 | ||||
176 | /* Configure our sync channel event handler. */ | |||
177 | log_debug("%s: wiring in sync channel handler (fd=%d)", __func__, | |||
178 | dev.sync_fd); | |||
179 | if (fcntl(dev.sync_fd, F_SETFL4, O_NONBLOCK0x0004) == -1) { | |||
180 | ret = errno(*__errno()); | |||
181 | log_warn("%s: fcntl", __func__); | |||
182 | goto fail; | |||
183 | } | |||
184 | imsg_init(&dev.sync_iev.ibuf, dev.sync_fd); | |||
185 | dev.sync_iev.handler = handle_sync_io; | |||
186 | dev.sync_iev.data = &dev; | |||
187 | dev.sync_iev.events = EV_READ0x02; | |||
188 | imsg_event_add(&dev.sync_iev); | |||
189 | ||||
190 | /* Send a ready message over the sync channel. */ | |||
191 | log_debug("%s: telling vm %s device is ready", __func__, vcp->vcp_name); | |||
192 | memset(&msg, 0, sizeof(msg)); | |||
193 | msg.type = VIODEV_MSG_READY1; | |||
194 | imsg_compose_event(&dev.sync_iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg, | |||
195 | sizeof(msg)); | |||
196 | ||||
197 | /* Send a ready message over the async channel. */ | |||
198 | log_debug("%s: sending heartbeat", __func__); | |||
199 | ret = imsg_compose_event(&dev.async_iev, IMSG_DEVOP_MSG, 0, 0, -1, | |||
200 | &msg, sizeof(msg)); | |||
201 | if (ret == -1) { | |||
202 | log_warnx("%s: failed to send async ready message!", __func__); | |||
203 | goto fail; | |||
204 | } | |||
205 | ||||
206 | /* Engage the event loop! */ | |||
207 | ret = event_dispatch(); | |||
208 | ||||
209 | if (ret == 0) { | |||
210 | /* Clean shutdown. */ | |||
211 | close_fd(dev.sync_fd); | |||
212 | close_fd(dev.async_fd); | |||
213 | for (i = 0; i < (int)sizeof(vioblk->disk_fd); i++) | |||
214 | close_fd(vioblk->disk_fd[i]); | |||
215 | _exit(0); | |||
216 | /* NOTREACHED */ | |||
217 | } | |||
218 | ||||
219 | fail: | |||
220 | /* Try letting the vm know we've failed something. */ | |||
221 | memset(&msg, 0, sizeof(msg)); | |||
222 | msg.type = VIODEV_MSG_ERROR2; | |||
223 | msg.data = ret; | |||
224 | imsg_compose(&dev.sync_iev.ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg, | |||
225 | sizeof(msg)); | |||
226 | imsg_flush(&dev.sync_iev.ibuf); | |||
227 | ||||
228 | close_fd(dev.sync_fd); | |||
229 | close_fd(dev.async_fd); | |||
230 | for (i = 0; i < (int)sizeof(vioblk->disk_fd); i++) | |||
231 | close_fd(vioblk->disk_fd[i]); | |||
| ||||
232 | _exit(ret); | |||
233 | /* NOTREACHED */ | |||
234 | } | |||
235 | ||||
236 | const char * | |||
237 | vioblk_cmd_name(uint32_t type) | |||
238 | { | |||
239 | switch (type) { | |||
240 | case VIRTIO_BLK_T_IN0: return "read"; | |||
241 | case VIRTIO_BLK_T_OUT1: return "write"; | |||
242 | case VIRTIO_BLK_T_SCSI_CMD2: return "scsi read"; | |||
243 | case VIRTIO_BLK_T_SCSI_CMD_OUT3: return "scsi write"; | |||
244 | case VIRTIO_BLK_T_FLUSH4: return "flush"; | |||
245 | case VIRTIO_BLK_T_FLUSH_OUT5: return "flush out"; | |||
246 | case VIRTIO_BLK_T_GET_ID8: return "get id"; | |||
247 | default: return "unknown"; | |||
248 | } | |||
249 | } | |||
250 | ||||
251 | static void | |||
252 | vioblk_update_qa(struct vioblk_dev *dev) | |||
253 | { | |||
254 | struct virtio_vq_info *vq_info; | |||
255 | void *hva = NULL((void *)0); | |||
256 | ||||
257 | /* Invalid queue? */ | |||
258 | if (dev->cfg.queue_select > 0) | |||
259 | return; | |||
260 | ||||
261 | vq_info = &dev->vq[dev->cfg.queue_select]; | |||
262 | vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE(4096); | |||
263 | ||||
264 | hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIOBLK_QUEUE_SIZE128)); | |||
265 | if (hva == NULL((void *)0)) | |||
266 | fatal("vioblk_update_qa"); | |||
267 | vq_info->q_hva = hva; | |||
268 | } | |||
269 | ||||
270 | static void | |||
271 | vioblk_update_qs(struct vioblk_dev *dev) | |||
272 | { | |||
273 | struct virtio_vq_info *vq_info; | |||
274 | ||||
275 | /* Invalid queue? */ | |||
276 | if (dev->cfg.queue_select > 0) { | |||
277 | dev->cfg.queue_size = 0; | |||
278 | return; | |||
279 | } | |||
280 | ||||
281 | vq_info = &dev->vq[dev->cfg.queue_select]; | |||
282 | ||||
283 | /* Update queue pfn/size based on queue select */ | |||
284 | dev->cfg.queue_pfn = vq_info->q_gpa >> 12; | |||
285 | dev->cfg.queue_size = vq_info->qs; | |||
286 | } | |||
287 | ||||
288 | /* | |||
289 | * Process virtqueue notifications. If an unrecoverable error occurs, puts | |||
290 | * device into a "needs reset" state. | |||
291 | * | |||
292 | * Returns 1 if an we need to assert an IRQ. | |||
293 | */ | |||
294 | static int | |||
295 | vioblk_notifyq(struct vioblk_dev *dev) | |||
296 | { | |||
297 | uint32_t cmd_len; | |||
298 | uint16_t idx, cmd_desc_idx; | |||
299 | uint8_t ds; | |||
300 | off_t offset; | |||
301 | ssize_t sz; | |||
302 | int is_write, notify, i; | |||
303 | char *vr; | |||
304 | struct vring_desc *table, *desc; | |||
305 | struct vring_avail *avail; | |||
306 | struct vring_used *used; | |||
307 | struct virtio_blk_req_hdr *cmd; | |||
308 | struct virtio_vq_info *vq_info; | |||
309 | ||||
310 | /* Invalid queue? */ | |||
311 | if (dev->cfg.queue_notify > 0) | |||
312 | return (0); | |||
313 | ||||
314 | vq_info = &dev->vq[dev->cfg.queue_notify]; | |||
315 | idx = vq_info->last_avail; | |||
316 | vr = vq_info->q_hva; | |||
317 | if (vr == NULL((void *)0)) | |||
318 | fatalx("%s: null vring", __func__); | |||
319 | ||||
320 | /* Compute offsets in table of descriptors, avail ring, and used ring */ | |||
321 | table = (struct vring_desc *)(vr); | |||
322 | avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); | |||
323 | used = (struct vring_used *)(vr + vq_info->vq_usedoffset); | |||
324 | ||||
325 | while (idx != avail->idx) { | |||
326 | /* Retrieve Command descriptor. */ | |||
327 | cmd_desc_idx = avail->ring[idx & VIOBLK_QUEUE_MASK(128 - 1)]; | |||
328 | desc = &table[cmd_desc_idx]; | |||
329 | cmd_len = desc->len; | |||
330 | ||||
331 | /* | |||
332 | * Validate Command descriptor. It should be chained to another | |||
333 | * descriptor and not be itself writable. | |||
334 | */ | |||
335 | if ((desc->flags & VRING_DESC_F_NEXT1) == 0) { | |||
336 | log_warnx("%s: unchained cmd descriptor", __func__); | |||
337 | goto reset; | |||
338 | } | |||
339 | if (DESC_WRITABLE(desc)(((desc)->flags & 2) ? 1 : 0)) { | |||
340 | log_warnx("%s: invalid cmd descriptor state", __func__); | |||
341 | goto reset; | |||
342 | } | |||
343 | ||||
344 | /* Retrieve the vioblk command request. */ | |||
345 | cmd = hvaddr_mem(desc->addr, sizeof(*cmd)); | |||
346 | if (cmd == NULL((void *)0)) | |||
347 | goto reset; | |||
348 | ||||
349 | /* Advance to the 2nd descriptor. */ | |||
350 | desc = &table[desc->next & VIOBLK_QUEUE_MASK(128 - 1)]; | |||
351 | ||||
352 | /* Process each available command & chain. */ | |||
353 | switch (cmd->type) { | |||
354 | case VIRTIO_BLK_T_IN0: | |||
355 | case VIRTIO_BLK_T_OUT1: | |||
356 | /* Read (IN) & Write (OUT) */ | |||
357 | is_write = (cmd->type == VIRTIO_BLK_T_OUT1) ? 1 : 0; | |||
358 | offset = cmd->sector * VIRTIO_BLK_SECTOR_SIZE512; | |||
359 | sz = vioblk_rw(dev, is_write, offset, table, &desc); | |||
360 | if (sz == -1) | |||
361 | ds = VIRTIO_BLK_S_IOERR1; | |||
362 | else | |||
363 | ds = VIRTIO_BLK_S_OK0; | |||
364 | break; | |||
365 | case VIRTIO_BLK_T_GET_ID8: | |||
366 | /* | |||
367 | * We don't support this command yet. While it's not | |||
368 | * officially part of the virtio spec (will be in v1.2) | |||
369 | * there's no feature to negotiate. Linux drivers will | |||
370 | * often send this command regardless. | |||
371 | */ | |||
372 | ds = VIRTIO_BLK_S_UNSUPP2; | |||
373 | default: | |||
374 | log_warnx("%s: unsupported vioblk command %d", __func__, | |||
375 | cmd->type); | |||
376 | ds = VIRTIO_BLK_S_UNSUPP2; | |||
377 | break; | |||
378 | } | |||
379 | ||||
380 | /* Advance to the end of the chain, if needed. */ | |||
381 | i = 0; | |||
382 | while (desc->flags & VRING_DESC_F_NEXT1) { | |||
383 | desc = &table[desc->next & VIOBLK_QUEUE_MASK(128 - 1)]; | |||
384 | if (++i >= VIOBLK_QUEUE_SIZE128) { | |||
385 | /* | |||
386 | * If we encounter an infinite/looping chain, | |||
387 | * not much we can do but say we need a reset. | |||
388 | */ | |||
389 | log_warnx("%s: descriptor chain overflow", | |||
390 | __func__); | |||
391 | goto reset; | |||
392 | } | |||
393 | } | |||
394 | ||||
395 | /* Provide the status of our command processing. */ | |||
396 | if (!DESC_WRITABLE(desc)(((desc)->flags & 2) ? 1 : 0)) { | |||
397 | log_warnx("%s: status descriptor unwritable", __func__); | |||
398 | goto reset; | |||
399 | } | |||
400 | /* Overkill as ds is 1 byte, but validates gpa. */ | |||
401 | if (write_mem(desc->addr, &ds, sizeof(ds))) | |||
402 | log_warnx("%s: can't write device status data " | |||
403 | "@ 0x%llx",__func__, desc->addr); | |||
404 | ||||
405 | dev->cfg.isr_status |= 1; | |||
406 | notify = 1; | |||
407 | ||||
408 | used->ring[used->idx & VIOBLK_QUEUE_MASK(128 - 1)].id = cmd_desc_idx; | |||
409 | used->ring[used->idx & VIOBLK_QUEUE_MASK(128 - 1)].len = cmd_len; | |||
410 | ||||
411 | __sync_synchronize(); | |||
412 | used->idx++; | |||
413 | idx++; | |||
414 | } | |||
415 | ||||
416 | vq_info->last_avail = idx; | |||
417 | return (notify); | |||
418 | ||||
419 | reset: | |||
420 | /* | |||
421 | * When setting the "needs reset" flag, the driver is notified | |||
422 | * via a configuration change interrupt. | |||
423 | */ | |||
424 | dev->cfg.device_status |= DEVICE_NEEDS_RESET64; | |||
425 | dev->cfg.isr_status |= VIRTIO_CONFIG_ISR_CONFIG_CHANGE2; | |||
426 | return (1); | |||
427 | } | |||
428 | ||||
429 | static void | |||
430 | dev_dispatch_vm(int fd, short event, void *arg) | |||
431 | { | |||
432 | struct virtio_dev *dev = (struct virtio_dev *)arg; | |||
433 | struct imsgev *iev = &dev->async_iev; | |||
434 | struct imsgbuf *ibuf = &iev->ibuf; | |||
435 | struct imsg imsg; | |||
436 | ssize_t n = 0; | |||
437 | int verbose; | |||
438 | ||||
439 | if (event & EV_READ0x02) { | |||
440 | if ((n = imsg_read(ibuf)) == -1 && errno(*__errno()) != EAGAIN35) | |||
441 | fatal("%s: imsg_read", __func__); | |||
442 | if (n == 0) { | |||
443 | /* this pipe is dead, so remove the event handler */ | |||
444 | log_debug("%s: pipe dead (EV_READ)", __func__); | |||
445 | event_del(&iev->ev); | |||
446 | event_loopexit(NULL((void *)0)); | |||
447 | return; | |||
448 | } | |||
449 | } | |||
450 | ||||
451 | if (event & EV_WRITE0x04) { | |||
452 | if ((n = msgbuf_write(&ibuf->w)) == -1 && errno(*__errno()) != EAGAIN35) | |||
453 | fatal("%s: msgbuf_write", __func__); | |||
454 | if (n == 0) { | |||
455 | /* this pipe is dead, so remove the event handler */ | |||
456 | log_debug("%s: pipe dead (EV_WRITE)", __func__); | |||
457 | event_del(&iev->ev); | |||
458 | event_loopbreak(); | |||
459 | return; | |||
460 | } | |||
461 | } | |||
462 | ||||
463 | for (;;) { | |||
464 | if ((n = imsg_get(ibuf, &imsg)) == -1) | |||
465 | fatal("%s: imsg_get", __func__); | |||
466 | if (n == 0) | |||
467 | break; | |||
468 | ||||
469 | switch (imsg.hdr.type) { | |||
470 | case IMSG_VMDOP_PAUSE_VM: | |||
471 | log_debug("%s: pausing", __func__); | |||
472 | break; | |||
473 | case IMSG_VMDOP_UNPAUSE_VM: | |||
474 | log_debug("%s: unpausing", __func__); | |||
475 | break; | |||
476 | case IMSG_CTL_VERBOSE: | |||
477 | IMSG_SIZE_CHECK(&imsg, &verbose)do { if (((&imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof(*&verbose)) fatalx("bad length imsg received (%s)" , "&verbose"); } while (0); | |||
478 | memcpy(&verbose, imsg.data, sizeof(verbose)); | |||
479 | log_setverbose(verbose); | |||
480 | break; | |||
481 | default: | |||
482 | log_warnx("%s: unhandled imsg type %d", __func__, | |||
483 | imsg.hdr.type); | |||
484 | break; | |||
485 | } | |||
486 | imsg_free(&imsg); | |||
487 | } | |||
488 | imsg_event_add(iev); | |||
489 | } | |||
490 | ||||
491 | /* | |||
492 | * Synchronous IO handler. | |||
493 | * | |||
494 | */ | |||
495 | static void | |||
496 | handle_sync_io(int fd, short event, void *arg) | |||
497 | { | |||
498 | struct virtio_dev *dev = (struct virtio_dev *)arg; | |||
499 | struct imsgev *iev = &dev->sync_iev; | |||
500 | struct imsgbuf *ibuf = &iev->ibuf; | |||
501 | struct viodev_msg msg; | |||
502 | struct imsg imsg; | |||
503 | ssize_t n; | |||
504 | int8_t intr = INTR_STATE_NOOP0; | |||
505 | ||||
506 | if (event & EV_READ0x02) { | |||
507 | if ((n = imsg_read(ibuf)) == -1 && errno(*__errno()) != EAGAIN35) | |||
508 | fatal("%s: imsg_read", __func__); | |||
509 | if (n == 0) { | |||
510 | /* this pipe is dead, so remove the event handler */ | |||
511 | log_debug("%s: vioblk pipe dead (EV_READ)", __func__); | |||
512 | event_del(&iev->ev); | |||
513 | event_loopexit(NULL((void *)0)); | |||
514 | return; | |||
515 | } | |||
516 | } | |||
517 | ||||
518 | if (event & EV_WRITE0x04) { | |||
519 | if ((n = msgbuf_write(&ibuf->w)) == -1 && errno(*__errno()) != EAGAIN35) | |||
520 | fatal("%s: msgbuf_write", __func__); | |||
521 | if (n == 0) { | |||
522 | /* this pipe is dead, so remove the event handler */ | |||
523 | log_debug("%s: vioblk pipe dead (EV_WRITE)", __func__); | |||
524 | event_del(&iev->ev); | |||
525 | event_loopexit(NULL((void *)0)); | |||
526 | return; | |||
527 | } | |||
528 | } | |||
529 | ||||
530 | for (;;) { | |||
531 | if ((n = imsg_get(ibuf, &imsg)) == -1) | |||
532 | fatalx("%s: imsg_get (n=%ld)", __func__, n); | |||
533 | if (n == 0) | |||
534 | break; | |||
535 | ||||
536 | /* Unpack our message. They ALL should be dev messeges! */ | |||
537 | IMSG_SIZE_CHECK(&imsg, &msg)do { if (((&imsg)->hdr.len - sizeof(struct imsg_hdr)) < sizeof(*&msg)) fatalx("bad length imsg received (%s)", "&msg" ); } while (0); | |||
538 | memcpy(&msg, imsg.data, sizeof(msg)); | |||
539 | imsg_free(&imsg); | |||
540 | ||||
541 | switch (msg.type) { | |||
542 | case VIODEV_MSG_DUMP6: | |||
543 | /* Dump device */ | |||
544 | n = atomicio(vwrite(ssize_t (*)(int, void *, size_t))write, dev->sync_fd, dev, sizeof(*dev)); | |||
545 | if (n != sizeof(*dev)) { | |||
546 | log_warnx("%s: failed to dump vioblk device", | |||
547 | __func__); | |||
548 | break; | |||
549 | } | |||
550 | case VIODEV_MSG_IO_READ4: | |||
551 | /* Read IO: make sure to send a reply */ | |||
552 | msg.data = handle_io_read(&msg, dev, &intr); | |||
553 | msg.data_valid = 1; | |||
554 | msg.state = intr; | |||
555 | imsg_compose_event(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg, | |||
556 | sizeof(msg)); | |||
557 | break; | |||
558 | case VIODEV_MSG_IO_WRITE5: | |||
559 | /* Write IO: no reply needed */ | |||
560 | if (handle_io_write(&msg, dev) == 1) | |||
561 | virtio_assert_pic_irq(dev, 0); | |||
562 | break; | |||
563 | case VIODEV_MSG_SHUTDOWN7: | |||
564 | event_del(&dev->sync_iev.ev); | |||
565 | event_loopbreak(); | |||
566 | return; | |||
567 | default: | |||
568 | fatalx("%s: invalid msg type %d", __func__, msg.type); | |||
569 | } | |||
570 | } | |||
571 | imsg_event_add(iev); | |||
572 | } | |||
573 | ||||
574 | static int | |||
575 | handle_io_write(struct viodev_msg *msg, struct virtio_dev *dev) | |||
576 | { | |||
577 | struct vioblk_dev *vioblk = &dev->vioblk; | |||
578 | uint32_t data = msg->data; | |||
579 | int intr = 0; | |||
580 | ||||
581 | switch (msg->reg) { | |||
582 | case VIRTIO_CONFIG_DEVICE_FEATURES0: | |||
583 | case VIRTIO_CONFIG_QUEUE_SIZE12: | |||
584 | case VIRTIO_CONFIG_ISR_STATUS19: | |||
585 | log_warnx("%s: illegal write %x to %s", __progname, data, | |||
586 | virtio_reg_name(msg->reg)); | |||
587 | break; | |||
588 | case VIRTIO_CONFIG_GUEST_FEATURES4: | |||
589 | vioblk->cfg.guest_feature = data; | |||
590 | break; | |||
591 | case VIRTIO_CONFIG_QUEUE_PFN8: | |||
592 | vioblk->cfg.queue_pfn = data; | |||
593 | vioblk_update_qa(vioblk); | |||
594 | break; | |||
595 | case VIRTIO_CONFIG_QUEUE_SELECT14: | |||
596 | vioblk->cfg.queue_select = data; | |||
597 | vioblk_update_qs(vioblk); | |||
598 | break; | |||
599 | case VIRTIO_CONFIG_QUEUE_NOTIFY16: | |||
600 | /* XXX We should be stricter about status checks. */ | |||
601 | if (!(vioblk->cfg.device_status & DEVICE_NEEDS_RESET64)) { | |||
602 | vioblk->cfg.queue_notify = data; | |||
603 | if (vioblk_notifyq(vioblk)) | |||
604 | intr = 1; | |||
605 | } | |||
606 | break; | |||
607 | case VIRTIO_CONFIG_DEVICE_STATUS18: | |||
608 | vioblk->cfg.device_status = data; | |||
609 | if (vioblk->cfg.device_status == 0) { | |||
610 | vioblk->cfg.guest_feature = 0; | |||
611 | vioblk->cfg.queue_pfn = 0; | |||
612 | vioblk_update_qa(vioblk); | |||
613 | vioblk->cfg.queue_size = 0; | |||
614 | vioblk_update_qs(vioblk); | |||
615 | vioblk->cfg.queue_select = 0; | |||
616 | vioblk->cfg.queue_notify = 0; | |||
617 | vioblk->cfg.isr_status = 0; | |||
618 | vioblk->vq[0].last_avail = 0; | |||
619 | vioblk->vq[0].notified_avail = 0; | |||
620 | virtio_deassert_pic_irq(dev, msg->vcpu); | |||
621 | } | |||
622 | break; | |||
623 | default: | |||
624 | break; | |||
625 | } | |||
626 | return (intr); | |||
627 | } | |||
628 | ||||
629 | static uint32_t | |||
630 | handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t *intr) | |||
631 | { | |||
632 | struct vioblk_dev *vioblk = &dev->vioblk; | |||
633 | uint8_t sz = msg->io_sz; | |||
634 | uint32_t data; | |||
635 | ||||
636 | if (msg->data_valid) | |||
637 | data = msg->data; | |||
638 | else | |||
639 | data = 0; | |||
640 | ||||
641 | switch (msg->reg) { | |||
642 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20: | |||
643 | switch (sz) { | |||
644 | case 4: | |||
645 | data = (uint32_t)(vioblk->capacity); | |||
646 | break; | |||
647 | case 2: | |||
648 | data &= 0xFFFF0000; | |||
649 | data |= (uint32_t)(vioblk->capacity) & 0xFFFF; | |||
650 | break; | |||
651 | case 1: | |||
652 | data &= 0xFFFFFF00; | |||
653 | data |= (uint32_t)(vioblk->capacity) & 0xFF; | |||
654 | break; | |||
655 | } | |||
656 | /* XXX handle invalid sz */ | |||
657 | break; | |||
658 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 1: | |||
659 | if (sz == 1) { | |||
660 | data &= 0xFFFFFF00; | |||
661 | data |= (uint32_t)(vioblk->capacity >> 8) & 0xFF; | |||
662 | } | |||
663 | /* XXX handle invalid sz */ | |||
664 | break; | |||
665 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 2: | |||
666 | if (sz == 1) { | |||
667 | data &= 0xFFFFFF00; | |||
668 | data |= (uint32_t)(vioblk->capacity >> 16) & 0xFF; | |||
669 | } else if (sz == 2) { | |||
670 | data &= 0xFFFF0000; | |||
671 | data |= (uint32_t)(vioblk->capacity >> 16) & 0xFFFF; | |||
672 | } | |||
673 | /* XXX handle invalid sz */ | |||
674 | break; | |||
675 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 3: | |||
676 | if (sz == 1) { | |||
677 | data &= 0xFFFFFF00; | |||
678 | data |= (uint32_t)(vioblk->capacity >> 24) & 0xFF; | |||
679 | } | |||
680 | /* XXX handle invalid sz */ | |||
681 | break; | |||
682 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 4: | |||
683 | switch (sz) { | |||
684 | case 4: | |||
685 | data = (uint32_t)(vioblk->capacity >> 32); | |||
686 | break; | |||
687 | case 2: | |||
688 | data &= 0xFFFF0000; | |||
689 | data |= (uint32_t)(vioblk->capacity >> 32) & 0xFFFF; | |||
690 | break; | |||
691 | case 1: | |||
692 | data &= 0xFFFFFF00; | |||
693 | data |= (uint32_t)(vioblk->capacity >> 32) & 0xFF; | |||
694 | break; | |||
695 | } | |||
696 | /* XXX handle invalid sz */ | |||
697 | break; | |||
698 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 5: | |||
699 | if (sz == 1) { | |||
700 | data &= 0xFFFFFF00; | |||
701 | data |= (uint32_t)(vioblk->capacity >> 40) & 0xFF; | |||
702 | } | |||
703 | /* XXX handle invalid sz */ | |||
704 | break; | |||
705 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 6: | |||
706 | if (sz == 1) { | |||
707 | data &= 0xFFFFFF00; | |||
708 | data |= (uint32_t)(vioblk->capacity >> 48) & 0xFF; | |||
709 | } else if (sz == 2) { | |||
710 | data &= 0xFFFF0000; | |||
711 | data |= (uint32_t)(vioblk->capacity >> 48) & 0xFFFF; | |||
712 | } | |||
713 | /* XXX handle invalid sz */ | |||
714 | break; | |||
715 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 7: | |||
716 | if (sz == 1) { | |||
717 | data &= 0xFFFFFF00; | |||
718 | data |= (uint32_t)(vioblk->capacity >> 56) & 0xFF; | |||
719 | } | |||
720 | /* XXX handle invalid sz */ | |||
721 | break; | |||
722 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 12: | |||
723 | switch (sz) { | |||
724 | case 4: | |||
725 | data = (uint32_t)(vioblk->seg_max); | |||
726 | break; | |||
727 | case 2: | |||
728 | data &= 0xFFFF0000; | |||
729 | data |= (uint32_t)(vioblk->seg_max) & 0xFFFF; | |||
730 | break; | |||
731 | case 1: | |||
732 | data &= 0xFFFFFF00; | |||
733 | data |= (uint32_t)(vioblk->seg_max) & 0xFF; | |||
734 | break; | |||
735 | } | |||
736 | /* XXX handle invalid sz */ | |||
737 | break; | |||
738 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 13: | |||
739 | if (sz == 1) { | |||
740 | data &= 0xFFFFFF00; | |||
741 | data |= (uint32_t)(vioblk->seg_max >> 8) & 0xFF; | |||
742 | } | |||
743 | /* XXX handle invalid sz */ | |||
744 | break; | |||
745 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 14: | |||
746 | if (sz == 1) { | |||
747 | data &= 0xFFFFFF00; | |||
748 | data |= (uint32_t)(vioblk->seg_max >> 16) & 0xFF; | |||
749 | } else if (sz == 2) { | |||
750 | data &= 0xFFFF0000; | |||
751 | data |= (uint32_t)(vioblk->seg_max >> 16) | |||
752 | & 0xFFFF; | |||
753 | } | |||
754 | /* XXX handle invalid sz */ | |||
755 | break; | |||
756 | case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI20 + 15: | |||
757 | if (sz == 1) { | |||
758 | data &= 0xFFFFFF00; | |||
759 | data |= (uint32_t)(vioblk->seg_max >> 24) & 0xFF; | |||
760 | } | |||
761 | /* XXX handle invalid sz */ | |||
762 | break; | |||
763 | case VIRTIO_CONFIG_DEVICE_FEATURES0: | |||
764 | data = vioblk->cfg.device_feature; | |||
765 | break; | |||
766 | case VIRTIO_CONFIG_GUEST_FEATURES4: | |||
767 | data = vioblk->cfg.guest_feature; | |||
768 | break; | |||
769 | case VIRTIO_CONFIG_QUEUE_PFN8: | |||
770 | data = vioblk->cfg.queue_pfn; | |||
771 | break; | |||
772 | case VIRTIO_CONFIG_QUEUE_SIZE12: | |||
773 | data = vioblk->cfg.queue_size; | |||
774 | break; | |||
775 | case VIRTIO_CONFIG_QUEUE_SELECT14: | |||
776 | data = vioblk->cfg.queue_select; | |||
777 | break; | |||
778 | case VIRTIO_CONFIG_QUEUE_NOTIFY16: | |||
779 | data = vioblk->cfg.queue_notify; | |||
780 | break; | |||
781 | case VIRTIO_CONFIG_DEVICE_STATUS18: | |||
782 | data = vioblk->cfg.device_status; | |||
783 | break; | |||
784 | case VIRTIO_CONFIG_ISR_STATUS19: | |||
785 | data = vioblk->cfg.isr_status; | |||
786 | vioblk->cfg.isr_status = 0; | |||
787 | if (intr != NULL((void *)0)) | |||
788 | *intr = INTR_STATE_DEASSERT-1; | |||
789 | break; | |||
790 | default: | |||
791 | return (0xFFFFFFFF); | |||
792 | } | |||
793 | ||||
794 | return (data); | |||
795 | } | |||
796 | ||||
797 | /* | |||
798 | * Emulate read/write io. Walks the descriptor chain, collecting io work and | |||
799 | * then emulates the read or write. | |||
800 | * | |||
801 | * On success, returns bytes read/written. | |||
802 | * On error, returns -1 and descriptor (desc) remains at its current position. | |||
803 | */ | |||
804 | static ssize_t | |||
805 | vioblk_rw(struct vioblk_dev *dev, int is_write, off_t offset, | |||
806 | struct vring_desc *desc_tbl, struct vring_desc **desc) | |||
807 | { | |||
808 | struct iovec *iov = NULL((void *)0); | |||
809 | ssize_t sz = 0; | |||
810 | size_t io_idx = 0; /* Index into iovec workqueue. */ | |||
811 | size_t xfer_sz = 0; /* Total accumulated io bytes. */ | |||
812 | ||||
813 | do { | |||
814 | iov = &io_v[io_idx]; | |||
815 | ||||
816 | /* | |||
817 | * Reads require writable descriptors. Writes require | |||
818 | * non-writeable descriptors. | |||
819 | */ | |||
820 | if ((!is_write) ^ DESC_WRITABLE(*desc)(((*desc)->flags & 2) ? 1 : 0)) { | |||
821 | log_warnx("%s: invalid descriptor for %s command", | |||
822 | __func__, is_write ? "write" : "read"); | |||
823 | return (-1); | |||
824 | } | |||
825 | ||||
826 | /* Collect the IO segment information. */ | |||
827 | iov->iov_len = (size_t)(*desc)->len; | |||
828 | iov->iov_base = hvaddr_mem((*desc)->addr, iov->iov_len); | |||
829 | if (iov->iov_base == NULL((void *)0)) | |||
830 | return (-1); | |||
831 | ||||
832 | /* Move our counters. */ | |||
833 | xfer_sz += iov->iov_len; | |||
834 | io_idx++; | |||
835 | ||||
836 | /* Guard against infinite chains */ | |||
837 | if (io_idx >= nitems(io_v)(sizeof((io_v)) / sizeof((io_v)[0]))) { | |||
838 | log_warnx("%s: descriptor table " | |||
839 | "invalid", __func__); | |||
840 | return (-1); | |||
841 | } | |||
842 | ||||
843 | /* Advance to the next descriptor. */ | |||
844 | *desc = &desc_tbl[(*desc)->next & VIOBLK_QUEUE_MASK(128 - 1)]; | |||
845 | } while ((*desc)->flags & VRING_DESC_F_NEXT1); | |||
846 | ||||
847 | /* | |||
848 | * Validate the requested block io operation alignment and size. | |||
849 | * Checking offset is just an extra caution as it is derived from | |||
850 | * a disk sector and is done for completeness in bounds checking. | |||
851 | */ | |||
852 | if (offset % VIRTIO_BLK_SECTOR_SIZE512 != 0 && | |||
853 | xfer_sz % VIRTIO_BLK_SECTOR_SIZE512 != 0) { | |||
854 | log_warnx("%s: unaligned read", __func__); | |||
855 | return (-1); | |||
856 | } | |||
857 | if (xfer_sz > SSIZE_MAX0x7fffffffffffffffL) { /* iovec_copyin limit */ | |||
858 | log_warnx("%s: invalid %s size: %zu", __func__, | |||
859 | is_write ? "write" : "read", xfer_sz); | |||
860 | return (-1); | |||
861 | } | |||
862 | ||||
863 | /* Emulate the Read or Write operation. */ | |||
864 | if (is_write) | |||
865 | sz = dev->file.pwritev(dev->file.p, io_v, io_idx, offset); | |||
866 | else | |||
867 | sz = dev->file.preadv(dev->file.p, io_v, io_idx, offset); | |||
868 | if (sz != (ssize_t)xfer_sz) { | |||
869 | log_warnx("%s: %s failure at offset 0x%llx, xfer_sz=%zu, " | |||
870 | "sz=%ld", __func__, (is_write ? "write" : "read"), offset, | |||
871 | xfer_sz, sz); | |||
872 | return (-1); | |||
873 | } | |||
874 | ||||
875 | return (sz); | |||
876 | } |