blob: 9dc091acd5fbab0a6da92510447174a6d995e22c [file] [log] [blame]
Kyle Swenson8d8f6542021-03-15 11:02:55 -06001/*
2 * Kernel-based Virtual Machine - device assignment support
3 *
4 * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2. See
7 * the COPYING file in the top-level directory.
8 *
9 */
10
11#include <linux/kvm_host.h>
12#include <linux/kvm.h>
13#include <linux/uaccess.h>
14#include <linux/vmalloc.h>
15#include <linux/errno.h>
16#include <linux/spinlock.h>
17#include <linux/pci.h>
18#include <linux/interrupt.h>
19#include <linux/slab.h>
20#include <linux/namei.h>
21#include <linux/fs.h>
22#include "irq.h"
23#include "assigned-dev.h"
24#include "trace/events/kvm.h"
25
26struct kvm_assigned_dev_kernel {
27 struct kvm_irq_ack_notifier ack_notifier;
28 struct list_head list;
29 int assigned_dev_id;
30 int host_segnr;
31 int host_busnr;
32 int host_devfn;
33 unsigned int entries_nr;
34 int host_irq;
35 bool host_irq_disabled;
36 bool pci_2_3;
37 struct msix_entry *host_msix_entries;
38 int guest_irq;
39 struct msix_entry *guest_msix_entries;
40 unsigned long irq_requested_type;
41 int irq_source_id;
42 int flags;
43 struct pci_dev *dev;
44 struct kvm *kvm;
45 spinlock_t intx_lock;
46 spinlock_t intx_mask_lock;
47 char irq_name[32];
48 struct pci_saved_state *pci_saved_state;
49};
50
51static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
52 int assigned_dev_id)
53{
54 struct list_head *ptr;
55 struct kvm_assigned_dev_kernel *match;
56
57 list_for_each(ptr, head) {
58 match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
59 if (match->assigned_dev_id == assigned_dev_id)
60 return match;
61 }
62 return NULL;
63}
64
65static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
66 *assigned_dev, int irq)
67{
68 int i, index;
69 struct msix_entry *host_msix_entries;
70
71 host_msix_entries = assigned_dev->host_msix_entries;
72
73 index = -1;
74 for (i = 0; i < assigned_dev->entries_nr; i++)
75 if (irq == host_msix_entries[i].vector) {
76 index = i;
77 break;
78 }
79 if (index < 0)
80 printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
81
82 return index;
83}
84
85static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
86{
87 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
88 int ret;
89
90 spin_lock(&assigned_dev->intx_lock);
91 if (pci_check_and_mask_intx(assigned_dev->dev)) {
92 assigned_dev->host_irq_disabled = true;
93 ret = IRQ_WAKE_THREAD;
94 } else
95 ret = IRQ_NONE;
96 spin_unlock(&assigned_dev->intx_lock);
97
98 return ret;
99}
100
101static void
102kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
103 int vector)
104{
105 if (unlikely(assigned_dev->irq_requested_type &
106 KVM_DEV_IRQ_GUEST_INTX)) {
107 spin_lock(&assigned_dev->intx_mask_lock);
108 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
109 kvm_set_irq(assigned_dev->kvm,
110 assigned_dev->irq_source_id, vector, 1,
111 false);
112 spin_unlock(&assigned_dev->intx_mask_lock);
113 } else
114 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
115 vector, 1, false);
116}
117
118static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
119{
120 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
121
122 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
123 spin_lock_irq(&assigned_dev->intx_lock);
124 disable_irq_nosync(irq);
125 assigned_dev->host_irq_disabled = true;
126 spin_unlock_irq(&assigned_dev->intx_lock);
127 }
128
129 kvm_assigned_dev_raise_guest_irq(assigned_dev,
130 assigned_dev->guest_irq);
131
132 return IRQ_HANDLED;
133}
134
135/*
136 * Deliver an IRQ in an atomic context if we can, or return a failure,
137 * user can retry in a process context.
138 * Return value:
139 * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
140 * Other values - No need to retry.
141 */
142static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq,
143 int level)
144{
145 struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
146 struct kvm_kernel_irq_routing_entry *e;
147 int ret = -EINVAL;
148 int idx;
149
150 trace_kvm_set_irq(irq, level, irq_source_id);
151
152 /*
153 * Injection into either PIC or IOAPIC might need to scan all CPUs,
154 * which would need to be retried from thread context; when same GSI
155 * is connected to both PIC and IOAPIC, we'd have to report a
156 * partial failure here.
157 * Since there's no easy way to do this, we only support injecting MSI
158 * which is limited to 1:1 GSI mapping.
159 */
160 idx = srcu_read_lock(&kvm->irq_srcu);
161 if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
162 e = &entries[0];
163 ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
164 irq, level);
165 }
166 srcu_read_unlock(&kvm->irq_srcu, idx);
167 return ret;
168}
169
170
171static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
172{
173 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
174 int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
175 assigned_dev->irq_source_id,
176 assigned_dev->guest_irq, 1);
177 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
178}
179
180static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
181{
182 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
183
184 kvm_assigned_dev_raise_guest_irq(assigned_dev,
185 assigned_dev->guest_irq);
186
187 return IRQ_HANDLED;
188}
189
190static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
191{
192 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
193 int index = find_index_from_host_irq(assigned_dev, irq);
194 u32 vector;
195 int ret = 0;
196
197 if (index >= 0) {
198 vector = assigned_dev->guest_msix_entries[index].vector;
199 ret = kvm_set_irq_inatomic(assigned_dev->kvm,
200 assigned_dev->irq_source_id,
201 vector, 1);
202 }
203
204 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
205}
206
207static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
208{
209 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
210 int index = find_index_from_host_irq(assigned_dev, irq);
211 u32 vector;
212
213 if (index >= 0) {
214 vector = assigned_dev->guest_msix_entries[index].vector;
215 kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
216 }
217
218 return IRQ_HANDLED;
219}
220
221/* Ack the irq line for an assigned device */
222static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
223{
224 struct kvm_assigned_dev_kernel *dev =
225 container_of(kian, struct kvm_assigned_dev_kernel,
226 ack_notifier);
227
228 kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
229
230 spin_lock(&dev->intx_mask_lock);
231
232 if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
233 bool reassert = false;
234
235 spin_lock_irq(&dev->intx_lock);
236 /*
237 * The guest IRQ may be shared so this ack can come from an
238 * IRQ for another guest device.
239 */
240 if (dev->host_irq_disabled) {
241 if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
242 enable_irq(dev->host_irq);
243 else if (!pci_check_and_unmask_intx(dev->dev))
244 reassert = true;
245 dev->host_irq_disabled = reassert;
246 }
247 spin_unlock_irq(&dev->intx_lock);
248
249 if (reassert)
250 kvm_set_irq(dev->kvm, dev->irq_source_id,
251 dev->guest_irq, 1, false);
252 }
253
254 spin_unlock(&dev->intx_mask_lock);
255}
256
257static void deassign_guest_irq(struct kvm *kvm,
258 struct kvm_assigned_dev_kernel *assigned_dev)
259{
260 if (assigned_dev->ack_notifier.gsi != -1)
261 kvm_unregister_irq_ack_notifier(kvm,
262 &assigned_dev->ack_notifier);
263
264 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
265 assigned_dev->guest_irq, 0, false);
266
267 if (assigned_dev->irq_source_id != -1)
268 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
269 assigned_dev->irq_source_id = -1;
270 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
271}
272
273/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
274static void deassign_host_irq(struct kvm *kvm,
275 struct kvm_assigned_dev_kernel *assigned_dev)
276{
277 /*
278 * We disable irq here to prevent further events.
279 *
280 * Notice this maybe result in nested disable if the interrupt type is
281 * INTx, but it's OK for we are going to free it.
282 *
283 * If this function is a part of VM destroy, please ensure that till
284 * now, the kvm state is still legal for probably we also have to wait
285 * on a currently running IRQ handler.
286 */
287 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
288 int i;
289 for (i = 0; i < assigned_dev->entries_nr; i++)
290 disable_irq(assigned_dev->host_msix_entries[i].vector);
291
292 for (i = 0; i < assigned_dev->entries_nr; i++)
293 free_irq(assigned_dev->host_msix_entries[i].vector,
294 assigned_dev);
295
296 assigned_dev->entries_nr = 0;
297 kfree(assigned_dev->host_msix_entries);
298 kfree(assigned_dev->guest_msix_entries);
299 pci_disable_msix(assigned_dev->dev);
300 } else {
301 /* Deal with MSI and INTx */
302 if ((assigned_dev->irq_requested_type &
303 KVM_DEV_IRQ_HOST_INTX) &&
304 (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
305 spin_lock_irq(&assigned_dev->intx_lock);
306 pci_intx(assigned_dev->dev, false);
307 spin_unlock_irq(&assigned_dev->intx_lock);
308 synchronize_irq(assigned_dev->host_irq);
309 } else
310 disable_irq(assigned_dev->host_irq);
311
312 free_irq(assigned_dev->host_irq, assigned_dev);
313
314 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
315 pci_disable_msi(assigned_dev->dev);
316 }
317
318 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
319}
320
321static int kvm_deassign_irq(struct kvm *kvm,
322 struct kvm_assigned_dev_kernel *assigned_dev,
323 unsigned long irq_requested_type)
324{
325 unsigned long guest_irq_type, host_irq_type;
326
327 if (!irqchip_in_kernel(kvm))
328 return -EINVAL;
329 /* no irq assignment to deassign */
330 if (!assigned_dev->irq_requested_type)
331 return -ENXIO;
332
333 host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
334 guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
335
336 if (host_irq_type)
337 deassign_host_irq(kvm, assigned_dev);
338 if (guest_irq_type)
339 deassign_guest_irq(kvm, assigned_dev);
340
341 return 0;
342}
343
344static void kvm_free_assigned_irq(struct kvm *kvm,
345 struct kvm_assigned_dev_kernel *assigned_dev)
346{
347 kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
348}
349
350static void kvm_free_assigned_device(struct kvm *kvm,
351 struct kvm_assigned_dev_kernel
352 *assigned_dev)
353{
354 kvm_free_assigned_irq(kvm, assigned_dev);
355
356 pci_reset_function(assigned_dev->dev);
357 if (pci_load_and_free_saved_state(assigned_dev->dev,
358 &assigned_dev->pci_saved_state))
359 printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
360 __func__, dev_name(&assigned_dev->dev->dev));
361 else
362 pci_restore_state(assigned_dev->dev);
363
364 pci_clear_dev_assigned(assigned_dev->dev);
365
366 pci_release_regions(assigned_dev->dev);
367 pci_disable_device(assigned_dev->dev);
368 pci_dev_put(assigned_dev->dev);
369
370 list_del(&assigned_dev->list);
371 kfree(assigned_dev);
372}
373
374void kvm_free_all_assigned_devices(struct kvm *kvm)
375{
376 struct list_head *ptr, *ptr2;
377 struct kvm_assigned_dev_kernel *assigned_dev;
378
379 list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
380 assigned_dev = list_entry(ptr,
381 struct kvm_assigned_dev_kernel,
382 list);
383
384 kvm_free_assigned_device(kvm, assigned_dev);
385 }
386}
387
388static int assigned_device_enable_host_intx(struct kvm *kvm,
389 struct kvm_assigned_dev_kernel *dev)
390{
391 irq_handler_t irq_handler;
392 unsigned long flags;
393
394 dev->host_irq = dev->dev->irq;
395
396 /*
397 * We can only share the IRQ line with other host devices if we are
398 * able to disable the IRQ source at device-level - independently of
399 * the guest driver. Otherwise host devices may suffer from unbounded
400 * IRQ latencies when the guest keeps the line asserted.
401 */
402 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
403 irq_handler = kvm_assigned_dev_intx;
404 flags = IRQF_SHARED;
405 } else {
406 irq_handler = NULL;
407 flags = IRQF_ONESHOT;
408 }
409 if (request_threaded_irq(dev->host_irq, irq_handler,
410 kvm_assigned_dev_thread_intx, flags,
411 dev->irq_name, dev))
412 return -EIO;
413
414 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
415 spin_lock_irq(&dev->intx_lock);
416 pci_intx(dev->dev, true);
417 spin_unlock_irq(&dev->intx_lock);
418 }
419 return 0;
420}
421
422static int assigned_device_enable_host_msi(struct kvm *kvm,
423 struct kvm_assigned_dev_kernel *dev)
424{
425 int r;
426
427 if (!dev->dev->msi_enabled) {
428 r = pci_enable_msi(dev->dev);
429 if (r)
430 return r;
431 }
432
433 dev->host_irq = dev->dev->irq;
434 if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
435 kvm_assigned_dev_thread_msi, 0,
436 dev->irq_name, dev)) {
437 pci_disable_msi(dev->dev);
438 return -EIO;
439 }
440
441 return 0;
442}
443
444static int assigned_device_enable_host_msix(struct kvm *kvm,
445 struct kvm_assigned_dev_kernel *dev)
446{
447 int i, r = -EINVAL;
448
449 /* host_msix_entries and guest_msix_entries should have been
450 * initialized */
451 if (dev->entries_nr == 0)
452 return r;
453
454 r = pci_enable_msix_exact(dev->dev,
455 dev->host_msix_entries, dev->entries_nr);
456 if (r)
457 return r;
458
459 for (i = 0; i < dev->entries_nr; i++) {
460 r = request_threaded_irq(dev->host_msix_entries[i].vector,
461 kvm_assigned_dev_msix,
462 kvm_assigned_dev_thread_msix,
463 0, dev->irq_name, dev);
464 if (r)
465 goto err;
466 }
467
468 return 0;
469err:
470 for (i -= 1; i >= 0; i--)
471 free_irq(dev->host_msix_entries[i].vector, dev);
472 pci_disable_msix(dev->dev);
473 return r;
474}
475
476static int assigned_device_enable_guest_intx(struct kvm *kvm,
477 struct kvm_assigned_dev_kernel *dev,
478 struct kvm_assigned_irq *irq)
479{
480 dev->guest_irq = irq->guest_irq;
481 dev->ack_notifier.gsi = irq->guest_irq;
482 return 0;
483}
484
485static int assigned_device_enable_guest_msi(struct kvm *kvm,
486 struct kvm_assigned_dev_kernel *dev,
487 struct kvm_assigned_irq *irq)
488{
489 dev->guest_irq = irq->guest_irq;
490 dev->ack_notifier.gsi = -1;
491 return 0;
492}
493
494static int assigned_device_enable_guest_msix(struct kvm *kvm,
495 struct kvm_assigned_dev_kernel *dev,
496 struct kvm_assigned_irq *irq)
497{
498 dev->guest_irq = irq->guest_irq;
499 dev->ack_notifier.gsi = -1;
500 return 0;
501}
502
503static int assign_host_irq(struct kvm *kvm,
504 struct kvm_assigned_dev_kernel *dev,
505 __u32 host_irq_type)
506{
507 int r = -EEXIST;
508
509 if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
510 return r;
511
512 snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
513 pci_name(dev->dev));
514
515 switch (host_irq_type) {
516 case KVM_DEV_IRQ_HOST_INTX:
517 r = assigned_device_enable_host_intx(kvm, dev);
518 break;
519 case KVM_DEV_IRQ_HOST_MSI:
520 r = assigned_device_enable_host_msi(kvm, dev);
521 break;
522 case KVM_DEV_IRQ_HOST_MSIX:
523 r = assigned_device_enable_host_msix(kvm, dev);
524 break;
525 default:
526 r = -EINVAL;
527 }
528 dev->host_irq_disabled = false;
529
530 if (!r)
531 dev->irq_requested_type |= host_irq_type;
532
533 return r;
534}
535
536static int assign_guest_irq(struct kvm *kvm,
537 struct kvm_assigned_dev_kernel *dev,
538 struct kvm_assigned_irq *irq,
539 unsigned long guest_irq_type)
540{
541 int id;
542 int r = -EEXIST;
543
544 if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
545 return r;
546
547 id = kvm_request_irq_source_id(kvm);
548 if (id < 0)
549 return id;
550
551 dev->irq_source_id = id;
552
553 switch (guest_irq_type) {
554 case KVM_DEV_IRQ_GUEST_INTX:
555 r = assigned_device_enable_guest_intx(kvm, dev, irq);
556 break;
557 case KVM_DEV_IRQ_GUEST_MSI:
558 r = assigned_device_enable_guest_msi(kvm, dev, irq);
559 break;
560 case KVM_DEV_IRQ_GUEST_MSIX:
561 r = assigned_device_enable_guest_msix(kvm, dev, irq);
562 break;
563 default:
564 r = -EINVAL;
565 }
566
567 if (!r) {
568 dev->irq_requested_type |= guest_irq_type;
569 if (dev->ack_notifier.gsi != -1)
570 kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
571 } else {
572 kvm_free_irq_source_id(kvm, dev->irq_source_id);
573 dev->irq_source_id = -1;
574 }
575
576 return r;
577}
578
579/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
580static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
581 struct kvm_assigned_irq *assigned_irq)
582{
583 int r = -EINVAL;
584 struct kvm_assigned_dev_kernel *match;
585 unsigned long host_irq_type, guest_irq_type;
586
587 if (!irqchip_in_kernel(kvm))
588 return r;
589
590 mutex_lock(&kvm->lock);
591 r = -ENODEV;
592 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
593 assigned_irq->assigned_dev_id);
594 if (!match)
595 goto out;
596
597 host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
598 guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
599
600 r = -EINVAL;
601 /* can only assign one type at a time */
602 if (hweight_long(host_irq_type) > 1)
603 goto out;
604 if (hweight_long(guest_irq_type) > 1)
605 goto out;
606 if (host_irq_type == 0 && guest_irq_type == 0)
607 goto out;
608
609 r = 0;
610 if (host_irq_type)
611 r = assign_host_irq(kvm, match, host_irq_type);
612 if (r)
613 goto out;
614
615 if (guest_irq_type)
616 r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
617out:
618 mutex_unlock(&kvm->lock);
619 return r;
620}
621
622static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
623 struct kvm_assigned_irq
624 *assigned_irq)
625{
626 int r = -ENODEV;
627 struct kvm_assigned_dev_kernel *match;
628 unsigned long irq_type;
629
630 mutex_lock(&kvm->lock);
631
632 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
633 assigned_irq->assigned_dev_id);
634 if (!match)
635 goto out;
636
637 irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
638 KVM_DEV_IRQ_GUEST_MASK);
639 r = kvm_deassign_irq(kvm, match, irq_type);
640out:
641 mutex_unlock(&kvm->lock);
642 return r;
643}
644
645/*
646 * We want to test whether the caller has been granted permissions to
647 * use this device. To be able to configure and control the device,
648 * the user needs access to PCI configuration space and BAR resources.
649 * These are accessed through PCI sysfs. PCI config space is often
650 * passed to the process calling this ioctl via file descriptor, so we
651 * can't rely on access to that file. We can check for permissions
652 * on each of the BAR resource files, which is a pretty clear
653 * indicator that the user has been granted access to the device.
654 */
655static int probe_sysfs_permissions(struct pci_dev *dev)
656{
657#ifdef CONFIG_SYSFS
658 int i;
659 bool bar_found = false;
660
661 for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
662 char *kpath, *syspath;
663 struct path path;
664 struct inode *inode;
665 int r;
666
667 if (!pci_resource_len(dev, i))
668 continue;
669
670 kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
671 if (!kpath)
672 return -ENOMEM;
673
674 /* Per sysfs-rules, sysfs is always at /sys */
675 syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
676 kfree(kpath);
677 if (!syspath)
678 return -ENOMEM;
679
680 r = kern_path(syspath, LOOKUP_FOLLOW, &path);
681 kfree(syspath);
682 if (r)
683 return r;
684
685 inode = d_backing_inode(path.dentry);
686
687 r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
688 path_put(&path);
689 if (r)
690 return r;
691
692 bar_found = true;
693 }
694
695 /* If no resources, probably something special */
696 if (!bar_found)
697 return -EPERM;
698
699 return 0;
700#else
701 return -EINVAL; /* No way to control the device without sysfs */
702#endif
703}
704
705static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
706 struct kvm_assigned_pci_dev *assigned_dev)
707{
708 int r = 0, idx;
709 struct kvm_assigned_dev_kernel *match;
710 struct pci_dev *dev;
711
712 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
713 return -EINVAL;
714
715 mutex_lock(&kvm->lock);
716 idx = srcu_read_lock(&kvm->srcu);
717
718 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
719 assigned_dev->assigned_dev_id);
720 if (match) {
721 /* device already assigned */
722 r = -EEXIST;
723 goto out;
724 }
725
726 match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
727 if (match == NULL) {
728 printk(KERN_INFO "%s: Couldn't allocate memory\n",
729 __func__);
730 r = -ENOMEM;
731 goto out;
732 }
733 dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
734 assigned_dev->busnr,
735 assigned_dev->devfn);
736 if (!dev) {
737 printk(KERN_INFO "%s: host device not found\n", __func__);
738 r = -EINVAL;
739 goto out_free;
740 }
741
742 /* Don't allow bridges to be assigned */
743 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
744 r = -EPERM;
745 goto out_put;
746 }
747
748 r = probe_sysfs_permissions(dev);
749 if (r)
750 goto out_put;
751
752 if (pci_enable_device(dev)) {
753 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
754 r = -EBUSY;
755 goto out_put;
756 }
757 r = pci_request_regions(dev, "kvm_assigned_device");
758 if (r) {
759 printk(KERN_INFO "%s: Could not get access to device regions\n",
760 __func__);
761 goto out_disable;
762 }
763
764 pci_reset_function(dev);
765 pci_save_state(dev);
766 match->pci_saved_state = pci_store_saved_state(dev);
767 if (!match->pci_saved_state)
768 printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
769 __func__, dev_name(&dev->dev));
770
771 if (!pci_intx_mask_supported(dev))
772 assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
773
774 match->assigned_dev_id = assigned_dev->assigned_dev_id;
775 match->host_segnr = assigned_dev->segnr;
776 match->host_busnr = assigned_dev->busnr;
777 match->host_devfn = assigned_dev->devfn;
778 match->flags = assigned_dev->flags;
779 match->dev = dev;
780 spin_lock_init(&match->intx_lock);
781 spin_lock_init(&match->intx_mask_lock);
782 match->irq_source_id = -1;
783 match->kvm = kvm;
784 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
785
786 list_add(&match->list, &kvm->arch.assigned_dev_head);
787
788 if (!kvm->arch.iommu_domain) {
789 r = kvm_iommu_map_guest(kvm);
790 if (r)
791 goto out_list_del;
792 }
793 r = kvm_assign_device(kvm, match->dev);
794 if (r)
795 goto out_list_del;
796
797out:
798 srcu_read_unlock(&kvm->srcu, idx);
799 mutex_unlock(&kvm->lock);
800 return r;
801out_list_del:
802 if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
803 printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
804 __func__, dev_name(&dev->dev));
805 list_del(&match->list);
806 pci_release_regions(dev);
807out_disable:
808 pci_disable_device(dev);
809out_put:
810 pci_dev_put(dev);
811out_free:
812 kfree(match);
813 srcu_read_unlock(&kvm->srcu, idx);
814 mutex_unlock(&kvm->lock);
815 return r;
816}
817
818static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
819 struct kvm_assigned_pci_dev *assigned_dev)
820{
821 int r = 0;
822 struct kvm_assigned_dev_kernel *match;
823
824 mutex_lock(&kvm->lock);
825
826 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
827 assigned_dev->assigned_dev_id);
828 if (!match) {
829 printk(KERN_INFO "%s: device hasn't been assigned before, "
830 "so cannot be deassigned\n", __func__);
831 r = -EINVAL;
832 goto out;
833 }
834
835 kvm_deassign_device(kvm, match->dev);
836
837 kvm_free_assigned_device(kvm, match);
838
839out:
840 mutex_unlock(&kvm->lock);
841 return r;
842}
843
844
845static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
846 struct kvm_assigned_msix_nr *entry_nr)
847{
848 int r = 0;
849 struct kvm_assigned_dev_kernel *adev;
850
851 mutex_lock(&kvm->lock);
852
853 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
854 entry_nr->assigned_dev_id);
855 if (!adev) {
856 r = -EINVAL;
857 goto msix_nr_out;
858 }
859
860 if (adev->entries_nr == 0) {
861 adev->entries_nr = entry_nr->entry_nr;
862 if (adev->entries_nr == 0 ||
863 adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
864 r = -EINVAL;
865 goto msix_nr_out;
866 }
867
868 adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
869 entry_nr->entry_nr,
870 GFP_KERNEL);
871 if (!adev->host_msix_entries) {
872 r = -ENOMEM;
873 goto msix_nr_out;
874 }
875 adev->guest_msix_entries =
876 kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
877 GFP_KERNEL);
878 if (!adev->guest_msix_entries) {
879 kfree(adev->host_msix_entries);
880 r = -ENOMEM;
881 goto msix_nr_out;
882 }
883 } else /* Not allowed set MSI-X number twice */
884 r = -EINVAL;
885msix_nr_out:
886 mutex_unlock(&kvm->lock);
887 return r;
888}
889
890static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
891 struct kvm_assigned_msix_entry *entry)
892{
893 int r = 0, i;
894 struct kvm_assigned_dev_kernel *adev;
895
896 mutex_lock(&kvm->lock);
897
898 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
899 entry->assigned_dev_id);
900
901 if (!adev) {
902 r = -EINVAL;
903 goto msix_entry_out;
904 }
905
906 for (i = 0; i < adev->entries_nr; i++)
907 if (adev->guest_msix_entries[i].vector == 0 ||
908 adev->guest_msix_entries[i].entry == entry->entry) {
909 adev->guest_msix_entries[i].entry = entry->entry;
910 adev->guest_msix_entries[i].vector = entry->gsi;
911 adev->host_msix_entries[i].entry = entry->entry;
912 break;
913 }
914 if (i == adev->entries_nr) {
915 r = -ENOSPC;
916 goto msix_entry_out;
917 }
918
919msix_entry_out:
920 mutex_unlock(&kvm->lock);
921
922 return r;
923}
924
925static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
926 struct kvm_assigned_pci_dev *assigned_dev)
927{
928 int r = 0;
929 struct kvm_assigned_dev_kernel *match;
930
931 mutex_lock(&kvm->lock);
932
933 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
934 assigned_dev->assigned_dev_id);
935 if (!match) {
936 r = -ENODEV;
937 goto out;
938 }
939
940 spin_lock(&match->intx_mask_lock);
941
942 match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
943 match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
944
945 if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
946 if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
947 kvm_set_irq(match->kvm, match->irq_source_id,
948 match->guest_irq, 0, false);
949 /*
950 * Masking at hardware-level is performed on demand,
951 * i.e. when an IRQ actually arrives at the host.
952 */
953 } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
954 /*
955 * Unmask the IRQ line if required. Unmasking at
956 * device level will be performed by user space.
957 */
958 spin_lock_irq(&match->intx_lock);
959 if (match->host_irq_disabled) {
960 enable_irq(match->host_irq);
961 match->host_irq_disabled = false;
962 }
963 spin_unlock_irq(&match->intx_lock);
964 }
965 }
966
967 spin_unlock(&match->intx_mask_lock);
968
969out:
970 mutex_unlock(&kvm->lock);
971 return r;
972}
973
974long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
975 unsigned long arg)
976{
977 void __user *argp = (void __user *)arg;
978 int r;
979
980 switch (ioctl) {
981 case KVM_ASSIGN_PCI_DEVICE: {
982 struct kvm_assigned_pci_dev assigned_dev;
983
984 r = -EFAULT;
985 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
986 goto out;
987 r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
988 if (r)
989 goto out;
990 break;
991 }
992 case KVM_ASSIGN_IRQ: {
993 r = -EOPNOTSUPP;
994 break;
995 }
996 case KVM_ASSIGN_DEV_IRQ: {
997 struct kvm_assigned_irq assigned_irq;
998
999 r = -EFAULT;
1000 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
1001 goto out;
1002 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
1003 if (r)
1004 goto out;
1005 break;
1006 }
1007 case KVM_DEASSIGN_DEV_IRQ: {
1008 struct kvm_assigned_irq assigned_irq;
1009
1010 r = -EFAULT;
1011 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
1012 goto out;
1013 r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
1014 if (r)
1015 goto out;
1016 break;
1017 }
1018 case KVM_DEASSIGN_PCI_DEVICE: {
1019 struct kvm_assigned_pci_dev assigned_dev;
1020
1021 r = -EFAULT;
1022 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1023 goto out;
1024 r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
1025 if (r)
1026 goto out;
1027 break;
1028 }
1029 case KVM_ASSIGN_SET_MSIX_NR: {
1030 struct kvm_assigned_msix_nr entry_nr;
1031 r = -EFAULT;
1032 if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
1033 goto out;
1034 r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
1035 if (r)
1036 goto out;
1037 break;
1038 }
1039 case KVM_ASSIGN_SET_MSIX_ENTRY: {
1040 struct kvm_assigned_msix_entry entry;
1041 r = -EFAULT;
1042 if (copy_from_user(&entry, argp, sizeof entry))
1043 goto out;
1044 r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
1045 if (r)
1046 goto out;
1047 break;
1048 }
1049 case KVM_ASSIGN_SET_INTX_MASK: {
1050 struct kvm_assigned_pci_dev assigned_dev;
1051
1052 r = -EFAULT;
1053 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1054 goto out;
1055 r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
1056 break;
1057 }
1058 default:
1059 r = -ENOTTY;
1060 break;
1061 }
1062out:
1063 return r;
1064}