blob: 1b58866175e625a1cf22c408e5afd6205e228117 [file] [log] [blame]
Kyle Swenson8d8f6542021-03-15 11:02:55 -06001/*
2 * net/switchdev/switchdev.c - Switch device API
3 * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us>
4 * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/types.h>
14#include <linux/init.h>
15#include <linux/mutex.h>
16#include <linux/notifier.h>
17#include <linux/netdevice.h>
18#include <linux/etherdevice.h>
19#include <linux/if_bridge.h>
20#include <linux/list.h>
21#include <linux/workqueue.h>
22#include <linux/if_vlan.h>
23#include <linux/rtnetlink.h>
24#include <net/ip_fib.h>
25#include <net/switchdev.h>
26
27/**
28 * switchdev_trans_item_enqueue - Enqueue data item to transaction queue
29 *
30 * @trans: transaction
31 * @data: pointer to data being queued
32 * @destructor: data destructor
33 * @tritem: transaction item being queued
34 *
35 * Enqeueue data item to transaction queue. tritem is typically placed in
36 * cointainter pointed at by data pointer. Destructor is called on
37 * transaction abort and after successful commit phase in case
38 * the caller did not dequeue the item before.
39 */
40void switchdev_trans_item_enqueue(struct switchdev_trans *trans,
41 void *data, void (*destructor)(void const *),
42 struct switchdev_trans_item *tritem)
43{
44 tritem->data = data;
45 tritem->destructor = destructor;
46 list_add_tail(&tritem->list, &trans->item_list);
47}
48EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue);
49
50static struct switchdev_trans_item *
51__switchdev_trans_item_dequeue(struct switchdev_trans *trans)
52{
53 struct switchdev_trans_item *tritem;
54
55 if (list_empty(&trans->item_list))
56 return NULL;
57 tritem = list_first_entry(&trans->item_list,
58 struct switchdev_trans_item, list);
59 list_del(&tritem->list);
60 return tritem;
61}
62
63/**
64 * switchdev_trans_item_dequeue - Dequeue data item from transaction queue
65 *
66 * @trans: transaction
67 */
68void *switchdev_trans_item_dequeue(struct switchdev_trans *trans)
69{
70 struct switchdev_trans_item *tritem;
71
72 tritem = __switchdev_trans_item_dequeue(trans);
73 BUG_ON(!tritem);
74 return tritem->data;
75}
76EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue);
77
78static void switchdev_trans_init(struct switchdev_trans *trans)
79{
80 INIT_LIST_HEAD(&trans->item_list);
81}
82
83static void switchdev_trans_items_destroy(struct switchdev_trans *trans)
84{
85 struct switchdev_trans_item *tritem;
86
87 while ((tritem = __switchdev_trans_item_dequeue(trans)))
88 tritem->destructor(tritem->data);
89}
90
91static void switchdev_trans_items_warn_destroy(struct net_device *dev,
92 struct switchdev_trans *trans)
93{
94 WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n",
95 dev->name);
96 switchdev_trans_items_destroy(trans);
97}
98
99static LIST_HEAD(deferred);
100static DEFINE_SPINLOCK(deferred_lock);
101
102typedef void switchdev_deferred_func_t(struct net_device *dev,
103 const void *data);
104
105struct switchdev_deferred_item {
106 struct list_head list;
107 struct net_device *dev;
108 switchdev_deferred_func_t *func;
109 unsigned long data[0];
110};
111
112static struct switchdev_deferred_item *switchdev_deferred_dequeue(void)
113{
114 struct switchdev_deferred_item *dfitem;
115
116 spin_lock_bh(&deferred_lock);
117 if (list_empty(&deferred)) {
118 dfitem = NULL;
119 goto unlock;
120 }
121 dfitem = list_first_entry(&deferred,
122 struct switchdev_deferred_item, list);
123 list_del(&dfitem->list);
124unlock:
125 spin_unlock_bh(&deferred_lock);
126 return dfitem;
127}
128
129/**
130 * switchdev_deferred_process - Process ops in deferred queue
131 *
132 * Called to flush the ops currently queued in deferred ops queue.
133 * rtnl_lock must be held.
134 */
135void switchdev_deferred_process(void)
136{
137 struct switchdev_deferred_item *dfitem;
138
139 ASSERT_RTNL();
140
141 while ((dfitem = switchdev_deferred_dequeue())) {
142 dfitem->func(dfitem->dev, dfitem->data);
143 dev_put(dfitem->dev);
144 kfree(dfitem);
145 }
146}
147EXPORT_SYMBOL_GPL(switchdev_deferred_process);
148
149static void switchdev_deferred_process_work(struct work_struct *work)
150{
151 rtnl_lock();
152 switchdev_deferred_process();
153 rtnl_unlock();
154}
155
156static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work);
157
158static int switchdev_deferred_enqueue(struct net_device *dev,
159 const void *data, size_t data_len,
160 switchdev_deferred_func_t *func)
161{
162 struct switchdev_deferred_item *dfitem;
163
164 dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC);
165 if (!dfitem)
166 return -ENOMEM;
167 dfitem->dev = dev;
168 dfitem->func = func;
169 memcpy(dfitem->data, data, data_len);
170 dev_hold(dev);
171 spin_lock_bh(&deferred_lock);
172 list_add_tail(&dfitem->list, &deferred);
173 spin_unlock_bh(&deferred_lock);
174 schedule_work(&deferred_process_work);
175 return 0;
176}
177
178/**
179 * switchdev_port_attr_get - Get port attribute
180 *
181 * @dev: port device
182 * @attr: attribute to get
183 */
184int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
185{
186 const struct switchdev_ops *ops = dev->switchdev_ops;
187 struct net_device *lower_dev;
188 struct list_head *iter;
189 struct switchdev_attr first = {
190 .id = SWITCHDEV_ATTR_ID_UNDEFINED
191 };
192 int err = -EOPNOTSUPP;
193
194 if (ops && ops->switchdev_port_attr_get)
195 return ops->switchdev_port_attr_get(dev, attr);
196
197 if (attr->flags & SWITCHDEV_F_NO_RECURSE)
198 return err;
199
200 /* Switch device port(s) may be stacked under
201 * bond/team/vlan dev, so recurse down to get attr on
202 * each port. Return -ENODATA if attr values don't
203 * compare across ports.
204 */
205
206 netdev_for_each_lower_dev(dev, lower_dev, iter) {
207 err = switchdev_port_attr_get(lower_dev, attr);
208 if (err)
209 break;
210 if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED)
211 first = *attr;
212 else if (memcmp(&first, attr, sizeof(*attr)))
213 return -ENODATA;
214 }
215
216 return err;
217}
218EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
219
220static int __switchdev_port_attr_set(struct net_device *dev,
221 const struct switchdev_attr *attr,
222 struct switchdev_trans *trans)
223{
224 const struct switchdev_ops *ops = dev->switchdev_ops;
225 struct net_device *lower_dev;
226 struct list_head *iter;
227 int err = -EOPNOTSUPP;
228
229 if (ops && ops->switchdev_port_attr_set) {
230 err = ops->switchdev_port_attr_set(dev, attr, trans);
231 goto done;
232 }
233
234 if (attr->flags & SWITCHDEV_F_NO_RECURSE)
235 goto done;
236
237 /* Switch device port(s) may be stacked under
238 * bond/team/vlan dev, so recurse down to set attr on
239 * each port.
240 */
241
242 netdev_for_each_lower_dev(dev, lower_dev, iter) {
243 err = __switchdev_port_attr_set(lower_dev, attr, trans);
244 if (err)
245 break;
246 }
247
248done:
249 if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
250 err = 0;
251
252 return err;
253}
254
255static int switchdev_port_attr_set_now(struct net_device *dev,
256 const struct switchdev_attr *attr)
257{
258 struct switchdev_trans trans;
259 int err;
260
261 switchdev_trans_init(&trans);
262
263 /* Phase I: prepare for attr set. Driver/device should fail
264 * here if there are going to be issues in the commit phase,
265 * such as lack of resources or support. The driver/device
266 * should reserve resources needed for the commit phase here,
267 * but should not commit the attr.
268 */
269
270 trans.ph_prepare = true;
271 err = __switchdev_port_attr_set(dev, attr, &trans);
272 if (err) {
273 /* Prepare phase failed: abort the transaction. Any
274 * resources reserved in the prepare phase are
275 * released.
276 */
277
278 if (err != -EOPNOTSUPP)
279 switchdev_trans_items_destroy(&trans);
280
281 return err;
282 }
283
284 /* Phase II: commit attr set. This cannot fail as a fault
285 * of driver/device. If it does, it's a bug in the driver/device
286 * because the driver said everythings was OK in phase I.
287 */
288
289 trans.ph_prepare = false;
290 err = __switchdev_port_attr_set(dev, attr, &trans);
291 WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
292 dev->name, attr->id);
293 switchdev_trans_items_warn_destroy(dev, &trans);
294
295 return err;
296}
297
298static void switchdev_port_attr_set_deferred(struct net_device *dev,
299 const void *data)
300{
301 const struct switchdev_attr *attr = data;
302 int err;
303
304 err = switchdev_port_attr_set_now(dev, attr);
305 if (err && err != -EOPNOTSUPP)
306 netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n",
307 err, attr->id);
308}
309
310static int switchdev_port_attr_set_defer(struct net_device *dev,
311 const struct switchdev_attr *attr)
312{
313 return switchdev_deferred_enqueue(dev, attr, sizeof(*attr),
314 switchdev_port_attr_set_deferred);
315}
316
317/**
318 * switchdev_port_attr_set - Set port attribute
319 *
320 * @dev: port device
321 * @attr: attribute to set
322 *
323 * Use a 2-phase prepare-commit transaction model to ensure
324 * system is not left in a partially updated state due to
325 * failure from driver/device.
326 *
327 * rtnl_lock must be held and must not be in atomic section,
328 * in case SWITCHDEV_F_DEFER flag is not set.
329 */
330int switchdev_port_attr_set(struct net_device *dev,
331 const struct switchdev_attr *attr)
332{
333 if (attr->flags & SWITCHDEV_F_DEFER)
334 return switchdev_port_attr_set_defer(dev, attr);
335 ASSERT_RTNL();
336 return switchdev_port_attr_set_now(dev, attr);
337}
338EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
339
340static size_t switchdev_obj_size(const struct switchdev_obj *obj)
341{
342 switch (obj->id) {
343 case SWITCHDEV_OBJ_ID_PORT_VLAN:
344 return sizeof(struct switchdev_obj_port_vlan);
345 case SWITCHDEV_OBJ_ID_IPV4_FIB:
346 return sizeof(struct switchdev_obj_ipv4_fib);
347 case SWITCHDEV_OBJ_ID_PORT_FDB:
348 return sizeof(struct switchdev_obj_port_fdb);
349 default:
350 BUG();
351 }
352 return 0;
353}
354
355static int __switchdev_port_obj_add(struct net_device *dev,
356 const struct switchdev_obj *obj,
357 struct switchdev_trans *trans)
358{
359 const struct switchdev_ops *ops = dev->switchdev_ops;
360 struct net_device *lower_dev;
361 struct list_head *iter;
362 int err = -EOPNOTSUPP;
363
364 if (ops && ops->switchdev_port_obj_add)
365 return ops->switchdev_port_obj_add(dev, obj, trans);
366
367 /* Switch device port(s) may be stacked under
368 * bond/team/vlan dev, so recurse down to add object on
369 * each port.
370 */
371
372 netdev_for_each_lower_dev(dev, lower_dev, iter) {
373 err = __switchdev_port_obj_add(lower_dev, obj, trans);
374 if (err)
375 break;
376 }
377
378 return err;
379}
380
381static int switchdev_port_obj_add_now(struct net_device *dev,
382 const struct switchdev_obj *obj)
383{
384 struct switchdev_trans trans;
385 int err;
386
387 ASSERT_RTNL();
388
389 switchdev_trans_init(&trans);
390
391 /* Phase I: prepare for obj add. Driver/device should fail
392 * here if there are going to be issues in the commit phase,
393 * such as lack of resources or support. The driver/device
394 * should reserve resources needed for the commit phase here,
395 * but should not commit the obj.
396 */
397
398 trans.ph_prepare = true;
399 err = __switchdev_port_obj_add(dev, obj, &trans);
400 if (err) {
401 /* Prepare phase failed: abort the transaction. Any
402 * resources reserved in the prepare phase are
403 * released.
404 */
405
406 if (err != -EOPNOTSUPP)
407 switchdev_trans_items_destroy(&trans);
408
409 return err;
410 }
411
412 /* Phase II: commit obj add. This cannot fail as a fault
413 * of driver/device. If it does, it's a bug in the driver/device
414 * because the driver said everythings was OK in phase I.
415 */
416
417 trans.ph_prepare = false;
418 err = __switchdev_port_obj_add(dev, obj, &trans);
419 WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
420 switchdev_trans_items_warn_destroy(dev, &trans);
421
422 return err;
423}
424
425static void switchdev_port_obj_add_deferred(struct net_device *dev,
426 const void *data)
427{
428 const struct switchdev_obj *obj = data;
429 int err;
430
431 err = switchdev_port_obj_add_now(dev, obj);
432 if (err && err != -EOPNOTSUPP)
433 netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
434 err, obj->id);
435}
436
437static int switchdev_port_obj_add_defer(struct net_device *dev,
438 const struct switchdev_obj *obj)
439{
440 return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
441 switchdev_port_obj_add_deferred);
442}
443
444/**
445 * switchdev_port_obj_add - Add port object
446 *
447 * @dev: port device
448 * @id: object ID
449 * @obj: object to add
450 *
451 * Use a 2-phase prepare-commit transaction model to ensure
452 * system is not left in a partially updated state due to
453 * failure from driver/device.
454 *
455 * rtnl_lock must be held and must not be in atomic section,
456 * in case SWITCHDEV_F_DEFER flag is not set.
457 */
458int switchdev_port_obj_add(struct net_device *dev,
459 const struct switchdev_obj *obj)
460{
461 if (obj->flags & SWITCHDEV_F_DEFER)
462 return switchdev_port_obj_add_defer(dev, obj);
463 ASSERT_RTNL();
464 return switchdev_port_obj_add_now(dev, obj);
465}
466EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
467
468static int switchdev_port_obj_del_now(struct net_device *dev,
469 const struct switchdev_obj *obj)
470{
471 const struct switchdev_ops *ops = dev->switchdev_ops;
472 struct net_device *lower_dev;
473 struct list_head *iter;
474 int err = -EOPNOTSUPP;
475
476 if (ops && ops->switchdev_port_obj_del)
477 return ops->switchdev_port_obj_del(dev, obj);
478
479 /* Switch device port(s) may be stacked under
480 * bond/team/vlan dev, so recurse down to delete object on
481 * each port.
482 */
483
484 netdev_for_each_lower_dev(dev, lower_dev, iter) {
485 err = switchdev_port_obj_del_now(lower_dev, obj);
486 if (err)
487 break;
488 }
489
490 return err;
491}
492
493static void switchdev_port_obj_del_deferred(struct net_device *dev,
494 const void *data)
495{
496 const struct switchdev_obj *obj = data;
497 int err;
498
499 err = switchdev_port_obj_del_now(dev, obj);
500 if (err && err != -EOPNOTSUPP)
501 netdev_err(dev, "failed (err=%d) to del object (id=%d)\n",
502 err, obj->id);
503}
504
505static int switchdev_port_obj_del_defer(struct net_device *dev,
506 const struct switchdev_obj *obj)
507{
508 return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
509 switchdev_port_obj_del_deferred);
510}
511
512/**
513 * switchdev_port_obj_del - Delete port object
514 *
515 * @dev: port device
516 * @id: object ID
517 * @obj: object to delete
518 *
519 * rtnl_lock must be held and must not be in atomic section,
520 * in case SWITCHDEV_F_DEFER flag is not set.
521 */
522int switchdev_port_obj_del(struct net_device *dev,
523 const struct switchdev_obj *obj)
524{
525 if (obj->flags & SWITCHDEV_F_DEFER)
526 return switchdev_port_obj_del_defer(dev, obj);
527 ASSERT_RTNL();
528 return switchdev_port_obj_del_now(dev, obj);
529}
530EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
531
532/**
533 * switchdev_port_obj_dump - Dump port objects
534 *
535 * @dev: port device
536 * @id: object ID
537 * @obj: object to dump
538 * @cb: function to call with a filled object
539 *
540 * rtnl_lock must be held.
541 */
542int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
543 switchdev_obj_dump_cb_t *cb)
544{
545 const struct switchdev_ops *ops = dev->switchdev_ops;
546 struct net_device *lower_dev;
547 struct list_head *iter;
548 int err = -EOPNOTSUPP;
549
550 ASSERT_RTNL();
551
552 if (ops && ops->switchdev_port_obj_dump)
553 return ops->switchdev_port_obj_dump(dev, obj, cb);
554
555 /* Switch device port(s) may be stacked under
556 * bond/team/vlan dev, so recurse down to dump objects on
557 * first port at bottom of stack.
558 */
559
560 netdev_for_each_lower_dev(dev, lower_dev, iter) {
561 err = switchdev_port_obj_dump(lower_dev, obj, cb);
562 break;
563 }
564
565 return err;
566}
567EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
568
569static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
570
571/**
572 * register_switchdev_notifier - Register notifier
573 * @nb: notifier_block
574 *
575 * Register switch device notifier. This should be used by code
576 * which needs to monitor events happening in particular device.
577 * Return values are same as for atomic_notifier_chain_register().
578 */
579int register_switchdev_notifier(struct notifier_block *nb)
580{
581 int err;
582
583 rtnl_lock();
584 err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
585 rtnl_unlock();
586 return err;
587}
588EXPORT_SYMBOL_GPL(register_switchdev_notifier);
589
590/**
591 * unregister_switchdev_notifier - Unregister notifier
592 * @nb: notifier_block
593 *
594 * Unregister switch device notifier.
595 * Return values are same as for atomic_notifier_chain_unregister().
596 */
597int unregister_switchdev_notifier(struct notifier_block *nb)
598{
599 int err;
600
601 rtnl_lock();
602 err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
603 rtnl_unlock();
604 return err;
605}
606EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
607
608/**
609 * call_switchdev_notifiers - Call notifiers
610 * @val: value passed unmodified to notifier function
611 * @dev: port device
612 * @info: notifier information data
613 *
614 * Call all network notifier blocks. This should be called by driver
615 * when it needs to propagate hardware event.
616 * Return values are same as for atomic_notifier_call_chain().
617 * rtnl_lock must be held.
618 */
619int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
620 struct switchdev_notifier_info *info)
621{
622 int err;
623
624 ASSERT_RTNL();
625
626 info->dev = dev;
627 err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
628 return err;
629}
630EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
631
632struct switchdev_vlan_dump {
633 struct switchdev_obj_port_vlan vlan;
634 struct sk_buff *skb;
635 u32 filter_mask;
636 u16 flags;
637 u16 begin;
638 u16 end;
639};
640
641static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump)
642{
643 struct bridge_vlan_info vinfo;
644
645 vinfo.flags = dump->flags;
646
647 if (dump->begin == 0 && dump->end == 0) {
648 return 0;
649 } else if (dump->begin == dump->end) {
650 vinfo.vid = dump->begin;
651 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
652 sizeof(vinfo), &vinfo))
653 return -EMSGSIZE;
654 } else {
655 vinfo.vid = dump->begin;
656 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
657 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
658 sizeof(vinfo), &vinfo))
659 return -EMSGSIZE;
660 vinfo.vid = dump->end;
661 vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
662 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END;
663 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
664 sizeof(vinfo), &vinfo))
665 return -EMSGSIZE;
666 }
667
668 return 0;
669}
670
671static int switchdev_port_vlan_dump_cb(struct switchdev_obj *obj)
672{
673 struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
674 struct switchdev_vlan_dump *dump =
675 container_of(vlan, struct switchdev_vlan_dump, vlan);
676 int err = 0;
677
678 if (vlan->vid_begin > vlan->vid_end)
679 return -EINVAL;
680
681 if (dump->filter_mask & RTEXT_FILTER_BRVLAN) {
682 dump->flags = vlan->flags;
683 for (dump->begin = dump->end = vlan->vid_begin;
684 dump->begin <= vlan->vid_end;
685 dump->begin++, dump->end++) {
686 err = switchdev_port_vlan_dump_put(dump);
687 if (err)
688 return err;
689 }
690 } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) {
691 if (dump->begin > vlan->vid_begin &&
692 dump->begin >= vlan->vid_end) {
693 if ((dump->begin - 1) == vlan->vid_end &&
694 dump->flags == vlan->flags) {
695 /* prepend */
696 dump->begin = vlan->vid_begin;
697 } else {
698 err = switchdev_port_vlan_dump_put(dump);
699 dump->flags = vlan->flags;
700 dump->begin = vlan->vid_begin;
701 dump->end = vlan->vid_end;
702 }
703 } else if (dump->end <= vlan->vid_begin &&
704 dump->end < vlan->vid_end) {
705 if ((dump->end + 1) == vlan->vid_begin &&
706 dump->flags == vlan->flags) {
707 /* append */
708 dump->end = vlan->vid_end;
709 } else {
710 err = switchdev_port_vlan_dump_put(dump);
711 dump->flags = vlan->flags;
712 dump->begin = vlan->vid_begin;
713 dump->end = vlan->vid_end;
714 }
715 } else {
716 err = -EINVAL;
717 }
718 }
719
720 return err;
721}
722
723static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
724 u32 filter_mask)
725{
726 struct switchdev_vlan_dump dump = {
727 .vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
728 .skb = skb,
729 .filter_mask = filter_mask,
730 };
731 int err = 0;
732
733 if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
734 (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
735 err = switchdev_port_obj_dump(dev, &dump.vlan.obj,
736 switchdev_port_vlan_dump_cb);
737 if (err)
738 goto err_out;
739 if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
740 /* last one */
741 err = switchdev_port_vlan_dump_put(&dump);
742 }
743
744err_out:
745 return err == -EOPNOTSUPP ? 0 : err;
746}
747
748/**
749 * switchdev_port_bridge_getlink - Get bridge port attributes
750 *
751 * @dev: port device
752 *
753 * Called for SELF on rtnl_bridge_getlink to get bridge port
754 * attributes.
755 */
756int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
757 struct net_device *dev, u32 filter_mask,
758 int nlflags)
759{
760 struct switchdev_attr attr = {
761 .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
762 };
763 u16 mode = BRIDGE_MODE_UNDEF;
764 u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD;
765 int err;
766
767 err = switchdev_port_attr_get(dev, &attr);
768 if (err && err != -EOPNOTSUPP)
769 return err;
770
771 return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode,
772 attr.u.brport_flags, mask, nlflags,
773 filter_mask, switchdev_port_vlan_fill);
774}
775EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink);
776
777static int switchdev_port_br_setflag(struct net_device *dev,
778 struct nlattr *nlattr,
779 unsigned long brport_flag)
780{
781 struct switchdev_attr attr = {
782 .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
783 };
784 u8 flag = nla_get_u8(nlattr);
785 int err;
786
787 err = switchdev_port_attr_get(dev, &attr);
788 if (err)
789 return err;
790
791 if (flag)
792 attr.u.brport_flags |= brport_flag;
793 else
794 attr.u.brport_flags &= ~brport_flag;
795
796 return switchdev_port_attr_set(dev, &attr);
797}
798
799static const struct nla_policy
800switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = {
801 [IFLA_BRPORT_STATE] = { .type = NLA_U8 },
802 [IFLA_BRPORT_COST] = { .type = NLA_U32 },
803 [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 },
804 [IFLA_BRPORT_MODE] = { .type = NLA_U8 },
805 [IFLA_BRPORT_GUARD] = { .type = NLA_U8 },
806 [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 },
807 [IFLA_BRPORT_FAST_LEAVE] = { .type = NLA_U8 },
808 [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 },
809 [IFLA_BRPORT_LEARNING_SYNC] = { .type = NLA_U8 },
810 [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 },
811};
812
813static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
814 struct nlattr *protinfo)
815{
816 struct nlattr *attr;
817 int rem;
818 int err;
819
820 err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
821 switchdev_port_bridge_policy);
822 if (err)
823 return err;
824
825 nla_for_each_nested(attr, protinfo, rem) {
826 switch (nla_type(attr)) {
827 case IFLA_BRPORT_LEARNING:
828 err = switchdev_port_br_setflag(dev, attr,
829 BR_LEARNING);
830 break;
831 case IFLA_BRPORT_LEARNING_SYNC:
832 err = switchdev_port_br_setflag(dev, attr,
833 BR_LEARNING_SYNC);
834 break;
835 case IFLA_BRPORT_UNICAST_FLOOD:
836 err = switchdev_port_br_setflag(dev, attr, BR_FLOOD);
837 break;
838 default:
839 err = -EOPNOTSUPP;
840 break;
841 }
842 if (err)
843 return err;
844 }
845
846 return 0;
847}
848
849static int switchdev_port_br_afspec(struct net_device *dev,
850 struct nlattr *afspec,
851 int (*f)(struct net_device *dev,
852 const struct switchdev_obj *obj))
853{
854 struct nlattr *attr;
855 struct bridge_vlan_info *vinfo;
856 struct switchdev_obj_port_vlan vlan = {
857 .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
858 };
859 int rem;
860 int err;
861
862 nla_for_each_nested(attr, afspec, rem) {
863 if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
864 continue;
865 if (nla_len(attr) != sizeof(struct bridge_vlan_info))
866 return -EINVAL;
867 vinfo = nla_data(attr);
868 if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
869 return -EINVAL;
870 vlan.flags = vinfo->flags;
871 if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
872 if (vlan.vid_begin)
873 return -EINVAL;
874 vlan.vid_begin = vinfo->vid;
875 /* don't allow range of pvids */
876 if (vlan.flags & BRIDGE_VLAN_INFO_PVID)
877 return -EINVAL;
878 } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
879 if (!vlan.vid_begin)
880 return -EINVAL;
881 vlan.vid_end = vinfo->vid;
882 if (vlan.vid_end <= vlan.vid_begin)
883 return -EINVAL;
884 err = f(dev, &vlan.obj);
885 if (err)
886 return err;
887 vlan.vid_begin = 0;
888 } else {
889 if (vlan.vid_begin)
890 return -EINVAL;
891 vlan.vid_begin = vinfo->vid;
892 vlan.vid_end = vinfo->vid;
893 err = f(dev, &vlan.obj);
894 if (err)
895 return err;
896 vlan.vid_begin = 0;
897 }
898 }
899
900 return 0;
901}
902
903/**
904 * switchdev_port_bridge_setlink - Set bridge port attributes
905 *
906 * @dev: port device
907 * @nlh: netlink header
908 * @flags: netlink flags
909 *
910 * Called for SELF on rtnl_bridge_setlink to set bridge port
911 * attributes.
912 */
913int switchdev_port_bridge_setlink(struct net_device *dev,
914 struct nlmsghdr *nlh, u16 flags)
915{
916 struct nlattr *protinfo;
917 struct nlattr *afspec;
918 int err = 0;
919
920 protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
921 IFLA_PROTINFO);
922 if (protinfo) {
923 err = switchdev_port_br_setlink_protinfo(dev, protinfo);
924 if (err)
925 return err;
926 }
927
928 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
929 IFLA_AF_SPEC);
930 if (afspec)
931 err = switchdev_port_br_afspec(dev, afspec,
932 switchdev_port_obj_add);
933
934 return err;
935}
936EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink);
937
938/**
939 * switchdev_port_bridge_dellink - Set bridge port attributes
940 *
941 * @dev: port device
942 * @nlh: netlink header
943 * @flags: netlink flags
944 *
945 * Called for SELF on rtnl_bridge_dellink to set bridge port
946 * attributes.
947 */
948int switchdev_port_bridge_dellink(struct net_device *dev,
949 struct nlmsghdr *nlh, u16 flags)
950{
951 struct nlattr *afspec;
952
953 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
954 IFLA_AF_SPEC);
955 if (afspec)
956 return switchdev_port_br_afspec(dev, afspec,
957 switchdev_port_obj_del);
958
959 return 0;
960}
961EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink);
962
963/**
964 * switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port
965 *
966 * @ndmsg: netlink hdr
967 * @nlattr: netlink attributes
968 * @dev: port device
969 * @addr: MAC address to add
970 * @vid: VLAN to add
971 *
972 * Add FDB entry to switch device.
973 */
974int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
975 struct net_device *dev, const unsigned char *addr,
976 u16 vid, u16 nlm_flags)
977{
978 struct switchdev_obj_port_fdb fdb = {
979 .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
980 .vid = vid,
981 };
982
983 ether_addr_copy(fdb.addr, addr);
984 return switchdev_port_obj_add(dev, &fdb.obj);
985}
986EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
987
988/**
989 * switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port
990 *
991 * @ndmsg: netlink hdr
992 * @nlattr: netlink attributes
993 * @dev: port device
994 * @addr: MAC address to delete
995 * @vid: VLAN to delete
996 *
997 * Delete FDB entry from switch device.
998 */
999int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
1000 struct net_device *dev, const unsigned char *addr,
1001 u16 vid)
1002{
1003 struct switchdev_obj_port_fdb fdb = {
1004 .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
1005 .vid = vid,
1006 };
1007
1008 ether_addr_copy(fdb.addr, addr);
1009 return switchdev_port_obj_del(dev, &fdb.obj);
1010}
1011EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
1012
1013struct switchdev_fdb_dump {
1014 struct switchdev_obj_port_fdb fdb;
1015 struct net_device *dev;
1016 struct sk_buff *skb;
1017 struct netlink_callback *cb;
1018 int idx;
1019};
1020
1021static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj)
1022{
1023 struct switchdev_obj_port_fdb *fdb = SWITCHDEV_OBJ_PORT_FDB(obj);
1024 struct switchdev_fdb_dump *dump =
1025 container_of(fdb, struct switchdev_fdb_dump, fdb);
1026 u32 portid = NETLINK_CB(dump->cb->skb).portid;
1027 u32 seq = dump->cb->nlh->nlmsg_seq;
1028 struct nlmsghdr *nlh;
1029 struct ndmsg *ndm;
1030
1031 if (dump->idx < dump->cb->args[0])
1032 goto skip;
1033
1034 nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
1035 sizeof(*ndm), NLM_F_MULTI);
1036 if (!nlh)
1037 return -EMSGSIZE;
1038
1039 ndm = nlmsg_data(nlh);
1040 ndm->ndm_family = AF_BRIDGE;
1041 ndm->ndm_pad1 = 0;
1042 ndm->ndm_pad2 = 0;
1043 ndm->ndm_flags = NTF_SELF;
1044 ndm->ndm_type = 0;
1045 ndm->ndm_ifindex = dump->dev->ifindex;
1046 ndm->ndm_state = fdb->ndm_state;
1047
1048 if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr))
1049 goto nla_put_failure;
1050
1051 if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid))
1052 goto nla_put_failure;
1053
1054 nlmsg_end(dump->skb, nlh);
1055
1056skip:
1057 dump->idx++;
1058 return 0;
1059
1060nla_put_failure:
1061 nlmsg_cancel(dump->skb, nlh);
1062 return -EMSGSIZE;
1063}
1064
1065/**
1066 * switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries
1067 *
1068 * @skb: netlink skb
1069 * @cb: netlink callback
1070 * @dev: port device
1071 * @filter_dev: filter device
1072 * @idx:
1073 *
1074 * Delete FDB entry from switch device.
1075 */
1076int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
1077 struct net_device *dev,
1078 struct net_device *filter_dev, int idx)
1079{
1080 struct switchdev_fdb_dump dump = {
1081 .fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
1082 .dev = dev,
1083 .skb = skb,
1084 .cb = cb,
1085 .idx = idx,
1086 };
1087
1088 switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb);
1089 return dump.idx;
1090}
1091EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
1092
1093static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
1094{
1095 const struct switchdev_ops *ops = dev->switchdev_ops;
1096 struct net_device *lower_dev;
1097 struct net_device *port_dev;
1098 struct list_head *iter;
1099
1100 /* Recusively search down until we find a sw port dev.
1101 * (A sw port dev supports switchdev_port_attr_get).
1102 */
1103
1104 if (ops && ops->switchdev_port_attr_get)
1105 return dev;
1106
1107 netdev_for_each_lower_dev(dev, lower_dev, iter) {
1108 port_dev = switchdev_get_lowest_dev(lower_dev);
1109 if (port_dev)
1110 return port_dev;
1111 }
1112
1113 return NULL;
1114}
1115
1116static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
1117{
1118 struct switchdev_attr attr = {
1119 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
1120 };
1121 struct switchdev_attr prev_attr;
1122 struct net_device *dev = NULL;
1123 int nhsel;
1124
1125 ASSERT_RTNL();
1126
1127 /* For this route, all nexthop devs must be on the same switch. */
1128
1129 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
1130 const struct fib_nh *nh = &fi->fib_nh[nhsel];
1131
1132 if (!nh->nh_dev)
1133 return NULL;
1134
1135 dev = switchdev_get_lowest_dev(nh->nh_dev);
1136 if (!dev)
1137 return NULL;
1138
1139 if (switchdev_port_attr_get(dev, &attr))
1140 return NULL;
1141
1142 if (nhsel > 0 &&
1143 !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
1144 return NULL;
1145
1146 prev_attr = attr;
1147 }
1148
1149 return dev;
1150}
1151
1152/**
1153 * switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry
1154 *
1155 * @dst: route's IPv4 destination address
1156 * @dst_len: destination address length (prefix length)
1157 * @fi: route FIB info structure
1158 * @tos: route TOS
1159 * @type: route type
1160 * @nlflags: netlink flags passed in (NLM_F_*)
1161 * @tb_id: route table ID
1162 *
1163 * Add/modify switch IPv4 route entry.
1164 */
1165int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
1166 u8 tos, u8 type, u32 nlflags, u32 tb_id)
1167{
1168 struct switchdev_obj_ipv4_fib ipv4_fib = {
1169 .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
1170 .dst = dst,
1171 .dst_len = dst_len,
1172 .fi = fi,
1173 .tos = tos,
1174 .type = type,
1175 .nlflags = nlflags,
1176 .tb_id = tb_id,
1177 };
1178 struct net_device *dev;
1179 int err = 0;
1180
1181 /* Don't offload route if using custom ip rules or if
1182 * IPv4 FIB offloading has been disabled completely.
1183 */
1184
1185#ifdef CONFIG_IP_MULTIPLE_TABLES
1186 if (fi->fib_net->ipv4.fib_has_custom_rules)
1187 return 0;
1188#endif
1189
1190 if (fi->fib_net->ipv4.fib_offload_disabled)
1191 return 0;
1192
1193 dev = switchdev_get_dev_by_nhs(fi);
1194 if (!dev)
1195 return 0;
1196
1197 err = switchdev_port_obj_add(dev, &ipv4_fib.obj);
1198 if (!err)
1199 fi->fib_flags |= RTNH_F_OFFLOAD;
1200
1201 return err == -EOPNOTSUPP ? 0 : err;
1202}
1203EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
1204
1205/**
1206 * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch
1207 *
1208 * @dst: route's IPv4 destination address
1209 * @dst_len: destination address length (prefix length)
1210 * @fi: route FIB info structure
1211 * @tos: route TOS
1212 * @type: route type
1213 * @tb_id: route table ID
1214 *
1215 * Delete IPv4 route entry from switch device.
1216 */
1217int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
1218 u8 tos, u8 type, u32 tb_id)
1219{
1220 struct switchdev_obj_ipv4_fib ipv4_fib = {
1221 .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
1222 .dst = dst,
1223 .dst_len = dst_len,
1224 .fi = fi,
1225 .tos = tos,
1226 .type = type,
1227 .nlflags = 0,
1228 .tb_id = tb_id,
1229 };
1230 struct net_device *dev;
1231 int err = 0;
1232
1233 if (!(fi->fib_flags & RTNH_F_OFFLOAD))
1234 return 0;
1235
1236 dev = switchdev_get_dev_by_nhs(fi);
1237 if (!dev)
1238 return 0;
1239
1240 err = switchdev_port_obj_del(dev, &ipv4_fib.obj);
1241 if (!err)
1242 fi->fib_flags &= ~RTNH_F_OFFLOAD;
1243
1244 return err == -EOPNOTSUPP ? 0 : err;
1245}
1246EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del);
1247
1248/**
1249 * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation
1250 *
1251 * @fi: route FIB info structure
1252 */
1253void switchdev_fib_ipv4_abort(struct fib_info *fi)
1254{
1255 /* There was a problem installing this route to the offload
1256 * device. For now, until we come up with more refined
1257 * policy handling, abruptly end IPv4 fib offloading for
1258 * for entire net by flushing offload device(s) of all
1259 * IPv4 routes, and mark IPv4 fib offloading broken from
1260 * this point forward.
1261 */
1262
1263 fib_flush_external(fi->fib_net);
1264 fi->fib_net->ipv4.fib_offload_disabled = true;
1265}
1266EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
1267
1268static bool switchdev_port_same_parent_id(struct net_device *a,
1269 struct net_device *b)
1270{
1271 struct switchdev_attr a_attr = {
1272 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
1273 .flags = SWITCHDEV_F_NO_RECURSE,
1274 };
1275 struct switchdev_attr b_attr = {
1276 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
1277 .flags = SWITCHDEV_F_NO_RECURSE,
1278 };
1279
1280 if (switchdev_port_attr_get(a, &a_attr) ||
1281 switchdev_port_attr_get(b, &b_attr))
1282 return false;
1283
1284 return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
1285}
1286
1287static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
1288 struct net_device *group_dev)
1289{
1290 struct net_device *lower_dev;
1291 struct list_head *iter;
1292
1293 netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1294 if (lower_dev == dev)
1295 continue;
1296 if (switchdev_port_same_parent_id(dev, lower_dev))
1297 return lower_dev->offload_fwd_mark;
1298 return switchdev_port_fwd_mark_get(dev, lower_dev);
1299 }
1300
1301 return dev->ifindex;
1302}
1303
1304static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
1305 u32 old_mark, u32 *reset_mark)
1306{
1307 struct net_device *lower_dev;
1308 struct list_head *iter;
1309
1310 netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1311 if (lower_dev->offload_fwd_mark == old_mark) {
1312 if (!*reset_mark)
1313 *reset_mark = lower_dev->ifindex;
1314 lower_dev->offload_fwd_mark = *reset_mark;
1315 }
1316 switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
1317 }
1318}
1319
1320/**
1321 * switchdev_port_fwd_mark_set - Set port offload forwarding mark
1322 *
1323 * @dev: port device
1324 * @group_dev: containing device
1325 * @joining: true if dev is joining group; false if leaving group
1326 *
1327 * An ungrouped port's offload mark is just its ifindex. A grouped
1328 * port's (member of a bridge, for example) offload mark is the ifindex
1329 * of one of the ports in the group with the same parent (switch) ID.
1330 * Ports on the same device in the same group will have the same mark.
1331 *
1332 * Example:
1333 *
1334 * br0 ifindex=9
1335 * sw1p1 ifindex=2 mark=2
1336 * sw1p2 ifindex=3 mark=2
1337 * sw2p1 ifindex=4 mark=5
1338 * sw2p2 ifindex=5 mark=5
1339 *
1340 * If sw2p2 leaves the bridge, we'll have:
1341 *
1342 * br0 ifindex=9
1343 * sw1p1 ifindex=2 mark=2
1344 * sw1p2 ifindex=3 mark=2
1345 * sw2p1 ifindex=4 mark=4
1346 * sw2p2 ifindex=5 mark=5
1347 */
1348void switchdev_port_fwd_mark_set(struct net_device *dev,
1349 struct net_device *group_dev,
1350 bool joining)
1351{
1352 u32 mark = dev->ifindex;
1353 u32 reset_mark = 0;
1354
1355 if (group_dev) {
1356 ASSERT_RTNL();
1357 if (joining)
1358 mark = switchdev_port_fwd_mark_get(dev, group_dev);
1359 else if (dev->offload_fwd_mark == mark)
1360 /* Ohoh, this port was the mark reference port,
1361 * but it's leaving the group, so reset the
1362 * mark for the remaining ports in the group.
1363 */
1364 switchdev_port_fwd_mark_reset(group_dev, mark,
1365 &reset_mark);
1366 }
1367
1368 dev->offload_fwd_mark = mark;
1369}
1370EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);