MTU: Software interface / Per-protocol MTU support
This patch separates setting of hardware interfaec and software
interface MTU. Software MTU is L2 payload MTU (i.e. not including L2
header). Per-protocol MTU for IPv4, IPv6 and MPLS can also be set.
Currently only IP4, IP6 are enabled in adjacency / rewrite code.
Documentation in src/vnet/MTU.md
Change-Id: Iee2fd6f0bbc8210748dd8e073ab9fab87d323690
Signed-off-by: Ole Troan <ot@cisco.com>
diff --git a/src/vnet/MTU.md b/src/vnet/MTU.md
new file mode 100644
index 0000000..32b8c39
--- /dev/null
+++ b/src/vnet/MTU.md
@@ -0,0 +1,72 @@
+# Introduction
+Maximum Transmission Unit is a term used to describe the maximum sized "thingy" that can be sent out an interface. It can refer to the maximum frame size that a NIC can send. On Ethernet that would include the Ethernet header but typically not the IGF. It can refer to the maximum packet size, that is, on Ethernet an MTU of 1500, would allow an IPv4 packet of 1500 bytes, that would result in an Ethernet frame of 1518 bytes.
+
+# MTU in VPP
+VPP allows setting of the physical payload MTU. I.e. not including L2 overhead. Setting the hardware MTU will program the NIC.
+This MTU will be inherited by all software interfaces.
+
+VPP also allows setting of the payload MTU for software interfaces. Independently of the MTU set on the hardware. If the software payload MTU is set higher than the capability of the NIC, the packet will be dropped.
+
+In addition VPP supports setting the MTU of individual network layer protocols. IPv4, IPv6 or MPLS. For example an IPv4 MTU of 1500 (includes the IPv4 header) will fit in a hardware payload MTU of 1500.
+
+_Note we might consider changing the hardware payload MTU to hardware MTU_. That is, the MTU includes all L2 framing. Then the payload MTU can be calculated based on the interface's configuration. E.g. 802.1q tags etc.
+
+There are currently no checks or warnings if e.g. the user configures a per-protocol MTU larger than the underlying payload MTU. If that happens packets will be fragmented or dropped.
+
+## Data structures
+The hardware payload MTU is stored in the max_packet_bytes variable in the vnet_hw_interface_t structure.
+
+The software MTU (previously max_l3_packet_bytes) is in vnet_sw_interface_t->in mtu[VNET_N_MTU].
+
+# API
+
+## Set physical MTU
+
+This API message is used to set the physical MTU. It is currently limited to Ethernet interfaces. Note, this programs the NIC.
+
+```
+autoreply define hw_interface_set_mtu
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u16 mtu;
+};
+```
+
+## Set the L2 payload MTU (not including the L2 header) and per-protocol MTUs
+
+This API message sets the L3 payload MTU. E.g. on Ethernet it is the maximum size of the Ethernet payload. If a value is left as 0, then the default is picked from VNET_MTU_L3.
+
+```
+autoreply define sw_interface_set_mtu
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ /* $$$$ Replace with enum */
+ u32 mtu[4]; /* 0 - L3, 1 - IP4, 2 - IP6, 3 - MPLS */
+};
+
+```
+
+## Get interface MTU
+
+The various MTUs on an interface can be queried with the sw_interface_dump/sw_interface_details calls.
+
+```
+define sw_interface_details
+{
+ /* MTU */
+ u16 link_mtu;
+
+ /* Per protocol MTUs */
+ u32 mtu[4]; /* 0 - L3, 1 - IP4, 2 - IP6, 3 - MPLS */
+};
+```
+
+# CLI
+
+```
+set interface mtu [packet|ip4|ip6|mpls] <value> <interface>
+```
diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c
index abfe872..0de3fc8 100644
--- a/src/vnet/adj/adj.c
+++ b/src/vnet/adj/adj.c
@@ -347,36 +347,19 @@
adj = adj_get(ai);
- vnet_rewrite_update_mtu (vnet_get_main(),
+ vnet_rewrite_update_mtu (vnet_get_main(), adj->ia_link,
&adj->rewrite_header);
return (ADJ_WALK_RC_CONTINUE);
}
-static walk_rc_t
-adj_sw_mtu_update (vnet_main_t * vnm,
- u32 sw_if_index,
- void *ctx)
+static void
+adj_mtu_update (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
{
- /*
- * Walk all the adjacencies on the interface to update the cached MTU
- */
- adj_walk (sw_if_index, adj_mtu_update_walk_cb, NULL);
-
- return (WALK_CONTINUE);
+ adj_walk (sw_if_index, adj_mtu_update_walk_cb, NULL);
}
-void
-adj_mtu_update (u32 hw_if_index)
-{
- /*
- * Walk all the SW interfaces on the HW interface to update the cached MTU
- */
- vnet_hw_interface_walk_sw(vnet_get_main(),
- hw_if_index,
- adj_sw_mtu_update,
- NULL);
-}
+VNET_SW_INTERFACE_MTU_CHANGE_FUNCTION(adj_mtu_update);
/**
* @brief Walk the Adjacencies on a given interface
diff --git a/src/vnet/adj/adj.h b/src/vnet/adj/adj.h
index bcf6c04..fe77d16 100644
--- a/src/vnet/adj/adj.h
+++ b/src/vnet/adj/adj.h
@@ -345,12 +345,6 @@
extern void adj_feature_update (u32 sw_if_index, u8 arc_index, u8 is_enable);
/**
- * @brief Notify the adjacency subsystem that the MTU settings for
- * an HW interface have changed
- */
-extern void adj_mtu_update (u32 hw_if_index);
-
-/**
* @brief
* The global adjacnecy pool. Exposed for fast/inline data-plane access
*/
diff --git a/src/vnet/adj/adj_glean.c b/src/vnet/adj/adj_glean.c
index 74881d7..9236911 100644
--- a/src/vnet/adj/adj_glean.c
+++ b/src/vnet/adj/adj_glean.c
@@ -77,8 +77,8 @@
adj->rewrite_header.sw_if_index = sw_if_index;
adj->rewrite_header.data_bytes = 0;
adj->rewrite_header.max_l3_packet_bytes =
- vnet_sw_interface_get_mtu(vnet_get_main(), sw_if_index, VLIB_TX);
-
+ vnet_sw_interface_get_mtu(vnet_get_main(), sw_if_index,
+ vnet_link_to_mtu(linkt));
adj_lock(adj_get_index(adj));
vnet_update_adjacency_for_sw_interface(vnet_get_main(),
diff --git a/src/vnet/adj/adj_mcast.c b/src/vnet/adj/adj_mcast.c
index 593c1b6..9fbad48 100644
--- a/src/vnet/adj/adj_mcast.c
+++ b/src/vnet/adj/adj_mcast.c
@@ -69,7 +69,7 @@
adj_mcasts[proto][sw_if_index] = adj_get_index(adj);
adj_lock(adj_get_index(adj));
- vnet_rewrite_init(vnm, sw_if_index,
+ vnet_rewrite_init(vnm, sw_if_index, link_type,
adj_get_mcast_node(proto),
vnet_tx_node_index_for_sw_interface(vnm, sw_if_index),
&adj->rewrite_header);
diff --git a/src/vnet/adj/adj_nbr.c b/src/vnet/adj/adj_nbr.c
index 6fd9b40..3f66acb 100644
--- a/src/vnet/adj/adj_nbr.c
+++ b/src/vnet/adj/adj_nbr.c
@@ -230,7 +230,7 @@
adj_index = adj_get_index(adj);
adj_lock(adj_index);
- vnet_rewrite_init(vnm, sw_if_index,
+ vnet_rewrite_init(vnm, sw_if_index, link_type,
adj_get_nd_node(nh_proto),
vnet_tx_node_index_for_sw_interface(vnm, sw_if_index),
&adj->rewrite_header);
diff --git a/src/vnet/adj/rewrite.c b/src/vnet/adj/rewrite.c
index f4b26a9..1a87793 100644
--- a/src/vnet/adj/rewrite.c
+++ b/src/vnet/adj/rewrite.c
@@ -103,19 +103,22 @@
void
vnet_rewrite_init (vnet_main_t * vnm,
u32 sw_if_index,
+ vnet_link_t linkt,
u32 this_node, u32 next_node, vnet_rewrite_header_t * rw)
{
rw->sw_if_index = sw_if_index;
rw->next_index = vlib_node_add_next (vnm->vlib_main, this_node, next_node);
rw->max_l3_packet_bytes =
- vnet_sw_interface_get_mtu (vnm, sw_if_index, VLIB_TX);
+ vnet_sw_interface_get_mtu (vnm, sw_if_index, vnet_link_to_mtu (linkt));
}
void
-vnet_rewrite_update_mtu (vnet_main_t * vnm, vnet_rewrite_header_t * rw)
+vnet_rewrite_update_mtu (vnet_main_t * vnm, vnet_link_t linkt,
+ vnet_rewrite_header_t * rw)
{
rw->max_l3_packet_bytes =
- vnet_sw_interface_get_mtu (vnm, rw->sw_if_index, VLIB_TX);
+ vnet_sw_interface_get_mtu (vnm, rw->sw_if_index,
+ vnet_link_to_mtu (linkt));
}
void
@@ -133,7 +136,7 @@
vnet_get_hw_interface_class (vnm, hw->hw_class_index);
u8 *rewrite = NULL;
- vnet_rewrite_init (vnm, sw_if_index, node_index,
+ vnet_rewrite_init (vnm, sw_if_index, link_type, node_index,
vnet_tx_node_index_for_sw_interface (vnm, sw_if_index),
rw);
diff --git a/src/vnet/adj/rewrite.h b/src/vnet/adj/rewrite.h
index 712f686..0d4b0b9 100644
--- a/src/vnet/adj/rewrite.h
+++ b/src/vnet/adj/rewrite.h
@@ -314,11 +314,12 @@
void vnet_rewrite_init (struct vnet_main_t *vnm,
u32 sw_if_index,
+ vnet_link_t linkt,
u32 this_node,
u32 next_node, vnet_rewrite_header_t * rw);
void vnet_rewrite_update_mtu (struct vnet_main_t *vnm,
- vnet_rewrite_header_t * rw);
+ vnet_link_t linkt, vnet_rewrite_header_t * rw);
u8 *vnet_build_rewrite_for_sw_interface (struct vnet_main_t *vnm,
u32 sw_if_index,
diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c
index 4e745d6..9244776 100644
--- a/src/vnet/devices/virtio/vhost-user.c
+++ b/src/vnet/devices/virtio/vhost-user.c
@@ -2867,8 +2867,7 @@
if (error)
clib_error_report (error);
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, vui->hw_if_index);
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+ vnet_sw_interface_set_mtu (vnm, vui->sw_if_index, 9000);
}
/*
diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c
index 174b363..b3ea983 100644
--- a/src/vnet/ethernet/interface.c
+++ b/src/vnet/ethernet/interface.c
@@ -300,11 +300,9 @@
ETHERNET_MIN_PACKET_BYTES;
hi->max_packet_bytes = hi->max_supported_packet_bytes =
ETHERNET_MAX_PACKET_BYTES;
- hi->per_packet_overhead_bytes =
- /* preamble */ 8 + /* inter frame gap */ 12;
/* Standard default ethernet MTU. */
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+ vnet_sw_interface_set_mtu (vnm, hi->sw_if_index, 9000);
clib_memcpy (ei->address, address, sizeof (ei->address));
vec_add (hi->hw_address, address, sizeof (ei->address));
diff --git a/src/vnet/gre/interface.c b/src/vnet/gre/interface.c
index 5b165c8..0822cd7 100644
--- a/src/vnet/gre/interface.c
+++ b/src/vnet/gre/interface.c
@@ -348,11 +348,8 @@
64 + sizeof (gre_header_t) + sizeof (ip6_header_t);
}
- hi->per_packet_overhead_bytes =
- /* preamble */ 8 + /* inter frame gap */ 12;
-
/* Standard default gre MTU. */
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+ vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000);
/*
* source the FIB entry for the tunnel's destination
diff --git a/src/vnet/interface.api b/src/vnet/interface.api
index 25ba703..2cbf4f6 100644
--- a/src/vnet/interface.api
+++ b/src/vnet/interface.api
@@ -1,4 +1,4 @@
-option version = "1.1.0";
+option version = "2.0.0";
service {
rpc want_interface_events returns want_interface_events_reply
@@ -21,13 +21,13 @@
u8 admin_up_down;
};
-/** \brief Set interface MTU
+/** \brief Set interface physical MTU
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param sw_if_index - index of the interface to set MTU on
@param mtu - MTU
*/
-autoreply define sw_interface_set_mtu
+autoreply define hw_interface_set_mtu
{
u32 client_index;
u32 context;
@@ -35,6 +35,17 @@
u16 mtu;
};
+/** \brief Set interface L3 MTU */
+autoreply define sw_interface_set_mtu
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ /* $$$$ Replace with enum */
+ u32 mtu[4]; /* 0 - L3, 1 - IP4, 2 - IP6, 3 - MPLS */
+};
+
+
/** \brief Interface Event generated by want_interface_events
@param client_index - opaque cookie to identify the sender
@param pid - client pid registered to receive notification
@@ -125,6 +136,9 @@
/* MTU */
u16 link_mtu;
+ /* Per protocol MTUs */
+ u32 mtu[4]; /* 0 - L3, 1 - IP4, 2 - IP6, 3 - MPLS */
+
/* Subinterface ID. A number 0-N to uniquely identify this subinterface under the super interface */
u32 sub_id;
diff --git a/src/vnet/interface.c b/src/vnet/interface.c
index 797fe44..7a2c3ab 100644
--- a/src/vnet/interface.c
+++ b/src/vnet/interface.c
@@ -122,22 +122,6 @@
/* helper_flags no redistribution */ 0);
}
-void
-vnet_hw_interface_set_mtu (vnet_main_t * vnm, u32 hw_if_index, u32 mtu)
-{
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
-
- if (hi->max_packet_bytes != mtu)
- {
- u16 l3_pad = hi->max_packet_bytes - hi->max_l3_packet_bytes[VLIB_TX];
- hi->max_packet_bytes = mtu;
- hi->max_l3_packet_bytes[VLIB_TX] =
- hi->max_l3_packet_bytes[VLIB_RX] = mtu - l3_pad;
- ethernet_set_flags (vnm, hw_if_index, ETHERNET_INTERFACE_FLAG_MTU);
- adj_mtu_update (hw_if_index);
- }
-}
-
static void
unserialize_vnet_hw_interface_set_flags (serialize_main_t * m, va_list * va)
{
@@ -681,6 +665,71 @@
pool_put (im->sw_interfaces, sw);
}
+static clib_error_t *
+call_sw_interface_mtu_change_callbacks (vnet_main_t * vnm, u32 sw_if_index)
+{
+ return call_elf_section_interface_callbacks
+ (vnm, sw_if_index, 0, vnm->sw_interface_mtu_change_functions);
+}
+
+void
+vnet_sw_interface_set_mtu (vnet_main_t * vnm, u32 sw_if_index, u32 mtu)
+{
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+
+ if (si->mtu[VNET_MTU_L3] != mtu)
+ {
+ si->mtu[VNET_MTU_L3] = mtu;
+ call_sw_interface_mtu_change_callbacks (vnm, sw_if_index);
+ }
+}
+
+void
+vnet_sw_interface_set_protocol_mtu (vnet_main_t * vnm, u32 sw_if_index,
+ u32 mtu[])
+{
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ bool changed = false;
+ int i;
+
+ for (i = 0; i < VNET_N_MTU; i++)
+ {
+ if (si->mtu[i] != mtu[i])
+ {
+ si->mtu[i] = mtu[i];
+ changed = true;
+ }
+ }
+ /* Notify interested parties */
+ if (changed)
+ call_sw_interface_mtu_change_callbacks (vnm, sw_if_index);
+}
+
+/*
+ * Reflect a change in hardware MTU on protocol MTUs
+ */
+static walk_rc_t
+sw_interface_walk_callback (vnet_main_t * vnm, u32 sw_if_index, void *ctx)
+{
+ u32 *link_mtu = ctx;
+ vnet_sw_interface_set_mtu (vnm, sw_if_index, *link_mtu);
+ return WALK_CONTINUE;
+}
+
+void
+vnet_hw_interface_set_mtu (vnet_main_t * vnm, u32 hw_if_index, u32 mtu)
+{
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+ if (hi->max_packet_bytes != mtu)
+ {
+ hi->max_packet_bytes = mtu;
+ ethernet_set_flags (vnm, hw_if_index, ETHERNET_INTERFACE_FLAG_MTU);
+ vnet_hw_interface_walk_sw (vnm, hw_if_index, sw_interface_walk_callback,
+ &mtu);
+ }
+}
+
static void
setup_tx_node (vlib_main_t * vm,
u32 node_index, vnet_device_class_t * dev_class)
@@ -762,9 +811,7 @@
hw->max_rate_bits_per_sec = 0;
hw->min_packet_bytes = 0;
- hw->per_packet_overhead_bytes = 0;
- hw->max_l3_packet_bytes[VLIB_RX] = ~0;
- hw->max_l3_packet_bytes[VLIB_TX] = ~0;
+ vnet_sw_interface_set_mtu (vnm, hw->sw_if_index, 0);
if (dev_class->tx_function == 0)
goto no_output_nodes; /* No output/tx nodes to create */
@@ -1523,6 +1570,22 @@
return (0);
}
+vnet_mtu_t
+vnet_link_to_mtu (vnet_link_t link)
+{
+ switch (link)
+ {
+ case VNET_LINK_IP4:
+ return (VNET_MTU_IP4);
+ case VNET_LINK_IP6:
+ return (VNET_MTU_IP6);
+ case VNET_LINK_MPLS:
+ return (VNET_MTU_MPLS);
+ default:
+ return (VNET_MTU_L3);
+ }
+}
+
u8 *
default_build_rewrite (vnet_main_t * vnm,
u32 sw_if_index,
diff --git a/src/vnet/interface.h b/src/vnet/interface.h
index 87addbe..b582dba 100644
--- a/src/vnet/interface.h
+++ b/src/vnet/interface.h
@@ -154,8 +154,10 @@
_VNET_INTERFACE_FUNCTION_DECL(f,hw_interface_add_del)
#define VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION(f) \
_VNET_INTERFACE_FUNCTION_DECL(f,hw_interface_link_up_down)
-#define VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION_PRIO(f,p) \
+#define VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION_PRIO(f,p) \
_VNET_INTERFACE_FUNCTION_DECL_PRIO(f,hw_interface_link_up_down,p)
+#define VNET_SW_INTERFACE_MTU_CHANGE_FUNCTION(f) \
+ _VNET_INTERFACE_FUNCTION_DECL(f,sw_interface_mtu_change)
#define VNET_SW_INTERFACE_ADD_DEL_FUNCTION(f) \
_VNET_INTERFACE_FUNCTION_DECL(f,sw_interface_add_del)
#define VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(f) \
@@ -537,14 +539,6 @@
/* Largest packet size for this interface. */
u32 max_packet_bytes;
- /* Number of extra bytes that go on the wire.
- Packet length on wire
- = max (length + per_packet_overhead_bytes, min_packet_bytes). */
- u32 per_packet_overhead_bytes;
-
- /* Receive and transmit layer 3 packet size limits (MRU/MTU). */
- u32 max_l3_packet_bytes[VLIB_N_RX_TX];
-
/* Hash table mapping sub interface id to sw_if_index. */
uword *sub_interface_sw_if_index_by_id;
@@ -641,6 +635,18 @@
VNET_FLOOD_CLASS_NO_FLOOD,
} vnet_flood_class_t;
+/* Per protocol MTU */
+typedef enum
+{
+ VNET_MTU_L3, /* Default payload MTU (without L2 headers) */
+ VNET_MTU_IP4, /* Per-protocol MTUs overriding default */
+ VNET_MTU_IP6,
+ VNET_MTU_MPLS,
+ VNET_N_MTU
+} vnet_mtu_t;
+
+extern vnet_mtu_t vnet_link_to_mtu (vnet_link_t link);
+
/* Software-interface. This corresponds to a Ethernet VLAN, ATM vc, a
tunnel, etc. Configuration (e.g. IP address) gets attached to
software interface. */
@@ -684,6 +690,9 @@
/* VNET_SW_INTERFACE_TYPE_HARDWARE. */
u32 hw_if_index;
+ /* MTU for network layer (not including L2 headers) */
+ u32 mtu[VNET_N_MTU];
+
/* VNET_SW_INTERFACE_TYPE_SUB. */
vnet_sub_interface_t sub;
diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c
index 9a1838f..e2f4d8f 100644
--- a/src/vnet/interface_api.c
+++ b/src/vnet/interface_api.c
@@ -50,6 +50,7 @@
#define foreach_vpe_api_msg \
_(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \
+_(HW_INTERFACE_SET_MTU, hw_interface_set_mtu) \
_(SW_INTERFACE_SET_MTU, sw_interface_set_mtu) \
_(WANT_INTERFACE_EVENTS, want_interface_events) \
_(SW_INTERFACE_DUMP, sw_interface_dump) \
@@ -96,9 +97,9 @@
}
static void
-vl_api_sw_interface_set_mtu_t_handler (vl_api_sw_interface_set_mtu_t * mp)
+vl_api_hw_interface_set_mtu_t_handler (vl_api_hw_interface_set_mtu_t * mp)
{
- vl_api_sw_interface_set_mtu_reply_t *rmp;
+ vl_api_hw_interface_set_mtu_reply_t *rmp;
vnet_main_t *vnm = vnet_get_main ();
u32 sw_if_index = ntohl (mp->sw_if_index);
u16 mtu = ntohs (mp->mtu);
@@ -138,6 +139,27 @@
vnet_hw_interface_set_mtu (vnm, si->hw_if_index, mtu);
BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_HW_INTERFACE_SET_MTU_REPLY);
+}
+
+static void
+vl_api_sw_interface_set_mtu_t_handler (vl_api_sw_interface_set_mtu_t * mp)
+{
+ vl_api_sw_interface_set_mtu_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ int rv = 0;
+ int i;
+ u32 per_protocol_mtu[VNET_N_MTU];
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ for (i = 0; i < VNET_N_MTU; i++)
+ per_protocol_mtu[i] = ntohl (mp->mtu[i]);
+
+ vnet_sw_interface_set_protocol_mtu (vnm, sw_if_index, per_protocol_mtu);
+
+ BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_SW_INTERFACE_SET_MTU_REPLY);
}
@@ -162,6 +184,11 @@
mp->link_speed = ((hi->flags & VNET_HW_INTERFACE_FLAG_SPEED_MASK) >>
VNET_HW_INTERFACE_FLAG_SPEED_SHIFT);
mp->link_mtu = ntohs (hi->max_packet_bytes);
+ mp->mtu[VNET_MTU_L3] = ntohl (swif->mtu[VNET_MTU_L3]);
+ mp->mtu[VNET_MTU_IP4] = ntohl (swif->mtu[VNET_MTU_IP4]);
+ mp->mtu[VNET_MTU_IP6] = ntohl (swif->mtu[VNET_MTU_IP6]);
+ mp->mtu[VNET_MTU_MPLS] = ntohl (swif->mtu[VNET_MTU_MPLS]);
+
mp->context = context;
strncpy ((char *) mp->interface_name,
diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c
index b803a31..264c1f3 100644
--- a/src/vnet/interface_cli.c
+++ b/src/vnet/interface_cli.c
@@ -362,7 +362,7 @@
if (visible)
vec_add1 (sorted_sis, si[0]);}
));
- /* *INDENT-OFF* */
+ /* *INDENT-ON* */
/* Sort by name. */
vec_sort_with_function (sorted_sis, sw_interface_name_compare);
}
@@ -449,20 +449,19 @@
format_ip6_address, r6, ia->address_length);
}));
/* *INDENT-ON* */
+ }
}
-}
-
-else
-{
- vec_foreach (si, sorted_sis)
- {
- vlib_cli_output (vm, "%U\n", format_vnet_sw_interface, vnm, si);
- }
-}
+ else
+ {
+ vec_foreach (si, sorted_sis)
+ {
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_interface, vnm, si);
+ }
+ }
done:
-vec_free (sorted_sis);
-return error;
+ vec_free (sorted_sis);
+ return error;
}
/* *INDENT-OFF* */
@@ -1115,12 +1114,17 @@
mtu_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
{
vnet_main_t *vnm = vnet_get_main ();
- u32 hw_if_index, mtu;
+ u32 hw_if_index, sw_if_index, mtu;
ethernet_main_t *em = ðernet_main;
+ u32 mtus[VNET_N_MTU] = { 0, 0, 0, 0 };
if (unformat (input, "%d %U", &mtu,
unformat_vnet_hw_interface, vnm, &hw_if_index))
{
+ /*
+ * Change physical MTU on interface. Only supported for Ethernet
+ * interfaces
+ */
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
ethernet_interface_t *eif = ethernet_get_interface (em, hw_if_index);
@@ -1137,17 +1141,35 @@
hi->max_supported_packet_bytes);
vnet_hw_interface_set_mtu (vnm, hw_if_index, mtu);
+ goto done;
}
+ else if (unformat (input, "packet %d %U", &mtu,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ /* Set default packet MTU (including L3 header */
+ mtus[VNET_MTU_L3] = mtu;
+ else if (unformat (input, "ip4 %d %U", &mtu,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ mtus[VNET_MTU_IP4] = mtu;
+ else if (unformat (input, "ip6 %d %U", &mtu,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ mtus[VNET_MTU_IP6] = mtu;
+ else if (unformat (input, "mpls %d %U", &mtu,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ mtus[VNET_MTU_MPLS] = mtu;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
+
+ vnet_sw_interface_set_protocol_mtu (vnm, sw_if_index, mtus);
+
+done:
return 0;
}
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_mtu_cmd, static) = {
.path = "set interface mtu",
- .short_help = "set interface mtu <value> <interface>",
+ .short_help = "set interface mtu [packet|ip4|ip6|mpls] <value> <interface>",
.function = mtu_cmd,
};
/* *INDENT-ON* */
diff --git a/src/vnet/interface_format.c b/src/vnet/interface_format.c
index 7e94092..631120b 100644
--- a/src/vnet/interface_format.c
+++ b/src/vnet/interface_format.c
@@ -291,6 +291,16 @@
return s;
}
+static u8 *
+format_vnet_sw_interface_mtu (u8 * s, va_list * args)
+{
+ vnet_sw_interface_t *si = va_arg (*args, vnet_sw_interface_t *);
+
+ return format (s, "%d/%d/%d/%d", si->mtu[VNET_MTU_L3],
+ si->mtu[VNET_MTU_IP4],
+ si->mtu[VNET_MTU_IP6], si->mtu[VNET_MTU_MPLS]);
+}
+
u8 *
format_vnet_sw_interface (u8 * s, va_list * args)
{
@@ -299,12 +309,14 @@
vnet_interface_main_t *im = &vnm->interface_main;
if (!si)
- return format (s, "%=32s%=5s%=16s%=16s%=16s",
- "Name", "Idx", "State", "Counter", "Count");
+ return format (s, "%=32s%=5s%=10s%=21s%=16s%=16s",
+ "Name", "Idx", "State", "MTU (L3/IP4/IP6/MPLS)", "Counter",
+ "Count");
- s = format (s, "%-32U%=5d%=16U",
+ s = format (s, "%-32U%=5d%=10U%=21U",
format_vnet_sw_interface_name, vnm, si, si->sw_if_index,
- format_vnet_sw_interface_flags, si->flags);
+ format_vnet_sw_interface_flags, si->flags,
+ format_vnet_sw_interface_mtu, si);
s = format_vnet_sw_interface_cntrs (s, im, si);
diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h
index e1568e3..c4dd3b7 100644
--- a/src/vnet/interface_funcs.h
+++ b/src/vnet/interface_funcs.h
@@ -262,20 +262,22 @@
return hw->flags;
}
-always_inline uword
-vnet_hw_interface_get_mtu (vnet_main_t * vnm, u32 hw_if_index,
- vlib_rx_or_tx_t dir)
+always_inline u32
+vnet_hw_interface_get_mtu (vnet_main_t * vnm, u32 hw_if_index)
{
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
- return hw->max_l3_packet_bytes[dir];
+ return hw->max_packet_bytes;
}
-always_inline uword
-vnet_sw_interface_get_mtu (vnet_main_t * vnm, u32 sw_if_index,
- vlib_rx_or_tx_t dir)
+always_inline u32
+vnet_sw_interface_get_mtu (vnet_main_t * vnm, u32 sw_if_index, vnet_mtu_t af)
{
- vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
- return (hw->max_l3_packet_bytes[dir]);
+ vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
+ u32 mtu;
+ mtu = sw->mtu[af] > 0 ? sw->mtu[af] : sw->mtu[VNET_MTU_L3];
+ if (mtu == 0)
+ return 9000; /* $$$ Deal with interface-types not setting MTU */
+ return mtu;
}
always_inline uword
@@ -339,6 +341,11 @@
/* Set the MTU on the HW interface */
void vnet_hw_interface_set_mtu (vnet_main_t * vnm, u32 hw_if_index, u32 mtu);
+/* Set the MTU on the SW interface */
+void vnet_sw_interface_set_mtu (vnet_main_t * vnm, u32 sw_if_index, u32 mtu);
+void vnet_sw_interface_set_protocol_mtu (vnet_main_t * vnm, u32 sw_if_index,
+ u32 mtu[]);
+
/* update the unnumbered state of an interface */
void vnet_sw_interface_update_unnumbered (u32 sw_if_index,
u32 ip_sw_if_index, u8 enable);
diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c
index fe78eae..e281d7e 100644
--- a/src/vnet/ip/ip6_neighbor.c
+++ b/src/vnet/ip/ip6_neighbor.c
@@ -2633,10 +2633,6 @@
{
if (is_add)
{
- vnet_hw_interface_t *hw_if0;
-
- hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index);
-
pool_get (nm->if_radv_pool, a);
ri = a - nm->if_radv_pool;
@@ -2670,7 +2666,8 @@
a->send_radv = 1;
/* fill in radv_info for this interface that will be needed later */
- a->adv_link_mtu = hw_if0->max_l3_packet_bytes[VLIB_RX];
+ a->adv_link_mtu =
+ vnet_sw_interface_get_mtu (vnm, sw_if_index, VNET_MTU_IP6);
clib_memcpy (a->link_layer_address, eth_if0->address, 6);
diff --git a/src/vnet/ipip/ipip.c b/src/vnet/ipip/ipip.c
index c39d27d..a47704a 100644
--- a/src/vnet/ipip/ipip.c
+++ b/src/vnet/ipip/ipip.c
@@ -526,10 +526,8 @@
hi->min_packet_bytes = 64 + sizeof (ip6_header_t);
}
- hi->per_packet_overhead_bytes = /* preamble */ 8 + /* inter frame gap */ 12;
-
/* Standard default ipip MTU. */
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+ vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000);
t->tunnel_src = *src;
t->tunnel_dst = *dst;
diff --git a/src/vnet/ipip/sixrd.c b/src/vnet/ipip/sixrd.c
index cfdd0f8..38ca899 100644
--- a/src/vnet/ipip/sixrd.c
+++ b/src/vnet/ipip/sixrd.c
@@ -337,7 +337,7 @@
t->dev_instance = t_idx;
t->user_instance = t_idx;
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 1480;
+ vnet_sw_interface_set_mtu (vnet_get_main (), t->sw_if_index, 1480);
ipip_tunnel_db_add (t, &key);
diff --git a/src/vnet/ipsec-gre/interface.c b/src/vnet/ipsec-gre/interface.c
index 0772ce7..8903df0 100644
--- a/src/vnet/ipsec-gre/interface.c
+++ b/src/vnet/ipsec-gre/interface.c
@@ -174,12 +174,10 @@
hi->min_packet_bytes = 64 + sizeof (gre_header_t) +
sizeof (ip4_header_t) + sizeof (esp_header_t) + sizeof (esp_footer_t);
- hi->per_packet_overhead_bytes =
- /* preamble */ 8 + /* inter frame gap */ 12;
/* Standard default gre MTU. */
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] =
- 9000;
+ /* TODO: Should take tunnel overhead into consideration */
+ vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000);
clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src));
clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst));
diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c
index 7ca2c12..a142edf 100644
--- a/src/vnet/mpls/mpls_tunnel.c
+++ b/src/vnet/mpls/mpls_tunnel.c
@@ -643,6 +643,9 @@
hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index);
}
+ /* Standard default MPLS tunnel MTU. */
+ vnet_sw_interface_set_mtu (vnm, hi->sw_if_index, 9000);
+
/*
* Add the new tunnel to the tunnel DB - key:SW if index
*/
diff --git a/src/vnet/sctp/sctp.c b/src/vnet/sctp/sctp.c
index 6e2dccc..ffa8634 100644
--- a/src/vnet/sctp/sctp.c
+++ b/src/vnet/sctp/sctp.c
@@ -43,8 +43,12 @@
ip_copy (&listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.lcl_ip,
&tep->ip, tep->is_ip4);
- listener->sub_conn[SCTP_PRIMARY_PATH_IDX].PMTU =
- vnet_sw_interface_get_mtu (vnet_get_main (), tep->sw_if_index, VLIB_TX);
+ u32 mtu = tep->is_ip4 ? vnet_sw_interface_get_mtu (vnet_get_main (),
+ tep->sw_if_index,
+ VNET_MTU_IP4) :
+ vnet_sw_interface_get_mtu (vnet_get_main (), tep->sw_if_index,
+ VNET_MTU_IP6);
+ listener->sub_conn[SCTP_PRIMARY_PATH_IDX].PMTU = mtu;
listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.is_ip4 = tep->is_ip4;
listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.proto =
TRANSPORT_PROTO_SCTP;
@@ -480,8 +484,12 @@
clib_spinlock_lock_if_init (&tm->half_open_lock);
sctp_conn = sctp_half_open_connection_new (thread_id);
- sctp_conn->sub_conn[idx].PMTU =
- vnet_sw_interface_get_mtu (vnet_get_main (), rmt->sw_if_index, VLIB_TX);
+ u32 mtu = rmt->is_ip4 ? vnet_sw_interface_get_mtu (vnet_get_main (),
+ rmt->sw_if_index,
+ VNET_MTU_IP4) :
+ vnet_sw_interface_get_mtu (vnet_get_main (), rmt->sw_if_index,
+ VNET_MTU_IP6);
+ sctp_conn->sub_conn[idx].PMTU = mtu;
transport_connection_t *trans_conn = &sctp_conn->sub_conn[idx].connection;
ip_copy (&trans_conn->rmt_ip, &rmt->ip, rmt->is_ip4);
diff --git a/src/vnet/srp/interface.c b/src/vnet/srp/interface.c
index 44e2b0d..735f960 100644
--- a/src/vnet/srp/interface.c
+++ b/src/vnet/srp/interface.c
@@ -432,7 +432,7 @@
hi->min_packet_bytes = 40 + 16;
/* Standard default ethernet MTU. */
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 1500;
+ vnet_sw_interface_set_mtu (vnm, sw_if_index, 1500);
vec_free (hi->hw_address);
vec_add (hi->hw_address, address, sizeof (address));
diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c
index 10a86a4..e61c91a 100644
--- a/src/vnet/unix/tapcli.c
+++ b/src/vnet/unix/tapcli.c
@@ -1068,8 +1068,7 @@
hw = vnet_get_hw_interface (tm->vnet_main, ti->hw_if_index);
hw->min_supported_packet_bytes = TAP_MTU_MIN;
hw->max_supported_packet_bytes = TAP_MTU_MAX;
- hw->max_l3_packet_bytes[VLIB_RX] = hw->max_l3_packet_bytes[VLIB_TX] =
- hw->max_supported_packet_bytes - sizeof (ethernet_header_t);
+ vnet_sw_interface_set_mtu (tm->vnet_main, hw->sw_if_index, 9000);
ti->sw_if_index = hw->sw_if_index;
if (ap->sw_if_indexp)
*(ap->sw_if_indexp) = hw->sw_if_index;
diff --git a/src/vnet/vnet.h b/src/vnet/vnet.h
index 153fdcc..5098aa6 100644
--- a/src/vnet/vnet.h
+++ b/src/vnet/vnet.h
@@ -66,6 +66,8 @@
* sw_interface_add_del_functions[VNET_ITF_FUNC_N_PRIO];
_vnet_interface_function_list_elt_t
* sw_interface_admin_up_down_functions[VNET_ITF_FUNC_N_PRIO];
+ _vnet_interface_function_list_elt_t
+ * sw_interface_mtu_change_functions[VNET_ITF_FUNC_N_PRIO];
uword *interface_tag_by_sw_if_index;