ip: add support for buffer offload metadata in ip midchain

The offload should be handled by gso node or by the NIC
if the latter has the relevant capabilities. But ip midchain
is missing the support for buffer offload metadata in case
of GSO packet.

This patch adds the relevant support to add the buffer metadata
if the packet is GSO/IPIP to be handled accordingly.

Type: improvement

Change-Id: I17f5d71bf4c5f43a85ca3f2fbebfa1426b42ef69
Signed-off-by: Arthur de Kerhor <arthurdekerhor@gmail.com>
Signed-off-by: Mohsin Kazmi <sykazmi@cisco.com>
diff --git a/src/vnet/adj/adj_dp.h b/src/vnet/adj/adj_dp.h
index aff1a2b..186044b 100644
--- a/src/vnet/adj/adj_dp.h
+++ b/src/vnet/adj/adj_dp.h
@@ -36,22 +36,36 @@
   ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
 
   if (PREDICT_TRUE(TUNNEL_ENCAP_DECAP_FLAG_NONE == flags))
-  {
-      ip_csum_t sum;
-      u16 old,new;
-
-      old = 0;
-      new = ip4->length;
-
-      sum = ip4->checksum;
-      sum = ip_csum_update (sum, old, new, ip4_header_t, length);
-      ip4->checksum = ip_csum_fold (sum);
-  }
+    {
+      if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+       {
+         vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+         vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP |
+                                     VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM);
+       }
+      else
+       {
+         ip_csum_t sum;
+         u16 old,new;
+         old = 0;
+         new = ip4->length;
+         sum = ip4->checksum;
+         sum = ip_csum_update (sum, old, new, ip4_header_t, length);
+         ip4->checksum = ip_csum_fold (sum);
+       }
+    }
   else
-  {
+    {
       tunnel_encap_fixup_4o4 (flags, ip4 + 1, ip4);
-      ip4->checksum = ip4_header_checksum (ip4);
-  }
+      if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+       {
+         vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+         vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP |
+                                     VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM);
+       }
+      else
+        ip4->checksum = ip4_header_checksum (ip4);
+    }
 }
 
 static_always_inline void
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index eb2b7ee..e85c888 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -2223,9 +2223,6 @@
 						adj0->ia_cfg_index);
 
 	  next[0] = next_index;
-	  if (is_midchain)
-	    vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
-					0 /* is_ip6 */ );
 	}
       else
 	{
@@ -2248,9 +2245,6 @@
 						&next_index, b[1],
 						adj1->ia_cfg_index);
 	  next[1] = next_index;
-	  if (is_midchain)
-	    vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
-					0 /* is_ip6 */ );
 	}
       else
 	{
@@ -2400,9 +2394,6 @@
 
 	  if (is_midchain)
 	    {
-	      vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
-					  0 /* is_ip6 */ );
-
 	      /* Guess we are only writing on ipv4 header. */
 	      vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
 	    }
@@ -2506,10 +2497,6 @@
 
 	  if (is_midchain)
 	    {
-	      /* this acts on the packet that is about to be encapped */
-	      vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
-					  0 /* is_ip6 */ );
-
 	      /* Guess we are only writing on ipv4 header. */
 	      vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
 	    }
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index 1f7fc3f..06c473b 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -1971,13 +1971,6 @@
 
 	  if (is_midchain)
 	    {
-	      /* before we paint on the next header, update the L4
-	       * checksums if required, since there's no offload on a tunnel */
-	      vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ ,
-					  1 /* is_ip6 */ );
-	      vnet_calc_checksums_inline (vm, p1, 0 /* is_ip4 */ ,
-					  1 /* is_ip6 */ );
-
 	      /* Guess we are only writing on ipv6 header. */
 	      vnet_rewrite_two_headers (adj0[0], adj1[0],
 					ip0, ip1, sizeof (ip6_header_t));
@@ -2071,9 +2064,6 @@
 
 	  if (is_midchain)
 	    {
-	      vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ ,
-					  1 /* is_ip6 */ );
-
 	      /* Guess we are only writing on ip6 header. */
 	      vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip6_header_t));
 	    }
diff --git a/src/vnet/ipip/ipip.c b/src/vnet/ipip/ipip.c
index ec329e2..aaf2146 100644
--- a/src/vnet/ipip/ipip.c
+++ b/src/vnet/ipip/ipip.c
@@ -148,7 +148,14 @@
   ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
   tunnel_encap_fixup_6o4 (flags, ((ip6_header_t *) (ip4 + 1)), ip4);
 
-  ip4->checksum = ip4_header_checksum (ip4);
+  if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+    {
+      vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+      vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+					  VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+    }
+  else
+    ip4->checksum = ip4_header_checksum (ip4);
 }
 
 static void
@@ -164,7 +171,14 @@
   ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
   tunnel_encap_fixup_4o4 (flags, ip4 + 1, ip4);
 
-  ip4->checksum = ip4_header_checksum (ip4);
+  if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+    {
+      vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+      vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+					  VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+    }
+  else
+    ip4->checksum = ip4_header_checksum (ip4);
 }
 
 static void
@@ -185,6 +199,12 @@
     clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) -
 			  sizeof (*ip6));
   tunnel_encap_fixup_4o6 (flags, b, ((ip4_header_t *) (ip6 + 1)), ip6);
+
+  if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+    {
+      vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip6 - b->data;
+      vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+    }
 }
 
 static void
@@ -205,6 +225,12 @@
     clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) -
 			  sizeof (*ip6));
   tunnel_encap_fixup_6o6 (flags, ip6 + 1, ip6);
+
+  if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+    {
+      vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip6 - b->data;
+      vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+    }
 }
 
 static void
@@ -226,6 +252,12 @@
     clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) - sizeof (*ip6));
   tunnel_encap_fixup_mplso6 (flags, b, (mpls_unicast_header_t *) (ip6 + 1),
 			     ip6);
+
+  if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+    {
+      vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip6 - b->data;
+      vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+    }
 }
 
 static void
@@ -245,7 +277,15 @@
   ip4->length =
     clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) - sizeof (*ip4));
   tunnel_encap_fixup_mplso4 (flags, (mpls_unicast_header_t *) (ip4 + 1), ip4);
-  ip4->checksum = ip4_header_checksum (ip4);
+
+  if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+    {
+      vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+      vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+					  VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+    }
+  else
+    ip4->checksum = ip4_header_checksum (ip4);
 }
 
 static void
diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c
index d3a06dc..dd47053 100644
--- a/src/vnet/ipsec/esp_encrypt.c
+++ b/src/vnet/ipsec/esp_encrypt.c
@@ -18,6 +18,7 @@
 #include <vnet/vnet.h>
 #include <vnet/api_errno.h>
 #include <vnet/ip/ip.h>
+#include <vnet/interface_output.h>
 
 #include <vnet/crypto/crypto.h>
 
@@ -148,11 +149,9 @@
   if (is_transport)
     {
       u8 prot = is_udp ? IP_PROTOCOL_UDP : IP_PROTOCOL_IPSEC_ESP;
-
-      sum = ip_csum_update (ip4->checksum, ip4->protocol,
-			    prot, ip4_header_t, protocol);
+      sum = ip_csum_update (ip4->checksum, ip4->protocol, prot, ip4_header_t,
+			    protocol);
       ip4->protocol = prot;
-
       sum = ip_csum_update (sum, old_len, len, ip4_header_t, length);
     }
   else
@@ -663,6 +662,10 @@
 			 CLIB_CACHE_LINE_BYTES, LOAD);
 	}
 
+      vnet_calc_checksums_inline (vm, b[0], b[0]->flags & VNET_BUFFER_F_IS_IP4,
+				  b[0]->flags & VNET_BUFFER_F_IS_IP6);
+      vnet_calc_outer_checksums_inline (vm, b[0]);
+
       if (is_tun)
 	{
 	  /* we are on a ipsec tunnel's feature arc */
diff --git a/test/test_gso.py b/test/test_gso.py
index 2c8250e..78c5c73 100644
--- a/test/test_gso.py
+++ b/test/test_gso.py
@@ -11,6 +11,7 @@
 import unittest
 
 from scapy.packet import Raw
+from scapy.layers.l2 import GRE
 from scapy.layers.inet6 import IPv6, Ether, IP, ICMPv6PacketTooBig
 from scapy.layers.inet6 import ipv6nh, IPerror6
 from scapy.layers.inet import TCP, ICMP
@@ -23,6 +24,7 @@
 from vpp_ip_route import VppIpRoute, VppRoutePath, FibPathProto
 from vpp_ipip_tun_interface import VppIpIpTunInterface
 from vpp_vxlan_tunnel import VppVxlanTunnel
+from vpp_gre_interface import VppGreInterface
 
 from vpp_ipsec import VppIpsecSA, VppIpsecTunProtect
 from template_ipsec import (
@@ -89,6 +91,9 @@
             self, self.pg0, self.pg0.local_ip6, self.pg0.remote_ip6
         )
 
+        self.gre4 = VppGreInterface(self, self.pg0.local_ip4, self.pg0.remote_ip4)
+        self.gre6 = VppGreInterface(self, self.pg0.local_ip6, self.pg0.remote_ip6)
+
     def tearDown(self):
         super(TestGSO, self).tearDown()
         if not self.vpp_dead:
@@ -804,6 +809,202 @@
             sw_if_index=self.pg0.sw_if_index, enable_disable=0
         )
 
+    def test_gso_gre(self):
+        """GSO GRE test"""
+        #
+        # Send jumbo frame with gso enabled only on gre tunnel interface.
+        # create GRE tunnel on VPP pg0.
+        #
+
+        #
+        # create gre 4 tunnel
+        #
+        self.gre4.add_vpp_config()
+        self.gre4.admin_up()
+        self.gre4.config_ip4()
+
+        #
+        # Add a route that resolves the tunnel's destination
+        #
+        # Add IPv4 routes via tunnel interface
+        self.ip4_via_gre4_tunnel = VppIpRoute(
+            self,
+            "172.16.10.0",
+            24,
+            [
+                VppRoutePath(
+                    "0.0.0.0",
+                    self.gre4.sw_if_index,
+                    proto=FibPathProto.FIB_PATH_NH_PROTO_IP4,
+                )
+            ],
+        )
+        self.ip4_via_gre4_tunnel.add_vpp_config()
+
+        pgre4 = (
+            Ether(src=self.pg2.remote_mac, dst="02:fe:60:1e:a2:79")
+            / IP(src=self.pg2.remote_ip4, dst="172.16.10.3", flags="DF")
+            / TCP(sport=1234, dport=1234)
+            / Raw(b"\xa5" * 65200)
+        )
+
+        # test when GSO segmentation is disabled, Packets are truncated
+        rxs = self.send_and_expect(self.pg2, 5 * [pgre4], self.pg0, 5)
+        for rx in rxs:
+            self.assertEqual(rx[Ether].src, self.pg0.local_mac)
+            self.assertEqual(rx[Ether].dst, self.pg0.remote_mac)
+            self.assertEqual(rx[IP].src, self.pg0.local_ip4)
+            self.assertEqual(rx[IP].dst, self.pg0.remote_ip4)
+            self.assert_ip_checksum_valid(rx)
+            self.assertEqual(rx[IP].proto, 0x2F)  # GRE encap
+            self.assertEqual(rx[GRE].proto, 0x0800)  # IPv4
+            inner = rx[GRE].payload
+            self.assertNotEqual(rx[IP].len - 20 - 4, len(inner))
+            self.assertEqual(inner[IP].src, self.pg2.remote_ip4)
+            self.assertEqual(inner[IP].dst, "172.16.10.3")
+            self.assert_ip_checksum_valid(inner)
+            payload_len = inner[IP].len - 20 - 20
+            self.assertEqual(payload_len, 65200)
+            # truncated packet to MTU size
+            self.assertNotEqual(payload_len, len(inner[Raw]))
+
+        # enable the GSO segmentation on GRE tunnel
+        self.vapi.feature_gso_enable_disable(
+            sw_if_index=self.gre4.sw_if_index, enable_disable=1
+        )
+
+        # test again, this time payload will be chuncked to GSO size (i.e. 1448)
+        rxs = self.send_and_expect(self.pg2, 5 * [pgre4], self.pg0, 225)
+        size = 0
+        for rx in rxs:
+            self.assertEqual(rx[Ether].src, self.pg0.local_mac)
+            self.assertEqual(rx[Ether].dst, self.pg0.remote_mac)
+            self.assertEqual(rx[IP].src, self.pg0.local_ip4)
+            self.assertEqual(rx[IP].dst, self.pg0.remote_ip4)
+            self.assert_ip_checksum_valid(rx)
+            self.assertEqual(rx[IP].proto, 0x2F)  # GRE encap
+            self.assertEqual(rx[GRE].proto, 0x0800)  # IPv4
+            inner = rx[GRE].payload
+            self.assertEqual(rx[IP].len - 20 - 4, len(inner))
+            self.assertEqual(inner[IP].src, self.pg2.remote_ip4)
+            self.assertEqual(inner[IP].dst, "172.16.10.3")
+            self.assert_ip_checksum_valid(inner)
+            self.assert_tcp_checksum_valid(inner)
+            payload_len = inner[IP].len - 20 - 20
+            self.assertEqual(payload_len, len(inner[Raw]))
+            size += payload_len
+        self.assertEqual(size, 65200 * 5)
+
+        # Disable the GSO segmentation on GRE tunnel
+        self.vapi.feature_gso_enable_disable(
+            sw_if_index=self.gre4.sw_if_index, enable_disable=0
+        )
+
+        # test again when GSO segmentation is disabled, Packets are truncated
+        rxs = self.send_and_expect(self.pg2, 5 * [pgre4], self.pg0, 5)
+        for rx in rxs:
+            self.assertEqual(rx[Ether].src, self.pg0.local_mac)
+            self.assertEqual(rx[Ether].dst, self.pg0.remote_mac)
+            self.assertEqual(rx[IP].src, self.pg0.local_ip4)
+            self.assertEqual(rx[IP].dst, self.pg0.remote_ip4)
+            self.assert_ip_checksum_valid(rx)
+            self.assertEqual(rx[IP].proto, 0x2F)  # GRE encap
+            self.assertEqual(rx[GRE].proto, 0x0800)  # IPv4
+            inner = rx[GRE].payload
+            self.assertNotEqual(rx[IP].len - 20 - 4, len(inner))
+            self.assertEqual(inner[IP].src, self.pg2.remote_ip4)
+            self.assertEqual(inner[IP].dst, "172.16.10.3")
+            self.assert_ip_checksum_valid(inner)
+            payload_len = inner[IP].len - 20 - 20
+            self.assertEqual(payload_len, 65200)
+            # truncated packet to MTU size
+            self.assertNotEqual(payload_len, len(inner[Raw]))
+
+        self.ip4_via_gre4_tunnel.remove_vpp_config()
+        self.gre4.remove_vpp_config()
+
+        self.gre6.add_vpp_config()
+        self.gre6.admin_up()
+        self.gre6.config_ip4()
+
+        #
+        # Add a route that resolves the tunnel's destination
+        # Add IPv6 routes via tunnel interface
+        #
+        self.vapi.feature_gso_enable_disable(
+            sw_if_index=self.gre6.sw_if_index, enable_disable=1
+        )
+        self.ip6_via_gre6_tunnel = VppIpRoute(
+            self,
+            "fd01:10::",
+            64,
+            [
+                VppRoutePath(
+                    "::",
+                    self.gre6.sw_if_index,
+                    proto=FibPathProto.FIB_PATH_NH_PROTO_IP6,
+                )
+            ],
+        )
+        self.ip6_via_gre6_tunnel.add_vpp_config()
+
+        #
+        # Create IPv6 packet
+        #
+        pgre6 = (
+            Ether(src=self.pg2.remote_mac, dst="02:fe:60:1e:a2:79")
+            / IPv6(src=self.pg2.remote_ip6, dst="fd01:10::3")
+            / TCP(sport=1234, dport=1234)
+            / Raw(b"\xa5" * 65200)
+        )
+
+        # test when GSO segmentation is enabled, payload will be segmented
+        # into GSO size (i.e. 1448)
+        rxs = self.send_and_expect(self.pg2, 5 * [pgre6], self.pg0, 225)
+        size = 0
+        for rx in rxs:
+            self.assertEqual(rx[Ether].src, self.pg0.local_mac)
+            self.assertEqual(rx[Ether].dst, self.pg0.remote_mac)
+            self.assertEqual(rx[IPv6].src, self.pg0.local_ip6)
+            self.assertEqual(rx[IPv6].dst, self.pg0.remote_ip6)
+            self.assertEqual(ipv6nh[rx[IPv6].nh], "GRE")
+            self.assertEqual(rx[GRE].proto, 0x86DD)  # IPv6
+            inner = rx[GRE].payload
+            self.assertEqual(rx[IPv6].plen - 4, len(inner))
+            self.assertEqual(inner[IPv6].src, self.pg2.remote_ip6)
+            self.assertEqual(inner[IPv6].dst, "fd01:10::3")
+            self.assert_tcp_checksum_valid(inner)
+            payload_len = inner[IPv6].plen - 20
+            self.assertEqual(payload_len, len(inner[Raw]))
+            size += payload_len
+        self.assertEqual(size, 65200 * 5)
+
+        # disable GSO segmentation
+        self.vapi.feature_gso_enable_disable(
+            sw_if_index=self.gre6.sw_if_index, enable_disable=0
+        )
+
+        # test again, this time packets will be truncated
+        rxs = self.send_and_expect(self.pg2, 5 * [pgre6], self.pg0, 5)
+        for rx in rxs:
+            self.assertEqual(rx[Ether].src, self.pg0.local_mac)
+            self.assertEqual(rx[Ether].dst, self.pg0.remote_mac)
+            self.assertEqual(rx[IPv6].src, self.pg0.local_ip6)
+            self.assertEqual(rx[IPv6].dst, self.pg0.remote_ip6)
+            self.assertEqual(ipv6nh[rx[IPv6].nh], "GRE")
+            self.assertEqual(rx[GRE].proto, 0x86DD)  # IPv6
+            inner = rx[GRE].payload
+            self.assertNotEqual(rx[IPv6].plen - 4, len(inner))
+            self.assertEqual(inner[IPv6].src, self.pg2.remote_ip6)
+            self.assertEqual(inner[IPv6].dst, "fd01:10::3")
+            payload_len = inner[IPv6].plen - 20
+            self.assertEqual(payload_len, 65200)
+            # packets are truncated to MTU size
+            self.assertNotEqual(payload_len, len(inner[Raw]))
+
+        self.ip6_via_gre6_tunnel.remove_vpp_config()
+        self.gre6.remove_vpp_config()
+
     def test_gso_ipsec(self):
         """GSO IPSEC test"""
         #