ip4-input node rework

Gain is around 6 clocks per packet (22 to 16).

Change-Id: Ia6f4293ea9062368a9a6b235c650591dbc0707d0
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vnet/ip/ip4_input.h b/src/vnet/ip/ip4_input.h
index 600d693..e087303 100644
--- a/src/vnet/ip/ip4_input.h
+++ b/src/vnet/ip/ip4_input.h
@@ -57,6 +57,162 @@
 } ip4_input_next_t;
 
 always_inline void
+ip4_input_check_x4 (vlib_main_t * vm,
+		    vlib_node_runtime_t * error_node,
+		    vlib_buffer_t ** p, ip4_header_t ** ip,
+		    u16 * next, int verify_checksum)
+{
+  u8 error0, error1, error2, error3;
+  u32 ip_len0, cur_len0;
+  u32 ip_len1, cur_len1;
+  u32 ip_len2, cur_len2;
+  u32 ip_len3, cur_len3;
+  i32 len_diff0, len_diff1, len_diff2, len_diff3;
+
+  error0 = error1 = error2 = error3 = IP4_ERROR_NONE;
+
+  /* Punt packets with options or wrong version. */
+  if (PREDICT_FALSE (ip[0]->ip_version_and_header_length != 0x45))
+    error0 = (ip[0]->ip_version_and_header_length & 0xf) != 5 ?
+      IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
+
+  if (PREDICT_FALSE (ip[1]->ip_version_and_header_length != 0x45))
+    error1 = (ip[1]->ip_version_and_header_length & 0xf) != 5 ?
+      IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
+
+  if (PREDICT_FALSE (ip[2]->ip_version_and_header_length != 0x45))
+    error2 = (ip[2]->ip_version_and_header_length & 0xf) != 5 ?
+      IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
+
+  if (PREDICT_FALSE (ip[3]->ip_version_and_header_length != 0x45))
+    error3 = (ip[3]->ip_version_and_header_length & 0xf) != 5 ?
+      IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
+
+  if (PREDICT_FALSE (ip[0]->ttl < 1))
+    error0 = IP4_ERROR_TIME_EXPIRED;
+  if (PREDICT_FALSE (ip[1]->ttl < 1))
+    error1 = IP4_ERROR_TIME_EXPIRED;
+  if (PREDICT_FALSE (ip[2]->ttl < 1))
+    error2 = IP4_ERROR_TIME_EXPIRED;
+  if (PREDICT_FALSE (ip[3]->ttl < 1))
+    error3 = IP4_ERROR_TIME_EXPIRED;
+
+  /* Verify header checksum. */
+  if (verify_checksum)
+    {
+      ip_csum_t sum0, sum1, sum2, sum3;
+
+      ip4_partial_header_checksum_x1 (ip[0], sum0);
+      ip4_partial_header_checksum_x1 (ip[1], sum1);
+      ip4_partial_header_checksum_x1 (ip[2], sum2);
+      ip4_partial_header_checksum_x1 (ip[3], sum3);
+
+      error0 = 0xffff != ip_csum_fold (sum0) ?
+	IP4_ERROR_BAD_CHECKSUM : error0;
+      error1 = 0xffff != ip_csum_fold (sum1) ?
+	IP4_ERROR_BAD_CHECKSUM : error1;
+      error2 = 0xffff != ip_csum_fold (sum2) ?
+	IP4_ERROR_BAD_CHECKSUM : error2;
+      error3 = 0xffff != ip_csum_fold (sum3) ?
+	IP4_ERROR_BAD_CHECKSUM : error3;
+    }
+
+  /* Drop fragmentation offset 1 packets. */
+  error0 = ip4_get_fragment_offset (ip[0]) == 1 ?
+    IP4_ERROR_FRAGMENT_OFFSET_ONE : error0;
+  error1 = ip4_get_fragment_offset (ip[1]) == 1 ?
+    IP4_ERROR_FRAGMENT_OFFSET_ONE : error1;
+  error2 = ip4_get_fragment_offset (ip[2]) == 1 ?
+    IP4_ERROR_FRAGMENT_OFFSET_ONE : error2;
+  error3 = ip4_get_fragment_offset (ip[3]) == 1 ?
+    IP4_ERROR_FRAGMENT_OFFSET_ONE : error3;
+
+  /* Verify lengths. */
+  ip_len0 = clib_net_to_host_u16 (ip[0]->length);
+  ip_len1 = clib_net_to_host_u16 (ip[1]->length);
+  ip_len2 = clib_net_to_host_u16 (ip[2]->length);
+  ip_len3 = clib_net_to_host_u16 (ip[3]->length);
+
+  /* IP length must be at least minimal IP header. */
+  error0 = ip_len0 < sizeof (ip[0][0]) ? IP4_ERROR_TOO_SHORT : error0;
+  error1 = ip_len1 < sizeof (ip[1][0]) ? IP4_ERROR_TOO_SHORT : error1;
+  error2 = ip_len2 < sizeof (ip[2][0]) ? IP4_ERROR_TOO_SHORT : error2;
+  error3 = ip_len3 < sizeof (ip[3][0]) ? IP4_ERROR_TOO_SHORT : error3;
+
+  cur_len0 = vlib_buffer_length_in_chain (vm, p[0]);
+  cur_len1 = vlib_buffer_length_in_chain (vm, p[1]);
+  cur_len2 = vlib_buffer_length_in_chain (vm, p[2]);
+  cur_len3 = vlib_buffer_length_in_chain (vm, p[3]);
+
+  len_diff0 = cur_len0 - ip_len0;
+  len_diff1 = cur_len1 - ip_len1;
+  len_diff2 = cur_len2 - ip_len2;
+  len_diff3 = cur_len3 - ip_len3;
+
+  error0 = len_diff0 < 0 ? IP4_ERROR_BAD_LENGTH : error0;
+  error1 = len_diff1 < 0 ? IP4_ERROR_BAD_LENGTH : error1;
+  error2 = len_diff2 < 0 ? IP4_ERROR_BAD_LENGTH : error2;
+  error3 = len_diff3 < 0 ? IP4_ERROR_BAD_LENGTH : error3;
+
+  if (PREDICT_FALSE (error0 != IP4_ERROR_NONE))
+    {
+      if (error0 == IP4_ERROR_TIME_EXPIRED)
+	{
+	  icmp4_error_set_vnet_buffer (p[0], ICMP4_time_exceeded,
+				       ICMP4_time_exceeded_ttl_exceeded_in_transit,
+				       0);
+	  next[0] = IP4_INPUT_NEXT_ICMP_ERROR;
+	}
+      else
+	next[0] = error0 != IP4_ERROR_OPTIONS ?
+	  IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT;
+      p[0]->error = error_node->errors[error0];
+    }
+  if (PREDICT_FALSE (error1 != IP4_ERROR_NONE))
+    {
+      if (error1 == IP4_ERROR_TIME_EXPIRED)
+	{
+	  icmp4_error_set_vnet_buffer (p[1], ICMP4_time_exceeded,
+				       ICMP4_time_exceeded_ttl_exceeded_in_transit,
+				       0);
+	  next[1] = IP4_INPUT_NEXT_ICMP_ERROR;
+	}
+      else
+	next[1] = error1 != IP4_ERROR_OPTIONS ?
+	  IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT;
+      p[1]->error = error_node->errors[error1];
+    }
+  if (PREDICT_FALSE (error2 != IP4_ERROR_NONE))
+    {
+      if (error2 == IP4_ERROR_TIME_EXPIRED)
+	{
+	  icmp4_error_set_vnet_buffer (p[2], ICMP4_time_exceeded,
+				       ICMP4_time_exceeded_ttl_exceeded_in_transit,
+				       0);
+	  next[2] = IP4_INPUT_NEXT_ICMP_ERROR;
+	}
+      else
+	next[2] = error2 != IP4_ERROR_OPTIONS ?
+	  IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT;
+      p[2]->error = error_node->errors[error2];
+    }
+  if (PREDICT_FALSE (error3 != IP4_ERROR_NONE))
+    {
+      if (error3 == IP4_ERROR_TIME_EXPIRED)
+	{
+	  icmp4_error_set_vnet_buffer (p[3], ICMP4_time_exceeded,
+				       ICMP4_time_exceeded_ttl_exceeded_in_transit,
+				       0);
+	  next[3] = IP4_INPUT_NEXT_ICMP_ERROR;
+	}
+      else
+	next[3] = error3 != IP4_ERROR_OPTIONS ?
+	  IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT;
+      p[3]->error = error_node->errors[error3];
+    }
+}
+
+always_inline void
 ip4_input_check_x2 (vlib_main_t * vm,
 		    vlib_node_runtime_t * error_node,
 		    vlib_buffer_t * p0, vlib_buffer_t * p1,