[qca-nss-sfe] Use HW csum in TX if possible.
Change-Id: I7857b24e1fa0e240a41109b6c4ce0625293a8154
Signed-off-by: Ratheesh Kannoth <quic_rkannoth@quicinc.com>
diff --git a/sfe_ipv4.c b/sfe_ipv4.c
index 0e565d9..cf04e12 100644
--- a/sfe_ipv4.c
+++ b/sfe_ipv4.c
@@ -748,7 +748,9 @@
}
/*
- * Validate ip csum
+ * Validate ip csum if necessary. If ip_summed is set to CHECKSUM_UNNECESSARY, it is assumed
+ * that the L3 checksum is validated by the Rx interface or the tunnel interface that has
+ * generated the packet.
*/
iph = (struct iphdr *)skb->data;
if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY) && (ip_fast_csum((u8 *)iph, iph->ihl))) {
@@ -1080,6 +1082,7 @@
original_cm->dscp = msg->dscp_rule.flow_dscp << SFE_IPV4_DSCP_SHIFT;
original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK;
}
+
#ifdef CONFIG_NF_FLOW_COOKIE
original_cm->flow_cookie = 0;
#endif
@@ -1089,8 +1092,19 @@
} else {
original_cm->flow_accel = 1;
}
-
#endif
+ /*
+ * If l2_features are disabled and flow uses l2 features such as macvlan/bridge/pppoe/vlan,
+ * bottom interfaces are expected to be disabled in the flow rule and always top interfaces
+ * are used. In such cases, do not use HW csum offload. csum offload is used only when we
+ * are sending directly to the destination interface that supports it.
+ */
+ if (likely(dest_dev->features & NETIF_F_HW_CSUM)) {
+ if ((msg->conn_rule.return_top_interface_num == msg->conn_rule.return_interface_num) ||
+ (msg->rule_flags & SFE_RULE_CREATE_FLAG_USE_RETURN_BOTTOM_INTERFACE)) {
+ original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_CSUM_OFFLOAD;
+ }
+ }
/*
* For the non-arp interface, we don't write L2 HDR.
@@ -1168,6 +1182,18 @@
}
#endif
+ /*
+ * If l2_features are disabled and flow uses l2 features such as macvlan/bridge/pppoe/vlan,
+ * bottom interfaces are expected to be disabled in the flow rule and always top interfaces
+ * are used. In such cases, do not use HW csum offload. csum offload is used only when we
+ * are sending directly to the destination interface that supports it.
+ */
+ if (likely(src_dev->features & NETIF_F_HW_CSUM)) {
+ if ((msg->conn_rule.flow_top_interface_num == msg->conn_rule.flow_interface_num) ||
+ (msg->rule_flags & SFE_RULE_CREATE_FLAG_USE_FLOW_BOTTOM_INTERFACE)) {
+ reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_CSUM_OFFLOAD;
+ }
+ }
/*
* For the non-arp interface, we don't write L2 HDR.
diff --git a/sfe_ipv4.h b/sfe_ipv4.h
index 01baf2e..bdfd72d 100644
--- a/sfe_ipv4.h
+++ b/sfe_ipv4.h
@@ -53,6 +53,8 @@
/* remark priority of SKB */
#define SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK (1<<6)
/* remark DSCP of packet */
+#define SFE_IPV4_CONNECTION_MATCH_FLAG_CSUM_OFFLOAD (1<<7)
+ /* checksum offload.*/
/*
* IPv4 connection matching structure.
diff --git a/sfe_ipv4_tcp.c b/sfe_ipv4_tcp.c
index df8df0c..b9dcdce 100644
--- a/sfe_ipv4_tcp.c
+++ b/sfe_ipv4_tcp.c
@@ -127,6 +127,7 @@
u32 flags;
struct net_device *xmit_dev;
bool ret;
+ bool hw_csum;
/*
* Is our packet too short to contain a valid UDP header?
@@ -496,6 +497,13 @@
iph->ttl = ttl - 1;
/*
+ * Enable HW csum if rx checksum is verified and xmit interface is CSUM offload capable.
+ * Note: If L4 csum at Rx was found to be incorrect, we (router) should use incremental L4 checksum here
+ * so that HW does not re-calculate/replace the L4 csum
+ */
+ hw_csum = !!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_CSUM_OFFLOAD) && (skb->ip_summed == CHECKSUM_UNNECESSARY);
+
+ /*
* Do we have to perform translations of the source address/port?
*/
if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
@@ -505,19 +513,17 @@
iph->saddr = cm->xlate_src_ip;
tcph->source = cm->xlate_src_port;
- /*
- * Do we have a non-zero UDP checksum? If we do then we need
- * to update it.
- */
- tcp_csum = tcph->check;
- if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
- sum = tcp_csum + cm->xlate_src_partial_csum_adjustment;
- } else {
- sum = tcp_csum + cm->xlate_src_csum_adjustment;
- }
+ if (unlikely(!hw_csum)) {
+ tcp_csum = tcph->check;
+ if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+ sum = tcp_csum + cm->xlate_src_partial_csum_adjustment;
+ } else {
+ sum = tcp_csum + cm->xlate_src_csum_adjustment;
+ }
- sum = (sum & 0xffff) + (sum >> 16);
- tcph->check = (u16)sum;
+ sum = (sum & 0xffff) + (sum >> 16);
+ tcph->check = (u16)sum;
+ }
}
/*
@@ -530,25 +536,30 @@
iph->daddr = cm->xlate_dest_ip;
tcph->dest = cm->xlate_dest_port;
- /*
- * Do we have a non-zero UDP checksum? If we do then we need
- * to update it.
- */
- tcp_csum = tcph->check;
- if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
- sum = tcp_csum + cm->xlate_dest_partial_csum_adjustment;
- } else {
- sum = tcp_csum + cm->xlate_dest_csum_adjustment;
- }
+ if (unlikely(!hw_csum)) {
+ tcp_csum = tcph->check;
+ if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+ sum = tcp_csum + cm->xlate_dest_partial_csum_adjustment;
+ } else {
+ sum = tcp_csum + cm->xlate_dest_csum_adjustment;
+ }
- sum = (sum & 0xffff) + (sum >> 16);
- tcph->check = (u16)sum;
+ sum = (sum & 0xffff) + (sum >> 16);
+ tcph->check = (u16)sum;
+ }
}
/*
- * Replace the IP checksum.
+ * If HW checksum offload is not possible, full L3 checksum and incremental L4 checksum
+ * are used to update the packet. Setting ip_summed to CHECKSUM_UNNECESSARY ensures checksum is
+ * not recalculated further in packet path.
*/
- iph->check = sfe_ipv4_gen_ip_csum(iph);
+ if (likely(hw_csum)) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ } else {
+ iph->check = sfe_ipv4_gen_ip_csum(iph);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
/*
* Update traffic stats.
diff --git a/sfe_ipv4_udp.c b/sfe_ipv4_udp.c
index a13c923..1d1a4df 100644
--- a/sfe_ipv4_udp.c
+++ b/sfe_ipv4_udp.c
@@ -45,6 +45,7 @@
u8 ttl;
struct net_device *xmit_dev;
bool ret;
+ bool hw_csum;
/*
* Is our packet too short to contain a valid UDP header?
@@ -197,6 +198,13 @@
iph->ttl = ttl - 1;
/*
+ * Enable HW csum if rx checksum is verified and xmit interface is CSUM offload capable.
+ * Note: If L4 csum at Rx was found to be incorrect, we (router) should use incremental L4 checksum here
+ * so that HW does not re-calculate/replace the L4 csum
+ */
+ hw_csum = !!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_CSUM_OFFLOAD) && (skb->ip_summed == CHECKSUM_UNNECESSARY);
+
+ /*
* Do we have to perform translations of the source address/port?
*/
if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
@@ -209,18 +217,20 @@
* Do we have a non-zero UDP checksum? If we do then we need
* to update it.
*/
- udp_csum = udph->check;
- if (likely(udp_csum)) {
- u32 sum;
+ if (unlikely(!hw_csum)) {
+ udp_csum = udph->check;
+ if (likely(udp_csum)) {
+ u32 sum;
- if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
- sum = udp_csum + cm->xlate_src_partial_csum_adjustment;
- } else {
- sum = udp_csum + cm->xlate_src_csum_adjustment;
+ if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+ sum = udp_csum + cm->xlate_src_partial_csum_adjustment;
+ } else {
+ sum = udp_csum + cm->xlate_src_csum_adjustment;
+ }
+
+ sum = (sum & 0xffff) + (sum >> 16);
+ udph->check = (u16)sum;
}
-
- sum = (sum & 0xffff) + (sum >> 16);
- udph->check = (u16)sum;
}
}
@@ -237,25 +247,38 @@
* Do we have a non-zero UDP checksum? If we do then we need
* to update it.
*/
- udp_csum = udph->check;
- if (likely(udp_csum)) {
- u32 sum;
+ if (unlikely(!hw_csum)) {
+ udp_csum = udph->check;
+ if (likely(udp_csum)) {
+ u32 sum;
- if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
- sum = udp_csum + cm->xlate_dest_partial_csum_adjustment;
- } else {
- sum = udp_csum + cm->xlate_dest_csum_adjustment;
+ /*
+ * TODO: Use a common API for below incremental checksum calculation
+ * for IPv4/IPv6 UDP/TCP
+ */
+ if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+ sum = udp_csum + cm->xlate_dest_partial_csum_adjustment;
+ } else {
+ sum = udp_csum + cm->xlate_dest_csum_adjustment;
+ }
+
+ sum = (sum & 0xffff) + (sum >> 16);
+ udph->check = (u16)sum;
}
-
- sum = (sum & 0xffff) + (sum >> 16);
- udph->check = (u16)sum;
}
}
/*
- * Replace the IP checksum.
+ * If HW checksum offload is not possible, full L3 checksum and incremental L4 checksum
+ * are used to update the packet. Setting ip_summed to CHECKSUM_UNNECESSARY ensures checksum is
+ * not recalculated further in packet path.
*/
- iph->check = sfe_ipv4_gen_ip_csum(iph);
+ if (likely(hw_csum)) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ } else {
+ iph->check = sfe_ipv4_gen_ip_csum(iph);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
/*
* Update traffic stats.
diff --git a/sfe_ipv6.c b/sfe_ipv6.c
index 50c35b6..e53cef5 100644
--- a/sfe_ipv6.c
+++ b/sfe_ipv6.c
@@ -1075,6 +1075,18 @@
original_cm->flow_accel = 1;
}
#endif
+ /*
+ * If l2_features are disabled and flow uses l2 features such as macvlan/bridge/pppoe/vlan,
+ * bottom interfaces are expected to be disabled in the flow rule and always top interfaces
+ * are used. In such cases, do not use HW csum offload. csum offload is used only when we
+ * are sending directly to the destination interface that supports it.
+ */
+ if (likely(dest_dev->features & NETIF_F_HW_CSUM)) {
+ if ((msg->conn_rule.return_top_interface_num == msg->conn_rule.return_interface_num) ||
+ (msg->rule_flags & SFE_RULE_CREATE_FLAG_USE_RETURN_BOTTOM_INTERFACE)) {
+ original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_CSUM_OFFLOAD;
+ }
+ }
/*
* For the non-arp interface, we don't write L2 HDR.
@@ -1149,6 +1161,19 @@
}
#endif
/*
+ * If l2_features are disabled and flow uses l2 features such as macvlan/bridge/pppoe/vlan,
+ * bottom interfaces are expected to be disabled in the flow rule and always top interfaces
+ * are used. In such cases, do not use HW csum offload. csum offload is used only when we
+ * are sending directly to the destination interface that supports it.
+ */
+ if (likely(src_dev->features & NETIF_F_HW_CSUM)) {
+ if ((msg->conn_rule.flow_top_interface_num == msg->conn_rule.flow_interface_num) ||
+ (msg->rule_flags & SFE_RULE_CREATE_FLAG_USE_FLOW_BOTTOM_INTERFACE)) {
+ reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_CSUM_OFFLOAD;
+ }
+ }
+
+ /*
* For the non-arp interface, we don't write L2 HDR.
*/
if (!(src_dev->flags & IFF_NOARP)) {
diff --git a/sfe_ipv6.h b/sfe_ipv6.h
index d3812f1..e9c58bd 100644
--- a/sfe_ipv6.h
+++ b/sfe_ipv6.h
@@ -66,6 +66,8 @@
/* remark priority of SKB */
#define SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK (1<<6)
/* remark DSCP of packet */
+#define SFE_IPV6_CONNECTION_MATCH_FLAG_CSUM_OFFLOAD (1<<7)
+ /* checksum offload.*/
/*
* IPv6 connection matching structure.
diff --git a/sfe_ipv6_tcp.c b/sfe_ipv6_tcp.c
index c936116..04870e9 100644
--- a/sfe_ipv6_tcp.c
+++ b/sfe_ipv6_tcp.c
@@ -126,6 +126,7 @@
u32 flags;
struct net_device *xmit_dev;
bool ret;
+ bool hw_csum;
/*
* Is our packet too short to contain a valid UDP header?
@@ -504,6 +505,13 @@
iph->hop_limit -= 1;
/*
+ * Enable HW csum if rx checksum is verified and xmit interface is CSUM offload capable.
+ * Note: If L4 csum at Rx was found to be incorrect, we (router) should use incremental L4 checksum here
+ * so that HW does not re-calculate/replace the L4 csum
+ */
+ hw_csum = !!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_CSUM_OFFLOAD) && (skb->ip_summed == CHECKSUM_UNNECESSARY);
+
+ /*
* Do we have to perform translations of the source address/port?
*/
if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
@@ -516,14 +524,12 @@
iph->saddr.s6_addr32[3] = cm->xlate_src_ip[0].addr[3];
tcph->source = cm->xlate_src_port;
- /*
- * Do we have a non-zero UDP checksum? If we do then we need
- * to update it.
- */
- tcp_csum = tcph->check;
- sum = tcp_csum + cm->xlate_src_csum_adjustment;
- sum = (sum & 0xffff) + (sum >> 16);
- tcph->check = (u16)sum;
+ if (unlikely(!hw_csum)) {
+ tcp_csum = tcph->check;
+ sum = tcp_csum + cm->xlate_src_csum_adjustment;
+ sum = (sum & 0xffff) + (sum >> 16);
+ tcph->check = (u16)sum;
+ }
}
/*
@@ -539,14 +545,23 @@
iph->daddr.s6_addr32[3] = cm->xlate_dest_ip[0].addr[3];
tcph->dest = cm->xlate_dest_port;
- /*
- * Do we have a non-zero UDP checksum? If we do then we need
- * to update it.
- */
- tcp_csum = tcph->check;
- sum = tcp_csum + cm->xlate_dest_csum_adjustment;
- sum = (sum & 0xffff) + (sum >> 16);
- tcph->check = (u16)sum;
+ if (unlikely(!hw_csum)) {
+ tcp_csum = tcph->check;
+ sum = tcp_csum + cm->xlate_dest_csum_adjustment;
+ sum = (sum & 0xffff) + (sum >> 16);
+ tcph->check = (u16)sum;
+ }
+ }
+
+ /*
+ * If HW checksum offload is not possible, incremental L4 checksum is used to update the packet.
+ * Setting ip_summed to CHECKSUM_UNNECESSARY ensures checksum is not recalculated further in packet
+ * path.
+ */
+ if (likely(hw_csum)) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ } else {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
}
/*
diff --git a/sfe_ipv6_udp.c b/sfe_ipv6_udp.c
index 559425e..2523255 100644
--- a/sfe_ipv6_udp.c
+++ b/sfe_ipv6_udp.c
@@ -44,6 +44,7 @@
struct sfe_ipv6_connection_match *cm;
struct net_device *xmit_dev;
bool ret;
+ bool hw_csum;
/*
* Is our packet too short to contain a valid UDP header?
@@ -198,6 +199,13 @@
iph->hop_limit -= 1;
/*
+ * Enable HW csum if rx checksum is verified and xmit interface is CSUM offload capable.
+ * Note: If L4 csum at Rx was found to be incorrect, we (router) should use incremental L4 checksum here
+ * so that HW does not re-calculate/replace the L4 csum
+ */
+ hw_csum = !!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_CSUM_OFFLOAD) && (skb->ip_summed == CHECKSUM_UNNECESSARY);
+
+ /*
* Do we have to perform translations of the source address/port?
*/
if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
@@ -213,11 +221,13 @@
* Do we have a non-zero UDP checksum? If we do then we need
* to update it.
*/
- udp_csum = udph->check;
- if (likely(udp_csum)) {
- u32 sum = udp_csum + cm->xlate_src_csum_adjustment;
- sum = (sum & 0xffff) + (sum >> 16);
- udph->check = (u16)sum;
+ if (unlikely(!hw_csum)) {
+ udp_csum = udph->check;
+ if (likely(udp_csum)) {
+ u32 sum = udp_csum + cm->xlate_src_csum_adjustment;
+ sum = (sum & 0xffff) + (sum >> 16);
+ udph->check = (u16)sum;
+ }
}
}
@@ -237,15 +247,28 @@
* Do we have a non-zero UDP checksum? If we do then we need
* to update it.
*/
- udp_csum = udph->check;
- if (likely(udp_csum)) {
- u32 sum = udp_csum + cm->xlate_dest_csum_adjustment;
- sum = (sum & 0xffff) + (sum >> 16);
- udph->check = (u16)sum;
+ if (unlikely(!hw_csum)) {
+ udp_csum = udph->check;
+ if (likely(udp_csum)) {
+ u32 sum = udp_csum + cm->xlate_dest_csum_adjustment;
+ sum = (sum & 0xffff) + (sum >> 16);
+ udph->check = (u16)sum;
+ }
}
}
/*
+ * If HW checksum offload is not possible, incremental L4 checksum is used to update the packet.
+ * Setting ip_summed to CHECKSUM_UNNECESSARY ensures checksum is not recalculated further in packet
+ * path.
+ */
+ if (likely(hw_csum)) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ } else {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+
+ /*
* Update traffic stats.
*/
atomic_inc(&cm->rx_packet_count);