[qca-nss-sfe] Use rcu read lock for ipv4
Change-Id: I249fa74af19238e415b848f376bf5ba59fac79c8
Signed-off-by: Ratheesh Kannoth <quic_rkannoth@quicinc.com>
diff --git a/sfe_ipv4.c b/sfe_ipv4.c
index 492eb1c..7c85e95 100644
--- a/sfe_ipv4.c
+++ b/sfe_ipv4.c
@@ -25,6 +25,7 @@
#include <net/tcp.h>
#include <linux/etherdevice.h>
#include <linux/version.h>
+#include <linux/lockdep.h>
#include "sfe_debug.h"
#include "sfe.h"
@@ -113,91 +114,68 @@
}
/*
- * sfe_ipv4_find_sfe_ipv4_connection_match()
+ * sfe_ipv4_find_sfe_ipv4_connection_match_rcu()
* Get the IPv4 flow match info that corresponds to a particular 5-tuple.
*
* On entry we must be holding the lock that protects the hash table.
*/
static struct sfe_ipv4_connection_match *
-sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, u8 protocol,
+sfe_ipv4_find_sfe_ipv4_connection_match_rcu(struct sfe_ipv4 *si, struct net_device *dev, u8 protocol,
__be32 src_ip, __be16 src_port,
__be32 dest_ip, __be16 dest_port)
{
- struct sfe_ipv4_connection_match *cm;
- struct sfe_ipv4_connection_match *head;
+ struct sfe_ipv4_connection_match *cm = NULL;
unsigned int conn_match_idx;
+ struct hlist_head *lhead;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port);
- cm = si->conn_match_hash[conn_match_idx];
- /*
- * If we don't have anything in this chain then bail.
- */
- if (unlikely(!cm)) {
- return NULL;
- }
+ lhead = &si->hlist_conn_match_hash_head[conn_match_idx];
- /*
- * Hopefully the first entry is the one we want.
- */
- if ((cm->match_src_port == src_port)
- && (cm->match_dest_port == dest_port)
- && (cm->match_src_ip == src_ip)
- && (cm->match_dest_ip == dest_ip)
- && (cm->match_protocol == protocol)
- && (cm->match_dev == dev)) {
+ hlist_for_each_entry_rcu(cm, lhead, hnode) {
+ if (cm->match_src_port != src_port
+ || cm->match_dest_port != dest_port
+ || cm->match_src_ip != src_ip
+ || cm->match_dest_ip != dest_ip
+ || cm->match_protocol != protocol
+ || cm->match_dev != dev) {
+ continue;
+ }
+
+ spin_lock_bh(&si->lock);
si->connection_match_hash_hits++;
- return cm;
- }
+ spin_unlock_bh(&si->lock);
- /*
- * Unfortunately we didn't find it at head, so we search it in chain and
- * move matching entry to the top of the hash chain. We presume that this
- * will be reused again very quickly.
- */
- head = cm;
- do {
- cm = cm->next;
- } while (cm && (cm->match_src_port != src_port
- || cm->match_dest_port != dest_port
- || cm->match_src_ip != src_ip
- || cm->match_dest_ip != dest_ip
- || cm->match_protocol != protocol
- || cm->match_dev != dev));
-
- /*
- * Not found then we're done.
- */
- if (unlikely(!cm)) {
- return NULL;
+ break;
}
- /*
- * We found a match so move it.
- */
- if (cm->next) {
- cm->next->prev = cm->prev;
- }
- cm->prev->next = cm->next;
- cm->prev = NULL;
- cm->next = head;
- head->prev = cm;
- si->conn_match_hash[conn_match_idx] = cm;
- si->connection_match_hash_reorders++;
-
return cm;
}
/*
* sfe_ipv4_connection_match_update_summary_stats()
* Update the summary stats for a connection match entry.
+ *
+ * Stats are incremented atomically. So use atomic substraction to update summary
+ * stats.
*/
-static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm)
+static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm,
+ u32 *packets, u32 *bytes)
{
- cm->rx_packet_count64 += cm->rx_packet_count;
- cm->rx_packet_count = 0;
- cm->rx_byte_count64 += cm->rx_byte_count;
- cm->rx_byte_count = 0;
+ u32 packet_count, byte_count;
+
+ packet_count = atomic_read(&cm->rx_packet_count);
+ cm->rx_packet_count64 += packet_count;
+ atomic_sub(packet_count, &cm->rx_packet_count);
+
+ byte_count = atomic_read(&cm->rx_byte_count);
+ cm->rx_byte_count64 += byte_count;
+ atomic_sub(byte_count, &cm->rx_byte_count);
+
+ *packets = packet_count;
+ *bytes = byte_count;
}
/*
@@ -326,29 +304,18 @@
/*
* sfe_ipv4_insert_sfe_ipv4_connection_match()
* Insert a connection match into the hash.
- *
- * On entry we must be holding the lock that protects the hash table.
*/
static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si,
struct sfe_ipv4_connection_match *cm)
{
- struct sfe_ipv4_connection_match **hash_head;
- struct sfe_ipv4_connection_match *prev_head;
unsigned int conn_match_idx
= sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
cm->match_src_ip, cm->match_src_port,
cm->match_dest_ip, cm->match_dest_port);
- hash_head = &si->conn_match_hash[conn_match_idx];
- prev_head = *hash_head;
- cm->prev = NULL;
- if (prev_head) {
- prev_head->prev = cm;
- }
+ lockdep_assert_held(&si->lock);
- cm->next = prev_head;
- *hash_head = cm;
-
+ hlist_add_head_rcu(&cm->hnode, &si->hlist_conn_match_hash_head[conn_match_idx]);
#ifdef CONFIG_NF_FLOW_COOKIE
if (!si->flow_cookie_enable)
return;
@@ -383,11 +350,12 @@
/*
* sfe_ipv4_remove_sfe_ipv4_connection_match()
* Remove a connection match object from the hash.
- *
- * On entry we must be holding the lock that protects the hash table.
*/
static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
{
+
+ lockdep_assert_held(&si->lock);
+
#ifdef CONFIG_NF_FLOW_COOKIE
if (si->flow_cookie_enable) {
/*
@@ -418,22 +386,7 @@
}
#endif
- /*
- * Unlink the connection match entry from the hash.
- */
- if (cm->prev) {
- cm->prev->next = cm->next;
- } else {
- unsigned int conn_match_idx
- = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
- cm->match_src_ip, cm->match_src_port,
- cm->match_dest_ip, cm->match_dest_port);
- si->conn_match_hash[conn_match_idx] = cm->next;
- }
-
- if (cm->next) {
- cm->next->prev = cm->prev;
- }
+ hlist_del_init_rcu(&cm->hnode);
/*
* If the connection match entry is in the active list remove it.
@@ -451,6 +404,7 @@
si->active_tail = cm->active_prev;
}
}
+
}
/*
@@ -476,42 +430,28 @@
{
struct sfe_ipv4_connection *c;
unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port);
+
+ lockdep_assert_held(&si->lock);
+
c = si->conn_hash[conn_idx];
/*
- * If we don't have anything in this chain then bale.
- */
- if (unlikely(!c)) {
- return NULL;
- }
-
- /*
- * Hopefully the first entry is the one we want.
- */
- if ((c->src_port == src_port)
- && (c->dest_port == dest_port)
- && (c->src_ip == src_ip)
- && (c->dest_ip == dest_ip)
- && (c->protocol == protocol)) {
- return c;
- }
-
- /*
- * Unfortunately we didn't find it at head, so we search it in chain.
- */
- do {
- c = c->next;
- } while (c && (c->src_port != src_port
- || c->dest_port != dest_port
- || c->src_ip != src_ip
- || c->dest_ip != dest_ip
- || c->protocol != protocol));
-
- /*
* Will need connection entry for next create/destroy metadata,
* So no need to re-order entry for these requests
*/
- return c;
+ while (c) {
+ if ((c->src_port == src_port)
+ && (c->dest_port == dest_port)
+ && (c->src_ip == src_ip)
+ && (c->dest_ip == dest_ip)
+ && (c->protocol == protocol)) {
+ return c;
+ }
+
+ c = c->next;
+ }
+
+ return NULL;
}
/*
@@ -554,6 +494,8 @@
struct sfe_ipv4_connection *prev_head;
unsigned int conn_idx;
+ lockdep_assert_held(&si->lock);
+
/*
* Insert entry into the connection hash.
*/
@@ -594,11 +536,16 @@
/*
* sfe_ipv4_remove_sfe_ipv4_connection()
* Remove a sfe_ipv4_connection object from the hash.
- *
- * On entry we must be holding the lock that protects the hash table.
*/
-static void sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
+static bool sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
{
+ lockdep_assert_held(&si->lock);
+
+ if (c->removed) {
+ DEBUG_ERROR("%px: Connection has been removed already\n", c);
+ return false;
+ }
+
/*
* Remove the connection match objects.
*/
@@ -635,15 +582,18 @@
si->all_connections_tail = c->all_connections_prev;
}
+ c->removed = true;
si->num_connections--;
+ return true;
}
/*
- * sfe_ipv4_sync_sfe_ipv4_connection()
+ * sfe_ipv4_gen_sync_sfe_ipv4_connection()
* Sync a connection.
*
* On entry to this function we expect that the lock for the connection is either
- * already held or isn't required.
+ * already held (while called from sfe_ipv4_periodic_sync() or isn't required
+ * (while called from sfe_ipv4_flush_sfe_ipv4_connection())
*/
static void sfe_ipv4_gen_sync_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c,
struct sfe_connection_sync *sis, sfe_sync_reason_t reason,
@@ -651,6 +601,7 @@
{
struct sfe_ipv4_connection_match *original_cm;
struct sfe_ipv4_connection_match *reply_cm;
+ u32 packet_count, byte_count;
/*
* Fill in the update message.
@@ -675,13 +626,13 @@
sis->dest_td_end = reply_cm->protocol_state.tcp.end;
sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end;
- sis->src_new_packet_count = original_cm->rx_packet_count;
- sis->src_new_byte_count = original_cm->rx_byte_count;
- sis->dest_new_packet_count = reply_cm->rx_packet_count;
- sis->dest_new_byte_count = reply_cm->rx_byte_count;
+ sfe_ipv4_connection_match_update_summary_stats(original_cm, &packet_count, &byte_count);
+ sis->src_new_packet_count = packet_count;
+ sis->src_new_byte_count = byte_count;
- sfe_ipv4_connection_match_update_summary_stats(original_cm);
- sfe_ipv4_connection_match_update_summary_stats(reply_cm);
+ sfe_ipv4_connection_match_update_summary_stats(reply_cm, &packet_count, &byte_count);
+ sis->dest_new_packet_count = packet_count;
+ sis->dest_new_byte_count = byte_count;
sis->src_dev = original_cm->match_dev;
sis->src_packet_count = original_cm->rx_packet_count64;
@@ -701,6 +652,34 @@
}
/*
+ * sfe_ipv4_free_sfe_ipv4_connection_rcu()
+ * Called at RCU qs state to free the connection object.
+ */
+static void sfe_ipv4_free_sfe_ipv4_connection_rcu(struct rcu_head *head)
+{
+ struct sfe_ipv4_connection *c;
+
+ /*
+ * We dont need spin lock as the connection is already removed from link list
+ */
+ c = container_of(head, struct sfe_ipv4_connection, rcu);
+
+ BUG_ON(!c->removed);
+
+ DEBUG_TRACE("%px: connecton has been deleted\n", c);
+
+ /*
+ * Release our hold of the source and dest devices and free the memory
+ * for our connection objects.
+ */
+ dev_put(c->original_dev);
+ dev_put(c->reply_dev);
+ kfree(c->original_match);
+ kfree(c->reply_match);
+ kfree(c);
+}
+
+/*
* sfe_ipv4_flush_sfe_ipv4_connection()
* Flush a connection and free all associated resources.
*
@@ -713,20 +692,23 @@
struct sfe_ipv4_connection *c,
sfe_sync_reason_t reason)
{
- struct sfe_connection_sync sis;
u64 now_jiffies;
sfe_sync_rule_callback_t sync_rule_callback;
- rcu_read_lock();
+ BUG_ON(!c->removed);
+
spin_lock_bh(&si->lock);
si->connection_flushes++;
- sync_rule_callback = rcu_dereference(si->sync_rule_callback);
spin_unlock_bh(&si->lock);
+ rcu_read_lock();
+ sync_rule_callback = rcu_dereference(si->sync_rule_callback);
+
+ /*
+ * Generate a sync message and then sync.
+ */
if (sync_rule_callback) {
- /*
- * Generate a sync message and then sync.
- */
+ struct sfe_connection_sync sis;
now_jiffies = get_jiffies_64();
sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, reason, now_jiffies);
sync_rule_callback(&sis);
@@ -738,11 +720,7 @@
* Release our hold of the source and dest devices and free the memory
* for our connection objects.
*/
- dev_put(c->original_dev);
- dev_put(c->reply_dev);
- kfree(c->original_match);
- kfree(c->reply_match);
- kfree(c);
+ call_rcu(&c->rcu, sfe_ipv4_free_sfe_ipv4_connection_rcu);
}
/*
@@ -760,6 +738,7 @@
struct sfe_ipv4_connection_match *cm;
u8 ttl;
struct net_device *xmit_dev;
+ bool ret;
/*
* Is our packet too short to contain a valid UDP header?
@@ -786,7 +765,7 @@
src_port = udph->source;
dest_port = udph->dest;
- spin_lock_bh(&si->lock);
+ rcu_read_lock();
/*
* Look for a connection match.
@@ -794,16 +773,19 @@
#ifdef CONFIG_NF_FLOW_COOKIE
cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
if (unlikely(!cm)) {
- cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
+ cm = sfe_ipv4_find_sfe_ipv4_connection_match_rcu(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
}
#else
- cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
+ cm = sfe_ipv4_find_sfe_ipv4_connection_match_rcu(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
#endif
if (unlikely(!cm)) {
+ spin_lock_bh(&si->lock);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
+ rcu_read_unlock();
+
DEBUG_TRACE("no connection found\n");
return 0;
}
@@ -815,13 +797,19 @@
*/
if (unlikely(flush_on_find)) {
struct sfe_ipv4_connection *c = cm->connection;
- sfe_ipv4_remove_sfe_ipv4_connection(si, c);
+
+ spin_lock_bh(&si->lock);
+ ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
+ if (ret) {
+ sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ }
+ rcu_read_unlock();
+
DEBUG_TRACE("flush on find\n");
- sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
return 0;
}
@@ -831,8 +819,10 @@
* through the slow path.
*/
if (unlikely(!cm->flow_accel)) {
+ spin_lock_bh(&si->lock);
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
+ rcu_read_unlock();
return 0;
}
#endif
@@ -843,13 +833,17 @@
ttl = iph->ttl;
if (unlikely(ttl < 2)) {
struct sfe_ipv4_connection *c = cm->connection;
- sfe_ipv4_remove_sfe_ipv4_connection(si, c);
+ spin_lock_bh(&si->lock);
+ ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
DEBUG_TRACE("ttl too low\n");
- sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ if (ret) {
+ sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ }
+ rcu_read_unlock();
return 0;
}
@@ -859,13 +853,17 @@
*/
if (unlikely(len > cm->xmit_dev_mtu)) {
struct sfe_ipv4_connection *c = cm->connection;
- sfe_ipv4_remove_sfe_ipv4_connection(si, c);
+ spin_lock_bh(&si->lock);
+ ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
DEBUG_TRACE("larger than mtu\n");
- sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ if (ret) {
+ sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ }
+ rcu_read_unlock();
return 0;
}
@@ -881,8 +879,9 @@
if (unlikely(skb_cloned(skb))) {
DEBUG_TRACE("%px: skb is a cloned skb\n", skb);
skb = skb_unshare(skb, GFP_ATOMIC);
- if (!skb) {
+ if (!skb) {
DEBUG_WARN("Failed to unshare the cloned skb\n");
+ rcu_read_unlock();
return 0;
}
@@ -969,22 +968,26 @@
/*
* Update traffic stats.
*/
- cm->rx_packet_count++;
- cm->rx_byte_count += len;
+ atomic_inc(&cm->rx_packet_count);
+ atomic_add(len, &cm->rx_byte_count);
/*
* If we're not already on the active list then insert ourselves at the tail
* of the current list.
*/
if (unlikely(!cm->active)) {
- cm->active = true;
- cm->active_prev = si->active_tail;
- if (likely(si->active_tail)) {
- si->active_tail->active_next = cm;
- } else {
- si->active_head = cm;
+ spin_lock_bh(&si->lock);
+ if (unlikely(!cm->active)) {
+ cm->active = true;
+ cm->active_prev = si->active_tail;
+ if (likely(si->active_tail)) {
+ si->active_tail->active_next = cm;
+ } else {
+ si->active_head = cm;
+ }
+ si->active_tail = cm;
}
- si->active_tail = cm;
+ spin_unlock_bh(&si->lock);
}
xmit_dev = cm->xmit_dev;
@@ -1023,6 +1026,9 @@
DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
}
+ rcu_read_unlock();
+
+ spin_lock_bh(&si->lock);
si->packets_forwarded++;
spin_unlock_bh(&si->lock);
@@ -1142,6 +1148,7 @@
u8 ttl;
u32 flags;
struct net_device *xmit_dev;
+ bool ret;
/*
* Is our packet too short to contain a valid UDP header?
@@ -1169,7 +1176,7 @@
dest_port = tcph->dest;
flags = tcp_flag_word(tcph);
- spin_lock_bh(&si->lock);
+ rcu_read_lock();
/*
* Look for a connection match.
@@ -1177,10 +1184,10 @@
#ifdef CONFIG_NF_FLOW_COOKIE
cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
if (unlikely(!cm)) {
- cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
+ cm = sfe_ipv4_find_sfe_ipv4_connection_match_rcu(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
}
#else
- cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
+ cm = sfe_ipv4_find_sfe_ipv4_connection_match_rcu(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
#endif
if (unlikely(!cm)) {
/*
@@ -1189,13 +1196,19 @@
* For diagnostic purposes we differentiate this here.
*/
if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
+ spin_lock_bh(&si->lock);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
+ rcu_read_unlock();
+
DEBUG_TRACE("no connection found - fast flags\n");
return 0;
}
+
+ rcu_read_unlock();
+ spin_lock_bh(&si->lock);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
@@ -1212,13 +1225,19 @@
*/
if (unlikely(flush_on_find)) {
struct sfe_ipv4_connection *c = cm->connection;
- sfe_ipv4_remove_sfe_ipv4_connection(si, c);
+
+ spin_lock_bh(&si->lock);
+ ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
DEBUG_TRACE("flush on find\n");
- sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ if (ret) {
+ sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ }
+
+ rcu_read_unlock();
return 0;
}
@@ -1228,8 +1247,10 @@
* through the slow path.
*/
if (unlikely(!cm->flow_accel)) {
+ spin_lock_bh(&si->lock);
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
+ rcu_read_unlock();
return 0;
}
#endif
@@ -1239,13 +1260,18 @@
ttl = iph->ttl;
if (unlikely(ttl < 2)) {
struct sfe_ipv4_connection *c = cm->connection;
- sfe_ipv4_remove_sfe_ipv4_connection(si, c);
+ spin_lock_bh(&si->lock);
+ ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
DEBUG_TRACE("ttl too low\n");
- sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ if (ret) {
+ sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ }
+
+ rcu_read_unlock();
return 0;
}
@@ -1255,13 +1281,18 @@
*/
if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) {
struct sfe_ipv4_connection *c = cm->connection;
- sfe_ipv4_remove_sfe_ipv4_connection(si, c);
+ spin_lock_bh(&si->lock);
+ ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
DEBUG_TRACE("larger than mtu\n");
- sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ if (ret) {
+ sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ }
+
+ rcu_read_unlock();
return 0;
}
@@ -1271,14 +1302,18 @@
*/
if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
struct sfe_ipv4_connection *c = cm->connection;
- sfe_ipv4_remove_sfe_ipv4_connection(si, c);
+ spin_lock_bh(&si->lock);
+ ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
- sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ if (ret) {
+ sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ }
+ rcu_read_unlock();
return 0;
}
@@ -1303,14 +1338,18 @@
seq = ntohl(tcph->seq);
if (unlikely((s32)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
struct sfe_ipv4_connection *c = cm->connection;
- sfe_ipv4_remove_sfe_ipv4_connection(si, c);
+ spin_lock_bh(&si->lock);
+ ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
seq, cm->protocol_state.tcp.max_end + 1);
- sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ if (ret) {
+ sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ }
+ rcu_read_unlock();
return 0;
}
@@ -1320,13 +1359,17 @@
data_offs = tcph->doff << 2;
if (unlikely(data_offs < sizeof(struct tcphdr))) {
struct sfe_ipv4_connection *c = cm->connection;
- sfe_ipv4_remove_sfe_ipv4_connection(si, c);
+ spin_lock_bh(&si->lock);
+ ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
- sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ if (ret) {
+ sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ }
+ rcu_read_unlock();
return 0;
}
@@ -1337,13 +1380,17 @@
sack = ack;
if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) {
struct sfe_ipv4_connection *c = cm->connection;
- sfe_ipv4_remove_sfe_ipv4_connection(si, c);
+ spin_lock_bh(&si->lock);
+ ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
DEBUG_TRACE("TCP option SACK size is wrong\n");
- sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ if (ret) {
+ sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ }
+ rcu_read_unlock();
return 0;
}
@@ -1353,14 +1400,18 @@
data_offs += sizeof(struct iphdr);
if (unlikely(len < data_offs)) {
struct sfe_ipv4_connection *c = cm->connection;
- sfe_ipv4_remove_sfe_ipv4_connection(si, c);
+ spin_lock_bh(&si->lock);
+ ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
data_offs, len);
- sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ if (ret) {
+ sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ }
+ rcu_read_unlock();
return 0;
}
@@ -1372,14 +1423,18 @@
if (unlikely((s32)(end - (cm->protocol_state.tcp.end
- counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
struct sfe_ipv4_connection *c = cm->connection;
- sfe_ipv4_remove_sfe_ipv4_connection(si, c);
+ spin_lock_bh(&si->lock);
+ ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
DEBUG_TRACE("seq: %u before left edge: %u\n",
end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
- sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ if (ret) {
+ sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ }
+ rcu_read_unlock();
return 0;
}
@@ -1388,14 +1443,18 @@
*/
if (unlikely((s32)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
struct sfe_ipv4_connection *c = cm->connection;
- sfe_ipv4_remove_sfe_ipv4_connection(si, c);
+ spin_lock_bh(&si->lock);
+ ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
sack, counter_cm->protocol_state.tcp.end + 1);
- sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ if (ret) {
+ sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ }
+ rcu_read_unlock();
return 0;
}
@@ -1408,13 +1467,17 @@
- 1;
if (unlikely((s32)(sack - left_edge) < 0)) {
struct sfe_ipv4_connection *c = cm->connection;
- sfe_ipv4_remove_sfe_ipv4_connection(si, c);
+ spin_lock_bh(&si->lock);
+ ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
- sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ if (ret) {
+ sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ }
+ rcu_read_unlock();
return 0;
}
@@ -1453,8 +1516,9 @@
if (unlikely(skb_cloned(skb))) {
DEBUG_TRACE("%px: skb is a cloned skb\n", skb);
skb = skb_unshare(skb, GFP_ATOMIC);
- if (!skb) {
+ if (!skb) {
DEBUG_WARN("Failed to unshare the cloned skb\n");
+ rcu_read_unlock();
return 0;
}
@@ -1535,22 +1599,27 @@
/*
* Update traffic stats.
*/
- cm->rx_packet_count++;
- cm->rx_byte_count += len;
+ atomic_inc(&cm->rx_packet_count);
+ atomic_add(len, &cm->rx_byte_count);
/*
* If we're not already on the active list then insert ourselves at the tail
* of the current list.
*/
if (unlikely(!cm->active)) {
- cm->active = true;
- cm->active_prev = si->active_tail;
- if (likely(si->active_tail)) {
- si->active_tail->active_next = cm;
- } else {
- si->active_head = cm;
+ spin_lock_bh(&si->lock);
+ if (unlikely(!cm->active)) {
+
+ cm->active = true;
+ cm->active_prev = si->active_tail;
+ if (likely(si->active_tail)) {
+ si->active_tail->active_next = cm;
+ } else {
+ si->active_head = cm;
+ }
+ si->active_tail = cm;
}
- si->active_tail = cm;
+ spin_unlock_bh(&si->lock);
}
xmit_dev = cm->xmit_dev;
@@ -1591,6 +1660,9 @@
DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
}
+ rcu_read_unlock();
+
+ spin_lock_bh(&si->lock);
si->packets_forwarded++;
spin_unlock_bh(&si->lock);
@@ -1640,6 +1712,7 @@
struct sfe_ipv4_connection_match *cm;
struct sfe_ipv4_connection *c;
u32 pull_len = sizeof(struct icmphdr) + ihl;
+ bool ret;
/*
* Is our packet too short to contain a valid ICMP header?
@@ -1777,7 +1850,7 @@
src_ip = icmp_iph->saddr;
dest_ip = icmp_iph->daddr;
- spin_lock_bh(&si->lock);
+ rcu_read_lock();
/*
* Look for a connection match. Note that we reverse the source and destination
@@ -1786,12 +1859,14 @@
* been sent on the interface from which we received it though so that's still
* ok to use.
*/
- cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port);
+ cm = sfe_ipv4_find_sfe_ipv4_connection_match_rcu(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port);
if (unlikely(!cm)) {
+ spin_lock_bh(&si->lock);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
+ rcu_read_unlock();
DEBUG_TRACE("no connection found\n");
return 0;
}
@@ -1801,12 +1876,16 @@
* its state.
*/
c = cm->connection;
- sfe_ipv4_remove_sfe_ipv4_connection(si, c);
+ spin_lock_bh(&si->lock);
+ ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
- sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ if (ret) {
+ sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
+ }
+ rcu_read_unlock();
return 0;
}
@@ -2027,7 +2106,7 @@
int sfe_ipv4_create_rule(struct sfe_connection_create *sic)
{
struct sfe_ipv4 *si = &__si;
- struct sfe_ipv4_connection *c;
+ struct sfe_ipv4_connection *c, *c_old;
struct sfe_ipv4_connection_match *original_cm;
struct sfe_ipv4_connection_match *reply_cm;
struct net_device *dest_dev;
@@ -2041,6 +2120,27 @@
return -EINVAL;
}
+ /*
+ * Allocate the various connection tracking objects.
+ */
+ c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC);
+ if (unlikely(!c)) {
+ return -ENOMEM;
+ }
+
+ original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
+ if (unlikely(!original_cm)) {
+ kfree(c);
+ return -ENOMEM;
+ }
+
+ reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
+ if (unlikely(!reply_cm)) {
+ kfree(original_cm);
+ kfree(c);
+ return -ENOMEM;
+ }
+
spin_lock_bh(&si->lock);
si->connection_create_requests++;
@@ -2048,13 +2148,13 @@
* Check to see if there is already a flow that matches the rule we're
* trying to create. If there is then we can't create a new one.
*/
- c = sfe_ipv4_find_sfe_ipv4_connection(si,
+ c_old = sfe_ipv4_find_sfe_ipv4_connection(si,
sic->protocol,
sic->src_ip.ip,
sic->src_port,
sic->dest_ip.ip,
sic->dest_port);
- if (c != NULL) {
+ if (c_old != NULL) {
si->connection_create_collisions++;
/*
@@ -2062,9 +2162,13 @@
* request to create the connection rule contains more
* up-to-date information. Check and update accordingly.
*/
- sfe_ipv4_update_protocol_state(c, sic);
+ sfe_ipv4_update_protocol_state(c_old, sic);
spin_unlock_bh(&si->lock);
+ kfree(reply_cm);
+ kfree(original_cm);
+ kfree(c);
+
DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n"
" s: %s:%pxM:%pI4:%u, d: %s:%pxM:%pI4:%u\n",
sic->mark, sic->protocol,
@@ -2074,30 +2178,6 @@
}
/*
- * Allocate the various connection tracking objects.
- */
- c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC);
- if (unlikely(!c)) {
- spin_unlock_bh(&si->lock);
- return -ENOMEM;
- }
-
- original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
- if (unlikely(!original_cm)) {
- spin_unlock_bh(&si->lock);
- kfree(c);
- return -ENOMEM;
- }
-
- reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
- if (unlikely(!reply_cm)) {
- spin_unlock_bh(&si->lock);
- kfree(original_cm);
- kfree(c);
- return -ENOMEM;
- }
-
- /*
* Fill in the "original" direction connection matching object.
* Note that the transmit MAC address is "dest_mac_xlate" because
* we always know both ends of a connection by their translated
@@ -2113,9 +2193,9 @@
original_cm->xlate_src_port = sic->src_port_xlate;
original_cm->xlate_dest_ip = sic->dest_ip_xlate.ip;
original_cm->xlate_dest_port = sic->dest_port_xlate;
- original_cm->rx_packet_count = 0;
+ atomic_set(&original_cm->rx_packet_count, 0);
original_cm->rx_packet_count64 = 0;
- original_cm->rx_byte_count = 0;
+ atomic_set(&original_cm->rx_byte_count, 0);
original_cm->rx_byte_count64 = 0;
original_cm->xmit_dev = dest_dev;
original_cm->xmit_dev_mtu = sic->dest_mtu;
@@ -2172,10 +2252,12 @@
reply_cm->xlate_src_port = sic->dest_port;
reply_cm->xlate_dest_ip = sic->src_ip.ip;
reply_cm->xlate_dest_port = sic->src_port;
- reply_cm->rx_packet_count = 0;
+
+ atomic_set(&reply_cm->rx_packet_count, 0);
reply_cm->rx_packet_count64 = 0;
- reply_cm->rx_byte_count = 0;
+ atomic_set(&reply_cm->rx_byte_count, 0);
reply_cm->rx_byte_count64 = 0;
+
reply_cm->xmit_dev = src_dev;
reply_cm->xmit_dev_mtu = sic->src_mtu;
memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN);
@@ -2244,6 +2326,7 @@
c->mark = sic->mark;
c->debug_read_seq = 0;
c->last_sync_jiffies = get_jiffies_64();
+ c->removed = false;
/*
* Take hold of our source and dest devices for the duration of the connection.
@@ -2300,6 +2383,7 @@
{
struct sfe_ipv4 *si = &__si;
struct sfe_ipv4_connection *c;
+ bool ret;
spin_lock_bh(&si->lock);
si->connection_destroy_requests++;
@@ -2323,10 +2407,12 @@
/*
* Remove our connection details from the hash tables.
*/
- sfe_ipv4_remove_sfe_ipv4_connection(si, c);
+ ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
spin_unlock_bh(&si->lock);
- sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY);
+ if (ret) {
+ sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY);
+ }
DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n",
sid->protocol, &sid->src_ip.ip, ntohs(sid->src_port),
@@ -2381,6 +2467,7 @@
{
struct sfe_ipv4 *si = &__si;
struct sfe_ipv4_connection *c;
+ bool ret;
another_round:
spin_lock_bh(&si->lock);
@@ -2397,13 +2484,15 @@
}
if (c) {
- sfe_ipv4_remove_sfe_ipv4_connection(si, c);
+ ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
}
spin_unlock_bh(&si->lock);
if (c) {
- sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY);
+ if (ret) {
+ sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY);
+ }
goto another_round;
}
}
@@ -2589,6 +2678,7 @@
u64 dest_rx_bytes;
u64 last_sync_jiffies;
u32 mark, src_priority, dest_priority, src_dscp, dest_dscp;
+ u32 packet, byte;
#ifdef CONFIG_NF_FLOW_COOKIE
int src_flow_cookie, dst_flow_cookie;
#endif
@@ -2605,7 +2695,7 @@
/*
* If there were no connections then move to the next state.
*/
- if (!c) {
+ if (!c || c->removed) {
spin_unlock_bh(&si->lock);
ws->state++;
return true;
@@ -2623,8 +2713,8 @@
src_priority = original_cm->priority;
src_dscp = original_cm->dscp >> SFE_IPV4_DSCP_SHIFT;
- sfe_ipv4_connection_match_update_summary_stats(original_cm);
- sfe_ipv4_connection_match_update_summary_stats(reply_cm);
+ sfe_ipv4_connection_match_update_summary_stats(original_cm, &packet, &byte);
+ sfe_ipv4_connection_match_update_summary_stats(reply_cm, &packet, &byte);
src_rx_packets = original_cm->rx_packet_count64;
src_rx_bytes = original_cm->rx_byte_count64;
@@ -2967,6 +3057,7 @@
* We've finished with our output so free the write state.
*/
kfree(ws);
+ file->private_data = NULL;
}
return 0;
@@ -3050,6 +3141,20 @@
__ATTR(flow_cookie_enable, S_IWUSR | S_IRUGO, sfe_ipv4_get_flow_cookie, sfe_ipv4_set_flow_cookie);
#endif /*CONFIG_NF_FLOW_COOKIE*/
+ /*
+ * sfe_ipv4_conn_match_hash_init()
+ * Initialize conn match hash lists
+ */
+static void sfe_ipv4_conn_match_hash_init(struct sfe_ipv4 *si, int len)
+{
+ struct hlist_head *hash_list = si->hlist_conn_match_hash_head;
+ int i;
+
+ for (i = 0; i < len; i++) {
+ INIT_HLIST_HEAD(&hash_list[i]);
+ }
+}
+
/*
* sfe_ipv4_init()
*/
@@ -3060,6 +3165,8 @@
DEBUG_INFO("SFE IPv4 init\n");
+ sfe_ipv4_conn_match_hash_init(si, ARRAY_SIZE(si->hlist_conn_match_hash_head));
+
/*
* Create sys/sfe_ipv4
*/
@@ -3106,9 +3213,7 @@
timer_setup(&si->timer, sfe_ipv4_periodic_sync, 0);
#endif
mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
-
spin_lock_init(&si->lock);
-
return 0;
exit4:
@@ -3134,7 +3239,6 @@
struct sfe_ipv4 *si = &__si;
DEBUG_INFO("SFE IPv4 exit\n");
-
/*
* Destroy all connections.
*/