Adds flowstat changes

These changes are based on changes for flowstats/QoE feature
for shortcut-fe

Tests:
1. Verified that connection stats are present in /proc/net/ip_conn_addr
   when enabled on R920 kernel and R920 running firmware with these changes.
   Also verified that the counters increment when passing routed
   traffic between WAN and LAN
diff --git a/exports/sfe_api.h b/exports/sfe_api.h
index 86eadac..998c5ba 100644
--- a/exports/sfe_api.h
+++ b/exports/sfe_api.h
@@ -398,6 +398,18 @@
 	u8 flags;			/**< Bit flags associated with the rule. */
 	u32 qos_tag;			/**< QoS tag. */
 	u32 cause;			/**< Flush cause. */
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS /* CP_LATENCY_IP*/
+        u32 flows_sync_valid;   /* flag indicating is flowstats need to sync now */
+        u32 cp_fs_original; /* Flag indicating if the connection is original (outbound) */
+        u32 tot_delta;       /* added samples of latency to compute average*/
+        u32 tot_delta_square; /* sum of squares of delta; to calculate std deviation.*/
+        u32 num_samples;
+        u64 fs_rx_packet_count;
+        u64 fs_rx_byte_count;
+        u64 fs_tx_packet_count;
+        u64 fs_tx_byte_count;
+        struct net_device *dest_dev;
+#endif /* CP_LATENCY_IP*/
 };
 
 /**
diff --git a/sfe.c b/sfe.c
index 51dcb29..ed725e9 100644
--- a/sfe.c
+++ b/sfe.c
@@ -42,6 +42,11 @@
 extern int max_ipv4_conn;
 extern int max_ipv6_conn;
 
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+extern unsigned int cp_flowstats_enabled;
+#endif
+
+
 #define SFE_MESSAGE_VERSION 0x1
 #define sfe_ipv6_addr_copy(src, dest) memcpy((void *)(dest), (void *)(src), 16)
 #define sfe_ipv4_stopped(CTX) (rcu_dereference((CTX)->ipv4_stats_sync_cb) == NULL)
@@ -555,6 +560,23 @@
 	sync_msg->return_tx_packet_count = sis->src_new_packet_count;
 	sync_msg->return_tx_byte_count = sis->src_new_byte_count;
 
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS /* CP_LATENCY_IP*/
+        /*
+         * fill flowstats numbers; copy only if it contains valid data.
+         */
+        if (cp_flowstats_enabled && sis->flows_sync_valid) {
+            sync_msg->num_samples = sis->num_samples;
+            sync_msg->tot_delta   = sis->tot_delta;
+            sync_msg->tot_delta_square  = sis->tot_delta_square;
+            sync_msg->fs_rx_packet_count  = sis->fs_rx_packet_count;
+            sync_msg->fs_tx_packet_count  = sis->fs_tx_packet_count;
+            sync_msg->fs_rx_byte_count  = sis->fs_rx_byte_count;
+            sync_msg->fs_tx_byte_count  = sis->fs_tx_byte_count;
+            sync_msg->dest_dev  = sis->dest_dev;
+        }
+        sync_msg->flows_sync_valid  = sis->flows_sync_valid;
+#endif /* CP_LATENCY_IP*/
+
 	/*
 	 * Fill expiration time to extend, in unit of msec
 	 */
diff --git a/sfe.h b/sfe.h
index 9ecfed3..ad809be 100644
--- a/sfe.h
+++ b/sfe.h
@@ -124,6 +124,17 @@
 	u32 dest_new_byte_count;
 	u32 reason;                     /* reason for stats sync message, i.e. destroy, flush, period sync */
 	u64 delta_jiffies;		/* Time to be added to the current timeout to keep the connection alive */
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+        u32 flows_sync_valid; /* flag indicating is flowstats need to sync now */
+        u32 cp_fs_original;   /* Flag indicating if the connection is original (outbound) */
+        u32 tot_delta;        /* added samples of latency to compute average*/
+        u32 tot_delta_square; /* sum of squares of delta; to calculate std deviation.*/
+        u32 num_samples;
+        u64 fs_rx_packet_count;
+        u64 fs_rx_byte_count;
+        u64 fs_tx_packet_count;
+        u64 fs_tx_byte_count;
+#endif //CONFIG_NETFILTER_CP_FLOWSTATS
 };
 
 /*
diff --git a/sfe_ipv4.c b/sfe_ipv4.c
index 994bd96..ff293a5 100644
--- a/sfe_ipv4.c
+++ b/sfe_ipv4.c
@@ -117,6 +117,50 @@
 struct sfe_ipv4_msg *sfe_ipv4_sync_many_msg;
 uint32_t sfe_ipv4_sync_max_number;
 
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+/*
+ * Copy the flowstats related counters/stats in the sis structure
+ * this is eventually used to update the contrack structure with latest
+ * flowstats numbers.
+ */
+static void sfe_ipv4_gen_sync_flowstats(struct sfe_connection_sync *sis,
+                                        struct sfe_ipv4_connection_match *original_cm,
+                                        struct sfe_ipv4_connection_match *reply_cm)
+{
+    /* Perform contrack sync only is the flow has been active for over 15 seconds
+       This minimizes clearing pending latency samples
+    */
+    if ((sis->reason == SFE_SYNC_REASON_STATS) &&
+        (jiffies < original_cm->connection->start_time + (CP_IP_REC_TIMEOUT_SFE * (HZ)))) {
+            sis->flows_sync_valid = false;
+            return;
+    }
+    sis->flows_sync_valid = true;
+
+    /* Copy the stats and latency measures in the sis structure */
+    if (original_cm->match_protocol == IPPROTO_TCP) {
+        sis->tot_delta =  original_cm->tot_delta;
+        sis->num_samples = original_cm->num_samples;
+        sis->tot_delta_square = original_cm->tot_delta_square;
+    }
+    sis->fs_rx_packet_count = reply_cm->fs_rx_packet_count;
+    sis->fs_tx_packet_count = original_cm->fs_rx_packet_count;
+    sis->fs_rx_byte_count = reply_cm->fs_rx_byte_count;
+    sis->fs_tx_byte_count = original_cm->fs_rx_byte_count;
+
+    /* Reset counts on the connections so we pick stats for next iteration */
+    original_cm->fs_rx_packet_count = reply_cm->fs_rx_packet_count = 0;
+    original_cm->fs_rx_byte_count = reply_cm->fs_rx_byte_count = 0;
+    original_cm->tot_delta = 0;
+    original_cm->num_samples = 0;
+    original_cm->tot_delta_square = 0;
+    original_cm->last_seq_num = 0;
+    original_cm->last_seq_time = 0;
+    original_cm->connection->start_time = jiffies;
+}
+#endif //CONFIG_NETFILTER_CP_FLOWSTATS
+
+
 /*
  * sfe_ipv4_gen_ip_csum()
  *	Generate the IP checksum for an IPv4 header.
@@ -654,6 +698,9 @@
 
 	sis->reason = reason;
 
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+        sfe_ipv4_gen_sync_flowstats(sis, original_cm, reply_cm);
+#endif
 	/*
 	 * Get the time increment since our last sync.
 	 */
@@ -1252,6 +1299,10 @@
 		return -ENOMEM;
 	}
 
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+	c->start_time = jiffies;
+#endif
+
 	original_cm = (struct sfe_ipv4_connection_match *)kzalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
 	if (unlikely(!original_cm)) {
 		DEBUG_WARN("%px: memory allocation of connection match entry failed\n", msg);
@@ -1359,6 +1410,16 @@
 
 	original_cm->xmit_dev = dest_dev;
 	original_cm->xmit_dev_mtu = msg->conn_rule.return_mtu;
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+	original_cm->fs_rx_byte_count = 0;
+	original_cm->fs_rx_packet_count = 0;
+	original_cm->cp_fs_original = true;
+	original_cm->last_seq_num = 0;
+	original_cm->last_seq_time = 0;
+	original_cm->tot_delta = 0;
+	original_cm->tot_delta_square = 0;
+	original_cm->num_samples = 0;
+#endif
 
 	original_cm->connection = c;
 	original_cm->counter_match = reply_cm;
@@ -1569,6 +1630,16 @@
 
 	reply_cm->xmit_dev = src_dev;
 	reply_cm->xmit_dev_mtu = msg->conn_rule.flow_mtu;
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+	reply_cm->fs_rx_packet_count = 0;
+	reply_cm->fs_rx_byte_count = 0;
+	reply_cm->cp_fs_original = false;
+	reply_cm->last_seq_num = 0;
+	reply_cm->last_seq_time = 0;
+	reply_cm->tot_delta = 0;
+	reply_cm->tot_delta_square = 0;
+	reply_cm->num_samples = 0;
+#endif
 
 	reply_cm->connection = c;
 	reply_cm->counter_match = original_cm;
diff --git a/sfe_ipv4.h b/sfe_ipv4.h
index 49fd0ff..ea74338 100644
--- a/sfe_ipv4.h
+++ b/sfe_ipv4.h
@@ -26,6 +26,23 @@
 #include <linux/version.h>
 
 /*
+ * By default Linux IP header and transport layer header structures are
+ * unpacked, assuming that such headers should be 32-bit aligned.
+ * Unfortunately some wireless adaptors can't cope with this requirement and
+ * some CPUs can't handle misaligned accesses.  For those platforms we
+ * define SFE_IPV4_UNALIGNED_IP_HEADER and mark the structures as packed.
+ * When we do this the compiler will generate slightly worse code than for the
+ * aligned case (on most platforms) but will be much quicker than fixing
+ * things up in an unaligned trap handler.
+ */
+#define SFE_IPV4_UNALIGNED_IP_HEADER 1
+#if SFE_IPV4_UNALIGNED_IP_HEADER
+#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed))
+#else
+#define SFE_IPV4_UNALIGNED_STRUCT
+#endif
+
+/*
  * Specifies the lower bound on ACK numbers carried in the TCP header
  */
 #define SFE_IPV4_TCP_MAX_ACK_WINDOW 65520
@@ -40,6 +57,7 @@
 	u32 max_end;		/* Sequence number of the last byte to ack */
 };
 
+
 /*
  * Bit flags for IPv4 connection matching entry.
  */
@@ -129,6 +147,21 @@
 
 	struct net_device *match_dev;	/* Network device */
 
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+	u32 cp_fs_original;     /* Flag indicating if this is original */
+	u32 last_seq_num;     /* Sequence number saved*/
+	u32 last_seq_time;
+	u32 tot_delta;        /* added samples of latency
+				to compute average*/
+	u32 tot_delta_square; /* sum of squares of delta;
+				to calculate std deviation.*/
+	u32 num_samples;
+/*
+ * Stats used for Latency flowstats.
+ */
+	u64 fs_rx_packet_count;
+	u64 fs_rx_byte_count;
+#endif //CONFIG_NETFILTER_CP_FLOWSTATS
 	/*
 	 * Control the operations of the match.
 	 */
@@ -240,8 +273,18 @@
 	u32 debug_read_seq;		/* sequence number for debug dump */
 	bool removed;			/* Indicates the connection is removed */
 	struct rcu_head rcu;		/* delay rcu free */
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+	int start_time;                 /* Jiffies to track when the flow started */
+#endif
 };
 
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS /* CP_LATENCY_IP*/
+#define MIN_IP_PKT_SIZE 84
+#define CP_IP_REC_TIMEOUT_SFE 15 /* In seconds: */
+#define MSEC_IN_SEC 1000
+#define MAX_RTT_THRESHOLD  (700)*(HZ)/(MSEC_IN_SEC)
+#endif
+
 /*
  * IPv4 connections and hash table size information.
  */
diff --git a/sfe_ipv4_tcp.c b/sfe_ipv4_tcp.c
index f2cebe7..6dfb6e7 100644
--- a/sfe_ipv4_tcp.c
+++ b/sfe_ipv4_tcp.c
@@ -111,6 +111,71 @@
 	return true;
 }
 
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+extern unsigned int cp_flowstats_enabled;
+#endif
+
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+/***** Cradlepoint Latency Measurement Enhancement. ***/
+/*
+ * For each TCP packet; given its connection; compute the round trip
+ * latency and store it over multiple samples.
+ * We track RT latency by tracking the sequence numbers and matching the Acks
+ * and compute the delay between the two. The cumulative latency is stored
+ * in the conntrack structure; later to be used to calculate the average latency.
+ * 1) We measure latency for outbound connections; or the connections originating from
+ *    the LAN side by the clients. It is expected that majority of our traffic is
+ *    via outbound connections.
+ * 2) The calculations does not consider TCP window size changes. For outbound connections
+ * (with 5+ ms latency) and interactive applications; the windows size does not have much
+ * effect.
+ * 3) When the connection terminates the latency numbers are added to a 'IP records table"
+ * maintained in file "cp_ip_record_track.c"
+ */
+static inline void sfe_tcp_packet_latency(struct sfe_ipv4_connection_match *cm,
+                                          struct sfe_ipv4_connection_match *counter_cm,
+                                          struct iphdr *iph, struct tcphdr *tcph )
+{
+    long delta = 0;
+
+    /*Start measuring latency if following conditions are met:
+      -Packet is outbound
+      -The packet is not an Ack for data packet from the remote end
+      (packet size larger than min IP packet)
+    */
+    if ((cm->cp_fs_original) &&
+        (cm->last_seq_num == 0) &&
+        (ntohs(iph->tot_len) > MIN_IP_PKT_SIZE)) {
+        cm->last_seq_num = ntohl(tcph->seq);
+        cm->last_seq_time = jiffies;
+    }
+
+    /*
+     * If this is a reply packet and the sequence number matches or exceeds
+     * perform the latency calculations, and update the stats.
+     */
+    if ((counter_cm->cp_fs_original) &&
+        (counter_cm->last_seq_num != 0) &&
+        (counter_cm->last_seq_num <= ntohl(tcph->ack_seq))) {
+        delta = (jiffies - counter_cm->last_seq_time);
+
+        /* Cap the high latency (over 700 msec to eliminate the outliers */
+        if (delta > MAX_RTT_THRESHOLD) {
+            delta = MAX_RTT_THRESHOLD;
+        }
+
+        counter_cm->tot_delta += delta;
+        counter_cm->tot_delta_square += (delta *delta);
+        counter_cm->num_samples++;
+
+        counter_cm->last_seq_num = 0;
+        counter_cm->last_seq_time = 0;
+    }
+}
+#endif
+
+
+
 /*
  * sfe_ipv4_recv_tcp()
  *	Handle TCP packet receives and forwarding.
@@ -302,6 +367,12 @@
 	}
 
 	counter_cm = cm->counter_match;
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+	/* Invoke latency measurement code if enabled */
+	if (cp_flowstats_enabled) {
+		sfe_tcp_packet_latency(cm, counter_cm, iph, tcph);
+	}
+#endif //CONFIG_NETFILTER_CP_FLOWSTATS
 
 	/*
 	 * Are we doing sequence number checking?
@@ -659,6 +730,11 @@
 	 */
 	atomic_inc(&cm->rx_packet_count);
 	atomic_add(len, &cm->rx_byte_count);
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+	cm->fs_rx_packet_count++;
+	cm->fs_rx_byte_count += len;
+#endif
+
 
 	xmit_dev = cm->xmit_dev;
 	skb->dev = xmit_dev;
diff --git a/sfe_ipv4_udp.c b/sfe_ipv4_udp.c
index b08ce3c..92973aa 100644
--- a/sfe_ipv4_udp.c
+++ b/sfe_ipv4_udp.c
@@ -519,6 +519,11 @@
 	atomic_inc(&cm->rx_packet_count);
 	atomic_add(len, &cm->rx_byte_count);
 
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+	cm->fs_rx_packet_count++;
+	cm->fs_rx_byte_count += len;
+#endif
+
 	xmit_dev = cm->xmit_dev;
 	skb->dev = xmit_dev;