Patch v4.4.100 to Cradlepoint Current
Change-Id: Ia3c8d927e2d4445cc67793de5468074e521507a3
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 5e4199d..9f1bd5a 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -39,6 +39,9 @@
#include <linux/if_vlan.h>
#include "vlan.h"
#include "vlanproc.h"
+#ifdef HNDCTF
+#include <ctf/hndctf.h>
+#endif /* HNDCTF */
#define DRV_VERSION "1.8"
@@ -118,6 +121,10 @@
if (vlan_id)
vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
+#ifdef HNDCTF
+ (void)ctf_dev_vlan_delete(kcih, real_dev, vlan_id);
+#endif /* HNDCTF */
+
/* Get rid of the vlan's reference to real_dev */
dev_put(real_dev);
}
@@ -275,6 +282,10 @@
if (err < 0)
goto out_free_newdev;
+#ifdef HNDCTF
+ (void)ctf_dev_vlan_add(kcih, real_dev, vlan_id, new_dev);
+#endif /* HNDCTF */
+
return 0;
out_free_newdev:
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index e2ed698..a9b0713 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -64,6 +64,39 @@
return true;
}
+/* Cradlepoint: Used externally to account for VLAN traffic
+ * when a normal, fully constructed skb may not be available. */
+void vlan_ext_account_rx_packet(const struct net_device *vlan_dev,
+ bool is_multicast,
+ unsigned int len)
+{
+ struct vlan_pcpu_stats *rx_stats;
+
+ rx_stats = this_cpu_ptr(vlan_dev_priv(vlan_dev)->vlan_pcpu_stats);
+
+ u64_stats_update_begin(&rx_stats->syncp);
+ rx_stats->rx_packets++;
+ rx_stats->rx_bytes += len;
+ if (is_multicast)
+ rx_stats->rx_multicast++;
+ u64_stats_update_end(&rx_stats->syncp);
+}
+EXPORT_SYMBOL(vlan_ext_account_rx_packet);
+
+void vlan_ext_account_tx_packet(const struct net_device *vlan_dev,
+ unsigned int len)
+{
+ struct vlan_pcpu_stats *tx_stats;
+
+ tx_stats = this_cpu_ptr(vlan_dev_priv(vlan_dev)->vlan_pcpu_stats);
+
+ u64_stats_update_begin(&tx_stats->syncp);
+ tx_stats->tx_packets++;
+ tx_stats->tx_bytes += len;
+ u64_stats_update_end(&tx_stats->syncp);
+}
+EXPORT_SYMBOL(vlan_ext_account_tx_packet);
+
/* Must be invoked with rcu_read_lock. */
struct net_device *__vlan_find_dev_deep_rcu(struct net_device *dev,
__be16 vlan_proto, u16 vlan_id)
@@ -108,6 +141,12 @@
}
EXPORT_SYMBOL(vlan_dev_vlan_id);
+u16 vlan_dev_vlan_flags(const struct net_device *dev)
+{
+ return vlan_dev_priv(dev)->flags;
+}
+EXPORT_SYMBOL(vlan_dev_vlan_flags);
+
__be16 vlan_dev_vlan_proto(const struct net_device *dev)
{
return vlan_dev_priv(dev)->vlan_proto;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 2c8095a..f3377b5 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -22,6 +22,14 @@
#include <asm/uaccess.h>
#include "br_private.h"
+#ifdef CTF_ESDK_VERSION
+#include <typedefs.h>
+#include <bcmdefs.h>
+#ifdef HNDCTF
+#include <ctf/hndctf.h>
+#endif /* HNDCTF */
+#endif /* CTF_ESDK_VERSION */
+
#define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \
NETIF_F_GSO_MASK | NETIF_F_HW_CSUM)
@@ -42,6 +50,33 @@
u16 vid = 0;
rcu_read_lock();
+
+#ifdef CTF_ESDK_VERSION
+#ifdef HNDCTF
+ /* For broadstream iqos inbound traffic.
+ * Inbound traffic need to apply qdisc rule to br interface, and ctf need to use
+ * dev_queue_xmit of bridge dev to transmit packet.
+ * Add fastpath here to forward packet from br to eth0/1/2 directly if this packet
+ * is cached in ctf ip entry.
+ */
+ if (CTF_IS_PKTTOBR(skb)) {
+ const struct net_device_ops *ops = NULL;
+ struct net_device *tmpdev = skb->dev;
+ int rc = -1;
+
+ ops = ((struct net_device *)(skb->ctf_ipc_txif))->netdev_ops;
+ if (ops) {
+ skb->dev = (struct net_device *)(skb->ctf_ipc_txif);
+ rc = ops->ndo_start_xmit(skb, (struct net_device *)(skb->ctf_ipc_txif));
+ if (rc == NETDEV_TX_OK)
+ return rc;
+ skb->dev = tmpdev;
+ }
+ }
+#endif /* HNDCTF */
+#endif /* CTF_ESDK_VERSION */
+
+
nf_ops = rcu_dereference(nf_br_ops);
if (nf_ops && nf_ops->br_dev_xmit_hook(skb)) {
rcu_read_unlock();
@@ -196,6 +231,15 @@
struct net_bridge *br = netdev_priv(dev);
struct sockaddr *addr = p;
+ /* Allow the user to reset the mac address to the unsticky mode. */
+ if (is_zero_ether_addr(addr->sa_data)) {
+ spin_lock_bh(&br->lock);
+ dev->addr_assign_type = NET_ADDR_PERM;
+ br_stp_recalculate_bridge_id(br);
+ spin_unlock_bh(&br->lock);
+ return 0;
+ }
+
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 09442e0..1312aa8 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -26,6 +26,82 @@
#include <linux/if_vlan.h>
#include <net/switchdev.h>
#include "br_private.h"
+#ifdef HNDCTF
+#include <linux/if.h>
+#include <linux/if_vlan.h>
+#include <typedefs.h>
+#include <osl.h>
+#include <ctf/hndctf.h>
+
+static void
+br_brc_init(ctf_brc_t *brc, unsigned char *ea, struct net_device *rxdev)
+{
+ memset(brc, 0, sizeof(ctf_brc_t));
+
+ memcpy(brc->dhost.octet, ea, ETH_ALEN);
+
+ if (rxdev->priv_flags & IFF_802_1Q_VLAN) {
+ brc->txifp = (void *)vlan_dev_real_dev(rxdev);
+ brc->vid = vlan_dev_vlan_id(rxdev);
+ brc->action = ((vlan_dev_vlan_flags(rxdev) & 1) ?
+ CTF_ACTION_TAG : CTF_ACTION_UNTAG);
+ } else {
+ brc->txifp = (void *)rxdev;
+ brc->action = CTF_ACTION_UNTAG;
+ }
+
+#ifdef DEBUG
+ printk("mac %02x:%02x:%02x:%02x:%02x:%02x\n",
+ brc->dhost.octet[0], brc->dhost.octet[1],
+ brc->dhost.octet[2], brc->dhost.octet[3],
+ brc->dhost.octet[4], brc->dhost.octet[5]);
+ printk("vid: %d action %x\n", brc->vid, brc->action);
+ printk("txif: %s\n", ((struct net_device *)brc->txifp)->name);
+#endif
+
+ return;
+}
+
+/*
+ * Add bridge cache entry.
+ */
+void
+br_brc_add(unsigned char *ea, struct net_device *rxdev)
+{
+ ctf_brc_t brc_entry;
+#ifndef CTF_ESDK_VERSION
+ ctf_brc_t *brcp;
+#endif
+
+ /* Add brc entry only if packet is received on ctf
+ * enabled interface
+ */
+ if (!ctf_isenabled(kcih, ((rxdev->priv_flags & IFF_802_1Q_VLAN) ?
+ vlan_dev_real_dev(rxdev) : rxdev)))
+ return;
+
+ br_brc_init(&brc_entry, ea, rxdev);
+
+#ifdef DEBUG
+ printk("%s: Adding brc entry\n", __FUNCTION__);
+#endif
+
+ /* Add the bridge cache entry */
+#ifdef CTF_ESDK_VERSION
+ ctf_brc_add(kcih, &brc_entry);
+#else
+ if ((brcp = ctf_brc_lkup(kcih, ea)) == NULL)
+ ctf_brc_add(kcih, &brc_entry);
+ else {
+ ctf_brc_release(kcih, brcp);
+ ctf_brc_update(kcih, &brc_entry);
+ }
+#endif
+
+ return;
+}
+
+#endif /* HNDCTF */
static struct kmem_cache *br_fdb_cache __read_mostly;
static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head,
@@ -154,6 +230,14 @@
fdb_del_external_learn(f);
hlist_del_rcu(&f->hlist);
+
+#ifdef HNDCTF
+ /* Delete the corresponding brc entry when it expires
+ * or deleted by user.
+ */
+ ctf_brc_delete(kcih, f->addr.addr);
+#endif /* HNDCTF */
+
fdb_notify(br, f, RTM_DELNEIGH);
call_rcu(&f->rcu, fdb_rcu_free);
}
@@ -308,9 +392,47 @@
if (f->added_by_external_learn)
continue;
this_timer = f->updated + delay;
- if (time_before_eq(this_timer, jiffies))
+ if (time_before_eq(this_timer, jiffies)) {
+#ifdef HNDCTF
+ ctf_brc_t *brcp;
+
+#ifdef CTF_ESDK_VERSION
+ ctf_brc_acquire(kcih);
+#endif
+
+ /* Before expiring the fdb entry check the brc
+ * live counter to make sure there are no frames
+ * on this connection for timeout period.
+ */
+#ifdef CTF_ESDK_VERSION
+ brcp = ctf_brc_lkup(kcih, f->addr.addr, TRUE);
+#else
+ brcp = ctf_brc_lkup(kcih, f->addr.addr);
+#endif
+ if (brcp != NULL) {
+ if (brcp->live > 0) {
+ brcp->live = 0;
+#ifdef CTF_ESDK_VERSION
+ ctf_brc_release(kcih);
+#else
+ ctf_brc_release(kcih, brcp);
+#endif
+ f->updated = jiffies;
+ continue;
+ }
+#ifdef CTF_ESDK_VERSION
+ ctf_brc_release(kcih);
+#else
+ ctf_brc_release(kcih, brcp);
+#endif
+#ifdef CTF_ESDK_VERSION
+ } else {
+ ctf_brc_release(kcih);
+#endif
+ }
+#endif /* HNDCTF */
fdb_delete(br, f);
- else if (time_before(this_timer, next_timer))
+ } else if (time_before(this_timer, next_timer))
next_timer = this_timer;
}
}
@@ -518,6 +640,15 @@
return fdb;
}
+#ifdef HNDCTF
+static void fdb_ctf_create(struct net_bridge_fdb_entry *fdb)
+{
+ /* Add bridge cache entry for non local hosts */
+ if (!fdb->is_local && (fdb->dst->state == BR_STATE_FORWARDING))
+ br_brc_add(fdb->addr.addr, fdb->dst->dev);
+}
+#endif /* HNDCTF */
+
static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid)
{
@@ -545,6 +676,9 @@
return -ENOMEM;
fdb_add_hw_addr(br, addr);
+#ifdef HNDCTF
+ fdb_ctf_create(fdb);
+#endif /* HNDCTF */
fdb_notify(br, fdb, RTM_NEWNEIGH);
return 0;
}
@@ -587,6 +721,15 @@
source->dev->name);
} else {
/* fastpath: update of existing entry */
+#ifdef HNDCTF
+ /* Add the entry if the addr is new, or
+ * update the brc entry incase the host moved from
+ * one bridge to another or to a different port under
+ * the same bridge.
+ */
+ if (source->state == BR_STATE_FORWARDING)
+ br_brc_add((unsigned char *)addr, source->dev);
+#endif /* HNDCTF */
if (unlikely(source != fdb->dst)) {
fdb->dst = source;
fdb_modified = true;
@@ -604,6 +747,9 @@
if (fdb) {
if (unlikely(added_by_user))
fdb->added_by_user = 1;
+#ifdef HNDCTF
+ fdb_ctf_create(fdb);
+#endif /* HNDCTF */
fdb_notify(br, fdb, RTM_NEWNEIGH);
}
}
@@ -820,6 +966,9 @@
fdb->used = jiffies;
if (modified) {
fdb->updated = jiffies;
+#ifdef HNDCTF
+ fdb_ctf_create(fdb);
+#endif /* HNDCTF */
fdb_notify(br, fdb, RTM_NEWNEIGH);
}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index ec02f58..34787ef 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -28,6 +28,10 @@
#include "br_private.h"
+#ifdef HNDCTF
+#include <ctf/hndctf.h>
+#endif /* HNDCTF */
+
/*
* Determine initial path cost based on speed.
* using recommendations from 802.1d standard
@@ -277,6 +281,10 @@
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *p, *n;
+#ifdef HNDCTF
+ ctf_dev_unregister(kcih, dev);
+#endif /* HNDCTF */
+
list_for_each_entry_safe(p, n, &br->port_list, list) {
del_nbp(p);
}
@@ -359,7 +367,22 @@
res = register_netdev(dev);
if (res)
- free_netdev(dev);
+ goto out_free;
+
+#ifdef HNDCTF
+ if ((ctf_dev_register(kcih, dev, TRUE) != BCME_OK) ||
+ (ctf_enable(kcih, dev, TRUE, NULL) != BCME_OK)) {
+ ctf_dev_unregister(kcih, dev);
+ unregister_netdev(dev);
+ res = -ENXIO;
+ goto out_free;
+ }
+#endif /* HNDCTF */
+
+ return 0;
+
+out_free:
+ free_netdev(dev);
return res;
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index a1f697e..148b35b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -21,6 +21,10 @@
#include "br_private.h"
#include "br_private_stp.h"
+#ifdef HNDCTF
+#include <ctf/hndctf.h>
+#endif /* HNDCTF */
+
static int __get_num_vlan_infos(struct net_bridge_vlan_group *vg,
u32 filter_mask)
{
@@ -1080,6 +1084,17 @@
err = br_changelink(dev, tb, data);
if (err)
unregister_netdevice(dev);
+
+#ifdef HNDCTF
+ if (!err &&
+ ((ctf_dev_register(kcih, dev, TRUE) != BCME_OK) ||
+ (ctf_enable(kcih, dev, TRUE, NULL) != BCME_OK))) {
+ ctf_dev_unregister(kcih, dev);
+ unregister_netdevice(dev);
+ err = -ENXIO;
+ }
+#endif /* HNDCTF */
+
return err;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 73dfd77..ced081f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -77,6 +77,12 @@
#include <linux/capability.h>
#include <linux/user_namespace.h>
+#ifdef HNDCTF
+#include <typedefs.h>
+#include <bcmdefs.h>
+#include <osl.h>
+#endif
+
struct kmem_cache *skbuff_head_cache __read_mostly;
static struct kmem_cache *skbuff_fclone_cache __read_mostly;
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
@@ -249,11 +255,25 @@
atomic_set(&skb->users, 1);
skb->head = data;
skb->data = data;
+
+#ifdef BCMDBG_CTRACE
+ INIT_LIST_HEAD(&skb->ctrace_list);
+ skb->func[0] = (char *)__FUNCTION__;
+ skb->line[0] = __LINE__;
+ skb->ctrace_start = 0;
+ skb->ctrace_count = 1;
+#endif /* BCMDBG_CTRACE */
+
skb_reset_tail_pointer(skb);
skb->end = skb->tail + size;
skb->mac_header = (typeof(skb->mac_header))~0U;
skb->transport_header = (typeof(skb->transport_header))~0U;
+#ifdef BCMFA
+ skb->napt_idx = BCM_FA_INVALID_IDX_VAL;
+ skb->napt_flags = 0;
+#endif
+
/* make sure we initialize shinfo sequentially */
shinfo = skb_shinfo(skb);
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
@@ -758,6 +778,12 @@
static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
{
+#ifdef PKTC
+ memset(new->pktc_cb, 0, sizeof(new->pktc_cb));
+#endif
+#ifdef CTF_PPPOE
+ memset(new->ctf_pppoe_cb, 0, sizeof(new->ctf_pppoe_cb));
+#endif
new->tstamp = old->tstamp;
/* We do not copy old->sk */
new->dev = old->dev;
@@ -776,6 +802,30 @@
memcpy(&new->headers_start, &old->headers_start,
offsetof(struct sk_buff, headers_end) -
offsetof(struct sk_buff, headers_start));
+
+#if defined(HNDCTF) || defined(CTFPOOL)
+ new->pktc_flags = old->pktc_flags;
+#endif
+#ifdef CTFMAP
+ new->ctfmap = NULL;
+#endif
+#ifdef CTFPOOL
+ new->ctfpool = NULL;
+#endif
+
+#ifdef BCMDBG_CTRACE
+ INIT_LIST_HEAD(&new->ctrace_list);
+ new->func[0] = (char *)__FUNCTION__;
+ new->line[0] = __LINE__;
+ new->ctrace_start = 0;
+ new->ctrace_count = 1;
+#endif /* BCMDBG_CTRACE */
+
+#ifdef BCMFA
+ new->napt_idx = BCM_FA_INVALID_IDX_VAL;
+ new->napt_flags = 0;
+#endif
+
CHECK_SKB_FIELD(protocol);
CHECK_SKB_FIELD(csum);
CHECK_SKB_FIELD(hash);
@@ -3350,6 +3400,9 @@
if (copy > 0) {
if (copy > len)
copy = len;
+#ifdef CONFIG_BCM47XX
+ sg->page_link = 0; /* Broadcom did this, why? */
+#endif
sg_set_buf(sg, skb->data + offset, copy);
elt++;
if ((len -= copy) == 0)
@@ -3368,6 +3421,9 @@
if (copy > len)
copy = len;
+#ifdef CONFIG_BCM47XX
+ sg[elt].page_link = 0; /* Broadcom did this, why? */
+#endif
sg_set_page(&sg[elt], skb_frag_page(frag), copy,
frag->page_offset+offset-start);
elt++;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 0212591..94b8c5d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1743,6 +1743,8 @@
size += nla_total_size(4);
if (type == -1 || type == NETCONFA_MC_FORWARDING)
size += nla_total_size(4);
+ if (type == -1 || type == NETCONFA_BC_FORWARDING)
+ size += nla_total_size(4);
if (type == -1 || type == NETCONFA_PROXY_NEIGH)
size += nla_total_size(4);
if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
@@ -1783,6 +1785,10 @@
nla_put_s32(skb, NETCONFA_MC_FORWARDING,
IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
goto nla_put_failure;
+ if ((type == -1 || type == NETCONFA_BC_FORWARDING) &&
+ nla_put_s32(skb, NETCONFA_BC_FORWARDING,
+ IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
+ goto nla_put_failure;
if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
@@ -2048,6 +2054,10 @@
if ((new_value == 0) && (old_value != 0))
rt_cache_flush(net);
+ if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
+ new_value != old_value)
+ rt_cache_flush(net);
+
if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
new_value != old_value) {
ifindex = devinet_conf_ifindex(net, cnf);
@@ -2160,6 +2170,7 @@
DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
devinet_sysctl_forward),
DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
+ DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index d9c552a..e663596 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -66,7 +66,7 @@
u8 ver;
int ret;
- if (!pskb_may_pull(skb, 12))
+ if (!pskb_may_pull(skb, 4))
goto drop;
ver = skb->data[1]&0x7f;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 36e2697..cacc3ad 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -481,7 +481,8 @@
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
rt = __ip_route_output_key_hash(net, fl4,
- icmp_multipath_hash_skb(skb_in));
+ icmp_multipath_hash_skb(skb_in),
+ false);
if (IS_ERR(rt))
return rt;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 3e41840..296870a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -399,19 +399,25 @@
iph->saddr, iph->daddr, tpi->key);
if (tunnel) {
- skb_pop_mac_header(skb);
- if (tunnel->collect_md) {
- __be16 flags;
- __be64 tun_id;
+ /* Cradlepoint: if it's keepalive we can handle here */
+ if (tpi->proto == 0 && skb->len == 0) {
+ tunnel->ka_outstanding = false;
+ consume_skb(skb);
+ } else {
+ skb_pop_mac_header(skb);
+ if (tunnel->collect_md) {
+ __be16 flags;
+ __be64 tun_id;
- flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
- tun_id = key_to_tunnel_id(tpi->key);
- tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
- if (!tun_dst)
- return PACKET_REJECT;
+ flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ tun_id = key_to_tunnel_id(tpi->key);
+ tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
+ if (!tun_dst)
+ return PACKET_REJECT;
+ }
+
+ ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
}
-
- ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
return PACKET_RCVD;
}
return PACKET_REJECT;
@@ -479,6 +485,81 @@
}
}
+/* Cradlepoint: ipgre_keepalive_xmit */
+void ipgre_keepalive_xmit(unsigned long data)
+{
+ struct sk_buff *skb = NULL;
+ struct iphdr *iph;
+ struct flowi4 fl4;
+ struct rtable *rt;
+ struct ip_tunnel *t;
+
+ t = (struct ip_tunnel*)data;
+ if (!t)
+ return;
+
+ rt = ip_route_output_gre(dev_net(t->dev), &fl4,
+ t->parms.iph.daddr,
+ t->parms.iph.saddr,
+ t->parms.o_key,
+ RT_TOS(t->parms.iph.tos),
+ t->parms.link);
+ if (IS_ERR(rt)) {
+ goto ka_check_and_reset;
+ }
+
+ skb = alloc_skb(LL_RESERVED_SPACE(t->dev) +
+ rt->dst.header_len +
+ sizeof(struct iphdr) +
+ ip_gre_calc_hlen(t->parms.o_flags),
+ GFP_ATOMIC);
+ if (!skb) {
+ ip_rt_put(rt);
+ goto ka_check_and_reset;
+ }
+
+ skb_reserve(skb, LL_RESERVED_SPACE(t->dev) +
+ rt->dst.header_len +
+ sizeof(struct iphdr));
+ ip_rt_put(rt);
+
+ skb->protocol = htons(ETH_P_IP);
+ skb->dev = t->dev;
+
+ build_header(skb, t->tun_hlen, t->parms.o_flags,
+ 0, t->parms.o_key, htonl(t->o_seqno));
+
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+
+ iph = ip_hdr(skb);
+ memset(iph, 0, sizeof(struct iphdr));
+ iph->version = 4;
+ iph->ihl = sizeof(struct iphdr) >> 2;
+ iph->tot_len = htons(skb->len);
+ iph->protocol = IPPROTO_GRE;
+ iph->tos = t->parms.iph.tos;
+ iph->daddr = fl4.saddr;
+ iph->saddr = fl4.daddr;
+ iph->ttl = 64;
+ ip_send_check(iph);
+
+ skb_reset_transport_header(skb);
+
+ka_check_and_reset:
+ if (t->ka_outstanding)
+ t->ka.fail_count++;
+ else
+ t->ka.fail_count = 0;
+
+ t->ka_outstanding = true;
+
+ if (skb != NULL) {
+ dev_queue_xmit(skb);
+ }
+ mod_timer(&t->ka_timer, jiffies+HZ*t->ka.interval);
+}
+
static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params,
__be16 proto)
@@ -673,29 +754,46 @@
static int ipgre_tunnel_ioctl(struct net_device *dev,
struct ifreq *ifr, int cmd)
{
- int err;
- struct ip_tunnel_parm p;
+ int err = 0;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
- return -EFAULT;
- if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
- if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
- p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
- ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
- return -EINVAL;
+ /* Cradlepoint SIOCSKATUNNEL||SIOCGKATUNNEL */
+ if (cmd == SIOCSKATUNNEL || cmd == SIOCGKATUNNEL) {
+ struct ip_tunnel_ka_parm ka_p;
+
+ if (copy_from_user(&ka_p, ifr->ifr_ifru.ifru_data, sizeof(ka_p)))
+ return -EFAULT;
+
+ err = ip_tunnel_ka_ioctl(dev, &ka_p, cmd, &ipgre_keepalive_xmit);
+ if (err)
+ return err;
+
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &ka_p, sizeof(ka_p)))
+ return -EFAULT;
+ } else {
+ struct ip_tunnel_parm p;
+
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ return -EFAULT;
+ if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
+ if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
+ p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
+ ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
+ return -EINVAL;
+ }
+ p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
+ p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
+
+ err = ip_tunnel_ioctl(dev, &p, cmd);
+ if (err)
+ return err;
+
+ p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
+ p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
+
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ return -EFAULT;
}
- p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
- p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
- err = ip_tunnel_ioctl(dev, &p, cmd);
- if (err)
- return err;
-
- p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
- p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
-
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
- return -EFAULT;
return 0;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 09c73dd..99bf20b 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -521,10 +521,20 @@
* a block of the data of the original IP data part) that will yet fit in a
* single device frame, and queue such a frame for sending.
*/
-
int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *))
{
+ return ip_do_fragment2(net, sk, skb, 0, output, NULL);
+}
+EXPORT_SYMBOL(ip_do_fragment);
+
+/* Cradlepoint: add a version of ip_do_fragment that allows passing nexthop override. If
+ * 'output2' is provided, output2 will be called, otherwise output. This is done to make it
+ * compatible with the wrapper above (for other users). */
+int ip_do_fragment2(struct net *net, struct sock *sk, struct sk_buff *skb, u32 nexthop,
+ int (*output)(struct net *, struct sock *, struct sk_buff *),
+ int (*output2)(struct net *, struct sock *, struct sk_buff *, u32))
+{
struct iphdr *iph;
int ptr;
struct net_device *dev;
@@ -630,7 +640,10 @@
ip_send_check(iph);
}
- err = output(net, sk, skb);
+ if (output2)
+ err = output2(net, sk, skb, nexthop);
+ else
+ err = output(net, sk, skb);
if (!err)
IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
@@ -767,7 +780,10 @@
ip_send_check(iph);
- err = output(net, sk, skb2);
+ if (output2)
+ err = output2(net, sk, skb2, nexthop);
+ else
+ err = output(net, sk, skb2);
if (err)
goto fail;
@@ -782,7 +798,7 @@
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
return err;
}
-EXPORT_SYMBOL(ip_do_fragment);
+EXPORT_SYMBOL(ip_do_fragment2);
int
ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
@@ -1154,6 +1170,11 @@
cork->priority = ipc->priority;
cork->tx_flags = ipc->tx_flags;
+ /* Cradlepoint */
+ if (ipc->oif) {
+ cork->flags |= IPCORK_OIF_SPECIFIED;
+ }
+
return 0;
}
@@ -1371,6 +1392,12 @@
goto out;
tail_skb = &(skb_shinfo(skb)->frag_list);
+ /* Cradlepoint */
+ IPCB(skb)->flags &= ~IPSKB_OIF_SPECIFIED;
+ if (cork->flags & IPCORK_OIF_SPECIFIED) {
+ IPCB(skb)->flags |= IPSKB_OIF_SPECIFIED;
+ }
+
/* move skb->data to ip header from ext header */
if (skb->data < skb_network_header(skb))
__skb_pull(skb, skb_network_offset(skb));
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 3310ac7..1cef6a8 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -124,6 +124,25 @@
return (struct rtable *)dst;
}
+/* Cradlepoint: ip_tunnel_keepalive_cfg */
+static void ip_tunnel_keepalive_cfg(struct ip_tunnel *t,
+ struct ip_tunnel_ka *ka,
+ void (*ka_cb)(unsigned long))
+{
+ if (t->ka.enabled)
+ del_timer(&t->ka_timer);
+ if (ka)
+ t->ka = *ka;
+ if (ka_cb)
+ t->ka_cb = ka_cb;
+ if (t->ka.enabled) {
+ t->ka.fail_count = 0;
+ t->ka_outstanding = false;
+ setup_timer(&t->ka_timer, t->ka_cb, (unsigned long)t);
+ mod_timer(&t->ka_timer, jiffies+HZ*t->ka.interval);
+ }
+}
+
static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
__be16 flags, __be32 key)
{
@@ -267,12 +286,20 @@
if (t->collect_md)
rcu_assign_pointer(itn->collect_md_tun, t);
hlist_add_head_rcu(&t->hash_node, head);
+
+ /* Cradlepoint */
+ ip_tunnel_keepalive_cfg(t, NULL, NULL);
}
static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
{
if (t->collect_md)
rcu_assign_pointer(itn->collect_md_tun, NULL);
+
+ /* Cradlepoint */
+ if (t->ka.enabled)
+ del_timer(&t->ka_timer);
+
hlist_del_init_rcu(&t->hash_node);
}
@@ -969,6 +996,52 @@
}
EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
+/* Cradlepoint: ip_tunnel_ka_ioctl */
+int ip_tunnel_ka_ioctl(struct net_device *dev,
+ struct ip_tunnel_ka_parm *ka_p,
+ int cmd,
+ void (*cb)(unsigned long))
+{
+ int err = 0;
+ struct ip_tunnel *t;
+ struct net *net = dev_net(dev);
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
+
+ BUG_ON(!itn->fb_tunnel_dev);
+ switch (cmd) {
+ case SIOCSKATUNNEL:
+ err = -EPERM;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ goto done;
+ t = NULL;
+ if (dev == itn->fb_tunnel_dev)
+ t = ip_tunnel_find(itn, &ka_p->ipt, itn->fb_tunnel_dev->type);
+ if (t == NULL)
+ t = netdev_priv(dev);
+ ip_tunnel_keepalive_cfg(t, &ka_p->ka, cb);
+ err = 0;
+ break;
+
+ case SIOCGKATUNNEL:
+ t = NULL;
+ if (dev == itn->fb_tunnel_dev)
+ t = ip_tunnel_find(itn, &ka_p->ipt, itn->fb_tunnel_dev->type);
+ if (t == NULL)
+ t = netdev_priv(dev);
+ memcpy(&ka_p->ipt, &t->parms, sizeof(struct ip_tunnel_parm));
+ memcpy(&ka_p->ka, &t->ka, sizeof(struct ip_tunnel_ka));
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+done:
+ return err;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_ka_ioctl);
+
int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
{
return __ip_tunnel_change_mtu(dev, new_mtu, true);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index a03f834..210c571 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -89,6 +89,7 @@
struct net_device *dev;
struct pcpu_sw_netstats *tstats;
struct xfrm_state *x;
+ struct xfrm_mode *inner_mode;
struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4;
u32 orig_mark = skb->mark;
int ret;
@@ -106,7 +107,19 @@
}
x = xfrm_input_state(skb);
- family = x->inner_mode->afinfo->family;
+
+ inner_mode = x->inner_mode;
+
+ if (x->sel.family == AF_UNSPEC) {
+ inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+ if (inner_mode == NULL) {
+ XFRM_INC_STATS(dev_net(skb->dev),
+ LINUX_MIB_XFRMINSTATEMODEERROR);
+ return -EINVAL;
+ }
+ }
+
+ family = inner_mode->afinfo->family;
skb->mark = be32_to_cpu(tunnel->parms.i_key);
ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
@@ -158,6 +171,7 @@
struct net_device *tdev; /* Device to other host */
int pkt_len = skb->len;
int err;
+ int mtu;
if (!dst) {
dev->stats.tx_carrier_errors++;
@@ -194,6 +208,23 @@
tunnel->err_count = 0;
}
+ mtu = dst_mtu(dst);
+ if (skb->len > mtu) {
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+ if (skb->protocol == htons(ETH_P_IP)) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+ } else {
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ }
+
+ dst_release(dst);
+ goto tx_error;
+ }
+
skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
skb_dst_set(skb, dst);
skb->dev = skb_dst(skb)->dev;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index c3776ff..39758b4 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -40,6 +40,16 @@
fl4.saddr = saddr;
fl4.flowi4_tos = RT_TOS(iph->tos);
fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
+
+ /* Cradlepoint - honor IP_PKTINFO output interface setting */
+ if (!fl4.flowi4_oif && (IPCB(skb)->flags & IPSKB_OIF_SPECIFIED)) {
+ struct dst_entry* dst = skb_dst(skb);
+ if (dst && dst->dev) {
+ /* "bind" packet to previously determined output interface */
+ fl4.flowi4_oif = dst->dev->ifindex;
+ }
+ }
+
fl4.flowi4_mark = skb->mark;
fl4.flowi4_flags = flags;
rt = ip_route_output_key(net, &fl4);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index c187c60..8a36644 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -263,6 +263,12 @@
To compile it as a module, choose M here. If unsure, say N.
+config IP_NF_TARGET_ROUTE
+ tristate "ROUTE target support"
+ depends on IP_NF_MANGLE
+ help
+ To compile it as a module, choose M here. If unsure, say N.
+
# NAT + specific targets: nf_conntrack
config IP_NF_NAT
tristate "iptables NAT support"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 87b073d..e97f523 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -13,6 +13,8 @@
# connection tracking
obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
+obj-$(CONFIG_NETFILTER_CP_FLOWSTATS) += cp_ip_record_track.o
+
nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o
obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
@@ -63,6 +65,7 @@
obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
+obj-$(CONFIG_IP_NF_TARGET_ROUTE) += ipt_ROUTE.o
obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o
# generic ARP tables
diff --git a/net/ipv4/netfilter/cp_ip_record_track.c b/net/ipv4/netfilter/cp_ip_record_track.c
new file mode 100644
index 0000000..d07e385
--- /dev/null
+++ b/net/ipv4/netfilter/cp_ip_record_track.c
@@ -0,0 +1,646 @@
+/*
+ * Copyright 2019 CradlePoint, Inc. <www.cradlepoint.com>.
+ *
+ * cp_ip_record_track.c:
+ *
+ * Maintain the data structures that track top 10 IP destinations with
+ * most traffic volume.
+ */
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/percpu.h>
+#include <linux/security.h>
+#include <net/net_namespace.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#include <linux/rculist_nulls.h>
+#include <linux/export.h>
+#include <linux/vmalloc.h>
+#include <linux/rtc.h>
+
+
+/*
+ * This file maintains the IP records for top 10 IP addresses with most traffic.
+ * The IP records are updated when connection tracker code invokes "add_ip_record()"
+ * This function is called when either TCP or UDP connection terminates; or stays up
+ * for longer than 5 minutes.
+ * During the IP record update, it also checks if the IP address being updated should
+ * be part of 10 most visited IP list; and updates the list if necessary.
+ * It also provides a /proc filesystem interface "ip_conn_addr" so that the IP records
+ * can be accessed externally; The Status Tree implementation updates the stats from
+ * the /proc entry "ip_conn_addr".
+ */
+
+static spinlock_t ip_rec_lock;
+
+
+/* IP record structure; maintained per destination IP address seen */
+typedef struct ip_rec_ {
+ int idx;
+ u_int32_t dst_ip;
+ u_int32_t packets_in;
+ u_int32_t packets_out;
+ u_int32_t num_tcp_packets;
+ u_int32_t num_udp_packets;
+ u_int64_t bytes_in;
+ u_int64_t bytes_out;
+ u_int32_t num_conn;
+ u_int32_t num_samples;
+ u_int32_t tot_delta;
+ u_int32_t tot_delta_square;
+ u_int32_t max_delta;
+ u_int32_t min_delta;
+ u_int32_t ave_lat;
+ u_int32_t duration;
+ u_int16_t app_id;
+ u_int8_t cat_id;
+ char int_name[IFNAMSIZ];
+ struct ip_rec_ *next;
+} ip_rec_t;
+
+#define NUM_IP_RECORDS 1024
+#define HASH_TABLE_SIZE 1024
+
+#define MAX_IP_REC 10
+#define MIN_DELTA_INIT 0xffffff
+#define INIT_MIN_PACKET_CNT 0x3fffffff // some large value
+
+/* Static array to store the IP records */
+static ip_rec_t ip_records[NUM_IP_RECORDS];
+
+/* Pointer based hashtable for easy search of IP address */
+static ip_rec_t *hashtable[HASH_TABLE_SIZE];
+
+/* Array to store pointers to top 10 IP records */
+static ip_rec_t *max_ip[MAX_IP_REC];
+
+/****************************************************************************
+* Storing and accessing the IP records.
+* The IP records are stored as a static array if 1024 entries initialized
+* to zero. "ip_records[NUM_IP_RECORDS]"
+* The index "pool_index" points to the next available entry in the records table.
+* When a new record needs to be added, we return the entry pointed by the pool_index
+* and increment the index. If we reach the end of the table no further records
+* are added.
+* When an entry needs to be updated; we need easy way to search through the entries.
+* We achieve this using a hash table.
+* The "ip_rec_t *hashtable[HASH_TABLE_SIZE]" maintains pointers to the IP records;
+* and is indexed by the hash function generated using the IP destination. When a
+* new entry is added, the hash table is updated with the pointer to the new IP record.
+* When the IP record needs to be updated, the record is searched from the hash table.
+*
+* In this design an individual entry is never deleted. The entire records table,
+* hash table and related structures are stats are cleared at periodic interval
+* after the top 10 records are copied into the Status Tree.
+****************************************************************************/
+
+
+/* Additional counters for detailed analysis */
+static u_int32_t tot_pkts = 0;
+static u_int32_t num_ip_addr = 0;
+
+static int pool_index = 0;
+static int num_destns = 0;
+
+/*
+ * Computes the Hash index given the IP address.
+ * The index is used later to store the pointers in an array
+ * for easy access.
+ */
+static unsigned short hashfn(unsigned int addr)
+{
+ unsigned short x1, x2, x3, x4;
+ x1 = (unsigned short) (addr & 0xff);
+ x2 = (unsigned short) ((addr>>8) & 0xff);
+ x3 = (unsigned short) ((addr>>14) & 0x3ff);
+ x4 = (unsigned short) ((addr>>22) & 0x3ff);
+ return (x1^x2^x3^x4) & (0x3ff);
+}
+
+/*
+ * Get index of the next available IP record entry
+ */
+static int get_pool_index(void)
+{
+ /* num_destns counts how many destinations were accessed in a sample period, even if we
+ * run out of the records pool.
+ */
+ num_destns++;
+ if (pool_index >= NUM_IP_RECORDS) {
+ if (num_destns == NUM_IP_RECORDS + 1) {
+ /* This ensures that the message is printed only once */
+ printk(" IP records full no entries left \n");
+ }
+ return -1;
+ }
+ pool_index++;
+ return (pool_index - 1);
+}
+
+/* Given IP address return pointer to the ip_record entry.
+ If the record exists in the record table return the pointer;
+ otherwise find a free index from the array, insert the pointer to the entry
+ in the hash table and return.
+ Return NULL if we reach MAX
+ */
+static ip_rec_t *get_record_entry(unsigned int ipaddr)
+{
+ unsigned short idx = hashfn(ipaddr);
+ short pidx;
+
+ /* check if the address exists at the index, if yes return the location */
+ ip_rec_t *record = hashtable[idx];
+
+ while (record) {
+ if (record->dst_ip == ipaddr) { // existing record
+ return record;
+ }
+ record = record->next;
+ }
+
+ /* get a free entry fromn the record pool table */
+ pidx = get_pool_index();
+ if ( pidx == -1) {
+ return NULL;
+ }
+
+
+ /* This is a new record */
+ /* insert the pointer in the hash table */
+ record = &ip_records[pidx];
+ record->next = hashtable[idx];
+ record->idx = idx;
+ record->max_delta = 0;
+ record->min_delta = MIN_DELTA_INIT;
+
+ /* This is a new record so update the hash table.*/
+ hashtable[idx] = record;
+
+ /* return the pointer */
+ return record;
+}
+
+
+/*
+ * Routine to store and update top 10 Ip records (based on number of packets).
+ * Called every time an IP record is updated. (from add_ip_record())
+ * Compare the packet numbers for the new record, with the top 10 list;
+ * if the new record number exceeds the smallest entry count, replace it with
+ * the new record.
+ */
+void update_max_ip(ip_rec_t *ip_rec)
+{
+ unsigned int i, min = INIT_MIN_PACKET_CNT;
+ unsigned int min_idx = 0;
+
+ /* From the top 10 list, Locate the IP record with least packets */
+ for (i = 0; i < MAX_IP_REC; i++) {
+ if (max_ip[i] == NULL) {
+ // The table is not full yet, simply add this record.
+ max_ip[i] = ip_rec;
+ return;
+ }
+ /* if this entry is already in the list simply return */
+ if (ip_rec->dst_ip == max_ip[i]->dst_ip) {
+ return;
+ }
+ if ((max_ip[i]->packets_in + max_ip[i]->packets_out) < min) {
+ min = max_ip[i]->packets_in + max_ip[i]->packets_out;
+ min_idx = i;
+ }
+ }
+
+ /* if the new record has more packets than the min entry identified,
+ replace the min entry with the new entry.
+ */
+ if ((ip_rec->packets_in + ip_rec->packets_out) >
+ (max_ip[min_idx]->packets_in + max_ip[min_idx]->packets_out)) {
+ max_ip[min_idx] = ip_rec;
+ }
+}
+
+
+#ifdef CONFIG_NF_CONNTRACK_APPID
+/*
+ * store_app_cat_id
+ * Extract the last identified App-id and Cat-id from contrack structure
+ * and store it in the ip address record.
+ */
+static void store_app_cat_id(struct nf_conn *ct, ip_rec_t * record)
+{
+ int i;
+ for (i = 0; i < MAX_CT_APPID_VALS; i++) {
+ if (i == 0 && ct->appid[i].value == 0) {
+ return;
+ }
+
+ if (i > 0 && ct->appid[i].app_id == 0) {
+ // Find that last APPID in the chain
+ record->cat_id = ct->appid[i-1].cat_id;
+ record->app_id = ct->appid[i-1].app_id;
+ return;
+ }
+
+ if (i == MAX_CT_APPID_VALS - 1) {
+ // All APPID vals filled
+ record->cat_id = ct->appid[i].cat_id;
+ record->app_id = ct->appid[i].app_id;
+ return;
+ }
+ }
+}
+#endif
+
+
+/*
+ * Called when a connection terminates or stays up too long [5 minutes.]
+ * Updates the IP records table entry with the infor gathered in the nf_conn
+ * structure.
+ * conn_active indicates if the connection terminated; or the update is due to
+ * timeout for long duration flows.
+ */
+void add_ip_record(struct nf_conn *ct, int conn_active)
+{
+ struct nf_conntrack_tuple *tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+ unsigned int dst_ip = tuple->dst.u3.ip;
+ ip_rec_t * record;
+
+ spin_lock_bh(&ip_rec_lock);
+
+ /* if the record already exists, get the pointer.
+ * otherwise get pointer to a new entry.
+ * if the record is not available, return without adding any data.
+ */
+ record = get_record_entry(dst_ip);
+
+ if (record == NULL) {
+ spin_unlock_bh(&ip_rec_lock);
+ return;
+ }
+
+ /* Update the statitics for thie record. */
+ tot_pkts += (ct->packets_in + ct->packets_out);
+
+ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == IPPROTO_TCP) {
+ record->num_tcp_packets += (ct->packets_in + ct->packets_out);
+ record->num_samples += ct->proto.tcp.num_samples;
+ record->tot_delta += ct->proto.tcp.tot_delta;
+ record->tot_delta_square += ct->proto.tcp.tot_delta_square;
+
+ if (record->max_delta < ct->proto.tcp.max_delta) {
+ record->max_delta = ct->proto.tcp.max_delta;
+ }
+ if (record->min_delta > ct->proto.tcp.min_delta) {
+ record->min_delta = ct->proto.tcp.min_delta;
+ }
+ } else if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == IPPROTO_UDP){
+ record->num_udp_packets += (ct->packets_in + ct->packets_out);
+ }
+
+ record->dst_ip = dst_ip;
+ if (!conn_active) {
+ /* connection terminated, so update number of connection count*/
+ record->num_conn++;
+ }
+
+ if (ct->int_name[0]) {
+ strlcpy(record->int_name, ct->int_name, IFNAMSIZ);
+ }
+ record->duration += (jiffies - ct->start_time);
+ record->packets_in += ct->packets_in;
+ record->packets_out += ct->packets_out;
+ record->bytes_in += ct->bytes_in;
+ record->bytes_out += ct->bytes_out;
+#ifdef CONFIG_NF_CONNTRACK_APPID
+ store_app_cat_id(ct, record);
+#endif /* CONFIG_NF_CONNTRACK_APPID */
+ // Update the top 10 list
+ update_max_ip(record);
+
+ spin_unlock_bh(&ip_rec_lock);
+}
+EXPORT_SYMBOL(add_ip_record);
+
+unsigned int cp_flowstats_enabled = 0;
+
+
+
+
+/*
+ * Clear the Ip records tabale.
+ * Enable/disable latency measurements.
+ *
+ * To enable: echo e > /proc/net/ip_conn_addr
+ * To disable: echo d > /proc/net/ip_conn_addr
+ * To clear: echo f > /proc/net/ip_conn_addr
+ */
+static ssize_t ipaddr_seq_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ char inc;
+
+ if (copy_from_user(&inc, buf, 1))
+ return 0;
+
+ /* Enable/disable latency calculations. */
+ if (inc == 'e') {
+ cp_flowstats_enabled = 1;
+ }
+ if (inc == 'd') {
+ cp_flowstats_enabled = 0;
+ }
+
+
+ /* Flush all the records and clear the stats */
+ if (inc == 'f') {
+ /* Clear out all the ip_records table, as well as top 10 ip records. */
+ spin_lock_bh(&ip_rec_lock);
+ memset(hashtable, 0, sizeof(hashtable));
+ memset(ip_records, 0, sizeof(ip_records));
+ memset(max_ip, 0, sizeof(max_ip));
+
+ tot_pkts = 0;
+ num_ip_addr = 0;
+ pool_index = 0;
+ num_destns = 0;
+
+ spin_unlock_bh(&ip_rec_lock);
+ }
+ return count;
+}
+
+
+#define MSEC_IN_SEC 1000
+#define MAX_IP_ADDR_STR 17
+#define TIME_STR_LEN 50
+#define UTC_OFFSET 7
+#define SEC_PER_MIN 60
+#define MIN_PER_HOUR 60
+
+/*
+ * Prints the IP records in a CSV format; if used with console loggincan generate
+ * continuos time series for the IP records.
+ */
+static int ct_ipcsv_show(struct seq_file *s, void *v)
+{
+ int idx;
+ u_int32_t ip;
+ char ip_str[MAX_IP_ADDR_STR + 1];
+ char time_str[TIME_STR_LEN];
+ u_int32_t latency = 0;
+ u_int32_t variance = 0;
+ u_int32_t samples = 0;
+ ip_rec_t *record;
+ struct timeval time;
+ u_int32_t local_time;
+ struct rtc_time tm;
+ u_int32_t tick = (MSEC_IN_SEC)/(HZ);
+ u_int32_t average = 0, avg_square = 0, variance_ms = 0;
+
+
+ do_gettimeofday(&time);
+ local_time = (u32)(time.tv_sec - (UTC_OFFSET * SEC_PER_MIN * MIN_PER_HOUR));
+ rtc_time_to_tm(local_time, &tm);
+
+ snprintf(time_str, TIME_STR_LEN, "%04d-%02d-%02d %02d:%02d:%02d",
+ tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec);
+
+
+ spin_lock_bh(&ip_rec_lock);
+ for (idx = 0; idx < MAX_IP_REC; idx++) {
+ samples = 0; latency = 0;
+ if (max_ip[idx] == NULL) {
+ continue;
+ }
+ record = max_ip[idx];
+ ip = ntohl(record->dst_ip);
+ if (record->num_samples) {
+ average = (record->tot_delta)/ record->num_samples;
+ latency = (record->tot_delta *tick)/ record->num_samples;
+ avg_square = average * average;
+ variance = ((record->tot_delta_square)/(record->num_samples) - avg_square);
+ variance_ms = variance * tick * tick;
+ samples = record->num_samples;
+ }
+
+ snprintf(ip_str, MAX_IP_ADDR_STR, "%u.%u.%u.%u ",
+ ((ip>>24) & 0xff), ((ip >> 16) & 0xff),
+ ((ip >> 8) & 0xff), (ip & 0xff));
+ seq_printf(s, "%s, %s, %u, %u, %u, %u, %u, %u, %u, %u, %llu, %llu, %u, %u\n",
+ time_str, ip_str, record->num_conn,
+ samples, record->packets_in, record->packets_out,
+ latency, variance_ms, record->num_tcp_packets,
+ record->num_udp_packets, record->bytes_in,
+ record->bytes_out, record->app_id, record->cat_id);
+ }
+ seq_printf(s, " \n" );
+
+ seq_printf(s, " cp_flowstats_enabled = %d \n", cp_flowstats_enabled);
+
+ spin_unlock_bh(&ip_rec_lock);
+
+ return 0;
+}
+
+
+/*
+ * Print all the entries of the IP records table.
+ */
+static int show_iprecords_table(struct seq_file *s, void *v)
+{
+ int idx;
+ u_int32_t ip;
+ char ip_str[MAX_IP_ADDR_STR + 1];
+ u_int32_t latency = 0;
+ u_int32_t variance = 0;
+ u_int32_t samples = 0;
+ ip_rec_t *record;
+ u_int32_t tick = (MSEC_IN_SEC)/(HZ);
+ u_int32_t average = 0, avg_square = 0, variance_ms = 0;
+ u_int64_t latency_tot_sqr_ms = 0;
+
+ seq_printf(s, " Idx IP Connections Samples PkIn PkOut Latency SumSquare TCPpkt UDPpkt BytesIn BytesOut Intf AppId CatId \n");
+
+ spin_lock_bh(&ip_rec_lock);
+ for (idx = 0; idx < NUM_IP_RECORDS; idx++) {
+ samples = 0; latency = 0;
+ average = 0; avg_square = 0; variance = 0; variance_ms = 0;
+ latency_tot_sqr_ms = 0;
+
+ record = &ip_records[idx];
+ if (record->dst_ip == 0) {
+ continue;
+ }
+
+ ip = ntohl(record->dst_ip);
+ if (record->num_samples) {
+ average = (record->tot_delta)/ record->num_samples;
+ latency = (record->tot_delta *tick)/ record->num_samples;
+ avg_square = average * average;
+ variance = ((record->tot_delta_square)/(record->num_samples) - avg_square);
+ variance_ms = variance * tick * tick;
+ latency_tot_sqr_ms = record->tot_delta_square * tick * tick;
+ samples = record->num_samples;
+ }
+
+ snprintf(ip_str, MAX_IP_ADDR_STR, "%u.%u.%u.%u ",
+ ((ip>>24) & 0xff), (ip >> 16) & 0xff, (ip >> 8) & 0xff, ip & 0xff);
+ seq_printf(s, "%3u %4u %16s %5u %5u %8u %8u %6u %10llu %7u %7u %10llu %10llu %15s %7u %6u\n",
+ idx, hashfn(record->dst_ip), ip_str, record->num_conn,
+ samples, record->packets_in, record->packets_out,
+ latency, latency_tot_sqr_ms, record->num_tcp_packets,
+ record->num_udp_packets, record->bytes_in,
+ record->bytes_out,
+ record->int_name[0] ? (record->int_name) : "null",
+ record->app_id, record->cat_id);
+
+ }
+ spin_unlock_bh(&ip_rec_lock);
+ return 0;
+}
+
+
+/*
+ * Print the contents if the IP records for the top 10 IP addresses.
+ */
+static int ct_ipaddr_show(struct seq_file *s, void *v)
+{
+ int idx;
+ u_int32_t ip;
+ char ip_str[MAX_IP_ADDR_STR + 1];
+ u_int32_t latency = 0;
+ u_int32_t variance = 0;
+ u_int32_t samples = 0;
+ ip_rec_t *record;
+ u_int32_t tick = (MSEC_IN_SEC)/(HZ);
+ u_int32_t average = 0, avg_square = 0, variance_ms = 0;
+ u_int64_t latency_tot_sqr_ms = 0;
+ int details_enabled = false; // option to print the entire table
+
+
+ spin_lock_bh(&ip_rec_lock);
+
+
+ seq_printf(s, " Idx IP Connections Samples PkIn PkOut Latency SumSquare TCPpkt UDPpkt BytesIn BytesOut Intf AppId CatId \n");
+ for (idx = 0; idx < MAX_IP_REC; idx++) {
+ samples = 0; latency = 0;
+ average = 0; avg_square = 0; variance = 0; variance_ms = 0;
+ latency_tot_sqr_ms = 0;
+ if (max_ip[idx] == NULL) {
+ continue;
+ }
+ record = max_ip[idx];
+ ip = ntohl(record->dst_ip);
+ if (record->num_samples) {
+ average = (record->tot_delta)/ record->num_samples;
+ latency = (record->tot_delta *tick)/ record->num_samples;
+ avg_square = average * average;
+ variance = ((record->tot_delta_square)/(record->num_samples) - avg_square);
+ variance_ms = variance * tick * tick;
+ latency_tot_sqr_ms = record->tot_delta_square * tick * tick;
+ samples = record->num_samples;
+ }
+
+ snprintf(ip_str, MAX_IP_ADDR_STR, "%u.%u.%u.%u ",
+ ((ip>>24) & 0xff), (ip >> 16) & 0xff, (ip >> 8) & 0xff, ip & 0xff);
+ seq_printf(s, "%3d %16s %5u %5u %8u %8u %6u %10llu %7u %7u %10llu %10llu %15s %7d %6d\n",
+ idx, ip_str, record->num_conn,
+ samples, record->packets_in, record->packets_out,
+ latency, latency_tot_sqr_ms, record->num_tcp_packets,
+ record->num_udp_packets, record->bytes_in,
+ record->bytes_out,
+ record->int_name[0] ? (record->int_name) : "null",
+ record->app_id, record->cat_id);
+ }
+
+ seq_printf(s, " Total Packets %u, Total number of addresses %d\n", tot_pkts, num_destns);
+ spin_unlock_bh(&ip_rec_lock);
+
+ if (details_enabled) {
+ show_iprecords_table(s, v);
+ }
+ return 0;
+}
+
+static int ct_ipaddr_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ct_ipaddr_show, NULL);
+}
+
+
+static int ct_ipcsv_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ct_ipcsv_show, NULL);
+}
+
+
+/* Temparary to generated csv style record */
+static const struct file_operations ct_ipcsv_file_ops = {
+ .owner = THIS_MODULE,
+ .open = ct_ipcsv_open,
+ .read = seq_read,
+ .write = ipaddr_seq_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+
+static const struct file_operations ct_ipaddr_file_ops = {
+ .owner = THIS_MODULE,
+ .open = ct_ipaddr_open,
+ .read = seq_read,
+ .write = ipaddr_seq_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+
+/*
+ * Called during initialization.
+ * Clear the table entries and related structures.
+ * Create the proc entries.
+ * called from __net_init ip_conntrack_net_init() when related structures for the
+ * conn_track are created.
+ */
+int conn_track_ip_addr_track(struct net *net)
+{
+ struct proc_dir_entry *proc;
+
+ memset(hashtable, 0, sizeof(hashtable));
+ memset(max_ip, 0, sizeof(max_ip));
+ memset(ip_records, 0, sizeof(ip_records));
+ tot_pkts = 0;
+ num_ip_addr = 0;
+ pool_index = 0;
+ num_destns = 0;
+
+ /* Create the proce entry to read the IP records table */
+ proc = proc_create("ip_conn_addr", 0440, net->proc_net, &ct_ipaddr_file_ops);
+
+ if (!proc)
+ printk(" Failed to create ip_conn_addr \n");
+ else
+ printk(" ip_conn_addr created successfully \n");
+
+
+ /* This is temparary; create a proc file in csv format so that it can be used
+ for data analysis for interim.
+ */
+ proc = proc_create("ip_conn_csv", 0440, net->proc_net, &ct_ipcsv_file_ops);
+
+ if (!proc)
+ printk(" Failed to create ip_conn_csv \n");
+ else
+ printk(" ip_conn_csv created successfully \n");
+
+
+ spin_lock_init(&ip_rec_lock);
+
+ return 0;
+}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a399c54..25e57b0 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -82,9 +82,14 @@
#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
- if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
+ if (ipinfo->flags & IPT_F_NO_DEF_MATCH)
+ return true;
+
+ if (FWINV(ipinfo->smsk.s_addr &&
+ (ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
IPT_INV_SRCIP) ||
- FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
+ FWINV(ipinfo->dmsk.s_addr &&
+ (ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
IPT_INV_DSTIP)) {
dprintf("Source or dest mismatch.\n");
@@ -135,6 +140,29 @@
return true;
}
+static void
+ip_checkdefault(struct ipt_ip *ip)
+{
+ static const char iface_mask[IFNAMSIZ] = {};
+
+ if (ip->invflags || ip->flags & IPT_F_FRAG)
+ return;
+
+ if (memcmp(ip->iniface_mask, iface_mask, IFNAMSIZ) != 0)
+ return;
+
+ if (memcmp(ip->outiface_mask, iface_mask, IFNAMSIZ) != 0)
+ return;
+
+ if (ip->smsk.s_addr || ip->dmsk.s_addr)
+ return;
+
+ if (ip->proto)
+ return;
+
+ ip->flags |= IPT_F_NO_DEF_MATCH;
+}
+
static bool
ip_checkentry(const struct ipt_ip *ip)
{
@@ -664,6 +692,8 @@
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
+ ip_checkdefault(&e->ip);
+
e->counters.pcnt = xt_percpu_counter_alloc();
if (IS_ERR_VALUE(e->counters.pcnt))
return -ENOMEM;
@@ -948,6 +978,7 @@
const struct xt_table_info *private = table->private;
int ret = 0;
const void *loc_cpu_entry;
+ u8 flags;
counters = alloc_counters(table);
if (IS_ERR(counters))
@@ -975,6 +1006,14 @@
goto free_counters;
}
+ flags = e->ip.flags & IPT_F_MASK;
+ if (copy_to_user(userptr + off
+ + offsetof(struct ipt_entry, ip.flags),
+ &flags, sizeof(flags)) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
for (i = sizeof(struct ipt_entry);
i < e->target_offset;
i += m->u.match_size) {
diff --git a/net/ipv4/netfilter/ipt_ROUTE.c b/net/ipv4/netfilter/ipt_ROUTE.c
new file mode 100644
index 0000000..3d39454
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ROUTE.c
@@ -0,0 +1,561 @@
+/*
+ * This implements the ROUTE target, which enables you to setup unusual
+ * routes not supported by the standard kernel routing table.
+ *
+ * Copyright (C) 2002 Cedric de Launois <delaunois@info.ucl.ac.be>
+ *
+ * v 1.11 2004/11/23
+ *
+ * This software is distributed under GNU GPL v2, 1991
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <linux/route.h>
+#include <linux/version.h>
+#include <linux/if_arp.h>
+#include <net/ip.h>
+#include <net/arp.h>
+#include <net/route.h>
+#include <net/icmp.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_ROUTE.h>
+
+#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+#include <net/netfilter/nf_conntrack.h>
+
+#if 1
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+#define NIPQUAD(addr) \
+ ((unsigned char *)&addr)[0], \
+ ((unsigned char *)&addr)[1], \
+ ((unsigned char *)&addr)[2], \
+ ((unsigned char *)&addr)[3]
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Cedric de Launois <delaunois@info.ucl.ac.be>");
+MODULE_DESCRIPTION("iptables ROUTE target module");
+
+/* Cradlepoint:
+ * FIXME: This has gotten horribly ugly, with us essentially duplicating ip_output.c to
+ * allow bypassing most of the networking stack. We could probably vastly simplify this
+ * by adding a path in ip_output itself for us to use.
+ * /Cradlepoint */
+
+static struct net *pick_net(struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+ const struct dst_entry *dst;
+
+ if (skb->dev != NULL)
+ return dev_net(skb->dev);
+ dst = skb_dst(skb);
+ if (dst != NULL && dst->dev != NULL)
+ return dev_net(dst->dev);
+#endif
+ return &init_net;
+}
+
+/* Stolen from ip_exceeds_mtu */
+static bool ip_direct_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu)
+ return false;
+
+ if (unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0))
+ return false;
+
+ /* original fragment exceeds mtu and DF is set */
+ if (unlikely(IPCB(skb)->frag_max_size > mtu))
+ return true;
+
+ if (skb->ignore_df)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ return false;
+
+ return true;
+}
+
+/* Try to route the packet according to the routing keys specified in
+ * route_info. Keys are :
+ * - ifindex :
+ * 0 if no oif preferred,
+ * otherwise set to the index of the desired oif
+ * - route_info->gw :
+ * 0 if no gateway specified,
+ * otherwise set to the next host to which the pkt must be routed
+ * If success, skb->dev is the output device to which the packet must
+ * be sent and skb->dst is not NULL
+ *
+ * NOTE: This will also decrement packet TTL and get packet ready for
+ * forwarding.
+ *
+ * RETURN: -1 if an error occured
+ * 1 if the packet was succesfully routed to the
+ * destination desired
+ * 0 if the kernel routing table could not route the packet
+ * according to the keys specified
+ */
+static int route(struct sk_buff *skb,
+ unsigned int ifindex,
+ const struct ipt_route_target_info *route_info)
+{
+ struct iphdr *iph = ip_hdr(skb);
+ struct net *net = pick_net(skb);
+ struct flowi4 fl4 = {
+ .daddr = iph->daddr,
+ .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
+ .flowi4_oif = ifindex,
+ .flowi4_tos = RT_TOS(iph->tos),
+ .flowi4_scope = RT_SCOPE_UNIVERSE,
+ .flowi4_flags = FLOWI_FLAG_KNOWN_NH,
+ };
+ struct rtable *rt;
+ u32 mtu;
+
+ /* The destination address may be overloaded by the target */
+ if (route_info->gw)
+ fl4.daddr = route_info->gw;
+
+ /* Trying to route the packet using the standard routing table. */
+ rt = __ip_route_output_key_hash(net, &fl4, -1, true);
+ if (IS_ERR(rt)) {
+ if (net_ratelimit())
+ DEBUGP("ipt_ROUTE: couldn't route pkt (err: %ld)\n", PTR_ERR(rt));
+ return -1;
+ }
+
+ /* Drop old route. */
+ skb_dst_drop(skb);
+
+ /* Check for DF and fragmentation and handle any ICMP responses needed
+ * before we start messing with the skb. Stolen from ip_forward. */
+ IPCB(skb)->flags |= IPSKB_FORWARDED;
+ mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
+ if (ip_direct_exceeds_mtu(skb, mtu)) {
+ /* Need to create an input route so the ICMP response goes the right place */
+ if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, skb->dev) != 0) {
+ if (net_ratelimit())
+ DEBUGP("ipt_ROUTE: couldn't input route ICMP response\n");
+ } else {
+ IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+ }
+ ip_rt_put(rt);
+ return -1;
+ }
+
+ /* Success if no oif specified or if the oif correspond to the
+ * one desired, we know we will forward */
+ if (!ifindex || rt->dst.dev->ifindex == ifindex) {
+ skb_dst_set(skb, &rt->dst);
+ skb->dev = rt->dst.dev;
+ skb->protocol = htons(ETH_P_IP);
+
+ /* We are about to mangle packet. Copy it! */
+ if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+rt->dst.header_len)) {
+ if (net_ratelimit())
+ DEBUGP("ipt_ROUTE: couldn't copy SKB for mangling\n");
+ return -1;
+ }
+
+ /* Decrease ttl after skb cow done */
+ ip_decrease_ttl(iph);
+
+ return 1;
+ }
+
+ /* The interface selected by the routing table is not the one
+ * specified by the user. This may happen because the dst address
+ * is one of our own addresses.
+ */
+ if (net_ratelimit())
+ DEBUGP("ipt_ROUTE: failed to route as desired gw=%u.%u.%u.%u oif=%i (got oif=%i)\n",
+ NIPQUAD(route_info->gw), ifindex, rt->dst.dev->ifindex);
+
+ ip_rt_put(rt);
+ return 0;
+}
+
+
+/* Stolen from ip_finish_output2
+ * PRE : skb->dev is set to the device we are leaving by
+ * skb->dst is not NULL
+ * POST: the packet is sent with the link layer header pushed
+ * the packet is destroyed
+ */
+static int ip_direct_send2(struct net *net, struct sock *sk, struct sk_buff *skb, u32 nexthop)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct rtable *rt = (struct rtable *)dst;
+ struct net_device *dev = dst->dev;
+ unsigned int hh_len = LL_RESERVED_SPACE(dev);
+ struct neighbour *neigh;
+
+ if (rt->rt_type == RTN_MULTICAST) {
+ IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
+ } else if (rt->rt_type == RTN_BROADCAST)
+ IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
+
+ /* Be paranoid, rather than too clever. */
+ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
+ struct sk_buff *skb2;
+
+ skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
+ if (!skb2) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+ if (skb->sk)
+ skb_set_owner_w(skb2, skb->sk);
+ consume_skb(skb);
+ skb = skb2;
+ }
+
+ rcu_read_lock_bh();
+ if (!nexthop)
+ nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
+ neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
+ if (unlikely(!neigh))
+ neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
+ if (!IS_ERR(neigh)) {
+ int res = dst_neigh_output(dst, neigh, skb);
+
+ rcu_read_unlock_bh();
+ return res;
+ }
+ rcu_read_unlock_bh();
+
+ net_dbg_ratelimited("ipt_ROUTE (%s): No header cache and no neighbour!\n",
+ __func__);
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+/* Stolen from ip_fragment */
+static int ip_direct_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+ unsigned int mtu, u32 nexthop,
+ int (*output)(struct net *, struct sock *, struct sk_buff *, u32 nexthop))
+{
+ struct iphdr *iph = ip_hdr(skb);
+
+ if ((iph->frag_off & htons(IP_DF)) == 0)
+ return ip_do_fragment2(net, sk, skb, nexthop, NULL, output);
+
+ if (unlikely(!skb->ignore_df ||
+ (IPCB(skb)->frag_max_size &&
+ IPCB(skb)->frag_max_size > mtu))) {
+ IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ return ip_do_fragment2(net, sk, skb, nexthop, NULL, output);
+}
+
+/* Stolen from ip_finish_output_gso */
+static int ip_direct_output_gso(struct net *net, struct sock *sk,
+ struct sk_buff *skb, unsigned int mtu,
+ u32 nexthop)
+{
+ netdev_features_t features;
+ struct sk_buff *segs;
+ int ret = 0;
+
+ /* common case: locally created skb or seglen is <= mtu */
+ if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
+ skb_gso_network_seglen(skb) <= mtu)
+ return ip_direct_send2(net, sk, skb, nexthop);
+
+ /* Slowpath - GSO segment length is exceeding the dst MTU.
+ *
+ * This can happen in two cases:
+ * 1) TCP GRO packet, DF bit not set
+ * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly
+ * from host network stack.
+ */
+ features = netif_skb_features(skb);
+ BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET);
+ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+ if (IS_ERR_OR_NULL(segs)) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+
+ consume_skb(skb);
+
+ do {
+ struct sk_buff *nskb = segs->next;
+ int err;
+
+ segs->next = NULL;
+ err = ip_direct_fragment(net, sk, segs, mtu, nexthop, ip_direct_send2);
+
+ if (err && ret == 0)
+ ret = err;
+ segs = nskb;
+ } while (segs);
+
+ return ret;
+}
+
+/* Stolen from ip_finish_output */
+static int ip_direct_send(struct net *net, struct sock *sk, struct sk_buff *skb, u32 nexthop)
+{
+ unsigned int mtu;
+
+ mtu = ip_skb_dst_mtu(skb);
+ if (skb_is_gso(skb))
+ return ip_direct_output_gso(net, sk, skb, mtu, nexthop);
+
+ if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
+ return ip_direct_fragment(net, sk, skb, mtu, nexthop, ip_direct_send2);
+
+ return ip_direct_send2(net, sk, skb, nexthop);
+}
+
+
+/* PRE : skb->dev is set to the device we are leaving by
+ * POST: - the packet is directly sent to the skb->dev device, without
+ * pushing the link layer header.
+ * - the packet is destroyed
+ */
+static inline int dev_direct_send(struct sk_buff *skb)
+{
+ return dev_queue_xmit(skb);
+}
+
+
+static unsigned int route_oif(const struct ipt_route_target_info *route_info,
+ struct sk_buff *skb)
+{
+ unsigned int ifindex = 0;
+ struct net_device *dev_out = NULL;
+
+ /* The user set the interface name to use.
+ * Getting the current interface index.
+ */
+ if ((dev_out = dev_get_by_name(&init_net, route_info->oif))) {
+ ifindex = dev_out->ifindex;
+ } else {
+ /* Unknown interface name : packet dropped */
+ if (net_ratelimit())
+ DEBUGP(KERN_DEBUG "ipt_ROUTE: oif interface %s not found\n", route_info->oif);
+ return NF_DROP;
+ }
+
+ /* Trying the standard way of routing packets */
+ switch (route(skb, ifindex, route_info)) {
+ case 1:
+ dev_put(dev_out);
+ if (route_info->flags & IPT_ROUTE_CONTINUE)
+ return XT_CONTINUE;
+
+ ip_direct_send(pick_net(skb), NULL, skb, route_info->gw);
+ return NF_STOLEN;
+
+ case 0:
+ /* Failed to send to oif. Trying the hard way */
+ if (route_info->flags & IPT_ROUTE_CONTINUE) {
+ dev_put(dev_out);
+ return NF_DROP;
+ }
+
+ if (net_ratelimit())
+ DEBUGP("ipt_ROUTE: forcing the use of %i\n",
+ ifindex);
+
+ /* We have to force the use of an interface.
+ * This interface must be a tunnel interface since
+ * otherwise we can't guess the hw address for
+ * the packet. For a tunnel interface, no hw address
+ * is needed.
+ */
+ if ((dev_out->type != ARPHRD_TUNNEL)
+ && (dev_out->type != ARPHRD_IPGRE)) {
+ if (net_ratelimit())
+ DEBUGP("ipt_ROUTE: can't guess the hw addr !\n");
+ dev_put(dev_out);
+ return NF_DROP;
+ }
+
+ /* Send the packet. This will also free skb
+ * Do not go through the POST_ROUTING hook because
+ * skb->dst is not set and because it will probably
+ * get confused by the destination IP address.
+ */
+ skb->dev = dev_out;
+ dev_direct_send(skb);
+ dev_put(dev_out);
+ return NF_STOLEN;
+
+ default:
+ /* Unexpected error */
+ dev_put(dev_out);
+ return NF_DROP;
+ }
+}
+
+
+static unsigned int route_iif(const struct ipt_route_target_info *route_info,
+ struct sk_buff *skb)
+{
+ struct net_device *dev_in = NULL;
+
+ /* Getting the current interface index. */
+ if (!(dev_in = dev_get_by_name(&init_net, route_info->iif))) {
+ if (net_ratelimit())
+ DEBUGP("ipt_ROUTE: iif interface %s not found\n", route_info->iif);
+ return NF_DROP;
+ }
+
+ skb_dst_drop(skb);
+ skb->dev = dev_in;
+
+ netif_rx(skb);
+ dev_put(dev_in);
+ return NF_STOLEN;
+}
+
+
+static unsigned int route_gw(const struct ipt_route_target_info *route_info,
+ struct sk_buff *skb)
+{
+ if (route(skb, 0, route_info) != 1)
+ return NF_DROP;
+
+ if (route_info->flags & IPT_ROUTE_CONTINUE)
+ return XT_CONTINUE;
+
+ ip_direct_send(pick_net(skb), NULL, skb, route_info->gw);
+ return NF_STOLEN;
+}
+
+
+static unsigned int ipt_route_target(struct sk_buff *skb,
+ const struct xt_action_param *par)
+{
+ const struct ipt_route_target_info *route_info = par->targinfo;
+ unsigned int res;
+
+ /* If we are at PREROUTING or INPUT hook
+ * the TTL isn't decreased by the IP stack
+ */
+ if (par->hooknum == NF_INET_PRE_ROUTING ||
+ par->hooknum == NF_INET_LOCAL_IN) {
+
+ struct iphdr *iph = ip_hdr(skb);
+
+ if (iph->ttl <= 1) {
+ struct net *net = pick_net(skb);
+ struct rtable *rt;
+ struct flowi4 fl4;
+
+ memset(&fl4, 0, sizeof(fl4));
+ fl4.flowi4_tos = RT_TOS(iph->tos);
+ fl4.flowi4_scope = ((iph->tos & RTO_ONLINK) ?
+ RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
+ fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH;
+ fl4.daddr = iph->daddr;
+ fl4.saddr = iph->saddr;
+
+ rt = ip_route_output_key(net, &fl4);
+ if (IS_ERR(rt)) {
+ return NF_DROP;
+ }
+
+ if (skb->dev == rt->dst.dev) {
+ /* Drop old route. */
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->dst);
+
+ /* this will traverse normal stack, and
+ * thus call conntrack on the icmp packet */
+ icmp_send(skb, ICMP_TIME_EXCEEDED,
+ ICMP_EXC_TTL, 0);
+ } else
+ ip_rt_put(rt);
+
+ return NF_DROP;
+ }
+ }
+
+ if ((route_info->flags & IPT_ROUTE_TEE)) {
+ /*
+ * Copy the skb, and route the copy. Will later return
+ * XT_CONTINUE for the original skb, which should continue
+ * on its way as if nothing happened. The copy should be
+ * independantly delivered to the ROUTE --gw.
+ */
+ skb = skb_copy(skb, GFP_ATOMIC);
+ if (!skb) {
+ if (net_ratelimit())
+ DEBUGP(KERN_DEBUG "ipt_ROUTE: copy failed!\n");
+ return XT_CONTINUE;
+ }
+ }
+
+ if (!(route_info->flags & IPT_ROUTE_CONTINUE)) {
+ nf_conntrack_put(skb->nfct);
+ skb->nfct = &nf_ct_untracked_get()->ct_general;
+ skb->nfctinfo = IP_CT_NEW;
+ nf_conntrack_get(skb->nfct);
+ }
+
+ if (route_info->oif[0] != '\0') {
+ res = route_oif(route_info, skb);
+ } else if (route_info->iif[0] != '\0') {
+ res = route_iif(route_info, skb);
+ } else if (route_info->gw) {
+ res = route_gw(route_info, skb);
+ } else {
+ if (net_ratelimit())
+ DEBUGP(KERN_DEBUG "ipt_ROUTE: no parameter !\n");
+ res = XT_CONTINUE;
+ }
+
+ if ((route_info->flags & IPT_ROUTE_TEE))
+ res = XT_CONTINUE;
+
+ return res;
+}
+
+static struct xt_target xt_route_reg = {
+ .name = "ROUTE",
+ .target = ipt_route_target,
+ .family = NFPROTO_IPV4,
+ .targetsize = sizeof(struct ipt_route_target_info),
+ .table = "mangle",
+ .hooks = 1 << NF_INET_PRE_ROUTING | 1 << NF_INET_LOCAL_IN |
+ 1 << NF_INET_FORWARD | 1 << NF_INET_LOCAL_OUT |
+ 1 << NF_INET_POST_ROUTING,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ return xt_register_target(&xt_route_reg);
+}
+
+
+static void __exit fini(void)
+{
+ xt_unregister_target(&xt_route_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index f0dfe92..4118eb7 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -121,6 +121,13 @@
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
int ret = 0;
+// CRADLEPOINT START
+#ifdef CONFIG_NF_CONNTRACK_APPID
+ int i;
+ uint16_t app_id;
+ uint8_t cat_id;
+#endif /* CONFIG_NF_CONNTRACK_APPID */
+// CRADLEPOINT END
NF_CT_ASSERT(ct);
if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
@@ -180,6 +187,35 @@
ct_show_secctx(s, ct);
+// CRADLEPOINT START
+#ifdef CONFIG_NF_CONNTRACK_APPID
+ cat_id = app_id = 0;
+ for (i = 0; i < MAX_CT_APPID_VALS; i++) {
+ if (i == 0 && ct->appid[i].value == 0) {
+ // No APPID was detected for this CT
+ break;
+ }
+
+ if (i > 0 && ct->appid[i].app_id == 0) {
+ // Find that last APPID in the chain
+ cat_id = ct->appid[i-1].cat_id;
+ app_id = ct->appid[i-1].app_id;
+ break;
+ }
+
+ if (i == MAX_CT_APPID_VALS - 1) {
+ // All APPID vals filled
+ cat_id = ct->appid[i].cat_id;
+ app_id = ct->appid[i].app_id;
+ break;
+ }
+ }
+ seq_printf(s, "catid=%u appid=%u ", cat_id, app_id);
+#endif /* CONFIG_NF_CONNTRACK_APPID */
+
+ seq_printf(s, "dscp=%u ", ct->dscp);
+// CRADLEPOINT END
+
seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
if (seq_has_overflowed(s))
@@ -436,6 +472,12 @@
net->proc_net_stat, &ct_cpu_seq_fops);
if (!proc_stat)
goto err3;
+
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+ /* Initialize IP records proc entries and related structures. */
+ conn_track_ip_addr_track(net);
+#endif
+
return 0;
err3:
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 3abd9d7..4ede35b 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -302,6 +302,7 @@
SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE),
SNMP_MIB_ITEM("TCPMTUPFail", LINUX_MIB_TCPMTUPFAIL),
SNMP_MIB_ITEM("TCPMTUPSuccess", LINUX_MIB_TCPMTUPSUCCESS),
+ SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0294f7c..fc104d9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1788,7 +1788,7 @@
struct rtable *rth;
int err = -EINVAL;
struct net *net = dev_net(dev);
- bool do_cache;
+ bool do_cache = true;
/* IP on this device is disabled. */
@@ -1852,8 +1852,14 @@
goto no_route;
}
- if (res.type == RTN_BROADCAST)
+ if (res.type == RTN_BROADCAST) {
+ if (IN_DEV_BFORWARD(in_dev))
+ goto make_route;
+ /* not do cache if bc_forwarding is enabled */
+ if (IPV4_DEVCONF_ALL(net, BC_FORWARDING))
+ do_cache = false;
goto brd_input;
+ }
if (res.type == RTN_LOCAL) {
err = fib_validate_source(skb, saddr, daddr, tos,
@@ -1870,6 +1876,7 @@
if (res.type != RTN_UNICAST)
goto martian_destination;
+make_route:
err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
out: return err;
@@ -1888,16 +1895,13 @@
RT_CACHE_STAT_INC(in_brd);
local_input:
- do_cache = false;
- if (res.fi) {
- if (!itag) {
- rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
- if (rt_cache_valid(rth)) {
- skb_dst_set_noref(skb, &rth->dst);
- err = 0;
- goto out;
- }
- do_cache = true;
+ do_cache &= res.fi && !itag;
+ if (do_cache) {
+ rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
+ if (rt_cache_valid(rth)) {
+ skb_dst_set_noref(skb, &rth->dst);
+ err = 0;
+ goto out;
}
}
@@ -2148,8 +2152,9 @@
* Major route resolver routine.
*/
+/* Cradlepoint: Add "override_loopback" to allow iif routing */
struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
- int mp_hash)
+ int mp_hash, bool override_loopback)
{
struct net_device *dev_out = NULL;
__u8 tos = RT_FL_TOS(fl4);
@@ -2165,7 +2170,8 @@
orig_oif = fl4->flowi4_oif;
- fl4->flowi4_iif = LOOPBACK_IFINDEX;
+ if (!override_loopback)
+ fl4->flowi4_iif = LOOPBACK_IFINDEX;
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 70fb352..812b7e2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -36,6 +36,8 @@
static int ip_local_port_range_max[] = { 65535, 65535 };
static int tcp_adv_win_scale_min = -31;
static int tcp_adv_win_scale_max = 31;
+static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS;
+static int tcp_min_snd_mss_max = 65535;
static int ip_ttl_min = 1;
static int ip_ttl_max = 255;
static int tcp_syn_retries_min = 1;
@@ -930,6 +932,15 @@
.proc_handler = proc_dointvec,
},
{
+ .procname = "tcp_min_snd_mss",
+ .data = &init_net.ipv4.sysctl_tcp_min_snd_mss,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &tcp_min_snd_mss_min,
+ .extra2 = &tcp_min_snd_mss_max,
+ },
+ {
.procname = "tcp_probe_threshold",
.data = &init_net.ipv4.sysctl_tcp_probe_threshold,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5597120..0279318 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3134,6 +3134,7 @@
int max_rshare, max_wshare, cnt;
unsigned int i;
+ BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE);
sock_skb_cb_check_size(sizeof(struct tcp_skb_cb));
percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9e8d701..5e4a30d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1267,7 +1267,7 @@
TCP_SKB_CB(skb)->seq += shifted;
tcp_skb_pcount_add(prev, pcount);
- BUG_ON(tcp_skb_pcount(skb) < pcount);
+ WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
tcp_skb_pcount_add(skb, -pcount);
/* When we're adding to gso_segs == 1, gso_size will be zero,
@@ -1329,6 +1329,21 @@
return !skb_headlen(skb) && skb_is_nonlinear(skb);
}
+int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
+ int pcount, int shiftlen)
+{
+ /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE)
+ * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need
+ * to make sure not storing more than 65535 * 8 bytes per skb,
+ * even if current MSS is bigger.
+ */
+ if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
+ return 0;
+ if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
+ return 0;
+ return skb_shift(to, from, shiftlen);
+}
+
/* Try collapsing SACK blocks spanning across multiple skbs to a single
* skb.
*/
@@ -1340,6 +1355,7 @@
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *prev;
int mss;
+ int next_pcount;
int pcount = 0;
int len;
int in_sack;
@@ -1434,7 +1450,7 @@
if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
goto fallback;
- if (!skb_shift(prev, skb, len))
+ if (!tcp_skb_shift(prev, skb, pcount, len))
goto fallback;
if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
goto out;
@@ -1453,11 +1469,11 @@
goto out;
len = skb->len;
- if (skb_shift(prev, skb, len)) {
- pcount += tcp_skb_pcount(skb);
- tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
+ next_pcount = tcp_skb_pcount(skb);
+ if (tcp_skb_shift(prev, skb, next_pcount, len)) {
+ pcount += next_pcount;
+ tcp_shifted_skb(sk, skb, state, next_pcount, len, mss, 0);
}
-
out:
state->fack_count += pcount;
return prev;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a5d790c..ccf1998 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2413,6 +2413,7 @@
net->ipv4.sysctl_tcp_ecn_fallback = 1;
net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
+ net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 64c7ce8..eb5def7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1149,6 +1149,11 @@
if (nsize < 0)
nsize = 0;
+ if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
+ return -ENOMEM;
+ }
+
if (skb_unclone(skb, gfp))
return -ENOMEM;
@@ -1315,8 +1320,7 @@
mss_now -= icsk->icsk_ext_hdr_len;
/* Then reserve room for full set of TCP options and 8 bytes of data */
- if (mss_now < 48)
- mss_now = 48;
+ mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
return mss_now;
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 1ec12a4..7fcefb5 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -117,6 +117,7 @@
mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
mss = max(mss, 68 - tp->tcp_header_len);
+ mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 7ee6518..eb8c815 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -79,6 +79,9 @@
IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
#endif
+ /* grab flow hash before encryption */
+ skb_get_hash(skb);
+
return xfrm_output(sk, skb);
}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 7b0edb3..e74eb38 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -20,7 +20,7 @@
static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
- int tos, int oif,
+ int tos, int oif, int mark,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr)
{
@@ -29,6 +29,7 @@
memset(fl4, 0, sizeof(*fl4));
fl4->daddr = daddr->a4;
fl4->flowi4_tos = tos;
+ fl4->flowi4_mark = mark;
fl4->flowi4_oif = oif;
if (saddr)
fl4->saddr = saddr->a4;
@@ -43,12 +44,13 @@
}
static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif,
+ int mark,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr)
{
struct flowi4 fl4;
- return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr);
+ return __xfrm4_dst_lookup(net, &fl4, tos, oif, mark, saddr, daddr);
}
static int xfrm4_get_saddr(struct net *net, int oif,
@@ -57,7 +59,7 @@
struct dst_entry *dst;
struct flowi4 fl4;
- dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr);
+ dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, 0, NULL, daddr);
if (IS_ERR(dst))
return -EHOSTUNREACH;
@@ -71,6 +73,11 @@
return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; /* Strip ECN bits */
}
+static int xfrm4_get_mark(const struct flowi *fl)
+{
+ return fl->u.ip4.flowi4_mark;
+}
+
static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
int nfheader_len)
{
@@ -278,6 +285,7 @@
.get_saddr = xfrm4_get_saddr,
.decode_session = _decode_session4,
.get_tos = xfrm4_get_tos,
+ .get_mark = xfrm4_get_mark,
.init_path = xfrm4_init_path,
.fill_dst = xfrm4_fill_dst,
.blackhole_route = ipv4_blackhole_route,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 0a37ddc..8ec63c1 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -405,6 +405,7 @@
int len;
int hlimit;
int err = 0;
+ int oif_specified = 0; /* Cradlepoint */
u32 mark = IP6_REPLY_MARK(net, skb->mark);
if ((u8 *)hdr < skb->head ||
@@ -502,6 +503,11 @@
if (!fl6.flowi6_oif)
fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev);
+ /* Cradlepoint */
+ if (fl6.flowi6_oif) {
+ oif_specified = 1;
+ }
+
dst = icmpv6_route_lookup(net, skb, sk, &fl6);
if (IS_ERR(dst))
goto out;
@@ -523,11 +529,12 @@
rcu_read_lock();
idev = __in6_dev_get(skb->dev);
+ /* Cradlepoint - add oif_specified argument to ip6_append_data call */
err = ip6_append_data(sk, icmpv6_getfrag, &msg,
len + sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr), hlimit,
np->tclass, NULL, &fl6, (struct rt6_info *)dst,
- MSG_DONTWAIT, np->dontfrag);
+ MSG_DONTWAIT, np->dontfrag, oif_specified);
if (err) {
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
@@ -566,6 +573,7 @@
int hlimit;
u8 tclass;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
+ int oif_specified = 0; /* Cradlepoint */
saddr = &ipv6_hdr(skb)->daddr;
@@ -598,6 +606,11 @@
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
+ /* Cradlepoint */
+ if (fl6.flowi6_oif) {
+ oif_specified = 1;
+ }
+
err = ip6_dst_lookup(net, sk, &dst, &fl6);
if (err)
goto out;
@@ -614,10 +627,11 @@
msg.type = ICMPV6_ECHO_REPLY;
tclass = ipv6_get_dsfield(ipv6_hdr(skb));
+ /* Cradlepoint - add oif_specified argument to ip6_append_data call */
err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6,
(struct rt6_info *)dst, MSG_DONTWAIT,
- np->dontfrag);
+ np->dontfrag, oif_specified);
if (err) {
ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 71624cf..d54d08d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1186,10 +1186,11 @@
}
}
+/* Cradlepoint - add oif_specified argument to ip6_setup_cork */
static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
struct inet6_cork *v6_cork,
int hlimit, int tclass, struct ipv6_txoptions *opt,
- struct rt6_info *rt, struct flowi6 *fl6)
+ struct rt6_info *rt, struct flowi6 *fl6, int oif_specified)
{
struct ipv6_pinfo *np = inet6_sk(sk);
unsigned int mtu;
@@ -1251,6 +1252,11 @@
cork->base.flags |= IPCORK_ALLFRAG;
cork->base.length = 0;
+ /* Cradlepoint */
+ if (oif_specified) {
+ cork->base.flags |= IPCORK_OIF_SPECIFIED;
+ }
+
return 0;
}
@@ -1455,6 +1461,12 @@
}
if (!skb)
goto error;
+
+ /* Cradlepoint */
+ IP6CB(skb)->flags &= ~IP6SKB_OIF_SPECIFIED;
+ if (cork->flags & IPCORK_OIF_SPECIFIED) {
+ IP6CB(skb)->flags |= IP6SKB_OIF_SPECIFIED;
+ }
/*
* Fill in the control structures
*/
@@ -1567,12 +1579,13 @@
return err;
}
+/* Cradlepoint - add oif_specified argument to ip6_append_data */
int ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen, int hlimit,
int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
- struct rt6_info *rt, unsigned int flags, int dontfrag)
+ struct rt6_info *rt, unsigned int flags, int dontfrag, int oif_specified)
{
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1585,8 +1598,9 @@
/*
* setup for corking
*/
+ /* Cradlepoint - add oif_specified argument to ip6_setup_cork call */
err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
- tclass, opt, rt, fl6);
+ tclass, opt, rt, fl6, oif_specified);
if (err)
return err;
@@ -1752,6 +1766,7 @@
}
EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
+/* Cradlepoint - add oif_specified argument to ip6_make_skb */
struct sk_buff *ip6_make_skb(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
@@ -1759,7 +1774,7 @@
int hlimit, int tclass,
struct ipv6_txoptions *opt, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags,
- int dontfrag)
+ int dontfrag, int oif_specified)
{
struct inet_cork_full cork;
struct inet6_cork v6_cork;
@@ -1776,7 +1791,8 @@
cork.base.addr = 0;
cork.base.opt = NULL;
v6_cork.opt = NULL;
- err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
+ /* Cradlepoint - add oif_specified argument to ip6_setup_cork call */
+ err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6, oif_specified);
if (err)
return ERR_PTR(err);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 7ebb14d..d82280b 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -340,6 +340,7 @@
struct net_device *dev;
struct pcpu_sw_netstats *tstats;
struct xfrm_state *x;
+ struct xfrm_mode *inner_mode;
struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
u32 orig_mark = skb->mark;
int ret;
@@ -357,7 +358,19 @@
}
x = xfrm_input_state(skb);
- family = x->inner_mode->afinfo->family;
+
+ inner_mode = x->inner_mode;
+
+ if (x->sel.family == AF_UNSPEC) {
+ inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+ if (inner_mode == NULL) {
+ XFRM_INC_STATS(dev_net(skb->dev),
+ LINUX_MIB_XFRMINSTATEMODEERROR);
+ return -EINVAL;
+ }
+ }
+
+ family = inner_mode->afinfo->family;
skb->mark = be32_to_cpu(t->parms.i_key);
ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index d11c468..6b0bc9e 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -31,6 +31,15 @@
};
int err;
+ /* Cradlepoint - honor IPV6_PKTINFO output interface setting */
+ if (!fl6.flowi6_oif && (IP6CB(skb)->flags & IP6SKB_OIF_SPECIFIED)) {
+ dst = skb_dst(skb);
+ if (dst && dst->dev) {
+ /* "bind" packet to previously determined output interface */
+ fl6.flowi6_oif = dst->dev->ifindex;
+ }
+ }
+
dst = ip6_route_output(net, skb->sk, &fl6);
err = dst->error;
if (err) {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 22f39e0..6035140 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -97,9 +97,14 @@
#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg)))
- if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
+ if (ip6info->flags & IP6T_F_NO_DEF_MATCH)
+ return true;
+
+ if (FWINV(!ipv6_addr_any(&ip6info->smsk) &&
+ ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
&ip6info->src), IP6T_INV_SRCIP) ||
- FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
+ FWINV(!ipv6_addr_any(&ip6info->dmsk) &&
+ ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
&ip6info->dst), IP6T_INV_DSTIP)) {
dprintf("Source or dest mismatch.\n");
/*
@@ -165,6 +170,29 @@
return true;
}
+static void
+ip6_checkdefault(struct ip6t_ip6 *ipv6)
+{
+ static const char iface_mask[IFNAMSIZ] = {};
+
+ if (ipv6->invflags || ipv6->flags & IP6T_F_PROTO)
+ return;
+
+ if (memcmp(ipv6->iniface_mask, iface_mask, IFNAMSIZ) != 0)
+ return;
+
+ if (memcmp(ipv6->outiface_mask, iface_mask, IFNAMSIZ) != 0)
+ return;
+
+ if (!ipv6_addr_any(&ipv6->smsk) || !ipv6_addr_any(&ipv6->dmsk))
+ return;
+
+ if (ipv6->proto)
+ return;
+
+ ipv6->flags |= IP6T_F_NO_DEF_MATCH;
+}
+
/* should be ip6 safe */
static bool
ip6_checkentry(const struct ip6t_ip6 *ipv6)
@@ -677,6 +705,8 @@
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
+ ip6_checkdefault(&e->ipv6);
+
e->counters.pcnt = xt_percpu_counter_alloc();
if (IS_ERR_VALUE(e->counters.pcnt))
return -ENOMEM;
@@ -960,6 +990,7 @@
const struct xt_table_info *private = table->private;
int ret = 0;
const void *loc_cpu_entry;
+ u8 flags;
counters = alloc_counters(table);
if (IS_ERR(counters))
@@ -987,6 +1018,14 @@
goto free_counters;
}
+ flags = e->ipv6.flags & IP6T_F_MASK;
+ if (copy_to_user(userptr + off
+ + offsetof(struct ip6t_entry, ipv6.flags),
+ &flags, sizeof(flags)) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
for (i = sizeof(struct ip6t_entry);
i < e->target_offset;
i += m->u.match_size) {
diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
index 590f767..084e1bf 100644
--- a/net/ipv6/netfilter/ip6t_NPT.c
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -109,7 +109,6 @@
static struct xt_target ip6t_npt_target_reg[] __read_mostly = {
{
.name = "SNPT",
- .table = "mangle",
.target = ip6t_snpt_tg,
.targetsize = sizeof(struct ip6t_npt_tginfo),
.checkentry = ip6t_npt_checkentry,
@@ -120,7 +119,6 @@
},
{
.name = "DNPT",
- .table = "mangle",
.target = ip6t_dnpt_tg,
.targetsize = sizeof(struct ip6t_npt_tginfo),
.checkentry = ip6t_npt_checkentry,
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 1aa5848..d8e954b 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -34,6 +34,12 @@
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <net/netfilter/nf_log.h>
+#ifdef HNDCTF
+extern void ip_conntrack_ipct_add(struct sk_buff *skb, u_int32_t hooknum,
+ struct nf_conn *ct, enum ip_conntrack_info ci,
+ struct nf_conntrack_tuple *manip);
+#endif /* HNDCTF */
+
static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
{
@@ -169,7 +175,21 @@
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
+ unsigned int ret;
+
+ ret = nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
+
+#if defined(HNDCTF)
+ if (ret == NF_ACCEPT) {
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ ip_conntrack_ipct_add(skb, state->hook, ct, ctinfo, NULL);
+ }
+#endif /* HNDCTF */
+
+ return ret;
}
static unsigned int ipv6_conntrack_local(void *priv,
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index a830b68..5a8738b 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -91,6 +91,7 @@
struct dst_entry *dst;
struct rt6_info *rt;
struct pingfakehdr pfh;
+ int oif_specified = 0; /* Cradlepoint */
pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
@@ -144,6 +145,11 @@
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
+ /* Cradlepoint */
+ if (fl6.flowi6_oif) {
+ oif_specified = 1;
+ }
+
dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr);
if (IS_ERR(dst))
return PTR_ERR(dst);
@@ -172,10 +178,11 @@
hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
lock_sock(sk);
+ /* Cradlepoint - add oif_specified argument to ip6_append_data call */
err = ip6_append_data(sk, ping_getfrag, &pfh, len,
0, hlimit,
np->tclass, NULL, &fl6, rt,
- MSG_DONTWAIT, np->dontfrag);
+ MSG_DONTWAIT, np->dontfrag, oif_specified);
if (err) {
ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4d52a0e..40827bb 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -757,6 +757,7 @@
int dontfrag = -1;
u16 proto;
int err;
+ int oif_specified = 0; /* Cradlepoint */
/* Rough check on arithmetic overflow,
better check is made in ip6_append_data().
@@ -879,6 +880,11 @@
if (inet->hdrincl)
fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH;
+ /* Cradlepoint */
+ if (fl6.flowi6_oif) {
+ oif_specified = 1;
+ }
+
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
@@ -901,9 +907,10 @@
err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags);
else {
lock_sock(sk);
+ /* Cradlepoint - add oif_specified argument to ip6_append_data call */
err = ip6_append_data(sk, raw6_getfrag, &rfv,
len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst,
- msg->msg_flags, dontfrag);
+ msg->msg_flags, dontfrag, oif_specified);
if (err)
ip6_flush_pending_frames(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 6eb1e92..f801d29 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1126,6 +1126,7 @@
int connected = 0;
int is_udplite = IS_UDPLITE(sk);
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
+ int oif_specified = 0; /* Cradlepoint */
/* destination address check */
if (sin6) {
@@ -1297,6 +1298,11 @@
} else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
+ /* Cradlepoint */
+ if (fl6.flowi6_oif) {
+ oif_specified = 1;
+ }
+
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p);
@@ -1320,10 +1326,11 @@
if (!corkreq) {
struct sk_buff *skb;
+ /* Cradlepoint - add oif_specified argument to ip6_make_skb call */
skb = ip6_make_skb(sk, getfrag, msg, ulen,
sizeof(struct udphdr), hlimit, tclass, opt,
&fl6, (struct rt6_info *)dst,
- msg->msg_flags, dontfrag);
+ msg->msg_flags, dontfrag, oif_specified);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
err = udp_v6_send_skb(skb, &fl6);
@@ -1347,10 +1354,12 @@
if (dontfrag < 0)
dontfrag = np->dontfrag;
up->len += ulen;
+ /* Cradlepoint - add oif_specified argument to ip6_append_data call */
err = ip6_append_data(sk, getfrag, msg, ulen,
sizeof(struct udphdr), hlimit, tclass, opt, &fl6,
(struct rt6_info *)dst,
- corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag);
+ corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag,
+ oif_specified);
if (err)
udp_v6_flush_pending_frames(sk);
else if (!corkreq)
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 4d09ce6..c4f5650 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -129,6 +129,9 @@
IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
#endif
+ /* grab flow hash before encryption */
+ skb_get_hash(skb);
+
return xfrm_output(sk, skb);
}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index c074771..f86e2ef 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -28,6 +28,7 @@
static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
+ int mark,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr)
{
@@ -59,7 +60,7 @@
struct dst_entry *dst;
struct net_device *dev;
- dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr);
+ dst = xfrm6_dst_lookup(net, 0, oif, 0, NULL, daddr);
if (IS_ERR(dst))
return -EHOSTUNREACH;
@@ -74,6 +75,11 @@
return 0;
}
+static int xfrm6_get_mark(const struct flowi *fl)
+{
+ return 0;
+}
+
static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
int nfheader_len)
{
@@ -298,6 +304,7 @@
.get_saddr = xfrm6_get_saddr,
.decode_session = _decode_session6,
.get_tos = xfrm6_get_tos,
+ .get_mark = xfrm6_get_mark,
.init_path = xfrm6_init_path,
.fill_dst = xfrm6_fill_dst,
.blackhole_route = ip6_blackhole_route,
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index bcdab1c..e980402 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -501,6 +501,7 @@
int transhdrlen = 4; /* zero session-id */
int ulen = len + transhdrlen;
int err;
+ int oif_specified = 0; /* Cradlepoint */
/* Rough check on arithmetic overflow,
better check is made in ip6_append_data().
@@ -602,6 +603,11 @@
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
+ /* Cradlepoint */
+ if (fl6.flowi6_oif) {
+ oif_specified = 1;
+ }
+
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
@@ -624,10 +630,11 @@
back_from_confirm:
lock_sock(sk);
+ /* Cradlepoint - add oif_specified argument to ip6_append_data call */
err = ip6_append_data(sk, ip_generic_getfrag, msg,
ulen, transhdrlen, hlimit, tclass, opt,
&fl6, (struct rt6_info *)dst,
- msg->msg_flags, dontfrag);
+ msg->msg_flags, dontfrag, oif_specified);
if (err)
ip6_flush_pending_frames(sk);
else if (!(msg->msg_flags & MSG_MORE))
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 4692782..9a0de3d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -68,6 +68,14 @@
of packets, but this mark value is kept in the conntrack session
instead of the individual packets.
+config NF_CONNTRACK_APPID
+ bool 'Connection APPID tracking support'
+ depends on NETFILTER_ADVANCED
+ help
+ This option enables support for appid information in conntrack
+ . The appid information is gathered through the trend micro IPS
+ scanning engine.
+
config NF_CONNTRACK_SECMARK
bool 'Connection tracking security mark support'
depends on NETWORK_SECMARK
@@ -124,6 +132,14 @@
If unsure, say `N'.
+config NF_CONNTRACK_CHAIN_EVENTS
+ bool "Register multiple callbacks to ct events"
+ depends on NF_CONNTRACK_EVENTS
+ help
+ Support multiple registrations.
+
+ If unsure, say `N'.
+
config NF_CONNTRACK_TIMESTAMP
bool 'Connection tracking timestamping'
depends on NETFILTER_ADVANCED
@@ -430,6 +446,18 @@
config NETFILTER_SYNPROXY
tristate
+config NETFILTER_CP_FLOWSTATS
+ tristate '"USAGE" target support '
+ depends on NF_CONNTRACK
+ help
+ This option adds a "flow tracking" target, which allows you to
+ track flow metric on ip destination basis. Usage
+ data is available in a proc entry.
+
+config NETFILTER_CP_CLIENT_USAGE
+ tristate "Support for conntrack based client data usage module"
+ depends on NF_CONNTRACK && NF_CONNTRACK_CHAIN_EVENTS && NETFILTER_NETLINK_ACCT
+
endif # NF_CONNTRACK
config NF_TABLES
@@ -841,6 +869,17 @@
depends on NETFILTER_ADVANCED
select NETFILTER_XT_TARGET_CT
+config NETFILTER_XT_TARGET_NOACCEL
+ tristate '"NOACCEL" target support'
+ depends on NF_CONNTRACK && BCM_CTF
+ depends on NETFILTER_ADVANCED
+ help
+ The NOACCEL target allows you to specify packets (and any
+ flows created by the packets) to not be accelerated by CTF.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+
config NETFILTER_XT_TARGET_RATEEST
tristate '"RATEEST" target support'
depends on NETFILTER_ADVANCED
@@ -951,6 +990,14 @@
This option adds a "TCPOPTSTRIP" target, which allows you to strip
TCP options from TCP packets.
+config NETFILTER_XT_TARGET_USAGE
+ tristate '"USAGE" target support '
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a "USAGE" target, which allows you to
+ track data usage on a mac+ip address basis. Usage
+ data is available in a proc entry.
+
# alphabetically ordered list of matches
comment "Xtables matches"
@@ -965,6 +1012,16 @@
If you want to compile it as a module, say M here and read
<file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+config NETFILTER_XT_MATCH_APPID
+ tristate '"appid" type match support'
+ depends on NETFILTER_ADVANCED
+ ---help---
+ This option allows you to match appid values gathered from
+ the TM IPS/AppID engine
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+
config NETFILTER_XT_MATCH_BPF
tristate '"bpf" match support'
depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 7638c36..6db9cc1 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -113,6 +113,7 @@
obj-$(CONFIG_NETFILTER_XT_TARGET_NETMAP) += xt_NETMAP.o
obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_NOACCEL) += xt_NOACCEL.o
obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o
obj-$(CONFIG_NETFILTER_XT_TARGET_REDIRECT) += xt_REDIRECT.o
obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
@@ -122,9 +123,11 @@
obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o
obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_USAGE) += xt_USAGE.o
# matches
obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_APPID) += xt_appid.o
obj-$(CONFIG_NETFILTER_XT_MATCH_BPF) += xt_bpf.o
obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 54f3d7c..162a8be 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -399,7 +399,15 @@
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)
+#ifdef CONFIG_NF_CONNTRACK_APPID
+ ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBAPPID0) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBAPPID1) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBAPPID2) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBAPPID3)
+#endif
+ ))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_TIMEOUT]) {
@@ -441,6 +449,32 @@
ext->skbqueue = be16_to_cpu(nla_get_be16(
tb[IPSET_ATTR_SKBQUEUE]));
}
+#ifdef CONFIG_NF_CONNTRACK_APPID
+ if (tb[IPSET_ATTR_SKBAPPID0]) {
+ if (!SET_WITH_SKBINFO(set))
+ return -IPSET_ERR_SKBINFO;
+ ext->skbappid[0] = be32_to_cpu(nla_get_be32(
+ tb[IPSET_ATTR_SKBAPPID0]));
+ }
+ if (tb[IPSET_ATTR_SKBAPPID1]) {
+ if (!SET_WITH_SKBINFO(set))
+ return -IPSET_ERR_SKBINFO;
+ ext->skbappid[1] = be32_to_cpu(nla_get_be32(
+ tb[IPSET_ATTR_SKBAPPID1]));
+ }
+ if (tb[IPSET_ATTR_SKBAPPID2]) {
+ if (!SET_WITH_SKBINFO(set))
+ return -IPSET_ERR_SKBINFO;
+ ext->skbappid[2] = be32_to_cpu(nla_get_be32(
+ tb[IPSET_ATTR_SKBAPPID2]));
+ }
+ if (tb[IPSET_ATTR_SKBAPPID3]) {
+ if (!SET_WITH_SKBINFO(set))
+ return -IPSET_ERR_SKBINFO;
+ ext->skbappid[3] = be32_to_cpu(nla_get_be32(
+ tb[IPSET_ATTR_SKBAPPID3]));
+ }
+#endif
return 0;
}
EXPORT_SYMBOL_GPL(ip_set_get_extensions);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index e5336ab..ca22b83 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -261,9 +261,21 @@
#define HKEY_DATALEN sizeof(struct mtype_elem)
#endif
+/* Cradlepoint
+ * we use multiple ipsets of hash:net,iface type which
+ * have the same ip address (0.0.0.0/0). The default
+ * hash calculation used for all other ipset types wouldnt
+ * give a unique hashtable key.
+ */
+#if defined HASH_IFACE
+#define HKEY(data, initval, htable_bits) \
+(jhash((void *)(data), HKEY_DATALEN / sizeof(u8), initval) \
+ & jhash_mask(htable_bits))
+#else
#define HKEY(data, initval, htable_bits) \
(jhash2((u32 *)(data), HKEY_DATALEN / sizeof(u32), initval) \
& jhash_mask(htable_bits))
+#endif
#ifndef htype
#ifndef HTYPE
@@ -606,7 +618,7 @@
/* There can't be another parallel resizing, but dumping is possible */
atomic_set(&orig->ref, 1);
atomic_inc(&orig->uref);
- pr_debug("attempt to resize set %s from %u to %u, t %p\n",
+ pr_warn("attempt to resize set %s from %u to %u, t %p\n",
set->name, orig->htable_bits, htable_bits, orig);
for (i = 0; i < jhash_size(orig->htable_bits); i++) {
n = __ipset_dereference_protected(hbucket(orig, i), 1);
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 9d84b3d..76aa760 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -96,6 +96,19 @@
struct hash_ipport4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+ if (SET_WITH_SKBINFO(set)) {
+ int i;
+ ext.skbmark = opt->ext.skbmark;
+ ext.skbmarkmask = opt->ext.skbmarkmask;
+ ext.skbprio = opt->ext.skbprio;
+ ext.skbqueue = opt->ext.skbqueue;
+#ifdef CONFIG_NF_CONNTRACK_APPID
+ for (i = 0; i < 4; i++) {
+ ext.skbappid[i] = opt->ext.skbappid[i];
+ }
+#endif
+ }
+
if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
&e.port, &e.proto))
return -EINVAL;
@@ -245,6 +258,7 @@
#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
+
static int
hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
@@ -371,6 +385,12 @@
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
+#ifdef CONFIG_NF_CONNTRACK_APPID
+ [IPSET_ATTR_SKBAPPID0] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBAPPID1] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBAPPID2] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBAPPID3] = { .type = NLA_U32 },
+#endif
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 43d8c98..13b271f 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -44,15 +44,10 @@
#define STRLCPY(a, b) strlcpy(a, b, IFNAMSIZ)
-/* IPv4 variant */
-struct hash_netiface4_elem_hashed {
- __be32 ip;
- u8 physdev;
- u8 cidr;
- u8 nomatch;
- u8 elem;
-};
+#define HASH_IFACE
+
+/* IPv4 variant */
/* Member elements */
struct hash_netiface4_elem {
@@ -132,7 +127,8 @@
#define MTYPE hash_netiface4
#define HOST_MASK 32
-#define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed)
+/* Cradlepoint - use the complete struct for hashing */
+#define HKEY_DATALEN sizeof(struct hash_netiface4_elem)
#include "ip_set_hash_gen.h"
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
@@ -164,8 +160,6 @@
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- if (e.cidr == 0)
- return -EINVAL;
if (adt == IPSET_TEST)
e.cidr = HOST_MASK;
@@ -273,14 +267,6 @@
/* IPv6 variant */
-struct hash_netiface6_elem_hashed {
- union nf_inet_addr ip;
- u8 physdev;
- u8 cidr;
- u8 nomatch;
- u8 elem;
-};
-
struct hash_netiface6_elem {
union nf_inet_addr ip;
u8 physdev;
@@ -360,7 +346,8 @@
#define MTYPE hash_netiface6
#define HOST_MASK 128
-#define HKEY_DATALEN sizeof(struct hash_netiface6_elem_hashed)
+/* Cradlepoint - use the complete struct for hashing */
+#define HKEY_DATALEN sizeof(struct hash_netiface6_elem)
#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
@@ -377,8 +364,6 @@
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- if (e.cidr == 0)
- return -EINVAL;
if (adt == IPSET_TEST)
e.cidr = HOST_MASK;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 86a3c6f..9edef9d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -52,9 +52,38 @@
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
+// CRADLEPOINT START
+#include <net/ip.h>
+// CRADLEPOINT END
#define NF_CONNTRACK_VERSION "0.5.0"
+#ifdef HNDCTF
+#include <linux/if.h>
+#include <linux/if_vlan.h>
+#include <linux/in.h>
+//#include <linux/ip.h> // CRADLEPOINT: moved out of HNDCTF ifdef
+#include <linux/tcp.h>
+
+#ifdef CONFIG_IPV6
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#define IPVERSION_IS_4(ipver) ((ipver) == 4)
+#else
+#define IPVERSION_IS_4(ipver) 1
+#endif /* CONFIG_IPV6 */
+
+#include <net/ip.h>
+#include <net/route.h>
+#include <typedefs.h>
+#include <osl.h>
+#include <ctf/hndctf.h>
+#ifdef CTF_ESDK_VERSION
+#include <ctf/ctf_cfg.h>
+#endif /* CTF_ESDK_VERSION */
+#endif /* HNDCTF */
+
int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
enum nf_nat_manip_type manip,
const struct nlattr *attr) __read_mostly;
@@ -123,6 +152,505 @@
DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
+#ifdef HNDCTF
+
+unsigned int nf_ctf_disable __read_mostly = 0;
+EXPORT_SYMBOL(nf_ctf_disable);
+
+#ifdef CTF_ESDK_VERSION
+/* Returns the number of 1-bits in x */
+static int
+_popcounts(uint32 x)
+{
+ x = x - ((x >> 1) & 0x55555555);
+ x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
+ x = (x + (x >> 4)) & 0x0F0F0F0F;
+ x = (x + (x >> 16));
+ return (x + (x >> 8)) & 0x0000003F;
+}
+#endif
+
+bool
+ip_conntrack_is_ipc_allowed(struct sk_buff *skb, u_int32_t hooknum)
+{
+ struct net_device *dev;
+
+ /* Cradlepoint [KJS] 16 May 2017: Check pointers before deref */
+ if (!skb)
+ return FALSE;
+
+ if (!CTF_ENAB(kcih))
+ return FALSE;
+
+ /* Cradlepoint: Additional ways to disable (skb or proc) */
+ if ((nf_ctf_disable != 0) || (skb->nfcache & NFC_CTF_DISALLOWED))
+ return FALSE;
+
+ if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_POST_ROUTING) {
+ dev = skb->dev;
+ /* Cradlepoint [KJS] 16 May 2017: Check pointers before deref */
+ if (!dev)
+ return FALSE;
+
+ if (dev->priv_flags & IFF_802_1Q_VLAN) {
+ dev = vlan_dev_real_dev(dev);
+ /* Cradlepoint [KJS] 16 May 2017: Check pointers before deref */
+ if (!dev)
+ return FALSE;
+ }
+
+ /* Add ipc entry if packet is received on ctf enabled interface
+ * and the packet is not a defrag'd one.
+ */
+ if (ctf_isenabled(kcih, dev) && (skb->len <= dev->mtu))
+ skb->nfcache |= NFC_CTF_ENABLED;
+ }
+
+ /* Add the cache entries only if the device has registered and
+ * enabled ctf.
+ */
+ if (skb->nfcache & NFC_CTF_ENABLED)
+ return TRUE;
+
+ return FALSE;
+}
+
+void
+ip_conntrack_ipct_add(struct sk_buff *skb, u_int32_t hooknum,
+ struct nf_conn *ct, enum ip_conntrack_info ci,
+ struct nf_conntrack_tuple *manip)
+{
+ ctf_ipc_t ipc_entry;
+ struct hh_cache *hh;
+ struct ethhdr *eth;
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ struct rtable *rt;
+ struct neighbour *n;
+ struct nf_conn_help *help;
+ enum ip_conntrack_dir dir;
+ uint8 ipver, protocol;
+#ifdef CONFIG_IPV6
+ struct ipv6hdr *ip6h = NULL;
+#endif /* CONFIG_IPV6 */
+ uint32 nud_flags;
+ unsigned int seq;
+
+ if ((skb == NULL) || (ct == NULL))
+ return;
+
+ /* Check CTF enabled */
+ if (!ip_conntrack_is_ipc_allowed(skb, hooknum))
+ return;
+ /* We only add cache entires for non-helper connections and at
+ * pre or post routing hooks.
+ */
+ help = nfct_help(ct);
+ if ((help && help->helper) || (ct->ctf_flags & CTF_FLAGS_EXCLUDED) ||
+ ((hooknum != NF_INET_PRE_ROUTING) && (hooknum != NF_INET_POST_ROUTING)))
+ return;
+
+ iph = ip_hdr(skb);
+ ipver = iph->version;
+
+ /* Support both IPv4 and IPv6 */
+ if (ipver == 4) {
+ tcph = ((struct tcphdr *)(((__u8 *)iph) + (iph->ihl << 2)));
+ protocol = iph->protocol;
+ }
+#ifdef CONFIG_IPV6
+ else if (ipver == 6) {
+ ip6h = (struct ipv6hdr *)iph;
+ tcph = (struct tcphdr *)ctf_ipc_lkup_l4proto(kcih, ip6h, &protocol);
+ if (tcph == NULL)
+ return;
+ }
+#endif /* CONFIG_IPV6 */
+ else
+ return;
+
+ /* Only TCP and UDP are supported */
+ if (protocol == IPPROTO_TCP) {
+ /* Add ipc entries for connections in established state only */
+ if ((ci != IP_CT_ESTABLISHED) && (ci != IP_CT_ESTABLISHED_REPLY))
+ return;
+
+ if (ct->proto.tcp.state >= TCP_CONNTRACK_FIN_WAIT &&
+ ct->proto.tcp.state <= TCP_CONNTRACK_TIME_WAIT)
+ return;
+ }
+ else if (protocol != IPPROTO_UDP)
+ return;
+
+ dir = CTINFO2DIR(ci);
+ if (ct->ctf_flags & (1 << dir))
+ return;
+
+ /* Do route lookup for alias address if we are doing DNAT in this
+ * direction.
+ */
+ if (skb_dst(skb) == NULL) {
+ /* Find the destination interface */
+ if (IPVERSION_IS_4(ipver)) {
+ u_int32_t daddr;
+
+ if ((manip != NULL) && (HOOK2MANIP(hooknum) == NF_NAT_MANIP_DST))
+ daddr = manip->dst.u3.ip;
+ else
+ daddr = iph->daddr;
+ ip_route_input(skb, daddr, iph->saddr, iph->tos, skb->dev);
+ }
+#ifdef CONFIG_IPV6
+ else
+ ip6_route_input(skb);
+#endif /* CONFIG_IPV6 */
+ }
+
+ /* Ensure the packet belongs to a forwarding connection and it is
+ * destined to an unicast address.
+ */
+ rt = skb_rtable(skb);
+ if (rt == NULL)
+ return;
+
+ if (IPVERSION_IS_4(ipver) && (manip != NULL) && (HOOK2MANIP(hooknum) == NF_NAT_MANIP_DST))
+ n = dst_neigh_lookup(&rt->dst, &manip->dst.u3.ip);
+ else
+ n = dst_neigh_lookup_skb(&rt->dst, skb);
+
+ if (n == NULL)
+ return;
+
+ nud_flags = NUD_PERMANENT | NUD_REACHABLE | NUD_STALE | NUD_DELAY | NUD_PROBE;
+#ifdef CTF_PPPOE
+ if ((skb_dst(skb) != NULL) && (skb_dst(skb)->dev != NULL) &&
+ (skb_dst(skb)->dev->flags & IFF_POINTOPOINT))
+ nud_flags |= NUD_NOARP;
+#endif
+
+ if ((
+#ifdef CONFIG_IPV6
+ !IPVERSION_IS_4(ipver) ?
+ ((rt->dst.input != ip6_forward) ||
+ !(ipv6_addr_type(&ip6h->daddr) & IPV6_ADDR_UNICAST)) :
+#endif /* CONFIG_IPV6 */
+ ((rt->dst.input != ip_forward) ||
+ (rt->rt_type != RTN_UNICAST))) ||
+ ((n->nud_state & nud_flags) == 0)) {
+ neigh_release(n);
+ return;
+ }
+
+ memset(&ipc_entry, 0, sizeof(ipc_entry));
+
+ /* Init the neighboring sender address */
+ memcpy(ipc_entry.sa.octet, eth_hdr(skb)->h_source, ETH_ALEN);
+
+ /* If the packet is received on a bridge device then save
+ * the bridge cache entry pointer in the ip cache entry.
+ * This will be referenced in the data path to update the
+ * live counter of brc entry whenever a received packet
+ * matches corresponding ipc entry matches.
+ */
+ if ((skb->dev != NULL) && ctf_isbridge(kcih, skb->dev)) {
+#ifdef CTF_ESDK_VERSION
+ ipc_entry.brcp = ctf_brc_lkup(kcih, eth_hdr(skb)->h_source, FALSE);
+#else
+ ipc_entry.brcp = ctf_brc_lkup(kcih, eth_hdr(skb)->h_source);
+ if (ipc_entry.brcp != NULL)
+ ctf_brc_release(kcih, ipc_entry.brcp);
+#endif
+ }
+
+ hh = &n->hh;
+ if (hh->hh_len) {
+ do {
+ seq = read_seqbegin(&hh->hh_lock);
+ eth = (struct ethhdr *)(((unsigned char *)hh->hh_data) + 2);
+ memcpy(ipc_entry.dhost.octet, eth->h_dest, ETH_ALEN);
+ memcpy(ipc_entry.shost.octet, eth->h_source, ETH_ALEN);
+ } while (read_seqretry(&hh->hh_lock, seq));
+ } else {
+ do {
+ seq = read_seqbegin(&n->ha_lock);
+ memcpy(ipc_entry.dhost.octet, n->ha, ETH_ALEN);
+ } while (read_seqretry(&n->ha_lock, seq));
+ memcpy(ipc_entry.shost.octet, skb_dst(skb)->dev->dev_addr, ETH_ALEN);
+ }
+ neigh_release(n);
+
+ /* Add ctf ipc entry for this direction */
+ if (IPVERSION_IS_4(ipver)) {
+ ipc_entry.tuple.sip[0] = iph->saddr;
+ ipc_entry.tuple.dip[0] = iph->daddr;
+#ifdef CONFIG_IPV6
+ } else {
+ memcpy(ipc_entry.tuple.sip, &ip6h->saddr, sizeof(ipc_entry.tuple.sip));
+ memcpy(ipc_entry.tuple.dip, &ip6h->daddr, sizeof(ipc_entry.tuple.dip));
+#endif /* CONFIG_IPV6 */
+ }
+ ipc_entry.tuple.proto = protocol;
+ ipc_entry.tuple.sp = tcph->source;
+ ipc_entry.tuple.dp = tcph->dest;
+
+ ipc_entry.next = NULL;
+
+ /* For vlan interfaces fill the vlan id and the tag/untag actions */
+ if (skb_dst(skb)->dev->priv_flags & IFF_802_1Q_VLAN) {
+ ipc_entry.txif = (void *)vlan_dev_real_dev(skb_dst(skb)->dev);
+ ipc_entry.vid = vlan_dev_vlan_id(skb_dst(skb)->dev);
+ ipc_entry.action = ((vlan_dev_vlan_flags(skb_dst(skb)->dev) & 1) ?
+ CTF_ACTION_TAG : CTF_ACTION_UNTAG);
+ } else {
+ ipc_entry.txif = skb_dst(skb)->dev;
+ ipc_entry.action = CTF_ACTION_UNTAG;
+ }
+
+#ifdef CTF_PPPOE
+ /* For pppoe interfaces fill the session id and header add/del actions */
+ ipc_entry.pppoe_sid = -1;
+ if (skb_dst(skb)->dev->flags & IFF_POINTOPOINT) {
+ /* Transmit interface and sid will be populated by pppoe module */
+ ipc_entry.action |= CTF_ACTION_PPPOE_ADD;
+ skb->ctf_pppoe_cb[0] = 2;
+ ipc_entry.ppp_ifp = skb_dst(skb)->dev;
+ } else if ((skb->dev->flags & IFF_POINTOPOINT) && (skb->ctf_pppoe_cb[0] == 1)) {
+ ipc_entry.action |= CTF_ACTION_PPPOE_DEL;
+ ipc_entry.pppoe_sid = *(uint16 *)&skb->ctf_pppoe_cb[2];
+ ipc_entry.ppp_ifp = skb->dev;
+ }
+#endif
+
+#ifdef CTF_ESDK_VERSION
+ if (((ipc_entry.tuple.proto == IPPROTO_TCP) && (kcih->ipc_suspend & CTF_SUSPEND_TCP_MASK)) ||
+ ((ipc_entry.tuple.proto == IPPROTO_UDP) && (kcih->ipc_suspend & CTF_SUSPEND_UDP_MASK))) {
+#else
+ if (((ipc_entry.tuple.proto == IPPROTO_TCP) && (kcih->ipc_suspend & CTF_SUSPEND_TCP)) ||
+ ((ipc_entry.tuple.proto == IPPROTO_UDP) && (kcih->ipc_suspend & CTF_SUSPEND_UDP))) {
+#endif
+ /* The default action is suspend */
+ ipc_entry.action |= CTF_ACTION_SUSPEND;
+#ifdef CTF_ESDK_VERSION
+ ipc_entry.susp_cnt = ((ipc_entry.tuple.proto == IPPROTO_TCP) ?
+ _popcounts(kcih->ipc_suspend & CTF_SUSPEND_TCP_MASK) :
+ _popcounts(kcih->ipc_suspend & CTF_SUSPEND_UDP_MASK));
+#endif
+ }
+
+ /* Copy the DSCP value. ECN bits must be cleared. */
+ if (IPVERSION_IS_4(ipver))
+ ipc_entry.tos = IPV4_TOS(iph);
+#ifdef CONFIG_IPV6
+ else
+ ipc_entry.tos = IPV6_TRAFFIC_CLASS(ip6h);
+#endif /* CONFIG_IPV6 */
+ ipc_entry.tos &= IPV4_TOS_DSCP_MASK;
+ if (ipc_entry.tos)
+ ipc_entry.action |= CTF_ACTION_TOS;
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ /* Initialize the mark for this connection */
+ if (ct->mark != 0) {
+ ipc_entry.mark.value = ct->mark;
+ ipc_entry.action |= CTF_ACTION_MARK;
+ }
+#endif /* CONFIG_NF_CONNTRACK_MARK */
+
+ /* Update the manip ip and port */
+ if (manip != NULL) {
+ if (HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC) {
+ ipc_entry.nat.ip = manip->src.u3.ip;
+ ipc_entry.nat.port = manip->src.u.tcp.port;
+ ipc_entry.action |= CTF_ACTION_SNAT;
+ } else {
+ ipc_entry.nat.ip = manip->dst.u3.ip;
+ ipc_entry.nat.port = manip->dst.u.tcp.port;
+ ipc_entry.action |= CTF_ACTION_DNAT;
+ }
+ }
+
+ /* Do bridge cache lookup to determine outgoing interface
+ * and any vlan tagging actions if needed.
+ */
+ if (ctf_isbridge(kcih, ipc_entry.txif)) {
+ ctf_brc_t *brcp;
+
+#ifdef CTF_ESDK_VERSION
+ ctf_brc_acquire(kcih);
+
+ if ((brcp = ctf_brc_lkup(kcih, ipc_entry.dhost.octet, TRUE)) != NULL) {
+ ipc_entry.txbif = ipc_entry.txif;
+ ipc_entry.action |= brcp->action;
+ ipc_entry.txif = brcp->txifp;
+ ipc_entry.vid = brcp->vid;
+ }
+
+ ctf_brc_release(kcih);
+#else
+ brcp = ctf_brc_lkup(kcih, ipc_entry.dhost.octet);
+
+ if (brcp == NULL)
+ return;
+ else {
+ ipc_entry.action |= brcp->action;
+ ipc_entry.txif = brcp->txifp;
+ ipc_entry.vid = brcp->vid;
+ ctf_brc_release(kcih, brcp);
+ }
+#endif
+ }
+
+#ifdef DEBUG
+ if (IPVERSION_IS_4(ipver))
+ printk("%s: Adding ipc entry for [%d]%u.%u.%u.%u:%u - %u.%u.%u.%u:%u\n", __FUNCTION__,
+ ipc_entry.tuple.proto,
+ NIPQUAD(ipc_entry.tuple.sip[0]), ntohs(ipc_entry.tuple.sp),
+ NIPQUAD(ipc_entry.tuple.dip[0]), ntohs(ipc_entry.tuple.dp));
+#ifdef CONFIG_IPV6
+ else
+ printk("\n%s: Adding ipc entry for [%d]\n"
+ "%08x.%08x.%08x.%08x:%u => %08x.%08x.%08x.%08x:%u\n",
+ __FUNCTION__, ipc_entry.tuple.proto,
+ ntohl(ipc_entry.tuple.sip[0]), ntohl(ipc_entry.tuple.sip[1]),
+ ntohl(ipc_entry.tuple.sip[2]), ntohl(ipc_entry.tuple.sip[3]),
+ ntohs(ipc_entry.tuple.sp),
+ ntohl(ipc_entry.tuple.dip[0]), ntohl(ipc_entry.tuple.dip[1]),
+ ntohl(ipc_entry.tuple.dip[2]), ntohl(ipc_entry.tuple.dip[3]),
+ ntohs(ipc_entry.tuple.dp));
+#endif /* CONFIG_IPV6 */
+ printk("sa %02x:%02x:%02x:%02x:%02x:%02x\n",
+ ipc_entry.shost.octet[0], ipc_entry.shost.octet[1],
+ ipc_entry.shost.octet[2], ipc_entry.shost.octet[3],
+ ipc_entry.shost.octet[4], ipc_entry.shost.octet[5]);
+ printk("da %02x:%02x:%02x:%02x:%02x:%02x\n",
+ ipc_entry.dhost.octet[0], ipc_entry.dhost.octet[1],
+ ipc_entry.dhost.octet[2], ipc_entry.dhost.octet[3],
+ ipc_entry.dhost.octet[4], ipc_entry.dhost.octet[5]);
+ printk("[%d] vid: %d action %x\n", hooknum, ipc_entry.vid, ipc_entry.action);
+ if (manip != NULL)
+ printk("manip_ip: %u.%u.%u.%u manip_port %u\n",
+ NIPQUAD(ipc_entry.nat.ip), ntohs(ipc_entry.nat.port));
+ printk("txif: %s\n", ((struct net_device *)ipc_entry.txif)->name);
+#endif
+
+ ctf_ipc_add(kcih, &ipc_entry, !IPVERSION_IS_4(ipver));
+
+#ifdef CTF_PPPOE
+ if (skb->ctf_pppoe_cb[0] == 2) {
+ ctf_ipc_t *ipct;
+ ipct = ctf_ipc_lkup(kcih, &ipc_entry, ipver == 6);
+ *(uint32 *)&skb->ctf_pppoe_cb[4] = (uint32)ipct;
+ if (ipct != NULL)
+ ctf_ipc_release(kcih, ipct);
+ }
+#endif
+
+ /* Update the attributes flag to indicate a CTF conn */
+ ct->ctf_flags |= (CTF_FLAGS_CACHED | (1 << dir));
+}
+
+int
+ip_conntrack_ipct_delete(struct nf_conn *ct, int ct_timeout)
+{
+ ctf_ipc_t *ipct;
+ struct nf_conntrack_tuple *orig, *repl;
+ ctf_ipc_t orig_ipct, repl_ipct;
+ int ipaddr_sz;
+ bool v6;
+
+ if (!CTF_ENAB(kcih))
+ return (0);
+
+ orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+
+ if ((orig->dst.protonum != IPPROTO_TCP) && (orig->dst.protonum != IPPROTO_UDP))
+ return (0);
+
+ repl = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+#ifdef CONFIG_IPV6
+ v6 = (orig->src.l3num == AF_INET6);
+ ipaddr_sz = (v6) ? sizeof(struct in6_addr) : sizeof(struct in_addr);
+#else
+ v6 = FALSE;
+ ipaddr_sz = sizeof(struct in_addr);
+#endif /* CONFIG_IPV6 */
+
+ memset(&orig_ipct, 0, sizeof(orig_ipct));
+ memcpy(orig_ipct.tuple.sip, &orig->src.u3.ip, ipaddr_sz);
+ memcpy(orig_ipct.tuple.dip, &orig->dst.u3.ip, ipaddr_sz);
+ orig_ipct.tuple.proto = orig->dst.protonum;
+ orig_ipct.tuple.sp = orig->src.u.tcp.port;
+ orig_ipct.tuple.dp = orig->dst.u.tcp.port;
+
+ memset(&repl_ipct, 0, sizeof(repl_ipct));
+ memcpy(repl_ipct.tuple.sip, &repl->src.u3.ip, ipaddr_sz);
+ memcpy(repl_ipct.tuple.dip, &repl->dst.u3.ip, ipaddr_sz);
+ repl_ipct.tuple.proto = repl->dst.protonum;
+ repl_ipct.tuple.sp = repl->src.u.tcp.port;
+ repl_ipct.tuple.dp = repl->dst.u.tcp.port;
+
+ /* If the refresh counter of ipc entry is non zero, it indicates
+ * that the packet transfer is active and we should not delete
+ * the conntrack entry.
+ */
+ if (ct_timeout) {
+ ipct = ctf_ipc_lkup(kcih, &orig_ipct, v6);
+
+ /* Postpone the deletion of ct entry if there are frames
+ * flowing in this direction.
+ */
+ if (ipct != NULL) {
+#ifdef BCMFA
+ ctf_live(kcih, ipct, v6);
+#endif
+ if (ipct->live > 0) {
+ ipct->live = 0;
+ ctf_ipc_release(kcih, ipct);
+ ct->timeout.expires = jiffies + ct->expire_jiffies;
+ add_timer(&ct->timeout);
+ return (-1);
+ }
+ ctf_ipc_release(kcih, ipct);
+ }
+
+ ipct = ctf_ipc_lkup(kcih, &repl_ipct, v6);
+
+ if (ipct != NULL) {
+#ifdef BCMFA
+ ctf_live(kcih, ipct, v6);
+#endif
+ if (ipct->live > 0) {
+ ipct->live = 0;
+ ctf_ipc_release(kcih, ipct);
+ ct->timeout.expires = jiffies + ct->expire_jiffies;
+ add_timer(&ct->timeout);
+ return (-1);
+ }
+ ctf_ipc_release(kcih, ipct);
+ }
+ }
+
+ /* If there are no packets over this connection for timeout period
+ * delete the entries.
+ */
+ ctf_ipc_delete(kcih, &orig_ipct, v6);
+
+ ctf_ipc_delete(kcih, &repl_ipct, v6);
+
+#ifdef DEBUG
+ printk("%s: Deleting the tuple %x %x %d %d %d\n",
+ __FUNCTION__, orig->src.u3.ip, orig->dst.u3.ip, orig->dst.protonum,
+ orig->src.u.tcp.port, orig->dst.u.tcp.port);
+ printk("%s: Deleting the tuple %x %x %d %d %d\n",
+ __FUNCTION__, repl->dst.u3.ip, repl->src.u3.ip, repl->dst.protonum,
+ repl->dst.u.tcp.port, repl->src.u.tcp.port);
+#endif
+
+ return (0);
+}
+#endif /* HNDCTF */
+
unsigned int nf_conntrack_hash_rnd __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd);
@@ -334,6 +862,9 @@
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
NF_CT_ASSERT(!timer_pending(&ct->timeout));
+#ifdef HNDCTF
+ ip_conntrack_ipct_delete(ct, 0);
+#endif /* HNDCTF*/
if (unlikely(nf_ct_is_template(ct))) {
nf_ct_tmpl_free(ct);
return;
@@ -421,7 +952,24 @@
static void death_by_timeout(unsigned long ul_conntrack)
{
- nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
+ struct nf_conn *ct = (struct nf_conn *)ul_conntrack;
+
+#ifdef HNDCTF
+ /* If negative error is returned it means the entry hasn't
+ * timed out yet.
+ */
+ if (ip_conntrack_ipct_delete(ct, jiffies >= ct->timeout.expires ? 1 : 0) != 0)
+ return;
+#endif /* HNDCTF */
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+ /*
+ * Make entry to IP records table before the connection is deleted.
+ */
+ if (cp_flowstats_enabled) {
+ add_ip_record((struct nf_conn *)ul_conntrack, false);
+ }
+#endif
+ nf_ct_delete(ct, 0, 0);
}
static inline bool
@@ -688,6 +1236,19 @@
nf_conntrack_event_cache(master_ct(ct) ?
IPCT_RELATED : IPCT_NEW, ct);
+
+// CRADLEPOINT START
+ /*
+ * In certain cases,(ipsweep attacks) the conntrack value needs to persist so that
+ * it can stop any further ipsweep attacks. We confirm that this is a valid conntrack
+ * entry and then later DROP the packet.
+ * */
+#ifdef CONFIG_NF_CONNTRACK_APPID
+ if (ct->appid[0].value == ~0)
+ return NF_DROP;
+#endif
+// CRADLEPOINT END
+
return NF_ACCEPT;
out:
@@ -786,6 +1347,10 @@
if (!ct)
return dropped;
+#ifdef HNDCTF
+ ip_conntrack_ipct_delete(ct, 0);
+#endif /* HNDCTF */
+
if (del_timer(&ct->timeout)) {
if (nf_ct_delete(ct, 0, 0)) {
dropped = 1;
@@ -921,6 +1486,9 @@
const struct nf_conntrack_zone *zone;
struct nf_conn_timeout *timeout_ext;
struct nf_conntrack_zone tmp;
+// CRADLEPOINT START
+ struct iphdr *iph;
+// CRADLEPOINT END
unsigned int *timeouts;
if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
@@ -954,6 +1522,13 @@
return NULL;
}
+// CRADLEPOINT START
+ if (l3proto->l3proto == PF_INET) {
+ iph = ip_hdr(skb);
+ ct->dscp = (iph->tos) >> 2;
+ }
+// CRADLEPOINT END
+
if (timeout_ext)
nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout),
GFP_ATOMIC);
@@ -1012,6 +1587,13 @@
nf_ct_expect_put(exp);
}
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+ /* Store the start time of the connection.
+ * used later to compute the duration as well as detecting
+ * long duration flows.
+ */
+ ct->start_time = jiffies;
+#endif
return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
}
@@ -1241,6 +1823,9 @@
/* If not in hash table, timer will not be active yet */
if (!nf_ct_is_confirmed(ct)) {
+#ifdef HNDCTF
+ ct->expire_jiffies = extra_jiffies;
+#endif /* HNDCTF */
ct->timeout.expires = extra_jiffies;
} else {
unsigned long newtime = jiffies + extra_jiffies;
@@ -1249,6 +1834,10 @@
HZ jiffies from the old timeout. Need del_timer for race
avoidance (may already be dying). */
if (newtime - ct->timeout.expires >= HZ)
+#ifdef HNDCTF
+ /* XXX mod_timer_pending should be in if handling? */
+ ct->expire_jiffies = extra_jiffies;
+#endif /* HNDCTF */
mod_timer_pending(&ct->timeout, newtime);
}
@@ -1423,6 +2012,9 @@
unsigned int bucket = 0;
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
+#ifdef HNDCTF
+ ip_conntrack_ipct_delete(ct, 0);
+#endif /* HNDCTF */
/* Time to push up daises... */
if (del_timer(&ct->timeout))
nf_ct_delete(ct, portid, report);
@@ -1817,6 +2409,10 @@
ret = nf_conntrack_proto_pernet_init(net);
if (ret < 0)
goto err_proto;
+
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+ ATOMIC_INIT_NOTIFIER_HEAD(&net->ct.nf_conntrack_chain);
+#endif
return 0;
err_proto:
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index f3b92ce..df3948e 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -18,6 +18,9 @@
#include <linux/stddef.h>
#include <linux/err.h>
#include <linux/percpu.h>
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+#include <linux/notifier.h>
+#endif
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/slab.h>
@@ -115,6 +118,52 @@
/* deliver cached events and clear cache entry - must be called with locally
* disabled softirqs */
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+void nf_ct_deliver_cached_events(struct nf_conn *ct)
+{
+ unsigned long events, missed;
+ struct nf_conntrack_ecache *e;
+ struct nf_ct_event item;
+ struct net *net = nf_ct_net(ct);
+ int ret = 0;
+
+ e = nf_ct_ecache_find(ct);
+ if (!e)
+ return;
+
+ events = xchg(&e->cache, 0);
+
+ if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct) || !events)
+ return;
+
+ /*
+ * We make a copy of the missed event cache without taking
+ * the lock, thus we may send missed events twice. However,
+ * this does not harm and it happens very rarely.
+ */
+ missed = e->missed;
+
+ if (!((events | missed) & e->ctmask))
+ return;
+
+ item.ct = ct;
+ item.portid = 0;
+ item.report = 0;
+
+ atomic_notifier_call_chain(&net->ct.nf_conntrack_chain,
+ events | missed, &item);
+
+ if (likely(ret >= 0 && !missed))
+ return;
+
+ spin_lock_bh(&ct->lock);
+ if (ret < 0)
+ e->missed |= events;
+ else
+ e->missed &= ~missed;
+ spin_unlock_bh(&ct->lock);
+}
+#else
void nf_ct_deliver_cached_events(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
@@ -165,8 +214,15 @@
out_unlock:
rcu_read_unlock();
}
+#endif
EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+int nf_conntrack_register_notifier(struct net *net, struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&net->ct.nf_conntrack_chain, nb);
+}
+#else
int nf_conntrack_register_notifier(struct net *net,
struct nf_ct_event_notifier *new)
{
@@ -187,8 +243,16 @@
mutex_unlock(&nf_ct_ecache_mutex);
return ret;
}
+#endif
EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+int nf_conntrack_unregister_notifier(struct net *net, struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&net->ct.nf_conntrack_chain,
+ nb);
+}
+#else
void nf_conntrack_unregister_notifier(struct net *net,
struct nf_ct_event_notifier *new)
{
@@ -202,6 +266,7 @@
mutex_unlock(&nf_ct_ecache_mutex);
/* synchronize_rcu() is called from ctnetlink_exit. */
}
+#endif
EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
int nf_ct_expect_register_notifier(struct net *net,
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
index 3ce5c31..bc0b9ed 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -60,6 +60,29 @@
} while (cmpxchg(address, old, tmp) != old);
}
+/* cradlepoint - ability to set label and remove others */
+int nf_connlabel_replace_simple(struct nf_conn *ct, u16 bit)
+{
+ struct nf_conn_labels *labels;
+ unsigned int size, i, words32;
+ u32 *dst;
+
+ labels = nf_ct_labels_find(ct);
+ if (!labels)
+ return -ENOSPC;
+
+ size = labels->words * sizeof(long);
+ words32 = size / sizeof(u32);
+
+ dst = (u32 *) labels->bits;
+ for (i = 0; i < words32; i++)
+ replace_u32(&dst[i], 0, 0);
+
+ return nf_connlabel_set(ct, bit);
+}
+EXPORT_SYMBOL_GPL(nf_connlabel_replace_simple);
+/* end cradlepoint */
+
int nf_connlabels_replace(struct nf_conn *ct,
const u32 *data,
const u32 *mask, unsigned int words32)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 660939d..ecaa9dc 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -28,6 +28,9 @@
#include <linux/netlink.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+#include <linux/notifier.h>
+#endif
#include <linux/slab.h>
#include <linux/netfilter.h>
@@ -316,6 +319,52 @@
#define ctnetlink_dump_mark(a, b) (0)
#endif
+#ifdef CONFIG_NF_CONNTRACK_APPID
+static inline int
+ctnetlink_dump_appid(struct sk_buff *skb, const struct nf_conn *ct)
+{
+ int i;
+
+ for (i = 0; i < MAX_CT_APPID_VALS; i++) {
+
+ if (i == 0 && ct->appid[i].value == 0) {
+ /* No APPID was detected for this CT */
+ return 0;
+ }
+
+ /* Now we need to find that last APPID in the chain */
+ if (i > 0 && ct->appid[i].app_id == 0) {
+ if (nla_put_u16(skb, CTA_APPID, htons(ct->appid[i-1].app_id)))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, CTA_APPCAT, ct->appid[i-1].cat_id))
+ goto nla_put_failure;
+
+ break;
+ }
+
+ if (i == MAX_CT_APPID_VALS - 1) {
+ // All APPID vals filled
+ if (nla_put_u16(skb, CTA_APPID, htons(ct->appid[i].app_id)))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, CTA_APPCAT, ct->appid[i].cat_id))
+ goto nla_put_failure;
+
+ break;
+ }
+
+ }
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+#else
+#define ctnetlink_dump_appid(a, b) (0)
+#endif
+
#ifdef CONFIG_NF_CONNTRACK_SECMARK
static inline int
ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct)
@@ -528,6 +577,7 @@
ctnetlink_dump_id(skb, ct) < 0 ||
ctnetlink_dump_use(skb, ct) < 0 ||
ctnetlink_dump_master(skb, ct) < 0 ||
+ ctnetlink_dump_appid(skb, ct) < 0 ||
ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
goto nla_put_failure;
@@ -627,23 +677,35 @@
#endif
+ ctnetlink_proto_size(ct)
+ ctnetlink_label_size(ct)
+#ifdef CONFIG_NF_CONNTRACK_APPID
+ + nla_total_size(sizeof(u_int16_t)) /* CTA_APPID */
+ + nla_total_size(sizeof(u_int8_t)) /* CTA_APPCAT */
+#endif
;
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+static int ctnetlink_conntrack_event(struct notifier_block *this,
+ unsigned long events, void *ptr)
+#else
static int
ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
+#endif
{
const struct nf_conntrack_zone *zone;
struct net *net;
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
- struct nf_conn *ct = item->ct;
struct sk_buff *skb;
unsigned int type;
unsigned int flags = 0, group;
int err;
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+ struct nf_ct_event *item = (struct nf_ct_event *)ptr;
+#endif
+ struct nf_conn *ct = item->ct;
/* ignore our fake conntrack entry */
if (nf_ct_is_untracked(ct))
@@ -713,6 +775,9 @@
if (ctnetlink_dump_status(skb, ct) < 0)
goto nla_put_failure;
+ if (ctnetlink_dump_appid(skb, ct) < 0)
+ goto nla_put_failure;
+
if (events & (1 << IPCT_DESTROY)) {
if (ctnetlink_dump_acct(skb, ct, type) < 0 ||
ctnetlink_dump_timestamp(skb, ct) < 0)
@@ -1094,6 +1159,8 @@
.len = NF_CT_LABELS_MAX_SIZE },
[CTA_LABELS_MASK] = { .type = NLA_BINARY,
.len = NF_CT_LABELS_MAX_SIZE },
+ [CTA_APPID] = { .type = NLA_U16 },
+ [CTA_APPCAT] = { .type = NLA_U8 },
};
static int ctnetlink_flush_conntrack(struct net *net,
@@ -1697,8 +1764,18 @@
}
#if defined(CONFIG_NF_CONNTRACK_MARK)
- if (cda[CTA_MARK])
- ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
+// CRADLEPOINT START
+ if (cda[CTA_MARK]) {
+ u32 mask = 0, mark, newmark;
+ if (cda[CTA_MARK_MASK])
+ mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+
+ mark = ntohl(nla_get_be32(cda[CTA_MARK]));
+ newmark = (ct->mark & mask) ^ mark;
+ if (newmark != ct->mark)
+ ct->mark = newmark;
+ }
+// CRADLEPOINT END
#endif
if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) {
@@ -2225,6 +2302,11 @@
if (ctnetlink_dump_helpinfo(skb, ct) < 0)
goto nla_put_failure;
+#ifdef CONFIG_NF_CONNTRACK_APPID
+ if (ctnetlink_dump_appid(skb, ct) < 0)
+ goto nla_put_failure;
+#endif
+
#ifdef CONFIG_NF_CONNTRACK_SECMARK
if (ct->secmark && ctnetlink_dump_secctx(skb, ct) < 0)
goto nla_put_failure;
@@ -3262,9 +3344,15 @@
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+static struct notifier_block ctnl_notifier = {
+ .notifier_call = ctnetlink_conntrack_event,
+};
+#else
static struct nf_ct_event_notifier ctnl_notifier = {
.fcn = ctnetlink_conntrack_event,
};
+#endif
static struct nf_exp_event_notifier ctnl_notifier_exp = {
.fcn = ctnetlink_expect_event,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 278f3b9..a3f1d27 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -33,6 +33,11 @@
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#ifdef HNDCTF
+#include <ctf/hndctf.h>
+extern int ip_conntrack_ipct_delete(struct nf_conn *ct, int ct_timeout);
+#endif /* HNDCTF */
+
/* "Be conservative in what you do,
be liberal in what you accept from others."
If it's non-zero, we mark only out of window RST segments as INVALID. */
@@ -808,6 +813,122 @@
return tcp_pernet(net)->timeouts;
}
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS /* CP_LATENCY_IP*/
+
+#define MIN_IP_PKT_SIZE 84
+#define MIN_DELTA_INIT 0xffffff
+
+/***** Cradlepoint Latency Measurement Enhancement. ***/
+/*
+ * For each TCP packet; given its connection; compute the round trip
+ * latency and store it over multiple samples.
+ * We track RT latency by tracking the sequence numbers and matching the Acks
+ * and compute the delay between the two. The cumulative latency is stored
+ * in the conntrack structure; later to be used to calculate the average latency.
+ * 1) We measure latency for outbound connections; or the connections originating from
+ * the LAN side by the clients. It is expected that majority of our traffic is
+ * via outbound connections.
+ * 2) The calculations does not consider TCP window size changes. For outbound connections
+ * (with 5+ ms latency) and interactive applications; the windows size does not have much
+ * effect.
+ * 3) When the connection terminates the latency numbers are added to a 'IP records table"
+ * maintained in file "cp_ip_record_track.c"
+ */
+static void tcp_packet_latency(struct nf_conn *ct, const struct sk_buff *skb,
+ const struct tcphdr *th, enum ip_conntrack_dir dir,
+ enum tcp_conntrack new_state, unsigned int dataoff)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+
+ /* If the packets are initial SYN and ACK packets; compute
+ the sync latency.
+ */
+ if (th->syn && !th->ack) {
+ ct->proto.tcp.num_sync++;
+ ct->proto.tcp.last_sync_time = jiffies;
+ }
+ if (th->syn && th->ack) {
+ ct->proto.tcp.sync_delta = jiffies - ct->proto.tcp.last_sync_time;
+ }
+ /*Start measuring latency if following conditions are met:
+ Packet is outbound
+ TCP session is established.
+ The packet is not an Ack for data packet from the remote end
+ (packet size larger than min IP packet)
+ */
+ if ((dir == IP_CT_DIR_ORIGINAL) && (new_state > TCP_CONNTRACK_SYN_RECV)
+ && (new_state < TCP_CONNTRACK_CLOSE_WAIT)
+ && (ct->proto.tcp.last_seq_num == 0)
+ && (ntohs(iph->tot_len) > MIN_IP_PKT_SIZE)) {
+ ct->proto.tcp.last_seq_num = ntohl(th->seq);
+ ct->proto.tcp.last_time = jiffies;
+ ct->proto.tcp.iplen = ntohs(iph->tot_len);
+ ct->proto.tcp.doff = (th->doff);
+ }
+ if (dir == IP_CT_DIR_ORIGINAL) {
+ ct->packets_out++;
+ ct->bytes_out += ntohs(iph->tot_len);
+ } else {
+ ct->packets_in++;
+ ct->bytes_in += ntohs(iph->tot_len);
+ }
+
+ /*
+ * If this is a reply packet and the sequence number matches or exceeds
+ * perform the latency calculations, and update the stats.
+ */
+ if ((dir == IP_CT_DIR_REPLY )
+ && (ct->proto.tcp.last_seq_num <= ntohl(th->ack_seq))
+ && (ct->proto.tcp.last_seq_num != 0)) {
+ long delta = 0, curr_time = jiffies;
+
+ if ((ct->int_name[0] == 0) &&
+ (skb->dev)){
+ strncpy(ct->int_name, skb->dev->name, IFNAMSIZ -1);
+ }
+ /* ppai invesigate this */
+ /* Cover the time rollover case */
+ if (curr_time >= ct->proto.tcp.last_time) {
+ delta = curr_time - ct->proto.tcp.last_time;
+ ct->proto.tcp.tot_delta += (delta);
+ ct->proto.tcp.tot_delta_square += (delta *delta);
+
+ if (delta > ct->proto.tcp.max_delta) {
+ ct->proto.tcp.max_delta = delta;
+ }
+
+ if (delta < ct->proto.tcp.min_delta) {
+ ct->proto.tcp.min_delta = delta;
+ }
+
+ ct->proto.tcp.num_samples++;
+
+
+ if (jiffies > ct->start_time + (CP_IP_REC_TIMEOUT * HZ)) {
+ // Duration too long, add this as a ip record and restart
+ add_ip_record(ct, true);
+ ct->proto.tcp.max_delta = 0;
+ ct->proto.tcp.min_delta = MIN_DELTA_INIT;
+ ct->proto.tcp.last_time = 0;
+ ct->proto.tcp.tot_delta = 0;
+ ct->proto.tcp.tot_delta_square = 0;
+ ct->proto.tcp.num_sync = 0;
+ ct->proto.tcp.num_samples = 0;
+ ct->proto.tcp.num_packets = 0;
+ ct->int_name[0] = 0;
+ ct->start_time = jiffies;
+ ct->bytes_in = 0;
+ ct->bytes_out = 0;
+ ct->packets_in = ct->packets_out = 0;
+ }
+
+ }
+ ct->proto.tcp.last_seq_num = 0;
+ ct->proto.tcp.last_time = 0;
+ }
+}
+#endif //CONFIG_NETFILTER_CP_FLOWSTATS
+
/* Returns verdict for packet, or -1 for invalid. */
static int tcp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
@@ -837,6 +958,13 @@
new_state = tcp_conntracks[dir][index][old_state];
tuple = &ct->tuplehash[dir].tuple;
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS /* CP_LATENCY_IP*/
+ /* Invoke latency code if enabled */
+ if (cp_flowstats_enabled) {
+ tcp_packet_latency(ct, skb, th, dir, new_state, dataoff);
+ }
+#endif //CONFIG_NETFILTER_CP_FLOWSTATS
+
switch (new_state) {
case TCP_CONNTRACK_SYN_SENT:
if (old_state < TCP_CONNTRACK_TIME_WAIT)
@@ -1033,6 +1161,18 @@
break;
}
+#ifdef HNDCTF
+ /* Remove the ipc entries on receipt of FIN or RST */
+ if (CTF_ENAB(kcih)) {
+ if (ct->ctf_flags & CTF_FLAGS_CACHED) {
+ if (th->fin || th->rst) {
+ ip_conntrack_ipct_delete(ct, 0);
+ }
+ goto in_window;
+ }
+ }
+#endif /* HNDCTF */
+
if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
skb, dataoff, th, pf)) {
spin_unlock_bh(&ct->lock);
@@ -1042,6 +1182,9 @@
/* From now on we have got in-window packets */
ct->proto.tcp.last_index = index;
ct->proto.tcp.last_dir = dir;
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+ ct->proto.tcp.pkt_cnt++;
+#endif
pr_debug("tcp_conntracks: ");
nf_ct_dump_tuple(tuple);
@@ -1165,6 +1308,19 @@
/* tcp_packet will set them */
ct->proto.tcp.last_index = TCP_NONE_SET;
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+ /* Reset latency calc related fields */
+ ct->proto.tcp.last_seq_num = 0;
+ ct->proto.tcp.max_delta = 0;
+ ct->proto.tcp.min_delta = MIN_DELTA_INIT;
+ ct->proto.tcp.last_time = 0;
+ ct->proto.tcp.tot_delta = 0;
+ ct->proto.tcp.tot_delta_square = 0;
+ ct->proto.tcp.num_sync = 0;
+ ct->proto.tcp.num_samples = 0;
+ ct->proto.tcp.num_packets = 0;
+ ct->int_name[0] = 0;
+#endif
pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
"receiver end=%u maxend=%u maxwin=%u scale=%i\n",
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 478f92f..9d9644f 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -86,6 +86,11 @@
unsigned int hooknum,
unsigned int *timeouts)
{
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+ const struct iphdr *iph;
+ iph = ip_hdr(skb);
+#endif
+
/* If we've seen traffic both ways, this is some kind of UDP
stream. Extend timeout. */
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
@@ -98,6 +103,33 @@
nf_ct_refresh_acct(ct, ctinfo, skb,
timeouts[UDP_CT_UNREPLIED]);
}
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+ // update stats; perform timeout check.
+ if (iph->saddr == ct->src_ip) {
+ ct->packets_out++;
+ ct->bytes_out += ntohs(iph->tot_len);
+
+ } else {
+ ct->packets_in++;
+ ct->bytes_in += ntohs(iph->tot_len);
+ /* Save the interface name if not already saved
+ * For the UDP flows we store the interface name of the response
+ * packet input interface.
+ */
+ if ((ct->int_name[0] == 0) && (skb->dev)){
+ strncpy(ct->int_name, skb->dev->name, IFNAMSIZ -1);
+ }
+ }
+
+ if ((cp_flowstats_enabled) && (jiffies > ct->start_time + (CP_IP_REC_TIMEOUT * (HZ)))) {
+ // Duration too long, add this as a ip record and restart
+ add_ip_record(ct, true);
+ ct->start_time = jiffies;
+ ct->packets_in = ct->packets_out = 0;
+ ct->bytes_in = ct->bytes_out = 0;
+ ct->int_name[0] = 0;
+ }
+#endif
return NF_ACCEPT;
}
@@ -105,6 +137,13 @@
static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
{
+#ifdef CONFIG_NETFILTER_CP_FLOWSTATS
+ /* Note the source IP and store it */
+ /* We save the source IP address to track inbound or outbound packets */
+ struct iphdr *iph = ip_hdr(skb);
+ ct->src_ip = iph->saddr;
+ ct->int_name[0] = 0;
+#endif
return true;
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 1fb3cac..c29073c 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -204,6 +204,13 @@
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
int ret = 0;
+// CRADLEPOINT START
+#ifdef CONFIG_NF_CONNTRACK_APPID
+ int i;
+ uint16_t app_id;
+ uint8_t cat_id;
+#endif /* CONFIG_NF_CONNTRACK_APPID */
+// CRADLEPOINT END
NF_CT_ASSERT(ct);
if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
@@ -264,6 +271,35 @@
ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR);
ct_show_delta_time(s, ct);
+// CRADLEPOINT START
+#ifdef CONFIG_NF_CONNTRACK_APPID
+ cat_id = app_id = 0;
+ for (i = 0; i < MAX_CT_APPID_VALS; i++) {
+ if (i == 0 && ct->appid[i].value == 0) {
+ // No APPID was detected for this CT
+ break;
+ }
+
+ if (i > 0 && ct->appid[i].app_id == 0) {
+ // Find that last APPID in the chain
+ cat_id = ct->appid[i-1].cat_id;
+ app_id = ct->appid[i-1].app_id;
+ break;
+ }
+
+ if (i == MAX_CT_APPID_VALS - 1) {
+ // All APPID vals filled
+ cat_id = ct->appid[i].cat_id;
+ app_id = ct->appid[i].app_id;
+ break;
+ }
+ }
+ seq_printf(s, "catid=%u appid=%u ", cat_id, app_id);
+#endif /* CONFIG_NF_CONNTRACK_APPID */
+
+ seq_printf(s, "dscp=%u ", ct->dscp);
+// CRADLEPOINT END
+
seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
if (seq_has_overflowed(s))
@@ -479,6 +515,15 @@
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#ifdef HNDCTF
+ {
+ .procname = "nf_ctf_disable",
+ .data = &nf_ctf_disable,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
{ }
};
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 44516c9..75e054b 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -30,6 +30,16 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_nat.h>
+#ifdef HNDCTF
+#include <linux/if.h>
+#include <linux/if_vlan.h>
+#include <typedefs.h>
+#include <osl.h>
+#include <ctf/hndctf.h>
+
+#define NFC_CTF_ENABLED (1 << 31)
+#endif /* HNDCTF */
+
static DEFINE_SPINLOCK(nf_nat_lock);
static DEFINE_MUTEX(nf_nat_proto_mutex);
@@ -129,6 +139,12 @@
return reciprocal_scale(hash, net->ct.nat_htable_size);
}
+#ifdef HNDCTF
+extern void ip_conntrack_ipct_add(struct sk_buff *skb, u_int32_t hooknum,
+ struct nf_conn *ct, enum ip_conntrack_info ci,
+ struct nf_conntrack_tuple *manip);
+#endif /* HNDCTF */
+
/* Is this tuple already taken? (not by us) */
int
nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
@@ -498,6 +514,9 @@
/* We are aiming to look like inverse of other direction. */
nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+#ifdef HNDCTF
+ ip_conntrack_ipct_add(skb, hooknum, ct, ctinfo, &target);
+#endif /* HNDCTF */
l3proto = __nf_nat_l3proto_find(target.src.l3num);
l4proto = __nf_nat_l4proto_find(target.src.l3num,
diff --git a/net/netfilter/xt_NOACCEL.c b/net/netfilter/xt_NOACCEL.c
new file mode 100644
index 0000000..24e19be
--- /dev/null
+++ b/net/netfilter/xt_NOACCEL.c
@@ -0,0 +1,49 @@
+/* This is a module which is used to disable CTF hardware acceleration
+ * on any conntrack entry created by the packet
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+
+MODULE_DESCRIPTION("Xtables: Disables CTF acceleration for packets");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_NOACCEL");
+MODULE_ALIAS("ip6t_NOACCEL");
+
+static unsigned int
+noaccel_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct)
+ ct->ctf_flags |= CTF_FLAGS_EXCLUDED;
+
+ skb->nfcache |= NFC_CTF_DISALLOWED;
+
+ return XT_CONTINUE;
+}
+
+static struct xt_target noaccel_tg_reg __read_mostly = {
+ .name = "NOACCEL",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .target = noaccel_tg,
+ .me = THIS_MODULE,
+};
+
+static int __init noaccel_tg_init(void)
+{
+ return xt_register_target(&noaccel_tg_reg);
+}
+
+static void __exit noaccel_tg_exit(void)
+{
+ xt_unregister_target(&noaccel_tg_reg);
+}
+
+module_init(noaccel_tg_init);
+module_exit(noaccel_tg_exit);
diff --git a/net/netfilter/xt_USAGE.c b/net/netfilter/xt_USAGE.c
new file mode 100644
index 0000000..ad22f35
--- /dev/null
+++ b/net/netfilter/xt_USAGE.c
@@ -0,0 +1,511 @@
+/* Kernel module to track data usage by ip addresses. */
+
+/* * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/etherdevice.h>
+#include <linux/hashtable.h>
+#include <linux/seq_file.h>
+#include <linux/time.h>
+#include <linux/types.h>
+#include <linux/jhash.h>
+#include <linux/spinlock.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/xt_usage.h>
+#include <linux/netfilter/xt_mac.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/proc_fs.h>
+
+/* #define USAGE_DEBUG 1 */
+/* #define USAGE_TRACE 1 */
+
+#ifdef USAGE_DEBUG
+#define USAGE_DBG printk
+#else
+#define USAGE_DBG(format, args...)
+#endif
+
+#ifdef USAGE_TRACE
+#define USAGE_T printk
+#else
+#define USAGE_T(format, args...)
+#endif
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Will Grover <wgrover@cradlepoint.com>");
+MODULE_DESCRIPTION("Xtables: MAC address based usage statistics");
+MODULE_ALIAS("ipt_USAGE");
+MODULE_ALIAS("USAGE");
+
+struct xt_usage_stats {
+ u_int64_t bytes;
+ u_int64_t packets;
+};
+
+struct xt_usage_entry {
+ struct hlist_node node;
+ unsigned int key;
+
+ union nf_inet_addr addr;
+ unsigned char family;
+ unsigned char mac[ETH_ALEN];
+
+ struct xt_usage_stats download;
+ struct xt_usage_stats upload;
+
+ unsigned long first_time;
+ unsigned long last_time;
+};
+
+struct xt_usage_htable {
+ u_int8_t reset;
+ u_int8_t usecount;
+
+ struct proc_dir_entry *pde;
+ char *name;
+};
+
+static unsigned int jhash_rnd __read_mostly;
+static bool rnd_inited __read_mostly;
+
+static int xt_usage_count __read_mostly;
+static struct hlist_head xt_usage_hash[USAGE_HASH_BITS] __read_mostly;
+static DEFINE_SPINLOCK(hash_lock);
+
+static struct proc_dir_entry *ipt_usage_procdir;
+static const struct file_operations usage_file_ops;
+
+static inline unsigned int hash_ip_key(const union nf_inet_addr addr, const unsigned char family)
+{
+ if (family == NFPROTO_IPV4)
+ return jhash((const char *)&addr.ip, IP_ALEN, jhash_rnd) & (USAGE_HASH_BITS - 1);
+
+ if (family == NFPROTO_IPV6)
+ return jhash((const char *)&addr.ip6, IP6_ALEN, jhash_rnd) & (USAGE_HASH_BITS - 1);
+
+ printk(KERN_ERR "(%s:%s) Unknown family %d", __FILE__, __func__, family);
+ return 0;
+}
+
+static struct xt_usage_entry *find_entry(const struct sk_buff *skb, const unsigned char family, const unsigned char direction)
+{
+ struct xt_usage_entry *obj = NULL;
+ union nf_inet_addr addr = { 0 };
+ unsigned char *mac = direction ? eth_hdr(skb)->h_source : NULL;
+ unsigned int key = 0;
+
+ USAGE_T("(%s:%s) entry\n", __FILE__, __func__);
+
+ if (family == NFPROTO_IPV4)
+ addr.ip = direction ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr;
+
+ if (family == NFPROTO_IPV6)
+ memcpy(&addr.ip6, direction ? &ipv6_hdr(skb)->saddr : &ipv6_hdr(skb)->saddr, IP6_ALEN);
+
+ key = hash_ip_key(addr, family);
+ if (hlist_empty(&xt_usage_hash[key]))
+ return NULL;
+
+ hlist_for_each_entry(obj, &xt_usage_hash[key], node) {
+ USAGE_DBG("(%s:%s) obj key=%u ip=%pI4 ip6=%pI6 mac=%pM\n", __FILE__, __func__, key, &obj->addr.ip, &obj->addr.ip6, obj->mac);
+ /* find first ip, which will always be last updated entry due to hlist_add_head */
+ if ((family == NFPROTO_IPV4 && (obj->addr.ip == addr.ip)) || (family == NFPROTO_IPV6 && !memcmp(obj->addr.ip6, addr.ip6, IP6_ALEN))) {
+ if ((mac && !memcmp(mac, obj->mac, ETH_ALEN)) || mac == NULL)
+ return obj;
+ }
+ }
+
+ USAGE_T("(%s:%s) exit\n", __FILE__, __func__);
+
+ return NULL;
+}
+
+static struct xt_usage_entry *add_entry(const struct sk_buff *skb, const unsigned char family)
+{
+ struct xt_usage_entry *obj = NULL;
+ struct timespec now;
+
+ USAGE_T("(%s:%s) entry\n", __FILE__, __func__);
+
+ if (unlikely(xt_usage_count >= IPT_USAGE_MAXENTRY)) {
+ printk(KERN_ERR "%s: %s usage limit exceeded.\n", __FILE__, __func__);
+ return NULL;
+ }
+
+ obj = kzalloc(sizeof(struct xt_usage_entry), GFP_ATOMIC);
+ if (unlikely(obj == NULL)) {
+ printk(KERN_ERR "%s: %s error allocating usage entry.\n", __FILE__, __func__);
+ return NULL;
+ }
+
+ get_monotonic_boottime(&now);
+ obj->last_time = obj->first_time = (unsigned long) now.tv_sec;
+
+ obj->upload.bytes = skb->len;
+ obj->upload.packets = 1;
+
+ memcpy(&obj->mac, eth_hdr(skb)->h_source, ETH_ALEN);
+
+ obj->family = family;
+ if (obj->family == NFPROTO_IPV4)
+ obj->addr.ip = ip_hdr(skb)->saddr;
+
+ if (obj->family == NFPROTO_IPV6)
+ memcpy(&obj->addr.ip6, &ipv6_hdr(skb)->saddr, IP6_ALEN);
+
+ obj->key = hash_ip_key(obj->addr, obj->family);
+
+ hlist_add_head(&obj->node, &xt_usage_hash[obj->key]);
+ xt_usage_count++;
+
+ USAGE_DBG("(%s:%s) IP=%pI4 IP6=%pI6 MAC=%pM first_time=%lu last_time=%lu\n", __FILE__, __func__, &obj->addr.ip, &obj->addr.ip6, obj->mac, obj->first_time, obj->last_time);
+
+ USAGE_T("(%s:%s) exit\n", __FILE__, __func__);
+
+ return obj;
+}
+
+static unsigned int usage_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_usage_tginfo *info = par->targinfo;
+ struct xt_usage_entry *obj = NULL, *first;
+ struct timespec now;
+ unsigned char direction = (info->src == 1);
+
+ USAGE_T("(%s:%s) entry\n", __FILE__, __func__);
+
+ if (par->family != NFPROTO_IPV4 && par->family != NFPROTO_IPV6)
+ return XT_CONTINUE;
+
+ spin_lock_bh(&hash_lock);
+
+ obj = find_entry(skb, par->family, direction);
+ if (obj != NULL) {
+ if (obj->family == NFPROTO_IPV4)
+ USAGE_DBG("(%s:%s) IP=%pI4 MAC=%pM first_time=%lu last_time=%lu\n", __FILE__, __func__,
+ &obj->addr.ip, obj->mac, obj->first_time, obj->last_time);
+ else
+ USAGE_DBG("(%s:%s) IP=%pI6 MAC=%pM first_time=%lu last_time=%lu\n", __FILE__, __func__,
+ &obj->addr.ip6, obj->mac, obj->first_time, obj->last_time);
+
+ get_monotonic_boottime(&now);
+ obj->last_time = (unsigned long) now.tv_sec;
+
+ if (direction) {
+ obj->upload.bytes += skb->len;
+ obj->upload.packets++;
+
+ /* Edge case: Make sure that most recent ip & mac are at top of hlist */
+ first = hlist_entry_safe((&xt_usage_hash[obj->key])->first, struct xt_usage_entry, node);
+ if (memcmp(first->mac, obj->mac, ETH_ALEN)) {
+ hlist_del(&obj->node);
+ hlist_add_head(&obj->node, &xt_usage_hash[obj->key]);
+ }
+ } else {
+ obj->download.bytes += skb->len;
+ obj->download.packets++;
+ }
+ } else if (direction) {
+ obj = add_entry(skb, par->family);
+ }
+
+ spin_unlock_bh(&hash_lock);
+
+ if (obj == NULL) {
+ USAGE_DBG("(%s:%s) Anomaly detected IP=%pI4 MAC=%pM\n", __FILE__, __func__,
+ direction ? &ip_hdr(skb)->saddr : &ip_hdr(skb)->daddr, direction ? eth_hdr(skb)->h_source : NULL);
+ }
+
+ USAGE_T("(%s:%s) exit\n", __FILE__, __func__);
+
+ return XT_CONTINUE;
+}
+
+static int xt_usage_check(const struct xt_tgchk_param *par)
+{
+ struct xt_usage_tginfo *info = par->targinfo;
+ struct xt_usage_htable *priv;
+
+ USAGE_T("(%s:%s) entry\n", __FILE__, __func__);
+
+ if (unlikely(!rnd_inited)) {
+ get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
+ rnd_inited = true;
+ }
+
+ if (!info->priv) {
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (priv == NULL)
+ return -ENOMEM;
+
+ priv->usecount = 0;
+ priv->reset = info->readreset == 1;
+
+ priv->name = kstrdup(info->name, GFP_KERNEL);
+ if (!priv->name) {
+ kfree(priv);
+ return -ENOMEM;
+ }
+
+ priv->pde = proc_create_data(info->name, 0600 , ipt_usage_procdir, &usage_file_ops, priv);
+ if (priv->pde == NULL) {
+ kfree(priv->name);
+ kfree(priv);
+ return -ENOMEM;
+ }
+
+ info->priv = priv;
+ }
+
+ info->priv->usecount += 1;
+
+ USAGE_T("(%s:%s) exit\n", __FILE__, __func__);
+
+ return 0;
+}
+
+static void xt_usage_destroy(const struct xt_tgdtor_param *par)
+{
+ struct xt_usage_tginfo *info = par->targinfo;
+ struct xt_usage_htable *priv;
+ struct xt_usage_entry *obj;
+ struct hlist_node *n;
+ int i = 0;
+
+ USAGE_T("(%s:%s) entry\n", __FILE__, __func__);
+
+ if (info->priv != NULL) {
+ USAGE_DBG("(%s:%s) usecount=%d\n", __FILE__, __func__, info->priv->usecount);
+ priv = info->priv;
+ priv->usecount -= 1;
+ if (priv->usecount <= 0) {
+ /* if destroy is called, empty the list */
+ for(i = 0; i < USAGE_HASH_BITS; i++) {
+ spin_lock_bh(&hash_lock);
+ if (!hlist_empty(&xt_usage_hash[i])) {
+ hlist_for_each_entry_safe(obj, n, &xt_usage_hash[i], node) {
+ xt_usage_count--;
+
+ hlist_del(&obj->node);
+ kfree(obj);
+ }
+ }
+ spin_unlock_bh(&hash_lock);
+ }
+
+ if (priv->name)
+ kfree(priv->name);
+
+ if (priv->pde != NULL)
+ proc_remove(priv->pde);
+ kfree(priv);
+ info->priv = NULL;
+ }
+ }
+
+ USAGE_T("(%s:%s) exit\n", __FILE__, __func__);
+}
+
+static void *seq_start(struct seq_file *s, loff_t *pos)
+{
+ unsigned int *bucket;
+
+ USAGE_T("(%s:%s) entry\n", __FILE__, __func__);
+
+ spin_lock_bh(&hash_lock);
+ if (*pos >= USAGE_HASH_BITS)
+ return NULL;
+
+ bucket = kmalloc(sizeof(unsigned int), GFP_ATOMIC);
+ if (!bucket)
+ return ERR_PTR(-ENOMEM);
+
+ *bucket = *pos;
+
+ /* USAGE_DBG("(%s:%s) bucket %u pos = %llu\n", __FILE__, __func__, *bucket, *pos); */
+ USAGE_T("(%s:%s) exit\n", __FILE__, __func__);
+
+ return bucket;
+}
+
+static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ unsigned int *bucket = (unsigned int *)v;
+
+ USAGE_DBG("(%s:%s) entry\n", __FILE__, __func__);
+
+ *pos = ++(*bucket);
+ if (*pos >= USAGE_HASH_BITS) {
+ kfree(v);
+ return NULL;
+ }
+
+ /* USAGE_DBG("(%s:%s) bucket %u pos = %llu\n", __FILE__, __func__, *bucket, *pos); */
+ USAGE_T("(%s:%s) exit\n", __FILE__, __func__);
+
+ return bucket;
+}
+
+static void seq_stop(struct seq_file *s, void *v)
+{
+ unsigned int *bucket = (unsigned int *)v;
+
+ USAGE_T("(%s:%s) entry\n", __FILE__, __func__);
+
+ if (!IS_ERR_OR_NULL(bucket))
+ kfree(bucket);
+ spin_unlock_bh(&hash_lock);
+
+ USAGE_T("(%s:%s) exit\n", __FILE__, __func__);
+}
+
+#define USAGE_TIMEOUT 300
+static int seq_show(struct seq_file *s, void *v)
+{
+ struct xt_usage_htable *htable = s->private;
+ unsigned int *bucket = (unsigned int *)v;
+ struct xt_usage_entry *obj;
+ struct hlist_node *n;
+ struct timespec now;
+
+ USAGE_T("(%s:%s) entry\n", __FILE__, __func__);
+
+ if (!hlist_empty(&xt_usage_hash[*bucket])) {
+ get_monotonic_boottime(&now);
+
+ hlist_for_each_entry_safe(obj, n, &xt_usage_hash[*bucket], node) {
+ USAGE_DBG("MAC = %pm, IP = %pI4, IP6 = %pI6, "
+ "Up_Bytes = %llu, Up_Packets = %llu, "
+ "Down_Bytes = %llu, Down_Packets = %llu, "
+ "Last_Time = %lu, First_Time = %lu, Connect_Time = %lu, "
+ "Timeout = %s\n",
+ obj->mac, &obj->addr.ip, &obj->addr.ip6,
+ obj->upload.bytes, obj->upload.packets,
+ obj->download.bytes, obj->download.packets,
+ obj->last_time, obj->first_time, obj->last_time - obj->first_time,
+ (USAGE_TIMEOUT + obj->last_time) < (unsigned long) now.tv_sec ? "True" : "False");
+
+ seq_printf(s, "MAC = %pM, ", obj->mac);
+
+ if (obj->family == NFPROTO_IPV4)
+ seq_printf(s, "IP = %pI4, ", &obj->addr.ip);
+
+ if (obj->family == NFPROTO_IPV6)
+ seq_printf(s, "IP = %pI6, ", &obj->addr.ip6);
+
+ seq_printf(s, "Up_Bytes = %llu, Up_Packets = %llu, ", obj->upload.bytes, obj->upload.packets);
+ seq_printf(s, "Down_Bytes = %llu, Down_Packets = %llu, ", obj->download.bytes, obj->download.packets);
+
+ seq_printf(s, "Last_Time = %lu, First_Time = %lu, Connect_Time = %lu, ",
+ obj->last_time, obj->first_time, obj->last_time - obj->first_time);
+
+ seq_printf(s, "Timeout = %s\n", (USAGE_TIMEOUT + obj->last_time) < (unsigned long) now.tv_sec ? "True" : "False");
+
+ if (htable->reset == 1) {
+ obj->upload.bytes = obj->download.bytes = 0;
+ obj->upload.packets = obj->download.packets = 0;
+
+ if ((USAGE_TIMEOUT + obj->last_time) < (unsigned long) now.tv_sec) {
+ hlist_del(&obj->node);
+ kfree(obj);
+
+ xt_usage_count--;
+ }
+ }
+ }
+ }
+
+ USAGE_T("(%s:%s) exit\n", __FILE__, __func__);
+
+ return 0;
+}
+
+static const struct seq_operations usage_seq_ops = {
+ .start = seq_start,
+ .next = seq_next,
+ .stop = seq_stop,
+ .show = seq_show,
+};
+
+static int usage_open(struct inode *inode, struct file *file)
+{
+ int ret = seq_open(file, &usage_seq_ops);
+
+ USAGE_T("(%s:%s) entry\n", __FILE__, __func__);
+
+ if (!ret) {
+ struct seq_file *sf = file->private_data;
+ sf->private = PDE_DATA(inode);
+ }
+
+ USAGE_T("(%s:%s) exit\n", __FILE__, __func__);
+
+ return ret;
+}
+
+static const struct file_operations usage_file_ops = {
+ .owner = THIS_MODULE,
+ .open = usage_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static struct xt_target usage_tg_reg __read_mostly = {
+ .name = "USAGE",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .target = usage_tg,
+ .checkentry= xt_usage_check,
+ .destroy = xt_usage_destroy,
+ .targetsize = sizeof(struct xt_usage_tginfo),
+ .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT),
+ .me = THIS_MODULE,
+};
+
+static int __init usage_tg_init(void)
+{
+ int i, ret = 0;
+
+ USAGE_T("(%s:%s) entry\n", __FILE__, __func__);
+
+ for(i = 0; i < USAGE_HASH_BITS; i++)
+ INIT_HLIST_HEAD(&xt_usage_hash[i]);
+
+ ipt_usage_procdir = proc_mkdir("xt_usage", init_net.proc_net);
+ if (!ipt_usage_procdir) {
+ printk(KERN_ERR "%s: %s proc_mkdir failed.\n", __FILE__, __func__);
+ return -ENOMEM;
+ }
+
+ ret = xt_register_target(&usage_tg_reg);
+
+ USAGE_T("(%s:%s) exit\n", __FILE__, __func__);
+
+ return ret;
+}
+
+static void __exit usage_tg_exit(void)
+{
+ USAGE_T("(%s:%s) entry\n", __FILE__, __func__);
+
+ proc_remove(ipt_usage_procdir);
+ xt_unregister_target(&usage_tg_reg);
+
+ USAGE_T("(%s:%s) exit\n", __FILE__, __func__);
+}
+
+module_init(usage_tg_init);
+module_exit(usage_tg_exit);
diff --git a/net/netfilter/xt_appid.c b/net/netfilter/xt_appid.c
new file mode 100644
index 0000000..eff3b5e
--- /dev/null
+++ b/net/netfilter/xt_appid.c
@@ -0,0 +1,123 @@
+/*
+ * xt_appid - Netfilter module to match NF appid value
+ *
+ * (C) 2014 Will Grover <wgrover@cradlepoint.com>
+ * Copyright © Cradlepoint, 2014
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/xt_appid.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Will Grover <wgrover@cradlepoint.com>");
+MODULE_DESCRIPTION("Xtables: AppID matching operations");
+MODULE_ALIAS("ipt_appid");
+MODULE_ALIAS("ip6t_appid");
+MODULE_ALIAS("ipt_APPDROP");
+MODULE_ALIAS("ip6t_APPDROP");
+
+//#define APPID_DEBUG 1
+static unsigned int
+appdrop_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = nf_ct_get(skb, &ctinfo);
+
+ /*if we don't have an id we should not have arrived here anyway*/
+ if ((ct == NULL) || (ct->appid[0].value == 0))
+ return XT_CONTINUE;
+
+ /*we poison the appid value so it is always dropped in ips_hook*/
+#ifdef APPID_DEBUG
+ printk("APPDROP poisoning the connection\n");
+#endif
+ ct->appid[0].value = ~0;
+ return NF_DROP;
+}
+
+static bool
+appid_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_appid_mtinfo *info = par->matchinfo;
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ int i = 0;
+
+ ct = nf_ct_get(skb, &ctinfo);
+
+ /* if we don't have an appid or ct entry yet we can't match*/
+ if ((ct == NULL) || (ct->appid[0].value == 0)) {
+ return false;
+ } else {
+#ifdef APPID_DEBUG
+ printk("appid info mask is %08x\n", info->mask);
+ printk("appid info appid is %08x\n", info->appid);
+#endif
+
+ for (i=0; i < MAX_CT_APPID_VALS; i++) {
+#ifdef APPID_DEBUG
+
+ printk("appid info value is %08x\n", ct->appid[i].value);
+#endif
+ if (ct->appid[i].value == 0)
+ break;
+
+ if ((ct->appid[i].value & info->mask) == info->appid)
+ return !info->invert;
+ }
+ }
+
+ return false;
+}
+
+static struct xt_target appdrop_tg_reg __read_mostly = {
+ .name = "APPDROP",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .target = appdrop_tg,
+ .me = THIS_MODULE,
+};
+
+static struct xt_match appid_mt_reg __read_mostly = {
+ .name = "appid",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .match = appid_mt,
+ .matchsize = sizeof(struct xt_appid_mtinfo),
+ .me = THIS_MODULE,
+};
+
+static int __init appid_mt_init(void)
+{
+ int ret;
+
+ ret = xt_register_target(&appdrop_tg_reg);
+ if (ret < 0)
+ return ret;
+
+ ret = xt_register_match(&appid_mt_reg);
+ if (ret < 0) {
+ xt_unregister_target(&appdrop_tg_reg);
+ return ret;
+ }
+ return 0;
+}
+
+static void __exit appid_mt_exit(void)
+{
+ xt_unregister_match(&appid_mt_reg);
+ xt_unregister_target(&appdrop_tg_reg);
+}
+
+module_init(appid_mt_init);
+module_exit(appid_mt_exit);
diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c
index bb9cbeb..da6cf89 100644
--- a/net/netfilter/xt_connlabel.c
+++ b/net/netfilter/xt_connlabel.c
@@ -33,13 +33,19 @@
if (info->options & XT_CONNLABEL_OP_SET)
return (nf_connlabel_set(ct, info->bit) == 0) ^ invert;
+ /* cradlepoint - ability to set label and remove others */
+ if (info->options & XT_CONNLABEL_OP_REPLACE)
+ return (nf_connlabel_replace_simple(ct, info->bit) == 0) ^ invert;
+ /* end cradlepoint */
+
return nf_connlabel_match(ct, info->bit) ^ invert;
}
static int connlabel_mt_check(const struct xt_mtchk_param *par)
{
const int options = XT_CONNLABEL_OP_INVERT |
- XT_CONNLABEL_OP_SET;
+ XT_CONNLABEL_OP_SET |
+ XT_CONNLABEL_OP_REPLACE; /* cradlepoint */
struct xt_connlabel_mtinfo *info = par->matchinfo;
int ret;
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index bea7464..2bc3bdd 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -46,12 +46,18 @@
struct nf_nat_range range;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
+ struct nf_conn_nat *nat;
ct = nf_ct_get(skb, &ctinfo);
NF_CT_ASSERT(ct != NULL &&
(ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
ctinfo == IP_CT_RELATED_REPLY));
+ /* Cradlepoint: We always verify oif on SNAT */
+ nat = nfct_nat(ct);
+ if (nat && par->out)
+ nat->masq_index = par->out->ifindex;
+
xt_nat_convert_range(&range, &mr->range[0]);
return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
}
@@ -78,12 +84,18 @@
const struct nf_nat_range *range = par->targinfo;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
+ struct nf_conn_nat *nat;
ct = nf_ct_get(skb, &ctinfo);
NF_CT_ASSERT(ct != NULL &&
(ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
ctinfo == IP_CT_RELATED_REPLY));
+ /* Cradlepoint: We always verify oif on SNAT */
+ nat = nfct_nat(ct);
+ if (nat && par->out)
+ nat->masq_index = par->out->ifindex;
+
return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC);
}
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 5669e5b..4024a15 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -19,6 +19,7 @@
#include <linux/netfilter/ipset/ip_set.h>
#include <linux/netfilter/ipset/ip_set_timeout.h>
#include <uapi/linux/netfilter/xt_set.h>
+#include <net/netfilter/nf_conntrack.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -447,7 +448,8 @@
if (info->map_set.index != IPSET_INVALID_ID) {
map_opt.cmdflags |= info->flags & (IPSET_FLAG_MAP_SKBMARK |
IPSET_FLAG_MAP_SKBPRIO |
- IPSET_FLAG_MAP_SKBQUEUE);
+ IPSET_FLAG_MAP_SKBQUEUE |
+ IPSET_FLAG_MAP_APPID);
ret = match_set(info->map_set.index, skb, par, &map_opt,
info->map_set.flags & IPSET_INV_MATCH);
if (!ret)
@@ -461,6 +463,21 @@
skb->dev &&
skb->dev->real_num_tx_queues > map_opt.ext.skbqueue)
skb_set_queue_mapping(skb, map_opt.ext.skbqueue);
+#ifdef CONFIG_NF_CONNTRACK_APPID
+ if (map_opt.cmdflags & IPSET_FLAG_MAP_APPID) {
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ u_int8_t i;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct) {
+ for (i=0; i < MAX_CT_APPID_VALS; i++) {
+ ct->appid[i].value = map_opt.ext.skbappid[i];
+ }
+ ct->appid_cached = 1;
+ }
+ }
+#endif
}
return XT_CONTINUE;
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index acfb16f..3a447d0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1982,6 +1982,14 @@
}
EXPORT_SYMBOL(__netlink_kernel_create);
+/* Cradlepoint: CTF wants to link against this, let it */
+struct sock *
+netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
+{
+ return __netlink_kernel_create(net, unit, THIS_MODULE, cfg);
+}
+EXPORT_SYMBOL(netlink_kernel_create);
+
void
netlink_kernel_release(struct sock *sk)
{
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index daa3343..9ac3853 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -3,8 +3,9 @@
#
menuconfig NET_SCHED
- bool "QoS and/or fair queueing"
+ def_bool y
select NET_SCH_FIFO
+ select NET_SCH_FQ_CODEL
---help---
When the kernel has several packets to send out over a network
device, it has to decide which ones to send first, which ones to
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 9a1c42a..99add01 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -193,7 +193,8 @@
/* No failure allowed after this point */
police->tcfp_mtu = parm->mtu;
- if (police->tcfp_mtu == 0) {
+ /* Cradlepoint: When possible, allow unlimited MTU */
+ if (police->tcfp_mtu == 0 && P_tab) {
police->tcfp_mtu = ~0;
if (R_tab)
police->tcfp_mtu = 255 << R_tab->rate.cell_log;
@@ -273,7 +274,9 @@
return police->tcf_action;
}
- if (qdisc_pkt_len(skb) <= police->tcfp_mtu) {
+ /* Cradlepoint: When possible, allow unlimited MTU */
+ if (police->tcfp_mtu == 0 ||
+ qdisc_pkt_len(skb) <= police->tcfp_mtu) {
if (!police->rate_present) {
spin_unlock(&police->tcf_lock);
return police->tcfp_result;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6d340cd..ba6cce9 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1951,7 +1951,7 @@
return err;
}
- register_qdisc(&pfifo_fast_ops);
+ register_qdisc(&fq_codel_qdisc_ops);
register_qdisc(&pfifo_qdisc_ops);
register_qdisc(&bfifo_qdisc_ops);
register_qdisc(&pfifo_head_drop_qdisc_ops);
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index d3fc8f9..1c7fc8f 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -55,10 +55,13 @@
struct fq_codel_flow *flows; /* Flows table [flows_cnt] */
u32 *backlogs; /* backlog table [flows_cnt] */
u32 flows_cnt; /* number of flows */
- u32 perturbation; /* hash perturbation */
u32 quantum; /* psched_mtu(qdisc_dev(sch)); */
+ u32 drop_batch_size;
+ u32 memory_limit;
struct codel_params cparams;
struct codel_stats cstats;
+ u32 memory_usage;
+ u32 drop_overmemory;
u32 drop_overlimit;
u32 new_flow_count;
@@ -66,12 +69,13 @@
struct list_head old_flows; /* list of old flows */
};
+#define HIGH_Q_MEM_AVAIL 512 /* MBytes */
+#define LOW_Q_MEM_AVAIL 128 /* MBytes */
+
static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
struct sk_buff *skb)
{
- u32 hash = skb_get_hash_perturb(skb, q->perturbation);
-
- return reciprocal_scale(hash, q->flows_cnt);
+ return reciprocal_scale(skb_get_hash(skb), q->flows_cnt);
}
static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
@@ -133,17 +137,21 @@
skb->next = NULL;
}
-static unsigned int fq_codel_drop(struct Qdisc *sch)
+static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
unsigned int maxbacklog = 0, idx = 0, i, len;
struct fq_codel_flow *flow;
+ unsigned int threshold;
+ unsigned int mem = 0;
- /* Queue is full! Find the fat flow and drop packet from it.
+ /* Queue is full! Find the fat flow and drop packet(s) from it.
* This might sound expensive, but with 1024 flows, we scan
* 4KB of memory, and we dont need to handle a complex tree
* in fast path (packet queue/enqueue) with many cache misses.
+ * In stress mode, we'll try to drop 64 packets from the flow,
+ * amortizing this linear lookup to one cache line per drop.
*/
for (i = 0; i < q->flows_cnt; i++) {
if (q->backlogs[i] > maxbacklog) {
@@ -151,15 +159,26 @@
idx = i;
}
}
+
+ /* Our goal is to drop half of this fat flow backlog */
+ threshold = maxbacklog >> 1;
+
flow = &q->flows[idx];
- skb = dequeue_head(flow);
- len = qdisc_pkt_len(skb);
+ len = 0;
+ i = 0;
+ do {
+ skb = dequeue_head(flow);
+ len += qdisc_pkt_len(skb);
+ mem += skb->truesize;
+ kfree_skb(skb);
+ } while (++i < max_packets && len < threshold);
+
+ flow->dropped += i;
q->backlogs[idx] -= len;
- sch->q.qlen--;
- qdisc_qstats_drop(sch);
- qdisc_qstats_backlog_dec(sch, skb);
- kfree_skb(skb);
- flow->dropped++;
+ q->memory_usage -= mem;
+ sch->qstats.drops += i;
+ sch->qstats.backlog -= len;
+ sch->q.qlen -= i;
return idx;
}
@@ -168,16 +187,18 @@
unsigned int prev_backlog;
prev_backlog = sch->qstats.backlog;
- fq_codel_drop(sch);
+ fq_codel_drop(sch, 1U);
return prev_backlog - sch->qstats.backlog;
}
static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
- unsigned int idx, prev_backlog;
+ unsigned int idx, prev_backlog, prev_qlen;
struct fq_codel_flow *flow;
int uninitialized_var(ret);
+ unsigned int pkt_len;
+ bool memory_limited;
idx = fq_codel_classify(skb, sch, &ret);
if (idx == 0) {
@@ -198,21 +219,40 @@
list_add_tail(&flow->flowchain, &q->new_flows);
q->new_flow_count++;
flow->deficit = q->quantum;
- flow->dropped = 0;
}
- if (++sch->q.qlen <= sch->limit)
+ q->memory_usage += skb->truesize;
+ memory_limited = q->memory_usage > q->memory_limit;
+ if (++sch->q.qlen <= sch->limit && !memory_limited)
return NET_XMIT_SUCCESS;
prev_backlog = sch->qstats.backlog;
- q->drop_overlimit++;
- /* Return Congestion Notification only if we dropped a packet
- * from this flow.
- */
- if (fq_codel_drop(sch) == idx)
- return NET_XMIT_CN;
+ prev_qlen = sch->q.qlen;
- /* As we dropped a packet, better let upper stack know this */
- qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
+ /* save this packet length as it might be dropped by fq_codel_drop() */
+ pkt_len = qdisc_pkt_len(skb);
+ /* fq_codel_drop() is quite expensive, as it performs a linear search
+ * in q->backlogs[] to find a fat flow.
+ * So instead of dropping a single packet, drop half of its backlog
+ * with a 64 packets limit to not add a too big cpu spike here.
+ */
+ ret = fq_codel_drop(sch, q->drop_batch_size);
+
+ prev_qlen -= sch->q.qlen;
+ prev_backlog -= sch->qstats.backlog;
+ q->drop_overlimit += prev_qlen;
+ if (memory_limited)
+ q->drop_overmemory += prev_qlen;
+
+ /* As we dropped packet(s), better let upper stack know this.
+ * If we dropped a packet for this flow, return NET_XMIT_CN,
+ * but in this case, our parents wont increase their backlogs.
+ */
+ if (ret == idx) {
+ qdisc_tree_reduce_backlog(sch, prev_qlen - 1,
+ prev_backlog - pkt_len);
+ return NET_XMIT_CN;
+ }
+ qdisc_tree_reduce_backlog(sch, prev_qlen, prev_backlog);
return NET_XMIT_SUCCESS;
}
@@ -230,6 +270,7 @@
if (flow->head) {
skb = dequeue_head(flow);
q->backlogs[flow - q->flows] -= qdisc_pkt_len(skb);
+ q->memory_usage -= skb->truesize;
sch->q.qlen--;
}
return skb;
@@ -313,6 +354,7 @@
}
memset(q->backlogs, 0, q->flows_cnt * sizeof(u32));
sch->q.qlen = 0;
+ q->memory_usage = 0;
}
static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
@@ -323,6 +365,8 @@
[TCA_FQ_CODEL_FLOWS] = { .type = NLA_U32 },
[TCA_FQ_CODEL_QUANTUM] = { .type = NLA_U32 },
[TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 },
+ [TCA_FQ_CODEL_DROP_BATCH_SIZE] = { .type = NLA_U32 },
+ [TCA_FQ_CODEL_MEMORY_LIMIT] = { .type = NLA_U32 },
};
static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
@@ -374,7 +418,14 @@
if (tb[TCA_FQ_CODEL_QUANTUM])
q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
- while (sch->q.qlen > sch->limit) {
+ if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])
+ q->drop_batch_size = min(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
+
+ if (tb[TCA_FQ_CODEL_MEMORY_LIMIT])
+ q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT]));
+
+ while (sch->q.qlen > sch->limit ||
+ q->memory_usage > q->memory_limit) {
struct sk_buff *skb = fq_codel_dequeue(sch);
q->cstats.drop_len += qdisc_pkt_len(skb);
@@ -412,6 +463,20 @@
fq_codel_free(q->flows);
}
+static int fq_codel_mem_default(void)
+{
+ int mbytes;
+
+ if (totalram_pages > HIGH_Q_MEM_AVAIL * 1024 * 1024 / PAGE_SIZE)
+ mbytes = 32;
+ else if (totalram_pages > LOW_Q_MEM_AVAIL * 1024 * 1024 / PAGE_SIZE)
+ mbytes = 8;
+ else
+ mbytes = 4;
+
+ return mbytes << 20;
+}
+
static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
@@ -419,8 +484,9 @@
sch->limit = 10*1024;
q->flows_cnt = 1024;
+ q->memory_limit = fq_codel_mem_default();
+ q->drop_batch_size = 64;
q->quantum = psched_mtu(qdisc_dev(sch));
- q->perturbation = prandom_u32();
INIT_LIST_HEAD(&q->new_flows);
INIT_LIST_HEAD(&q->old_flows);
codel_params_init(&q->cparams, sch);
@@ -476,6 +542,10 @@
q->cparams.ecn) ||
nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM,
q->quantum) ||
+ nla_put_u32(skb, TCA_FQ_CODEL_DROP_BATCH_SIZE,
+ q->drop_batch_size) ||
+ nla_put_u32(skb, TCA_FQ_CODEL_MEMORY_LIMIT,
+ q->memory_limit) ||
nla_put_u32(skb, TCA_FQ_CODEL_FLOWS,
q->flows_cnt))
goto nla_put_failure;
@@ -504,6 +574,8 @@
st.qdisc_stats.ecn_mark = q->cstats.ecn_mark;
st.qdisc_stats.new_flow_count = q->new_flow_count;
st.qdisc_stats.ce_mark = q->cstats.ce_mark;
+ st.qdisc_stats.memory_usage = q->memory_usage;
+ st.qdisc_stats.drop_overmemory = q->drop_overmemory;
list_for_each(pos, &q->new_flows)
st.qdisc_stats.new_flows_len++;
@@ -629,7 +701,7 @@
.walk = fq_codel_walk,
};
-static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
+struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
.cl_ops = &fq_codel_class_ops,
.id = "fq_codel",
.priv_size = sizeof(struct fq_codel_sched_data),
@@ -645,6 +717,7 @@
.dump_stats = fq_codel_dump_stats,
.owner = THIS_MODULE,
};
+EXPORT_SYMBOL(fq_codel_qdisc_ops);
static int __init fq_codel_module_init(void)
{
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index aa47250..eab0ecd 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -31,7 +31,7 @@
#include <net/dst.h>
/* Qdisc to use by default */
-const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
+const struct Qdisc_ops *default_qdisc_ops = &fq_codel_qdisc_ops;
EXPORT_SYMBOL(default_qdisc_ops);
/* Main transmission queue. */
@@ -438,139 +438,6 @@
.owner = THIS_MODULE,
};
-static const u8 prio2band[TC_PRIO_MAX + 1] = {
- 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
-};
-
-/* 3-band FIFO queue: old style, but should be a bit faster than
- generic prio+fifo combination.
- */
-
-#define PFIFO_FAST_BANDS 3
-
-/*
- * Private data for a pfifo_fast scheduler containing:
- * - queues for the three band
- * - bitmap indicating which of the bands contain skbs
- */
-struct pfifo_fast_priv {
- u32 bitmap;
- struct sk_buff_head q[PFIFO_FAST_BANDS];
-};
-
-/*
- * Convert a bitmap to the first band number where an skb is queued, where:
- * bitmap=0 means there are no skbs on any band.
- * bitmap=1 means there is an skb on band 0.
- * bitmap=7 means there are skbs on all 3 bands, etc.
- */
-static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0};
-
-static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv,
- int band)
-{
- return priv->q + band;
-}
-
-static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
-{
- if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
- int band = prio2band[skb->priority & TC_PRIO_MAX];
- struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
- struct sk_buff_head *list = band2list(priv, band);
-
- priv->bitmap |= (1 << band);
- qdisc->q.qlen++;
- return __qdisc_enqueue_tail(skb, qdisc, list);
- }
-
- return qdisc_drop(skb, qdisc);
-}
-
-static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
-{
- struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
- int band = bitmap2band[priv->bitmap];
-
- if (likely(band >= 0)) {
- struct sk_buff_head *list = band2list(priv, band);
- struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list);
-
- qdisc->q.qlen--;
- if (skb_queue_empty(list))
- priv->bitmap &= ~(1 << band);
-
- return skb;
- }
-
- return NULL;
-}
-
-static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
-{
- struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
- int band = bitmap2band[priv->bitmap];
-
- if (band >= 0) {
- struct sk_buff_head *list = band2list(priv, band);
-
- return skb_peek(list);
- }
-
- return NULL;
-}
-
-static void pfifo_fast_reset(struct Qdisc *qdisc)
-{
- int prio;
- struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
-
- for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
- __qdisc_reset_queue(qdisc, band2list(priv, prio));
-
- priv->bitmap = 0;
- qdisc->qstats.backlog = 0;
- qdisc->q.qlen = 0;
-}
-
-static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
-{
- struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
-
- memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
- if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
- goto nla_put_failure;
- return skb->len;
-
-nla_put_failure:
- return -1;
-}
-
-static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
-{
- int prio;
- struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
-
- for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
- __skb_queue_head_init(band2list(priv, prio));
-
- /* Can by-pass the queue discipline */
- qdisc->flags |= TCQ_F_CAN_BYPASS;
- return 0;
-}
-
-struct Qdisc_ops pfifo_fast_ops __read_mostly = {
- .id = "pfifo_fast",
- .priv_size = sizeof(struct pfifo_fast_priv),
- .enqueue = pfifo_fast_enqueue,
- .dequeue = pfifo_fast_dequeue,
- .peek = pfifo_fast_peek,
- .init = pfifo_fast_init,
- .reset = pfifo_fast_reset,
- .dump = pfifo_fast_dump,
- .owner = THIS_MODULE,
-};
-
static struct lock_class_key qdisc_tx_busylock;
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
@@ -731,7 +598,7 @@
void *_unused)
{
struct Qdisc *qdisc;
- const struct Qdisc_ops *ops = default_qdisc_ops;
+ const struct Qdisc_ops *ops = &fq_codel_qdisc_ops;
if (dev->priv_flags & IFF_NO_QUEUE)
ops = &noqueue_qdisc_ops;
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index d9c8432..b0e3f1b 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -57,7 +57,7 @@
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
dev_queue = netdev_get_tx_queue(dev, ntx);
- qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops,
+ qdisc = qdisc_create_dflt(dev_queue, &fq_codel_qdisc_ops,
TC_H_MAKE(TC_H_MAJ(sch->handle),
TC_H_MIN(ntx + 1)));
if (!qdisc)
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 66bccc5..2941a84 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -122,7 +122,7 @@
for (i = 0; i < dev->num_tx_queues; i++) {
dev_queue = netdev_get_tx_queue(dev, i);
- qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops,
+ qdisc = qdisc_create_dflt(dev_queue, &fq_codel_qdisc_ops,
TC_H_MAKE(TC_H_MAJ(sch->handle),
TC_H_MIN(i + 1)));
if (!qdisc)
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index da72ed3..f0797ec 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -1,5 +1,7 @@
config WIRELESS_EXT
- bool
+ bool "Enable wireless extensions for external use"
+ help
+ Enable this to build in iwext extensions for external use.
config WEXT_CORE
def_bool y
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 1c4ad47..c79dec5 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -327,6 +327,12 @@
spin_unlock(&x->lock);
+ /* cradlepoint */
+ write_seqlock(&net->xfrm.xfrm_total_stats_lock);
+ net->xfrm.total_bytes += skb->len;
+ net->xfrm.total_packets++;
+ write_sequnlock(&net->xfrm.xfrm_total_stats_lock);
+
XFRM_MODE_SKB_CB(skb)->protocol = nexthdr;
inner_mode = x->inner_mode;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index ff4a91f..80718d9 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -97,6 +97,12 @@
spin_unlock_bh(&x->lock);
+ /* cradlepoint */
+ write_seqlock_bh(&net->xfrm.xfrm_total_stats_lock);
+ net->xfrm.total_bytes += skb->len;
+ net->xfrm.total_packets++;
+ write_sequnlock_bh(&net->xfrm.xfrm_total_stats_lock);
+
skb_dst_force(skb);
err = x->type->output(x, skb);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0e01250..adbb453 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -117,6 +117,7 @@
static inline struct dst_entry *__xfrm_dst_lookup(struct net *net,
int tos, int oif,
+ int mark,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr,
int family)
@@ -128,7 +129,7 @@
if (unlikely(afinfo == NULL))
return ERR_PTR(-EAFNOSUPPORT);
- dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr);
+ dst = afinfo->dst_lookup(net, tos, oif, mark, saddr, daddr);
xfrm_policy_put_afinfo(afinfo);
@@ -137,6 +138,7 @@
static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
int tos, int oif,
+ int mark,
xfrm_address_t *prev_saddr,
xfrm_address_t *prev_daddr,
int family)
@@ -155,7 +157,7 @@
daddr = x->coaddr;
}
- dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family);
+ dst = __xfrm_dst_lookup(net, tos, oif, mark, saddr, daddr, family);
if (!IS_ERR(dst)) {
if (prev_saddr != saddr)
@@ -1524,6 +1526,21 @@
return tos;
}
+static inline int xfrm_get_mark(const struct flowi *fl, int family)
+{
+ struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+ int mark;
+
+ if (!afinfo)
+ return -EINVAL;
+
+ mark = afinfo->get_mark(fl);
+
+ xfrm_policy_put_afinfo(afinfo);
+
+ return mark;
+}
+
static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
{
struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
@@ -1666,6 +1683,7 @@
int nfheader_len = 0;
int trailer_len = 0;
int tos;
+ int mark;
int family = policy->selector.family;
xfrm_address_t saddr, daddr;
@@ -1676,6 +1694,8 @@
if (tos < 0)
goto put_states;
+ mark = xfrm_get_mark(fl, family);
+
dst_hold(dst);
for (; i < nx; i++) {
@@ -1711,7 +1731,7 @@
if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
family = xfrm[i]->props.family;
- dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
+ dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif, mark,
&saddr, &daddr, family);
err = PTR_ERR(dst);
if (IS_ERR(dst))
diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c
index 05a6e3d..9bc3217 100644
--- a/net/xfrm/xfrm_sysctl.c
+++ b/net/xfrm/xfrm_sysctl.c
@@ -9,9 +9,44 @@
net->xfrm.sysctl_aevent_rseqth = XFRM_AE_SEQT_SIZE;
net->xfrm.sysctl_larval_drop = 1;
net->xfrm.sysctl_acq_expires = 30;
+
+ // cradlepoint
+ net->xfrm.total_packets = 0;
+ net->xfrm.total_bytes = 0;
}
#ifdef CONFIG_SYSCTL
+
+// cradlepoint
+int proc_total_stats(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ u64 val;
+ int ret;
+ unsigned int seq;
+ seqlock_t *lock = &((struct net*)table->extra1)->xfrm.xfrm_total_stats_lock;
+
+ if (write)
+ return -EPERM;
+
+ do {
+ seq = read_seqbegin(lock);
+ val = *(u64 *)(table->extra2);
+ } while (read_seqretry(lock, seq));
+
+ table->data = kmalloc(table->maxlen, GFP_USER);
+ if (!table->data)
+ return -ENOMEM;
+
+ snprintf((char*)(table->data), table->maxlen, "%llu", val);
+
+ ret = proc_dostring(table, write, buffer, lenp, ppos);
+
+ kfree(table->data);
+
+ return ret;
+}
+
static struct ctl_table xfrm_table[] = {
{
.procname = "xfrm_aevent_etime",
@@ -37,6 +72,20 @@
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ /* cradlepoint */
+ .procname = "xfrm_total_packets",
+ .maxlen = 256,
+ .mode = 0644,
+ .proc_handler = proc_total_stats
+ },
+ {
+ /* cradlepoint */
+ .procname = "xfrm_total_bytes",
+ .maxlen = 256,
+ .mode = 0644,
+ .proc_handler = proc_total_stats
+ },
{}
};
@@ -54,6 +103,12 @@
table[2].data = &net->xfrm.sysctl_larval_drop;
table[3].data = &net->xfrm.sysctl_acq_expires;
+ // cradlepoint xfrm total counters
+ table[4].extra1 = (void*) net;
+ table[4].extra2 = &net->xfrm.total_packets;
+ table[5].extra1 = (void*) net;
+ table[5].extra2 = &net->xfrm.total_bytes;
+
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
table[0].procname = NULL;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 7a5a64e..bfcc7d8 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -3132,6 +3132,10 @@
return -ENOMEM;
net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */
rcu_assign_pointer(net->xfrm.nlsk, nlsk);
+
+ /* cradlepoint */
+ seqlock_init(&net->xfrm.xfrm_total_stats_lock);
+
return 0;
}