| /* |
| * fast-classifier.c |
| * Shortcut forwarding engine connection manager. |
| * fast-classifier style |
| * |
| * XXX - fill in the appropriate GPL notice. |
| */ |
| #include <linux/module.h> |
| #include <linux/sysfs.h> |
| #include <linux/skbuff.h> |
| #include <net/route.h> |
| #include <linux/inetdevice.h> |
| #include <linux/netfilter_bridge.h> |
| #include <net/netfilter/nf_conntrack_acct.h> |
| #include <net/netfilter/nf_conntrack_helper.h> |
| #include <net/netfilter/nf_conntrack_zones.h> |
| #include <net/netfilter/nf_conntrack_core.h> |
| #include <net/genetlink.h> |
| #include <linux/list.h> |
| #include <linux/spinlock.h> |
| |
| #include "../shortcut-fe/sfe.h" |
| #include "../shortcut-fe/sfe_ipv4.h" |
| #include "fast-classifier-priv.h" |
| |
| /* |
| * Per-module structure. |
| */ |
| struct fast_classifier { |
| spinlock_t lock; /* Lock for SMP correctness */ |
| |
| /* |
| * Control state. |
| */ |
| struct kobject *sys_fast_classifier; /* sysfs linkage */ |
| |
| /* |
| * Callback notifiers. |
| */ |
| struct notifier_block dev_notifier; |
| /* Device notifier */ |
| struct notifier_block inet_notifier; |
| /* IP notifier */ |
| }; |
| |
| struct fast_classifier __sc; |
| |
| static struct nla_policy fast_classifier_genl_policy[FAST_CLASSIFIER_A_MAX + 1] = { |
| [FAST_CLASSIFIER_A_MSG] = { .type = NLA_NUL_STRING }, |
| }; |
| |
| static struct genl_family fast_classifier_gnl_family = { |
| .id = GENL_ID_GENERATE, |
| .hdrsize = 0, |
| .name = "FAST_CLASSIFIER", |
| .version = 1, |
| .maxattr = FAST_CLASSIFIER_A_MAX, |
| }; |
| |
| |
| #define FAST_CLASSIFIER_C_MAX (__FAST_CLASSIFIER_C_MAX - 1) |
| |
| static int fast_classifier_recv_genl_msg(struct sk_buff *skb, struct genl_info *info); |
| |
| static struct genl_ops fast_classifier_gnl_ops_recv = { |
| .cmd = FAST_CLASSIFIER_C_RECV, |
| .flags = 0, |
| .policy = fast_classifier_genl_policy, |
| .doit = fast_classifier_recv_genl_msg, |
| .dumpit = NULL, |
| }; |
| |
| /* |
| * Expose the hook for the receive processing. |
| */ |
| extern int (*athrs_fast_nat_recv)(struct sk_buff *skb); |
| |
| /* |
| * Expose what should be a static flag in the TCP connection tracker. |
| */ |
| extern int nf_ct_tcp_no_window_check; |
| |
| /* |
| * fast_classifier_recv() |
| * Handle packet receives. |
| * |
| * Returns 1 if the packet is forwarded or 0 if it isn't. |
| */ |
| int fast_classifier_recv(struct sk_buff *skb) |
| { |
| struct net_device *dev; |
| #if (SFE_HOOK_ABOVE_BRIDGE) |
| struct in_device *in_dev; |
| #endif |
| |
| /* |
| * We know that for the vast majority of packets we need the transport |
| * layer header so we may as well start to fetch it now! |
| */ |
| prefetch(skb->data + 32); |
| barrier(); |
| |
| dev = skb->dev; |
| |
| #if (SFE_HOOK_ABOVE_BRIDGE) |
| /* |
| * Does our input device support IP processing? |
| */ |
| in_dev = (struct in_device *)dev->ip_ptr; |
| if (unlikely(!in_dev)) { |
| DEBUG_TRACE("no IP processing for device: %s\n", dev->name); |
| return 0; |
| } |
| |
| /* |
| * Does it have an IP address? If it doesn't then we can't do anything |
| * interesting here! |
| */ |
| if (unlikely(!in_dev->ifa_list)) { |
| DEBUG_TRACE("no IP address for device: %s\n", dev->name); |
| return 0; |
| } |
| #endif |
| |
| /* |
| * We're only interested in IP packets. |
| */ |
| if (likely(htons(ETH_P_IP) == skb->protocol)) { |
| return sfe_ipv4_recv(dev, skb); |
| } |
| |
| DEBUG_TRACE("not IP packet\n"); |
| return 0; |
| } |
| |
| /* |
| * fast_classifier_find_mac_addr() |
| * Find the MAC address for a given IPv4 address. |
| * |
| * Returns true if we find the MAC address, otherwise false. |
| * |
| * We look up the rtable entry for the address and, from its neighbour |
| * structure, obtain the hardware address. This means this function also |
| * works if the neighbours are routers too. |
| */ |
| static bool fast_classifier_find_mac_addr(uint32_t addr, uint8_t *mac_addr) |
| { |
| struct neighbour *neigh; |
| struct rtable *rt; |
| struct dst_entry *dst; |
| struct net_device *dev; |
| |
| /* |
| * Look up the rtable entry for the IP address then get the hardware |
| * address from its neighbour structure. This means this work when the |
| * neighbours are routers too. |
| */ |
| rt = ip_route_output(&init_net, addr, 0, 0, 0); |
| if (unlikely(IS_ERR(rt))) { |
| return false; |
| } |
| |
| dst = (struct dst_entry *)rt; |
| |
| rcu_read_lock(); |
| neigh = dst_get_neighbour_noref(dst); |
| if (unlikely(!neigh)) { |
| rcu_read_unlock(); |
| dst_release(dst); |
| return false; |
| } |
| |
| if (unlikely(!(neigh->nud_state & NUD_VALID))) { |
| rcu_read_unlock(); |
| dst_release(dst); |
| return false; |
| } |
| |
| dev = neigh->dev; |
| if (!dev) { |
| rcu_read_unlock(); |
| dst_release(dst); |
| return false; |
| } |
| |
| memcpy(mac_addr, neigh->ha, (size_t)dev->addr_len); |
| rcu_read_unlock(); |
| |
| dst_release(dst); |
| |
| /* |
| * We're only interested in unicast MAC addresses - if it's not a unicast |
| * address then our IP address mustn't be unicast either. |
| */ |
| if (is_multicast_ether_addr(mac_addr)) { |
| DEBUG_TRACE("MAC is non-unicast - ignoring\n"); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| static DEFINE_SPINLOCK(sfe_connections_lock); |
| |
| struct sfe_connection { |
| struct list_head list; |
| struct sfe_ipv4_create *sic; |
| struct nf_conn *ct; |
| }; |
| |
| static LIST_HEAD(sfe_connections); |
| |
| /* |
| * fast_classifier_recv_genl_msg() |
| * Called from user space to offload a connection |
| */ |
| static int fast_classifier_recv_genl_msg(struct sk_buff *skb, struct genl_info *info) |
| { |
| struct nlattr *na; |
| struct fast_classifier_msg *fc_msg; |
| struct sfe_ipv4_create *p_sic; |
| struct sfe_connection *conn; |
| unsigned long flags; |
| |
| na = info->attrs[FAST_CLASSIFIER_C_RECV]; |
| fc_msg = nla_data(na); |
| |
| DEBUG_TRACE("INFO: want to offload: %d, %d, %d, %d, %d\n", fc_msg->proto, |
| fc_msg->src_saddr, |
| fc_msg->dst_saddr, |
| fc_msg->sport, fc_msg->dport); |
| spin_lock_irqsave(&sfe_connections_lock, flags); |
| list_for_each_entry(conn, &sfe_connections, list) { |
| struct nf_conn *ct = conn->ct; |
| p_sic = conn->sic; |
| |
| DEBUG_TRACE(" -> COMPARING: proto: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d...", |
| p_sic->protocol, p_sic->src_ip, p_sic->dest_ip, |
| p_sic->src_port, p_sic->dest_port); |
| |
| if (p_sic->protocol == fc_msg->proto && |
| p_sic->src_port == fc_msg->sport && |
| p_sic->dest_port == fc_msg->dport && |
| p_sic->src_ip == fc_msg->src_saddr && |
| p_sic->dest_ip == fc_msg->dst_saddr ) { |
| DEBUG_TRACE("FOUND, WILL OFFLOAD\n"); |
| switch (p_sic->protocol) { |
| case IPPROTO_TCP: |
| p_sic->src_td_window_scale = ct->proto.tcp.seen[0].td_scale; |
| p_sic->src_td_max_window = ct->proto.tcp.seen[0].td_maxwin; |
| p_sic->src_td_end = ct->proto.tcp.seen[0].td_end; |
| p_sic->src_td_max_end = ct->proto.tcp.seen[0].td_maxend; |
| p_sic->dest_td_window_scale = ct->proto.tcp.seen[1].td_scale; |
| p_sic->dest_td_max_window = ct->proto.tcp.seen[1].td_maxwin; |
| p_sic->dest_td_end = ct->proto.tcp.seen[1].td_end; |
| p_sic->dest_td_max_end = ct->proto.tcp.seen[1].td_maxend; |
| if (nf_ct_tcp_no_window_check |
| || (ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_BE_LIBERAL) |
| || (ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_BE_LIBERAL)) { |
| p_sic->flags |= SFE_IPV4_CREATE_FLAG_NO_SEQ_CHECK; |
| } |
| |
| /* |
| * If the connection is shutting down do not manage it. |
| * state can not be SYN_SENT, SYN_RECV because connection is assured |
| * Not managed states: FIN_WAIT, CLOSE_WAIT, LAST_ACK, TIME_WAIT, CLOSE. |
| */ |
| spin_lock(&ct->lock); |
| if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) { |
| spin_unlock_bh(&ct->lock); |
| DEBUG_TRACE("connection in termination state: %#x, s: %pI4:%u, d: %pI4:%u\n", |
| ct->proto.tcp.state, &p_sic->src_ip, ntohs(p_sic->src_port), |
| &p_sic->dest_ip, ntohs(p_sic->dest_port)); |
| spin_unlock_irqrestore(&sfe_connections_lock, flags); |
| return 0; |
| } |
| spin_unlock(&ct->lock); |
| break; |
| |
| case IPPROTO_UDP: |
| break; |
| |
| default: |
| DEBUG_TRACE("unhandled protocol %d\n", p_sic->protocol); |
| spin_unlock_irqrestore(&sfe_connections_lock, flags); |
| return 0; |
| } |
| |
| DEBUG_TRACE("INFO: calling sfe rule creation!\n"); |
| spin_unlock_irqrestore(&sfe_connections_lock, flags); |
| sfe_ipv4_create_rule(p_sic); |
| return 0; |
| } |
| DEBUG_TRACE("SEARCH CONTINUES\n"); |
| } |
| |
| spin_unlock_irqrestore(&sfe_connections_lock, flags); |
| return 0; |
| } |
| |
| /* |
| * fast_classifier_ipv4_post_routing_hook() |
| * Called for packets about to leave the box - either locally generated or forwarded from another interface |
| */ |
| static unsigned int fast_classifier_ipv4_post_routing_hook(unsigned int hooknum, |
| struct sk_buff *skb, |
| const struct net_device *in_unused, |
| const struct net_device *out, |
| int (*okfn)(struct sk_buff *)) |
| { |
| struct sfe_ipv4_create sic; |
| struct sfe_ipv4_create *p_sic; |
| struct net_device *in; |
| struct nf_conn *ct; |
| enum ip_conntrack_info ctinfo; |
| struct net_device *src_dev; |
| struct net_device *dest_dev; |
| struct net_device *src_br_dev = NULL; |
| struct net_device *dest_br_dev = NULL; |
| struct nf_conntrack_tuple orig_tuple; |
| struct nf_conntrack_tuple reply_tuple; |
| struct sfe_connection *conn; |
| int sfe_connections_size = 0; |
| unsigned long flags; |
| |
| /* |
| * Don't process broadcast or multicast packets. |
| */ |
| if (unlikely(skb->pkt_type == PACKET_BROADCAST)) { |
| DEBUG_TRACE("broadcast, ignoring\n"); |
| return NF_ACCEPT; |
| } |
| if (unlikely(skb->pkt_type == PACKET_MULTICAST)) { |
| DEBUG_TRACE("multicast, ignoring\n"); |
| return NF_ACCEPT; |
| } |
| |
| /* |
| * Don't process packets that are not being forwarded. |
| */ |
| in = dev_get_by_index(&init_net, skb->skb_iif); |
| if (!in) { |
| DEBUG_TRACE("packet not forwarding\n"); |
| return NF_ACCEPT; |
| } |
| |
| /* |
| * Don't process packets with non-standard 802.3 MAC address sizes. |
| */ |
| if (unlikely(in->addr_len != ETH_ALEN)) { |
| DEBUG_TRACE("in device: %s not 802.3 hw addr len: %u, ignoring\n", |
| in->name, (unsigned)in->addr_len); |
| goto done1; |
| } |
| if (unlikely(out->addr_len != ETH_ALEN)) { |
| DEBUG_TRACE("out device: %s not 802.3 hw addr len: %u, ignoring\n", |
| out->name, (unsigned)out->addr_len); |
| goto done1; |
| } |
| |
| /* |
| * Don't process packets that aren't being tracked by conntrack. |
| */ |
| ct = nf_ct_get(skb, &ctinfo); |
| if (unlikely(!ct)) { |
| DEBUG_TRACE("no conntrack connection, ignoring\n"); |
| goto done1; |
| } |
| |
| /* |
| * Don't process untracked connections. |
| */ |
| if (unlikely(ct == &nf_conntrack_untracked)) { |
| DEBUG_TRACE("untracked connection\n"); |
| goto done1; |
| } |
| |
| /* |
| * Don't process connections that require support from a 'helper' (typically a NAT ALG). |
| */ |
| if (unlikely(nfct_help(ct))) { |
| DEBUG_TRACE("connection has helper\n"); |
| goto done1; |
| } |
| |
| /* |
| * Look up the details of our connection in conntrack. |
| * |
| * Note that the data we get from conntrack is for the "ORIGINAL" direction |
| * but our packet may actually be in the "REPLY" direction. |
| */ |
| orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; |
| reply_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; |
| sic.protocol = (int32_t)orig_tuple.dst.protonum; |
| |
| /* |
| * Get addressing information, non-NAT first |
| */ |
| sic.src_ip = (__be32)orig_tuple.src.u3.ip; |
| sic.dest_ip = (__be32)orig_tuple.dst.u3.ip; |
| |
| /* |
| * NAT'ed addresses - note these are as seen from the 'reply' direction |
| * When NAT does not apply to this connection these will be identical to the above. |
| */ |
| sic.src_ip_xlate = (__be32)reply_tuple.dst.u3.ip; |
| sic.dest_ip_xlate = (__be32)reply_tuple.src.u3.ip; |
| |
| sic.flags = 0; |
| |
| switch (sic.protocol) { |
| case IPPROTO_TCP: |
| sic.src_port = orig_tuple.src.u.tcp.port; |
| sic.dest_port = orig_tuple.dst.u.tcp.port; |
| sic.src_port_xlate = reply_tuple.dst.u.tcp.port; |
| sic.dest_port_xlate = reply_tuple.src.u.tcp.port; |
| |
| /* |
| * Don't try to manage a non-established connection. |
| */ |
| if (!test_bit(IPS_ASSURED_BIT, &ct->status)) { |
| DEBUG_TRACE("non-established connection\n"); |
| goto done1; |
| } |
| |
| break; |
| |
| case IPPROTO_UDP: |
| sic.src_port = orig_tuple.src.u.udp.port; |
| sic.dest_port = orig_tuple.dst.u.udp.port; |
| sic.src_port_xlate = reply_tuple.dst.u.udp.port; |
| sic.dest_port_xlate = reply_tuple.src.u.udp.port; |
| break; |
| |
| default: |
| DEBUG_TRACE("unhandled protocol %d\n", sic.protocol); |
| goto done1; |
| } |
| |
| /* |
| * If we already have this connection in our list, skip it |
| * XXX: this may need to be optimized |
| */ |
| DEBUG_TRACE("POST_ROUTE: checking new connection: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d\n", |
| sic.protocol, sic.src_ip, sic.dest_ip, |
| sic.src_port, sic.dest_port); |
| spin_lock_irqsave(&sfe_connections_lock, flags); |
| list_for_each_entry(conn, &sfe_connections, list) { |
| p_sic = conn->sic; |
| DEBUG_TRACE("\t\t-> COMPARING: proto: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d...", |
| p_sic->protocol, p_sic->src_ip, p_sic->dest_ip, |
| p_sic->src_port, p_sic->dest_port); |
| |
| if (p_sic->protocol == sic.protocol && |
| p_sic->src_port == sic.src_port && |
| p_sic->dest_port == sic.dest_port && |
| p_sic->src_ip == sic.src_ip && |
| p_sic->dest_ip == sic.dest_ip ) { |
| DEBUG_TRACE("FOUND, SKIPPING\n"); |
| spin_unlock_irqrestore(&sfe_connections_lock, flags); |
| goto done1; |
| } else { |
| DEBUG_TRACE("SEARCH CONTINUES"); |
| } |
| |
| sfe_connections_size++; |
| } |
| spin_unlock_irqrestore(&sfe_connections_lock, flags); |
| |
| /* |
| * Get the MAC addresses that correspond to source and destination host addresses. |
| */ |
| if (!fast_classifier_find_mac_addr(sic.src_ip, sic.src_mac)) { |
| DEBUG_TRACE("failed to find MAC address for src IP: %pI4\n", &sic.src_ip); |
| goto done1; |
| } |
| |
| if (!fast_classifier_find_mac_addr(sic.src_ip_xlate, sic.src_mac_xlate)) { |
| DEBUG_TRACE("failed to find MAC address for xlate src IP: %pI4\n", &sic.src_ip_xlate); |
| goto done1; |
| } |
| |
| /* |
| * Do dest now |
| */ |
| if (!fast_classifier_find_mac_addr(sic.dest_ip, sic.dest_mac)) { |
| DEBUG_TRACE("failed to find MAC address for dest IP: %pI4\n", &sic.dest_ip); |
| goto done1; |
| } |
| |
| if (!fast_classifier_find_mac_addr(sic.dest_ip_xlate, sic.dest_mac_xlate)) { |
| DEBUG_TRACE("failed to find MAC address for xlate dest IP: %pI4\n", &sic.dest_ip_xlate); |
| goto done1; |
| } |
| |
| /* |
| * Get our device info. If we're dealing with the "reply" direction here then |
| * we'll need things swapped around. |
| */ |
| if (ctinfo < IP_CT_IS_REPLY) { |
| src_dev = in; |
| dest_dev = (struct net_device *)out; |
| } else { |
| src_dev = (struct net_device *)out; |
| dest_dev = in; |
| } |
| |
| #if (!SFE_HOOK_ABOVE_BRIDGE) |
| /* |
| * Now our devices may actually be a bridge interface. If that's |
| * the case then we need to hunt down the underlying interface. |
| */ |
| if (src_dev->priv_flags & IFF_EBRIDGE) { |
| src_br_dev = br_port_dev_get(src_dev, sic.src_mac); |
| if (!src_br_dev) { |
| DEBUG_TRACE("no port found on bridge\n"); |
| goto done1; |
| } |
| |
| src_dev = src_br_dev; |
| } |
| |
| if (dest_dev->priv_flags & IFF_EBRIDGE) { |
| dest_br_dev = br_port_dev_get(dest_dev, sic.dest_mac_xlate); |
| if (!dest_br_dev) { |
| DEBUG_TRACE("no port found on bridge\n"); |
| goto done2; |
| } |
| |
| dest_dev = dest_br_dev; |
| } |
| #else |
| /* |
| * Our devices may actually be part of a bridge interface. If that's |
| * the case then find the bridge interface instead. |
| */ |
| if (src_dev->priv_flags & IFF_BRIDGE_PORT) { |
| src_br_dev = src_dev->master; |
| if (!src_br_dev) { |
| DEBUG_TRACE("no bridge found for: %s\n", src_dev->name); |
| goto done1; |
| } |
| |
| dev_hold(src_br_dev); |
| src_dev = src_br_dev; |
| } |
| |
| if (dest_dev->priv_flags & IFF_BRIDGE_PORT) { |
| dest_br_dev = dest_dev->master; |
| if (!dest_br_dev) { |
| DEBUG_TRACE("no bridge found for: %s\n", dest_dev->name); |
| goto done2; |
| } |
| |
| dev_hold(dest_br_dev); |
| dest_dev = dest_br_dev; |
| } |
| #endif |
| |
| sic.src_dev = src_dev; |
| sic.dest_dev = dest_dev; |
| |
| // XXX - these MTUs need handling correctly! |
| sic.src_mtu = 1500; |
| sic.dest_mtu = 1500; |
| |
| conn = kmalloc(sizeof(struct sfe_connection), GFP_KERNEL); |
| if (conn == NULL) { |
| printk(KERN_CRIT "ERROR: no memory for sfe\n"); |
| goto done3; |
| } |
| |
| p_sic = kmalloc(sizeof(struct sfe_ipv4_create), GFP_KERNEL); |
| if (p_sic == NULL) { |
| printk(KERN_CRIT "ERROR: no memory for sfe\n"); |
| kfree(conn); |
| goto done3; |
| } |
| |
| memcpy(p_sic, &sic, sizeof(sic)); |
| conn->sic = p_sic; |
| conn->ct = ct; |
| DEBUG_TRACE(" -> adding item to sfe_connections, new size: %d\n", ++sfe_connections_size); |
| DEBUG_TRACE("POST_ROUTE: new offloadable connection: proto: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d\n", |
| p_sic->protocol, p_sic->src_ip, p_sic->dest_ip, |
| p_sic->src_port, p_sic->dest_port); |
| spin_lock_irqsave(&sfe_connections_lock, flags); |
| list_add_tail(&(conn->list), &sfe_connections); |
| spin_unlock_irqrestore(&sfe_connections_lock, flags); |
| done3: |
| /* |
| * If we had bridge ports then release them too. |
| */ |
| if (dest_br_dev) { |
| dev_put(dest_br_dev); |
| } |
| |
| done2: |
| if (src_br_dev) { |
| dev_put(src_br_dev); |
| } |
| |
| done1: |
| /* |
| * Release the interface on which this skb arrived |
| */ |
| dev_put(in); |
| |
| return NF_ACCEPT; |
| } |
| |
| #ifdef CONFIG_NF_CONNTRACK_EVENTS |
| /* |
| * fast_classifier_conntrack_event() |
| * Callback event invoked when a conntrack connection's state changes. |
| */ |
| #ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS |
| static int fast_classifier_conntrack_event(struct notifier_block *this, |
| unsigned int events, struct nf_ct_event *item) |
| #else |
| static int fast_classifier_conntrack_event(unsigned int events, struct nf_ct_event *item) |
| #endif |
| { |
| struct sfe_ipv4_destroy sid; |
| struct nf_conn *ct = item->ct; |
| struct nf_conntrack_tuple orig_tuple; |
| struct sfe_connection *conn; |
| struct sfe_ipv4_create *p_sic; |
| int sfe_found_match = 0; |
| int sfe_connections_size = 0; |
| unsigned long flags; |
| |
| /* |
| * If we don't have a conntrack entry then we're done. |
| */ |
| if (unlikely(!ct)) { |
| DEBUG_WARN("no ct in conntrack event callback\n"); |
| return NOTIFY_DONE; |
| } |
| |
| /* |
| * If this is an untracked connection then we can't have any state either. |
| */ |
| if (unlikely(ct == &nf_conntrack_untracked)) { |
| DEBUG_TRACE("ignoring untracked conn\n"); |
| return NOTIFY_DONE; |
| } |
| |
| /* |
| * Ignore anything other than IPv4 connections. |
| */ |
| if (unlikely(nf_ct_l3num(ct) != AF_INET)) { |
| DEBUG_TRACE("ignoring non-IPv4 conn\n"); |
| return NOTIFY_DONE; |
| } |
| |
| /* |
| * We're only interested in destroy events. |
| */ |
| if (unlikely(!(events & (1 << IPCT_DESTROY)))) { |
| DEBUG_TRACE("ignoring non-destroy event\n"); |
| return NOTIFY_DONE; |
| } |
| |
| orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; |
| sid.protocol = (int32_t)orig_tuple.dst.protonum; |
| |
| /* |
| * Extract information from the conntrack connection. We're only interested |
| * in nominal connection information (i.e. we're ignoring any NAT information). |
| */ |
| sid.src_ip = (__be32)orig_tuple.src.u3.ip; |
| sid.dest_ip = (__be32)orig_tuple.dst.u3.ip; |
| |
| switch (sid.protocol) { |
| case IPPROTO_TCP: |
| sid.src_port = orig_tuple.src.u.tcp.port; |
| sid.dest_port = orig_tuple.dst.u.tcp.port; |
| break; |
| |
| case IPPROTO_UDP: |
| sid.src_port = orig_tuple.src.u.udp.port; |
| sid.dest_port = orig_tuple.dst.u.udp.port; |
| break; |
| |
| default: |
| DEBUG_TRACE("unhandled protocol: %d\n", sid.protocol); |
| return NOTIFY_DONE; |
| } |
| |
| /* |
| * If we already have this connection in our list, skip it |
| * XXX: this may need to be optimized |
| */ |
| DEBUG_TRACE("INFO: want to clean up: proto: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d\n", |
| sid.protocol, sid.src_ip, sid.dest_ip, |
| sid.src_port, sid.dest_port); |
| spin_lock_irqsave(&sfe_connections_lock, flags); |
| list_for_each_entry(conn, &sfe_connections, list) { |
| p_sic = conn->sic; |
| DEBUG_TRACE(" -> COMPARING: proto: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d...", |
| p_sic->protocol, p_sic->src_ip, p_sic->dest_ip, |
| p_sic->src_port, p_sic->dest_port); |
| |
| if (p_sic->protocol == sid.protocol && |
| p_sic->src_port == sid.src_port && |
| p_sic->dest_port == sid.dest_port && |
| p_sic->src_ip == sid.src_ip && |
| p_sic->dest_ip == sid.dest_ip ) { |
| sfe_found_match = 1; |
| DEBUG_TRACE("FOUND, DELETING\n"); |
| break; |
| } else { |
| DEBUG_TRACE("SEARCH CONTINUES\n"); |
| } |
| sfe_connections_size++; |
| } |
| |
| if (sfe_found_match) { |
| DEBUG_TRACE("INFO: connection over proto: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d\n", |
| p_sic->protocol, p_sic->src_ip, p_sic->dest_ip, |
| p_sic->src_port, p_sic->dest_port); |
| kfree(conn->sic); |
| list_del(&(conn->list)); |
| kfree(conn); |
| } else { |
| DEBUG_TRACE("NO MATCH FOUND IN %d ENTRIES!!\n", sfe_connections_size); |
| } |
| spin_unlock_irqrestore(&sfe_connections_lock, flags); |
| |
| sfe_ipv4_destroy_rule(&sid); |
| return NOTIFY_DONE; |
| } |
| |
| /* |
| * Netfilter conntrack event system to monitor connection tracking changes |
| */ |
| #ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS |
| static struct notifier_block fast_classifier_conntrack_notifier = { |
| .notifier_call = fast_classifier_conntrack_event, |
| }; |
| #else |
| static struct nf_ct_event_notifier fast_classifier_conntrack_notifier = { |
| .fcn = fast_classifier_conntrack_event, |
| }; |
| #endif |
| #endif |
| |
| /* |
| * Structure to establish a hook into the post routing netfilter point - this |
| * will pick up local outbound and packets going from one interface to another. |
| * |
| * Note: see include/linux/netfilter_ipv4.h for info related to priority levels. |
| * We want to examine packets after NAT translation and any ALG processing. |
| */ |
| static struct nf_hook_ops fast_classifier_ipv4_ops_post_routing[] __read_mostly = { |
| { |
| .hook = fast_classifier_ipv4_post_routing_hook, |
| .owner = THIS_MODULE, |
| .pf = PF_INET, |
| .hooknum = NF_INET_POST_ROUTING, |
| .priority = NF_IP_PRI_NAT_SRC + 1, |
| }, |
| }; |
| |
| /* |
| * fast_classifier_sync_rule() |
| * Synchronize a connection's state. |
| */ |
| static void fast_classifier_sync_rule(struct sfe_ipv4_sync *sis) |
| { |
| struct nf_conntrack_tuple_hash *h; |
| struct nf_conntrack_tuple tuple; |
| struct nf_conn *ct; |
| struct nf_conn_counter *acct; |
| |
| /* |
| * Create a tuple so as to be able to look up a connection |
| */ |
| memset(&tuple, 0, sizeof(tuple)); |
| tuple.src.u3.ip = sis->src_ip; |
| tuple.src.u.all = (__be16)sis->src_port; |
| tuple.src.l3num = AF_INET; |
| |
| tuple.dst.u3.ip = sis->dest_ip; |
| tuple.dst.dir = IP_CT_DIR_ORIGINAL; |
| tuple.dst.protonum = (uint8_t)sis->protocol; |
| tuple.dst.u.all = (__be16)sis->dest_port; |
| |
| DEBUG_TRACE("update connection - p: %d, s: %pI4:%u, d: %pI4:%u\n", |
| (int)tuple.dst.protonum, |
| &tuple.src.u3.ip, (unsigned int)ntohs(tuple.src.u.all), |
| &tuple.dst.u3.ip, (unsigned int)ntohs(tuple.dst.u.all)); |
| |
| /* |
| * Look up conntrack connection |
| */ |
| h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple); |
| if (unlikely(!h)) { |
| DEBUG_TRACE("no connection found\n"); |
| return; |
| } |
| |
| ct = nf_ct_tuplehash_to_ctrack(h); |
| NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); |
| |
| /* |
| * Only update if this is not a fixed timeout |
| */ |
| if (!test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { |
| ct->timeout.expires += sis->delta_jiffies; |
| } |
| |
| acct = nf_conn_acct_find(ct); |
| if (acct) { |
| spin_lock_bh(&ct->lock); |
| atomic64_add(sis->src_packet_count, &acct[IP_CT_DIR_ORIGINAL].packets); |
| atomic64_add(sis->src_byte_count, &acct[IP_CT_DIR_ORIGINAL].bytes); |
| atomic64_add(sis->dest_packet_count, &acct[IP_CT_DIR_REPLY].packets); |
| atomic64_add(sis->dest_byte_count, &acct[IP_CT_DIR_REPLY].bytes); |
| spin_unlock_bh(&ct->lock); |
| } |
| |
| switch (sis->protocol) { |
| case IPPROTO_TCP: |
| spin_lock_bh(&ct->lock); |
| if (ct->proto.tcp.seen[0].td_maxwin < sis->src_td_max_window) { |
| ct->proto.tcp.seen[0].td_maxwin = sis->src_td_max_window; |
| } |
| if ((int32_t)(ct->proto.tcp.seen[0].td_end - sis->src_td_end) < 0) { |
| ct->proto.tcp.seen[0].td_end = sis->src_td_end; |
| } |
| if ((int32_t)(ct->proto.tcp.seen[0].td_maxend - sis->src_td_max_end) < 0) { |
| ct->proto.tcp.seen[0].td_maxend = sis->src_td_max_end; |
| } |
| if (ct->proto.tcp.seen[1].td_maxwin < sis->dest_td_max_window) { |
| ct->proto.tcp.seen[1].td_maxwin = sis->dest_td_max_window; |
| } |
| if ((int32_t)(ct->proto.tcp.seen[1].td_end - sis->dest_td_end) < 0) { |
| ct->proto.tcp.seen[1].td_end = sis->dest_td_end; |
| } |
| if ((int32_t)(ct->proto.tcp.seen[1].td_maxend - sis->dest_td_max_end) < 0) { |
| ct->proto.tcp.seen[1].td_maxend = sis->dest_td_max_end; |
| } |
| spin_unlock_bh(&ct->lock); |
| break; |
| } |
| |
| /* |
| * Release connection |
| */ |
| nf_ct_put(ct); |
| } |
| |
| /* |
| * fast_classifier_device_event() |
| */ |
| static int fast_classifier_device_event(struct notifier_block *this, unsigned long event, void *ptr) |
| { |
| struct net_device *dev = (struct net_device *)ptr; |
| |
| switch (event) { |
| case NETDEV_DOWN: |
| if (dev) { |
| sfe_ipv4_destroy_all_rules_for_dev(dev); |
| } |
| break; |
| } |
| |
| return NOTIFY_DONE; |
| } |
| |
| /* |
| * fast_classifier_inet_event() |
| */ |
| static int fast_classifier_inet_event(struct notifier_block *this, unsigned long event, void *ptr) |
| { |
| struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; |
| return fast_classifier_device_event(this, event, dev); |
| } |
| |
| /* |
| * fast_classifier_init() |
| */ |
| static int __init fast_classifier_init(void) |
| { |
| struct fast_classifier *sc = &__sc; |
| int result = -1; |
| |
| printk(KERN_ALERT "fast-classifier: starting up\n"); |
| DEBUG_INFO("SFE CM init\n"); |
| |
| /* |
| * Create sys/fast_classifier |
| */ |
| sc->sys_fast_classifier = kobject_create_and_add("fast_classifier", NULL); |
| if (!sc->sys_fast_classifier) { |
| DEBUG_ERROR("failed to register fast_classifier\n"); |
| goto exit1; |
| } |
| |
| sc->dev_notifier.notifier_call = fast_classifier_device_event; |
| sc->dev_notifier.priority = 1; |
| register_netdevice_notifier(&sc->dev_notifier); |
| |
| sc->inet_notifier.notifier_call = fast_classifier_inet_event; |
| sc->inet_notifier.priority = 1; |
| register_inetaddr_notifier(&sc->inet_notifier); |
| |
| /* |
| * Register our netfilter hooks. |
| */ |
| result = nf_register_hooks(fast_classifier_ipv4_ops_post_routing, ARRAY_SIZE(fast_classifier_ipv4_ops_post_routing)); |
| if (result < 0) { |
| DEBUG_ERROR("can't register nf post routing hook: %d\n", result); |
| goto exit6; |
| } |
| |
| #ifdef CONFIG_NF_CONNTRACK_EVENTS |
| /* |
| * Register a notifier hook to get fast notifications of expired connections. |
| */ |
| result = nf_conntrack_register_notifier(&init_net, &fast_classifier_conntrack_notifier); |
| if (result < 0) { |
| DEBUG_ERROR("can't register nf notifier hook: %d\n", result); |
| goto exit7; |
| } |
| #endif |
| |
| spin_lock_init(&sc->lock); |
| |
| /* |
| * Hook the receive path in the network stack. |
| */ |
| BUG_ON(athrs_fast_nat_recv != NULL); |
| RCU_INIT_POINTER(athrs_fast_nat_recv, fast_classifier_recv); |
| |
| /* |
| * Hook the shortcut sync callback. |
| */ |
| sfe_ipv4_register_sync_rule_callback(fast_classifier_sync_rule); |
| |
| result = genl_register_family(&fast_classifier_gnl_family); |
| if (result!= 0) |
| goto exit8; |
| |
| result = genl_register_ops(&fast_classifier_gnl_family, &fast_classifier_gnl_ops_recv); |
| if (result != 0) |
| goto exit9; |
| |
| printk(KERN_ALERT "fast-classifier: registered\n"); |
| |
| return 0; |
| |
| exit9: |
| genl_unregister_family(&fast_classifier_gnl_family); |
| exit8: |
| |
| #ifdef CONFIG_NF_CONNTRACK_EVENTS |
| exit7: |
| #endif |
| nf_unregister_hooks(fast_classifier_ipv4_ops_post_routing, ARRAY_SIZE(fast_classifier_ipv4_ops_post_routing)); |
| |
| exit6: |
| unregister_inetaddr_notifier(&sc->inet_notifier); |
| unregister_netdevice_notifier(&sc->dev_notifier); |
| kobject_put(sc->sys_fast_classifier); |
| |
| exit1: |
| return result; |
| } |
| |
| /* |
| * fast_classifier_exit() |
| */ |
| static void __exit fast_classifier_exit(void) |
| { |
| struct fast_classifier *sc = &__sc; |
| int result = -1; |
| |
| DEBUG_INFO("SFE CM exit\n"); |
| printk(KERN_ALERT "fast-classifier: shutting down\n"); |
| |
| /* |
| * Unregister our sync callback. |
| */ |
| sfe_ipv4_register_sync_rule_callback(NULL); |
| |
| /* |
| * Unregister our receive callback. |
| */ |
| RCU_INIT_POINTER(athrs_fast_nat_recv, NULL); |
| |
| /* |
| * Wait for all callbacks to complete. |
| */ |
| rcu_barrier(); |
| |
| /* |
| * Destroy all connections. |
| */ |
| sfe_ipv4_destroy_all_rules_for_dev(NULL); |
| |
| // XXX - this is where we need to unregister with any lower level offload services. |
| |
| #ifdef CONFIG_NF_CONNTRACK_EVENTS |
| nf_conntrack_unregister_notifier(&init_net, &fast_classifier_conntrack_notifier); |
| |
| #endif |
| nf_unregister_hooks(fast_classifier_ipv4_ops_post_routing, ARRAY_SIZE(fast_classifier_ipv4_ops_post_routing)); |
| |
| unregister_inetaddr_notifier(&sc->inet_notifier); |
| unregister_netdevice_notifier(&sc->dev_notifier); |
| |
| kobject_put(sc->sys_fast_classifier); |
| |
| result = genl_register_family(&fast_classifier_gnl_family); |
| if (result != 0) |
| printk(KERN_CRIT "Unable to unreigster genl_family\n"); |
| |
| result = genl_register_ops(&fast_classifier_gnl_family, &fast_classifier_gnl_ops_recv); |
| if (result != 0) |
| printk(KERN_CRIT "Unable to unreigster genl_ops\n"); |
| } |
| |
| module_init(fast_classifier_init) |
| module_exit(fast_classifier_exit) |
| |
| MODULE_AUTHOR("Qualcomm Atheros Inc."); |
| MODULE_DESCRIPTION("Shortcut Forwarding Engine - Connection Manager"); |
| MODULE_LICENSE("GPL"); |
| |