blob: d463c16ca3fd7aac8b6161b989dd9d74c6fde0ae [file] [log] [blame]
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001/*
2 * sfe-cm.c
3 * Shortcut forwarding engine connection manager.
4 *
Xiaoping Fand44a5b42015-05-26 17:37:37 -07005 * Copyright (c) 2013-2015 Qualcomm Atheros, Inc.
Matthew McClintocka3221942014-01-16 11:44:26 -06006 *
7 * All Rights Reserved.
8 * Qualcomm Atheros Confidential and Proprietary.
Dave Hudsondcd08fb2013-11-22 09:25:16 -06009 */
Matthew McClintocka3221942014-01-16 11:44:26 -060010
Dave Hudsondcd08fb2013-11-22 09:25:16 -060011#include <linux/module.h>
12#include <linux/sysfs.h>
13#include <linux/skbuff.h>
14#include <net/route.h>
Xiaoping Fan978b3772015-05-27 14:15:18 -070015#include <net/ip6_route.h>
16#include <net/addrconf.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060017#include <linux/inetdevice.h>
18#include <linux/netfilter_bridge.h>
Xiaoping Fan978b3772015-05-27 14:15:18 -070019#include <linux/netfilter_ipv6.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060020#include <net/netfilter/nf_conntrack_acct.h>
21#include <net/netfilter/nf_conntrack_helper.h>
22#include <net/netfilter/nf_conntrack_zones.h>
23#include <net/netfilter/nf_conntrack_core.h>
Matthew McClintockbf6b5bc2014-02-24 12:27:14 -060024#include <linux/if_bridge.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060025
26#include "sfe.h"
Xiaoping Fand44a5b42015-05-26 17:37:37 -070027#include "sfe_cm.h"
Xiaoping Fan3f1fe512014-11-05 12:14:57 -080028#include "sfe_backport.h"
Dave Hudsondcd08fb2013-11-22 09:25:16 -060029
30/*
31 * Per-module structure.
32 */
33struct sfe_cm {
34 spinlock_t lock; /* Lock for SMP correctness */
35
36 /*
37 * Control state.
38 */
39 struct kobject *sys_sfe_cm; /* sysfs linkage */
40
41 /*
42 * Callback notifiers.
43 */
44 struct notifier_block dev_notifier;
45 /* Device notifier */
46 struct notifier_block inet_notifier;
Xiaoping Fan978b3772015-05-27 14:15:18 -070047 /* IPv4 notifier */
48 struct notifier_block inet6_notifier;
49 /* IPv6 notifier */
Dave Hudsondcd08fb2013-11-22 09:25:16 -060050};
51
52struct sfe_cm __sc;
53
54/*
55 * Expose the hook for the receive processing.
56 */
57extern int (*athrs_fast_nat_recv)(struct sk_buff *skb);
58
59/*
60 * Expose what should be a static flag in the TCP connection tracker.
61 */
62extern int nf_ct_tcp_no_window_check;
63
64/*
65 * sfe_cm_recv()
66 * Handle packet receives.
67 *
68 * Returns 1 if the packet is forwarded or 0 if it isn't.
69 */
70int sfe_cm_recv(struct sk_buff *skb)
71{
72 struct net_device *dev;
Dave Hudsondcd08fb2013-11-22 09:25:16 -060073
74 /*
75 * We know that for the vast majority of packets we need the transport
76 * layer header so we may as well start to fetch it now!
77 */
78 prefetch(skb->data + 32);
79 barrier();
80
81 dev = skb->dev;
82
Dave Hudsondcd08fb2013-11-22 09:25:16 -060083 /*
Xiaoping Fan978b3772015-05-27 14:15:18 -070084 * We're only interested in IPv4 and IPv6 packets.
Xiaoping Fan59176422015-05-22 15:58:10 -070085 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -060086 if (likely(htons(ETH_P_IP) == skb->protocol)) {
Xiaoping Fan978b3772015-05-27 14:15:18 -070087#if (SFE_HOOK_ABOVE_BRIDGE)
88 struct in_device *in_dev;
89
90 /*
91 * Does our input device support IP processing?
92 */
93 in_dev = (struct in_device *)dev->ip_ptr;
94 if (unlikely(!in_dev)) {
95 DEBUG_TRACE("no IP processing for device: %s\n", dev->name);
96 return 0;
97 }
98
99 /*
100 * Does it have an IP address? If it doesn't then we can't do anything
101 * interesting here!
102 */
103 if (unlikely(!in_dev->ifa_list)) {
104 DEBUG_TRACE("no IP address for device: %s\n", dev->name);
105 return 0;
106 }
107#endif
108
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600109 return sfe_ipv4_recv(dev, skb);
110 }
111
Xiaoping Fan978b3772015-05-27 14:15:18 -0700112 if (likely(htons(ETH_P_IPV6) == skb->protocol)) {
113#if (SFE_HOOK_ABOVE_BRIDGE)
114 struct inet6_dev *in_dev;
115
116 /*
117 * Does our input device support IPv6 processing?
118 */
119 in_dev = (struct inet6_dev *)dev->ip6_ptr;
120 if (unlikely(!in_dev)) {
121 DEBUG_TRACE("no IPv6 processing for device: %s\n", dev->name);
122 return 0;
123 }
124
125 /*
126 * Does it have an IPv6 address? If it doesn't then we can't do anything
127 * interesting here!
128 */
129 if (unlikely(list_empty(&in_dev->addr_list))) {
130 DEBUG_TRACE("no IPv6 address for device: %s\n", dev->name);
131 return 0;
132 }
133#endif
134
135 return sfe_ipv6_recv(dev, skb);
136 }
137
Matthew McClintocka8ad7962014-01-16 16:49:30 -0600138 DEBUG_TRACE("not IP packet\n");
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600139 return 0;
140}
141
142/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600143 * sfe_cm_find_dev_and_mac_addr()
Xiaoping Fan978b3772015-05-27 14:15:18 -0700144 * Find the device and MAC address for a given IPv4/IPv6 address.
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600145 *
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600146 * Returns true if we find the device and MAC address, otherwise false.
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600147 *
148 * We look up the rtable entry for the address and, from its neighbour
149 * structure, obtain the hardware address. This means this function also
150 * works if the neighbours are routers too.
151 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700152static bool sfe_cm_find_dev_and_mac_addr(sfe_ip_addr_t *addr, struct net_device **dev, uint8_t *mac_addr, int is_v4)
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600153{
154 struct neighbour *neigh;
155 struct rtable *rt;
Xiaoping Fan978b3772015-05-27 14:15:18 -0700156 struct rt6_info *rt6;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600157 struct dst_entry *dst;
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600158 struct net_device *mac_dev;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600159
160 /*
161 * Look up the rtable entry for the IP address then get the hardware
162 * address from its neighbour structure. This means this work when the
163 * neighbours are routers too.
164 */
Xiaoping Fan978b3772015-05-27 14:15:18 -0700165 if (likely(is_v4)) {
166 rt = ip_route_output(&init_net, addr->ip, 0, 0, 0);
167 if (unlikely(IS_ERR(rt))) {
168 goto ret_fail;
169 }
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600170
Xiaoping Fan978b3772015-05-27 14:15:18 -0700171 dst = (struct dst_entry *)rt;
172 } else {
173 rt6 = rt6_lookup(&init_net, (struct in6_addr *)addr->ip6, 0, 0, 0);
174 if (!rt6) {
175 goto ret_fail;
176 }
177
178 dst = (struct dst_entry *)rt6;
179 }
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600180
181 rcu_read_lock();
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700182 neigh = dst_neigh_lookup(dst, addr);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600183 if (unlikely(!neigh)) {
184 rcu_read_unlock();
185 dst_release(dst);
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700186 goto ret_fail;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600187 }
188
189 if (unlikely(!(neigh->nud_state & NUD_VALID))) {
190 rcu_read_unlock();
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700191 neigh_release(neigh);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600192 dst_release(dst);
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700193 goto ret_fail;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600194 }
195
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600196 mac_dev = neigh->dev;
197 if (!mac_dev) {
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600198 rcu_read_unlock();
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700199 neigh_release(neigh);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600200 dst_release(dst);
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700201 goto ret_fail;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600202 }
203
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600204 memcpy(mac_addr, neigh->ha, (size_t)mac_dev->addr_len);
205
206 dev_hold(mac_dev);
207 *dev = mac_dev;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600208 rcu_read_unlock();
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700209 neigh_release(neigh);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600210 dst_release(dst);
211
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600212 return true;
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700213
214ret_fail:
215 if (is_v4) {
216 DEBUG_TRACE("failed to find MAC address for IP: %pI4\n", &addr->ip);
217
218 } else {
219 DEBUG_TRACE("failed to find MAC address for IP: %pI6\n", addr->ip6);
220 }
221
222 return false;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600223}
224
225/*
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700226 * sfe_cm_post_routing()
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600227 * Called for packets about to leave the box - either locally generated or forwarded from another interface
228 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700229static unsigned int sfe_cm_post_routing(struct sk_buff *skb, int is_v4)
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600230{
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700231 struct sfe_connection_create sic;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600232 struct net_device *in;
233 struct nf_conn *ct;
234 enum ip_conntrack_info ctinfo;
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600235 struct net_device *dev;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600236 struct net_device *src_dev;
237 struct net_device *dest_dev;
238 struct net_device *src_br_dev = NULL;
239 struct net_device *dest_br_dev = NULL;
240 struct nf_conntrack_tuple orig_tuple;
241 struct nf_conntrack_tuple reply_tuple;
242
243 /*
244 * Don't process broadcast or multicast packets.
245 */
246 if (unlikely(skb->pkt_type == PACKET_BROADCAST)) {
247 DEBUG_TRACE("broadcast, ignoring\n");
248 return NF_ACCEPT;
249 }
250 if (unlikely(skb->pkt_type == PACKET_MULTICAST)) {
251 DEBUG_TRACE("multicast, ignoring\n");
252 return NF_ACCEPT;
253 }
254
255 /*
256 * Don't process packets that are not being forwarded.
257 */
258 in = dev_get_by_index(&init_net, skb->skb_iif);
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600259 if (!in) {
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600260 DEBUG_TRACE("packet not forwarding\n");
261 return NF_ACCEPT;
262 }
263
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600264 dev_put(in);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600265
266 /*
267 * Don't process packets that aren't being tracked by conntrack.
268 */
269 ct = nf_ct_get(skb, &ctinfo);
270 if (unlikely(!ct)) {
271 DEBUG_TRACE("no conntrack connection, ignoring\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600272 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600273 }
274
275 /*
276 * Don't process untracked connections.
277 */
278 if (unlikely(ct == &nf_conntrack_untracked)) {
279 DEBUG_TRACE("untracked connection\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600280 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600281 }
282
283 /*
284 * Don't process connections that require support from a 'helper' (typically a NAT ALG).
285 */
286 if (unlikely(nfct_help(ct))) {
287 DEBUG_TRACE("connection has helper\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600288 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600289 }
290
291 /*
292 * Look up the details of our connection in conntrack.
293 *
294 * Note that the data we get from conntrack is for the "ORIGINAL" direction
295 * but our packet may actually be in the "REPLY" direction.
296 */
297 orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
298 reply_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
299 sic.protocol = (int32_t)orig_tuple.dst.protonum;
300
301 /*
302 * Get addressing information, non-NAT first
303 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700304 if (likely(is_v4)) {
305 sic.src_ip.ip = (__be32)orig_tuple.src.u3.ip;
306 sic.dest_ip.ip = (__be32)orig_tuple.dst.u3.ip;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600307
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700308 if (ipv4_is_multicast(sic.src_ip.ip) || ipv4_is_multicast(sic.dest_ip.ip)) {
309 DEBUG_TRACE("multicast address\n");
310 return NF_ACCEPT;
311 }
312
313 /*
314 * NAT'ed addresses - note these are as seen from the 'reply' direction
315 * When NAT does not apply to this connection these will be identical to the above.
316 */
317 sic.src_ip_xlate.ip = (__be32)reply_tuple.dst.u3.ip;
318 sic.dest_ip_xlate.ip = (__be32)reply_tuple.src.u3.ip;
319 } else {
Xiaoping Fan978b3772015-05-27 14:15:18 -0700320 sic.src_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.src.u3.in6);
321 sic.dest_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.dst.u3.in6);
322
323 if (ipv6_addr_is_multicast((struct in6_addr *)sic.src_ip.ip6) ||
324 ipv6_addr_is_multicast((struct in6_addr *)sic.dest_ip.ip6)) {
325 DEBUG_TRACE("multicast address\n");
326 return NF_ACCEPT;
327 }
328
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700329 /*
Xiaoping Fan978b3772015-05-27 14:15:18 -0700330 * NAT'ed addresses - note these are as seen from the 'reply' direction
331 * When NAT does not apply to this connection these will be identical to the above.
332 */
333 sic.src_ip_xlate.ip6[0] = *((struct sfe_ipv6_addr *)&reply_tuple.dst.u3.in6);
334 sic.dest_ip_xlate.ip6[0] = *((struct sfe_ipv6_addr *)&reply_tuple.src.u3.in6);
Matthew McClintocka11c7cd2014-08-06 16:41:30 -0500335 }
336
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600337 sic.flags = 0;
338
339 switch (sic.protocol) {
340 case IPPROTO_TCP:
341 sic.src_port = orig_tuple.src.u.tcp.port;
342 sic.dest_port = orig_tuple.dst.u.tcp.port;
343 sic.src_port_xlate = reply_tuple.dst.u.tcp.port;
344 sic.dest_port_xlate = reply_tuple.src.u.tcp.port;
345 sic.src_td_window_scale = ct->proto.tcp.seen[0].td_scale;
346 sic.src_td_max_window = ct->proto.tcp.seen[0].td_maxwin;
347 sic.src_td_end = ct->proto.tcp.seen[0].td_end;
348 sic.src_td_max_end = ct->proto.tcp.seen[0].td_maxend;
349 sic.dest_td_window_scale = ct->proto.tcp.seen[1].td_scale;
350 sic.dest_td_max_window = ct->proto.tcp.seen[1].td_maxwin;
351 sic.dest_td_end = ct->proto.tcp.seen[1].td_end;
352 sic.dest_td_max_end = ct->proto.tcp.seen[1].td_maxend;
353 if (nf_ct_tcp_no_window_check
354 || (ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_BE_LIBERAL)
355 || (ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_BE_LIBERAL)) {
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700356 sic.flags |= SFE_CREATE_FLAG_NO_SEQ_CHECK;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600357 }
358
359 /*
360 * Don't try to manage a non-established connection.
361 */
362 if (!test_bit(IPS_ASSURED_BIT, &ct->status)) {
363 DEBUG_TRACE("non-established connection\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600364 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600365 }
366
367 /*
368 * If the connection is shutting down do not manage it.
369 * state can not be SYN_SENT, SYN_RECV because connection is assured
370 * Not managed states: FIN_WAIT, CLOSE_WAIT, LAST_ACK, TIME_WAIT, CLOSE.
371 */
372 spin_lock_bh(&ct->lock);
373 if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) {
374 spin_unlock_bh(&ct->lock);
375 DEBUG_TRACE("connection in termination state: %#x, s: %pI4:%u, d: %pI4:%u\n",
376 ct->proto.tcp.state, &sic.src_ip, ntohs(sic.src_port),
377 &sic.dest_ip, ntohs(sic.dest_port));
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600378 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600379 }
380 spin_unlock_bh(&ct->lock);
381 break;
382
383 case IPPROTO_UDP:
384 sic.src_port = orig_tuple.src.u.udp.port;
385 sic.dest_port = orig_tuple.dst.u.udp.port;
386 sic.src_port_xlate = reply_tuple.dst.u.udp.port;
387 sic.dest_port_xlate = reply_tuple.src.u.udp.port;
388 break;
389
390 default:
391 DEBUG_TRACE("unhandled protocol %d\n", sic.protocol);
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600392 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600393 }
394
395 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600396 * Get the net device and MAC addresses that correspond to the various source and
397 * destination host addresses.
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600398 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700399 if (!sfe_cm_find_dev_and_mac_addr(&sic.src_ip, &src_dev, sic.src_mac, is_v4)) {
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600400 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600401 }
402
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700403 if (!sfe_cm_find_dev_and_mac_addr(&sic.src_ip_xlate, &dev, sic.src_mac_xlate, is_v4)) {
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600404 goto done1;
405 }
406
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600407 dev_put(dev);
408
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700409 if (!sfe_cm_find_dev_and_mac_addr(&sic.dest_ip, &dev, sic.dest_mac, is_v4)) {
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600410 goto done1;
411 }
412
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600413 dev_put(dev);
414
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700415 if (!sfe_cm_find_dev_and_mac_addr(&sic.dest_ip_xlate, &dest_dev, sic.dest_mac_xlate, is_v4)) {
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600416 goto done1;
417 }
418
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600419#if (!SFE_HOOK_ABOVE_BRIDGE)
420 /*
421 * Now our devices may actually be a bridge interface. If that's
422 * the case then we need to hunt down the underlying interface.
423 */
424 if (src_dev->priv_flags & IFF_EBRIDGE) {
425 src_br_dev = br_port_dev_get(src_dev, sic.src_mac);
426 if (!src_br_dev) {
427 DEBUG_TRACE("no port found on bridge\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600428 goto done2;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600429 }
430
431 src_dev = src_br_dev;
432 }
433
434 if (dest_dev->priv_flags & IFF_EBRIDGE) {
435 dest_br_dev = br_port_dev_get(dest_dev, sic.dest_mac_xlate);
436 if (!dest_br_dev) {
437 DEBUG_TRACE("no port found on bridge\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600438 goto done3;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600439 }
440
441 dest_dev = dest_br_dev;
442 }
443#else
444 /*
445 * Our devices may actually be part of a bridge interface. If that's
446 * the case then find the bridge interface instead.
447 */
448 if (src_dev->priv_flags & IFF_BRIDGE_PORT) {
Xiaoping Fan3f1fe512014-11-05 12:14:57 -0800449 src_br_dev = SFE_DEV_MASTER(src_dev);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600450 if (!src_br_dev) {
451 DEBUG_TRACE("no bridge found for: %s\n", src_dev->name);
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600452 goto done2;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600453 }
454
455 dev_hold(src_br_dev);
456 src_dev = src_br_dev;
457 }
458
459 if (dest_dev->priv_flags & IFF_BRIDGE_PORT) {
Xiaoping Fan3f1fe512014-11-05 12:14:57 -0800460 dest_br_dev = SFE_DEV_MASTER(dest_dev);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600461 if (!dest_br_dev) {
462 DEBUG_TRACE("no bridge found for: %s\n", dest_dev->name);
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600463 goto done3;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600464 }
465
466 dev_hold(dest_br_dev);
467 dest_dev = dest_br_dev;
468 }
469#endif
470
471 sic.src_dev = src_dev;
472 sic.dest_dev = dest_dev;
473
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600474 sic.src_mtu = src_dev->mtu;
475 sic.dest_mtu = dest_dev->mtu;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600476
Xiaoping Fan978b3772015-05-27 14:15:18 -0700477 if (likely(is_v4)) {
478 sfe_ipv4_create_rule(&sic);
479 } else {
480 sfe_ipv6_create_rule(&sic);
481 }
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600482
483 /*
484 * If we had bridge ports then release them too.
485 */
486 if (dest_br_dev) {
487 dev_put(dest_br_dev);
488 }
489
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600490done3:
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600491 if (src_br_dev) {
492 dev_put(src_br_dev);
493 }
494
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600495done2:
496 dev_put(dest_dev);
497
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600498done1:
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600499 dev_put(src_dev);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600500
501 return NF_ACCEPT;
502}
503
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700504/*
505 * sfe_cm_ipv4_post_routing_hook()
506 * Called for packets about to leave the box - either locally generated or forwarded from another interface
507 */
508sfe_cm_ipv4_post_routing_hook(hooknum, ops, skb, in_unused, out, okfn)
509{
510 return sfe_cm_post_routing(skb, true);
511}
512
Xiaoping Fan978b3772015-05-27 14:15:18 -0700513/*
514 * sfe_cm_ipv6_post_routing_hook()
515 * Called for packets about to leave the box - either locally generated or forwarded from another interface
516 */
517sfe_cm_ipv6_post_routing_hook(hooknum, ops, skb, in_unused, out, okfn)
518{
519 return sfe_cm_post_routing(skb, false);
520}
521
522
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600523#ifdef CONFIG_NF_CONNTRACK_EVENTS
524/*
525 * sfe_cm_conntrack_event()
526 * Callback event invoked when a conntrack connection's state changes.
527 */
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600528#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
529static int sfe_cm_conntrack_event(struct notifier_block *this,
Matthew McClintock0f5c0592014-02-12 11:17:11 -0600530 unsigned long events, void *ptr)
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600531#else
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600532static int sfe_cm_conntrack_event(unsigned int events, struct nf_ct_event *item)
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600533#endif
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600534{
Matthew McClintock0f5c0592014-02-12 11:17:11 -0600535#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
536 struct nf_ct_event *item = ptr;
537#endif
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700538 struct sfe_connection_destroy sid;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600539 struct nf_conn *ct = item->ct;
540 struct nf_conntrack_tuple orig_tuple;
541
542 /*
543 * If we don't have a conntrack entry then we're done.
544 */
545 if (unlikely(!ct)) {
546 DEBUG_WARN("no ct in conntrack event callback\n");
547 return NOTIFY_DONE;
548 }
549
550 /*
551 * If this is an untracked connection then we can't have any state either.
552 */
553 if (unlikely(ct == &nf_conntrack_untracked)) {
554 DEBUG_TRACE("ignoring untracked conn\n");
555 return NOTIFY_DONE;
556 }
557
558 /*
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600559 * We're only interested in destroy events.
560 */
561 if (unlikely(!(events & (1 << IPCT_DESTROY)))) {
562 DEBUG_TRACE("ignoring non-destroy event\n");
563 return NOTIFY_DONE;
564 }
565
566 orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
567 sid.protocol = (int32_t)orig_tuple.dst.protonum;
568
569 /*
570 * Extract information from the conntrack connection. We're only interested
571 * in nominal connection information (i.e. we're ignoring any NAT information).
572 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600573 switch (sid.protocol) {
574 case IPPROTO_TCP:
575 sid.src_port = orig_tuple.src.u.tcp.port;
576 sid.dest_port = orig_tuple.dst.u.tcp.port;
577 break;
578
579 case IPPROTO_UDP:
580 sid.src_port = orig_tuple.src.u.udp.port;
581 sid.dest_port = orig_tuple.dst.u.udp.port;
582 break;
583
584 default:
585 DEBUG_TRACE("unhandled protocol: %d\n", sid.protocol);
586 return NOTIFY_DONE;
587 }
588
Xiaoping Fan978b3772015-05-27 14:15:18 -0700589 if (likely(nf_ct_l3num(ct) == AF_INET)) {
590 sid.src_ip.ip = (__be32)orig_tuple.src.u3.ip;
591 sid.dest_ip.ip = (__be32)orig_tuple.dst.u3.ip;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600592
Xiaoping Fan978b3772015-05-27 14:15:18 -0700593 sfe_ipv4_destroy_rule(&sid);
594 } else if (likely(nf_ct_l3num(ct) == AF_INET6)) {
595 sid.src_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.src.u3.in6);
596 sid.dest_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.dst.u3.in6);
597
598 sfe_ipv6_destroy_rule(&sid);
599 } else {
600 DEBUG_TRACE("ignoring non-IPv4 and non-IPv6 connection\n");
601 }
602
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600603 return NOTIFY_DONE;
604}
605
606/*
607 * Netfilter conntrack event system to monitor connection tracking changes
608 */
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600609#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
610static struct notifier_block sfe_cm_conntrack_notifier = {
611 .notifier_call = sfe_cm_conntrack_event,
612};
613#else
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600614static struct nf_ct_event_notifier sfe_cm_conntrack_notifier = {
615 .fcn = sfe_cm_conntrack_event,
616};
617#endif
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600618#endif
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600619
620/*
621 * Structure to establish a hook into the post routing netfilter point - this
622 * will pick up local outbound and packets going from one interface to another.
623 *
624 * Note: see include/linux/netfilter_ipv4.h for info related to priority levels.
625 * We want to examine packets after NAT translation and any ALG processing.
626 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700627static struct nf_hook_ops sfe_cm_ops_post_routing[] __read_mostly = {
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600628 {
Xiaoping Fan3f1fe512014-11-05 12:14:57 -0800629 .hook = __sfe_cm_ipv4_post_routing_hook,
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600630 .owner = THIS_MODULE,
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700631 .pf = NFPROTO_IPV4,
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600632 .hooknum = NF_INET_POST_ROUTING,
633 .priority = NF_IP_PRI_NAT_SRC + 1,
634 },
Xiaoping Fan978b3772015-05-27 14:15:18 -0700635#ifdef SFE_SUPPORT_IPV6
636 {
637 .hook = __sfe_cm_ipv6_post_routing_hook,
638 .owner = THIS_MODULE,
639 .pf = NFPROTO_IPV6,
640 .hooknum = NF_INET_POST_ROUTING,
641 .priority = NF_IP6_PRI_NAT_SRC + 1,
642 },
643#endif
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600644};
645
646/*
647 * sfe_cm_sync_rule()
648 * Synchronize a connection's state.
649 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700650static void sfe_cm_sync_rule(struct sfe_connection_sync *sis)
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600651{
652 struct nf_conntrack_tuple_hash *h;
653 struct nf_conntrack_tuple tuple;
654 struct nf_conn *ct;
Xiaoping Fan3f1fe512014-11-05 12:14:57 -0800655 SFE_NF_CONN_ACCT(acct);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600656
657 /*
658 * Create a tuple so as to be able to look up a connection
659 */
660 memset(&tuple, 0, sizeof(tuple));
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600661 tuple.src.u.all = (__be16)sis->src_port;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600662 tuple.dst.dir = IP_CT_DIR_ORIGINAL;
663 tuple.dst.protonum = (uint8_t)sis->protocol;
664 tuple.dst.u.all = (__be16)sis->dest_port;
665
Xiaoping Fan978b3772015-05-27 14:15:18 -0700666 if (sis->is_v6) {
667 tuple.src.u3.in6 = *((struct in6_addr *)sis->src_ip.ip6);
668 tuple.dst.u3.in6 = *((struct in6_addr *)sis->dest_ip.ip6);
669 tuple.src.l3num = AF_INET6;
670
671 DEBUG_TRACE("update connection - p: %d, s: %pI6:%u, d: %pI6:%u\n",
672 (int)tuple.dst.protonum,
673 &tuple.src.u3.in6, (unsigned int)ntohs(tuple.src.u.all),
674 &tuple.dst.u3.in6, (unsigned int)ntohs(tuple.dst.u.all));
675 } else {
676 tuple.src.u3.ip = sis->src_ip.ip;
677 tuple.dst.u3.ip = sis->dest_ip.ip;
678 tuple.src.l3num = AF_INET;
679
680 DEBUG_TRACE("update connection - p: %d, s: %pI4:%u, d: %pI4:%u\n",
681 (int)tuple.dst.protonum,
682 &tuple.src.u3.ip, (unsigned int)ntohs(tuple.src.u.all),
683 &tuple.dst.u3.ip, (unsigned int)ntohs(tuple.dst.u.all));
684 }
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600685
686 /*
687 * Look up conntrack connection
688 */
689 h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
690 if (unlikely(!h)) {
691 DEBUG_TRACE("no connection found\n");
692 return;
693 }
694
695 ct = nf_ct_tuplehash_to_ctrack(h);
696 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
697
698 /*
699 * Only update if this is not a fixed timeout
700 */
701 if (!test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700702 spin_lock_bh(&ct->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600703 ct->timeout.expires += sis->delta_jiffies;
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700704 spin_unlock_bh(&ct->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600705 }
706
707 acct = nf_conn_acct_find(ct);
708 if (acct) {
709 spin_lock_bh(&ct->lock);
Xiaoping Fan3f1fe512014-11-05 12:14:57 -0800710 atomic64_set(&SFE_ACCT_COUNTER(acct)[IP_CT_DIR_ORIGINAL].packets, sis->src_packet_count);
711 atomic64_set(&SFE_ACCT_COUNTER(acct)[IP_CT_DIR_ORIGINAL].bytes, sis->src_byte_count);
712 atomic64_set(&SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].packets, sis->dest_packet_count);
713 atomic64_set(&SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].bytes, sis->dest_byte_count);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600714 spin_unlock_bh(&ct->lock);
715 }
716
717 switch (sis->protocol) {
718 case IPPROTO_TCP:
719 spin_lock_bh(&ct->lock);
720 if (ct->proto.tcp.seen[0].td_maxwin < sis->src_td_max_window) {
721 ct->proto.tcp.seen[0].td_maxwin = sis->src_td_max_window;
722 }
723 if ((int32_t)(ct->proto.tcp.seen[0].td_end - sis->src_td_end) < 0) {
724 ct->proto.tcp.seen[0].td_end = sis->src_td_end;
725 }
726 if ((int32_t)(ct->proto.tcp.seen[0].td_maxend - sis->src_td_max_end) < 0) {
727 ct->proto.tcp.seen[0].td_maxend = sis->src_td_max_end;
728 }
729 if (ct->proto.tcp.seen[1].td_maxwin < sis->dest_td_max_window) {
730 ct->proto.tcp.seen[1].td_maxwin = sis->dest_td_max_window;
731 }
732 if ((int32_t)(ct->proto.tcp.seen[1].td_end - sis->dest_td_end) < 0) {
733 ct->proto.tcp.seen[1].td_end = sis->dest_td_end;
734 }
735 if ((int32_t)(ct->proto.tcp.seen[1].td_maxend - sis->dest_td_max_end) < 0) {
736 ct->proto.tcp.seen[1].td_maxend = sis->dest_td_max_end;
737 }
738 spin_unlock_bh(&ct->lock);
739 break;
740 }
741
742 /*
743 * Release connection
744 */
745 nf_ct_put(ct);
746}
747
748/*
749 * sfe_cm_device_event()
750 */
751static int sfe_cm_device_event(struct notifier_block *this, unsigned long event, void *ptr)
752{
753 struct net_device *dev = (struct net_device *)ptr;
754
755 switch (event) {
756 case NETDEV_DOWN:
757 if (dev) {
758 sfe_ipv4_destroy_all_rules_for_dev(dev);
Xiaoping Fan978b3772015-05-27 14:15:18 -0700759 sfe_ipv6_destroy_all_rules_for_dev(dev);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600760 }
761 break;
762 }
763
764 return NOTIFY_DONE;
765}
766
767/*
768 * sfe_cm_inet_event()
769 */
770static int sfe_cm_inet_event(struct notifier_block *this, unsigned long event, void *ptr)
771{
772 struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
773 return sfe_cm_device_event(this, event, dev);
774}
775
776/*
Xiaoping Fan978b3772015-05-27 14:15:18 -0700777 * sfe_cm_inet6_event()
778 */
779static int sfe_cm_inet6_event(struct notifier_block *this, unsigned long event, void *ptr)
780{
781 struct net_device *dev = ((struct inet6_ifaddr *)ptr)->idev->dev;
782 return sfe_cm_device_event(this, event, dev);
783}
784
785/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600786 * sfe_cm_init()
787 */
788static int __init sfe_cm_init(void)
789{
790 struct sfe_cm *sc = &__sc;
791 int result = -1;
792
793 DEBUG_INFO("SFE CM init\n");
794
795 /*
796 * Create sys/sfe_cm
797 */
798 sc->sys_sfe_cm = kobject_create_and_add("sfe_cm", NULL);
799 if (!sc->sys_sfe_cm) {
800 DEBUG_ERROR("failed to register sfe_cm\n");
801 goto exit1;
802 }
803
804 sc->dev_notifier.notifier_call = sfe_cm_device_event;
805 sc->dev_notifier.priority = 1;
806 register_netdevice_notifier(&sc->dev_notifier);
807
808 sc->inet_notifier.notifier_call = sfe_cm_inet_event;
809 sc->inet_notifier.priority = 1;
810 register_inetaddr_notifier(&sc->inet_notifier);
811
Xiaoping Fan978b3772015-05-27 14:15:18 -0700812 sc->inet6_notifier.notifier_call = sfe_cm_inet6_event;
813 sc->inet6_notifier.priority = 1;
814 register_inet6addr_notifier(&sc->inet6_notifier);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600815 /*
816 * Register our netfilter hooks.
817 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700818 result = nf_register_hooks(sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing));
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600819 if (result < 0) {
820 DEBUG_ERROR("can't register nf post routing hook: %d\n", result);
Xiaoping Fan59176422015-05-22 15:58:10 -0700821 goto exit2;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600822 }
823
824#ifdef CONFIG_NF_CONNTRACK_EVENTS
825 /*
826 * Register a notifier hook to get fast notifications of expired connections.
827 */
828 result = nf_conntrack_register_notifier(&init_net, &sfe_cm_conntrack_notifier);
829 if (result < 0) {
830 DEBUG_ERROR("can't register nf notifier hook: %d\n", result);
Xiaoping Fan59176422015-05-22 15:58:10 -0700831 goto exit3;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600832 }
833#endif
834
835 spin_lock_init(&sc->lock);
836
837 /*
838 * Hook the receive path in the network stack.
839 */
840 BUG_ON(athrs_fast_nat_recv != NULL);
841 RCU_INIT_POINTER(athrs_fast_nat_recv, sfe_cm_recv);
842
843 /*
844 * Hook the shortcut sync callback.
845 */
846 sfe_ipv4_register_sync_rule_callback(sfe_cm_sync_rule);
Xiaoping Fan978b3772015-05-27 14:15:18 -0700847 sfe_ipv6_register_sync_rule_callback(sfe_cm_sync_rule);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600848 return 0;
849
850#ifdef CONFIG_NF_CONNTRACK_EVENTS
Xiaoping Fan59176422015-05-22 15:58:10 -0700851exit3:
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600852#endif
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700853 nf_unregister_hooks(sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing));
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600854
Xiaoping Fan59176422015-05-22 15:58:10 -0700855exit2:
Xiaoping Fan978b3772015-05-27 14:15:18 -0700856 unregister_inet6addr_notifier(&sc->inet6_notifier);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600857 unregister_inetaddr_notifier(&sc->inet_notifier);
858 unregister_netdevice_notifier(&sc->dev_notifier);
859 kobject_put(sc->sys_sfe_cm);
860
861exit1:
862 return result;
863}
864
865/*
866 * sfe_cm_exit()
867 */
868static void __exit sfe_cm_exit(void)
869{
870 struct sfe_cm *sc = &__sc;
871
872 DEBUG_INFO("SFE CM exit\n");
873
874 /*
875 * Unregister our sync callback.
876 */
877 sfe_ipv4_register_sync_rule_callback(NULL);
Xiaoping Fan978b3772015-05-27 14:15:18 -0700878 sfe_ipv6_register_sync_rule_callback(NULL);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600879
880 /*
881 * Unregister our receive callback.
882 */
883 RCU_INIT_POINTER(athrs_fast_nat_recv, NULL);
884
885 /*
886 * Wait for all callbacks to complete.
887 */
888 rcu_barrier();
889
890 /*
891 * Destroy all connections.
892 */
893 sfe_ipv4_destroy_all_rules_for_dev(NULL);
Xiaoping Fan978b3772015-05-27 14:15:18 -0700894 sfe_ipv6_destroy_all_rules_for_dev(NULL);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600895
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600896#ifdef CONFIG_NF_CONNTRACK_EVENTS
897 nf_conntrack_unregister_notifier(&init_net, &sfe_cm_conntrack_notifier);
898
899#endif
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700900 nf_unregister_hooks(sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing));
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600901
Xiaoping Fan978b3772015-05-27 14:15:18 -0700902 unregister_inet6addr_notifier(&sc->inet6_notifier);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600903 unregister_inetaddr_notifier(&sc->inet_notifier);
904 unregister_netdevice_notifier(&sc->dev_notifier);
905
906 kobject_put(sc->sys_sfe_cm);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600907}
908
909module_init(sfe_cm_init)
910module_exit(sfe_cm_exit)
911
912MODULE_AUTHOR("Qualcomm Atheros Inc.");
913MODULE_DESCRIPTION("Shortcut Forwarding Engine - Connection Manager");
Matthew McClintocka3221942014-01-16 11:44:26 -0600914MODULE_LICENSE("Dual BSD/GPL");
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600915