blob: 7f24e40f17e3137319fa3b194f93636c75e0ecf5 [file] [log] [blame]
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001/*
2 * sfe-cm.c
3 * Shortcut forwarding engine connection manager.
4 *
Xiaoping Fand44a5b42015-05-26 17:37:37 -07005 * Copyright (c) 2013-2015 Qualcomm Atheros, Inc.
Matthew McClintocka3221942014-01-16 11:44:26 -06006 *
7 * All Rights Reserved.
8 * Qualcomm Atheros Confidential and Proprietary.
Dave Hudsondcd08fb2013-11-22 09:25:16 -06009 */
Matthew McClintocka3221942014-01-16 11:44:26 -060010
Dave Hudsondcd08fb2013-11-22 09:25:16 -060011#include <linux/module.h>
12#include <linux/sysfs.h>
13#include <linux/skbuff.h>
14#include <net/route.h>
Xiaoping Fan978b3772015-05-27 14:15:18 -070015#include <net/ip6_route.h>
16#include <net/addrconf.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060017#include <linux/inetdevice.h>
18#include <linux/netfilter_bridge.h>
Xiaoping Fan978b3772015-05-27 14:15:18 -070019#include <linux/netfilter_ipv6.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060020#include <net/netfilter/nf_conntrack_acct.h>
21#include <net/netfilter/nf_conntrack_helper.h>
22#include <net/netfilter/nf_conntrack_zones.h>
23#include <net/netfilter/nf_conntrack_core.h>
Matthew McClintockbf6b5bc2014-02-24 12:27:14 -060024#include <linux/if_bridge.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060025
26#include "sfe.h"
Xiaoping Fand44a5b42015-05-26 17:37:37 -070027#include "sfe_cm.h"
Xiaoping Fan3f1fe512014-11-05 12:14:57 -080028#include "sfe_backport.h"
Dave Hudsondcd08fb2013-11-22 09:25:16 -060029
30/*
31 * Per-module structure.
32 */
33struct sfe_cm {
34 spinlock_t lock; /* Lock for SMP correctness */
35
36 /*
37 * Control state.
38 */
39 struct kobject *sys_sfe_cm; /* sysfs linkage */
40
41 /*
42 * Callback notifiers.
43 */
44 struct notifier_block dev_notifier;
45 /* Device notifier */
46 struct notifier_block inet_notifier;
Xiaoping Fan978b3772015-05-27 14:15:18 -070047 /* IPv4 notifier */
48 struct notifier_block inet6_notifier;
49 /* IPv6 notifier */
Dave Hudsondcd08fb2013-11-22 09:25:16 -060050};
51
52struct sfe_cm __sc;
53
54/*
55 * Expose the hook for the receive processing.
56 */
57extern int (*athrs_fast_nat_recv)(struct sk_buff *skb);
58
59/*
60 * Expose what should be a static flag in the TCP connection tracker.
61 */
62extern int nf_ct_tcp_no_window_check;
63
64/*
65 * sfe_cm_recv()
66 * Handle packet receives.
67 *
68 * Returns 1 if the packet is forwarded or 0 if it isn't.
69 */
70int sfe_cm_recv(struct sk_buff *skb)
71{
72 struct net_device *dev;
Dave Hudsondcd08fb2013-11-22 09:25:16 -060073
74 /*
75 * We know that for the vast majority of packets we need the transport
76 * layer header so we may as well start to fetch it now!
77 */
78 prefetch(skb->data + 32);
79 barrier();
80
81 dev = skb->dev;
82
Dave Hudsondcd08fb2013-11-22 09:25:16 -060083 /*
Xiaoping Fan978b3772015-05-27 14:15:18 -070084 * We're only interested in IPv4 and IPv6 packets.
Xiaoping Fan59176422015-05-22 15:58:10 -070085 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -060086 if (likely(htons(ETH_P_IP) == skb->protocol)) {
Xiaoping Fan978b3772015-05-27 14:15:18 -070087#if (SFE_HOOK_ABOVE_BRIDGE)
88 struct in_device *in_dev;
89
90 /*
91 * Does our input device support IP processing?
92 */
93 in_dev = (struct in_device *)dev->ip_ptr;
94 if (unlikely(!in_dev)) {
95 DEBUG_TRACE("no IP processing for device: %s\n", dev->name);
96 return 0;
97 }
98
99 /*
100 * Does it have an IP address? If it doesn't then we can't do anything
101 * interesting here!
102 */
103 if (unlikely(!in_dev->ifa_list)) {
104 DEBUG_TRACE("no IP address for device: %s\n", dev->name);
105 return 0;
106 }
107#endif
108
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600109 return sfe_ipv4_recv(dev, skb);
110 }
111
Xiaoping Fan978b3772015-05-27 14:15:18 -0700112 if (likely(htons(ETH_P_IPV6) == skb->protocol)) {
113#if (SFE_HOOK_ABOVE_BRIDGE)
114 struct inet6_dev *in_dev;
115
116 /*
117 * Does our input device support IPv6 processing?
118 */
119 in_dev = (struct inet6_dev *)dev->ip6_ptr;
120 if (unlikely(!in_dev)) {
121 DEBUG_TRACE("no IPv6 processing for device: %s\n", dev->name);
122 return 0;
123 }
124
125 /*
126 * Does it have an IPv6 address? If it doesn't then we can't do anything
127 * interesting here!
128 */
129 if (unlikely(list_empty(&in_dev->addr_list))) {
130 DEBUG_TRACE("no IPv6 address for device: %s\n", dev->name);
131 return 0;
132 }
133#endif
134
135 return sfe_ipv6_recv(dev, skb);
136 }
137
Matthew McClintocka8ad7962014-01-16 16:49:30 -0600138 DEBUG_TRACE("not IP packet\n");
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600139 return 0;
140}
141
142/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600143 * sfe_cm_find_dev_and_mac_addr()
Xiaoping Fan978b3772015-05-27 14:15:18 -0700144 * Find the device and MAC address for a given IPv4/IPv6 address.
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600145 *
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600146 * Returns true if we find the device and MAC address, otherwise false.
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600147 *
148 * We look up the rtable entry for the address and, from its neighbour
149 * structure, obtain the hardware address. This means this function also
150 * works if the neighbours are routers too.
151 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700152static bool sfe_cm_find_dev_and_mac_addr(sfe_ip_addr_t *addr, struct net_device **dev, uint8_t *mac_addr, int is_v4)
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600153{
154 struct neighbour *neigh;
155 struct rtable *rt;
Xiaoping Fan978b3772015-05-27 14:15:18 -0700156 struct rt6_info *rt6;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600157 struct dst_entry *dst;
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600158 struct net_device *mac_dev;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600159
160 /*
161 * Look up the rtable entry for the IP address then get the hardware
162 * address from its neighbour structure. This means this work when the
163 * neighbours are routers too.
164 */
Xiaoping Fan978b3772015-05-27 14:15:18 -0700165 if (likely(is_v4)) {
166 rt = ip_route_output(&init_net, addr->ip, 0, 0, 0);
167 if (unlikely(IS_ERR(rt))) {
168 goto ret_fail;
169 }
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600170
Xiaoping Fan978b3772015-05-27 14:15:18 -0700171 dst = (struct dst_entry *)rt;
172 } else {
173 rt6 = rt6_lookup(&init_net, (struct in6_addr *)addr->ip6, 0, 0, 0);
174 if (!rt6) {
175 goto ret_fail;
176 }
177
178 dst = (struct dst_entry *)rt6;
179 }
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600180
181 rcu_read_lock();
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700182 neigh = dst_neigh_lookup(dst, addr);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600183 if (unlikely(!neigh)) {
184 rcu_read_unlock();
185 dst_release(dst);
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700186 goto ret_fail;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600187 }
188
189 if (unlikely(!(neigh->nud_state & NUD_VALID))) {
190 rcu_read_unlock();
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700191 neigh_release(neigh);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600192 dst_release(dst);
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700193 goto ret_fail;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600194 }
195
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600196 mac_dev = neigh->dev;
197 if (!mac_dev) {
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600198 rcu_read_unlock();
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700199 neigh_release(neigh);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600200 dst_release(dst);
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700201 goto ret_fail;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600202 }
203
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600204 memcpy(mac_addr, neigh->ha, (size_t)mac_dev->addr_len);
205
206 dev_hold(mac_dev);
207 *dev = mac_dev;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600208 rcu_read_unlock();
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700209 neigh_release(neigh);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600210 dst_release(dst);
211
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600212 return true;
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700213
214ret_fail:
215 if (is_v4) {
216 DEBUG_TRACE("failed to find MAC address for IP: %pI4\n", &addr->ip);
217
218 } else {
219 DEBUG_TRACE("failed to find MAC address for IP: %pI6\n", addr->ip6);
220 }
221
222 return false;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600223}
224
225/*
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700226 * sfe_cm_post_routing()
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600227 * Called for packets about to leave the box - either locally generated or forwarded from another interface
228 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700229static unsigned int sfe_cm_post_routing(struct sk_buff *skb, int is_v4)
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600230{
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700231 struct sfe_connection_create sic;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600232 struct net_device *in;
233 struct nf_conn *ct;
234 enum ip_conntrack_info ctinfo;
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600235 struct net_device *dev;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600236 struct net_device *src_dev;
237 struct net_device *dest_dev;
238 struct net_device *src_br_dev = NULL;
239 struct net_device *dest_br_dev = NULL;
240 struct nf_conntrack_tuple orig_tuple;
241 struct nf_conntrack_tuple reply_tuple;
242
243 /*
244 * Don't process broadcast or multicast packets.
245 */
246 if (unlikely(skb->pkt_type == PACKET_BROADCAST)) {
247 DEBUG_TRACE("broadcast, ignoring\n");
248 return NF_ACCEPT;
249 }
250 if (unlikely(skb->pkt_type == PACKET_MULTICAST)) {
251 DEBUG_TRACE("multicast, ignoring\n");
252 return NF_ACCEPT;
253 }
254
255 /*
256 * Don't process packets that are not being forwarded.
257 */
258 in = dev_get_by_index(&init_net, skb->skb_iif);
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600259 if (!in) {
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600260 DEBUG_TRACE("packet not forwarding\n");
261 return NF_ACCEPT;
262 }
263
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600264 dev_put(in);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600265
266 /*
267 * Don't process packets that aren't being tracked by conntrack.
268 */
269 ct = nf_ct_get(skb, &ctinfo);
270 if (unlikely(!ct)) {
271 DEBUG_TRACE("no conntrack connection, ignoring\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600272 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600273 }
274
275 /*
276 * Don't process untracked connections.
277 */
278 if (unlikely(ct == &nf_conntrack_untracked)) {
279 DEBUG_TRACE("untracked connection\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600280 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600281 }
282
283 /*
Xiaoping Fan956461e2015-06-25 17:35:13 -0700284 * Unconfirmed connection may be dropped by Linux at the final step,
285 * So we don't process unconfirmed connections.
286 */
287 if (!nf_ct_is_confirmed(ct)) {
288 DEBUG_TRACE("unconfirmed connection\n");
289 return NF_ACCEPT;
290 }
291
292 /*
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600293 * Don't process connections that require support from a 'helper' (typically a NAT ALG).
294 */
295 if (unlikely(nfct_help(ct))) {
296 DEBUG_TRACE("connection has helper\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600297 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600298 }
299
300 /*
301 * Look up the details of our connection in conntrack.
302 *
303 * Note that the data we get from conntrack is for the "ORIGINAL" direction
304 * but our packet may actually be in the "REPLY" direction.
305 */
306 orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
307 reply_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
308 sic.protocol = (int32_t)orig_tuple.dst.protonum;
309
310 /*
311 * Get addressing information, non-NAT first
312 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700313 if (likely(is_v4)) {
314 sic.src_ip.ip = (__be32)orig_tuple.src.u3.ip;
315 sic.dest_ip.ip = (__be32)orig_tuple.dst.u3.ip;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600316
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700317 if (ipv4_is_multicast(sic.src_ip.ip) || ipv4_is_multicast(sic.dest_ip.ip)) {
318 DEBUG_TRACE("multicast address\n");
319 return NF_ACCEPT;
320 }
321
322 /*
323 * NAT'ed addresses - note these are as seen from the 'reply' direction
324 * When NAT does not apply to this connection these will be identical to the above.
325 */
326 sic.src_ip_xlate.ip = (__be32)reply_tuple.dst.u3.ip;
327 sic.dest_ip_xlate.ip = (__be32)reply_tuple.src.u3.ip;
328 } else {
Xiaoping Fan978b3772015-05-27 14:15:18 -0700329 sic.src_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.src.u3.in6);
330 sic.dest_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.dst.u3.in6);
331
332 if (ipv6_addr_is_multicast((struct in6_addr *)sic.src_ip.ip6) ||
333 ipv6_addr_is_multicast((struct in6_addr *)sic.dest_ip.ip6)) {
334 DEBUG_TRACE("multicast address\n");
335 return NF_ACCEPT;
336 }
337
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700338 /*
Xiaoping Fan978b3772015-05-27 14:15:18 -0700339 * NAT'ed addresses - note these are as seen from the 'reply' direction
340 * When NAT does not apply to this connection these will be identical to the above.
341 */
342 sic.src_ip_xlate.ip6[0] = *((struct sfe_ipv6_addr *)&reply_tuple.dst.u3.in6);
343 sic.dest_ip_xlate.ip6[0] = *((struct sfe_ipv6_addr *)&reply_tuple.src.u3.in6);
Matthew McClintocka11c7cd2014-08-06 16:41:30 -0500344 }
345
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600346 sic.flags = 0;
347
348 switch (sic.protocol) {
349 case IPPROTO_TCP:
350 sic.src_port = orig_tuple.src.u.tcp.port;
351 sic.dest_port = orig_tuple.dst.u.tcp.port;
352 sic.src_port_xlate = reply_tuple.dst.u.tcp.port;
353 sic.dest_port_xlate = reply_tuple.src.u.tcp.port;
354 sic.src_td_window_scale = ct->proto.tcp.seen[0].td_scale;
355 sic.src_td_max_window = ct->proto.tcp.seen[0].td_maxwin;
356 sic.src_td_end = ct->proto.tcp.seen[0].td_end;
357 sic.src_td_max_end = ct->proto.tcp.seen[0].td_maxend;
358 sic.dest_td_window_scale = ct->proto.tcp.seen[1].td_scale;
359 sic.dest_td_max_window = ct->proto.tcp.seen[1].td_maxwin;
360 sic.dest_td_end = ct->proto.tcp.seen[1].td_end;
361 sic.dest_td_max_end = ct->proto.tcp.seen[1].td_maxend;
362 if (nf_ct_tcp_no_window_check
363 || (ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_BE_LIBERAL)
364 || (ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_BE_LIBERAL)) {
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700365 sic.flags |= SFE_CREATE_FLAG_NO_SEQ_CHECK;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600366 }
367
368 /*
369 * Don't try to manage a non-established connection.
370 */
371 if (!test_bit(IPS_ASSURED_BIT, &ct->status)) {
372 DEBUG_TRACE("non-established connection\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600373 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600374 }
375
376 /*
377 * If the connection is shutting down do not manage it.
378 * state can not be SYN_SENT, SYN_RECV because connection is assured
379 * Not managed states: FIN_WAIT, CLOSE_WAIT, LAST_ACK, TIME_WAIT, CLOSE.
380 */
381 spin_lock_bh(&ct->lock);
382 if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) {
383 spin_unlock_bh(&ct->lock);
384 DEBUG_TRACE("connection in termination state: %#x, s: %pI4:%u, d: %pI4:%u\n",
385 ct->proto.tcp.state, &sic.src_ip, ntohs(sic.src_port),
386 &sic.dest_ip, ntohs(sic.dest_port));
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600387 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600388 }
389 spin_unlock_bh(&ct->lock);
390 break;
391
392 case IPPROTO_UDP:
393 sic.src_port = orig_tuple.src.u.udp.port;
394 sic.dest_port = orig_tuple.dst.u.udp.port;
395 sic.src_port_xlate = reply_tuple.dst.u.udp.port;
396 sic.dest_port_xlate = reply_tuple.src.u.udp.port;
397 break;
398
399 default:
400 DEBUG_TRACE("unhandled protocol %d\n", sic.protocol);
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600401 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600402 }
403
404 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600405 * Get the net device and MAC addresses that correspond to the various source and
406 * destination host addresses.
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600407 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700408 if (!sfe_cm_find_dev_and_mac_addr(&sic.src_ip, &src_dev, sic.src_mac, is_v4)) {
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600409 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600410 }
411
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700412 if (!sfe_cm_find_dev_and_mac_addr(&sic.src_ip_xlate, &dev, sic.src_mac_xlate, is_v4)) {
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600413 goto done1;
414 }
415
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600416 dev_put(dev);
417
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700418 if (!sfe_cm_find_dev_and_mac_addr(&sic.dest_ip, &dev, sic.dest_mac, is_v4)) {
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600419 goto done1;
420 }
421
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600422 dev_put(dev);
423
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700424 if (!sfe_cm_find_dev_and_mac_addr(&sic.dest_ip_xlate, &dest_dev, sic.dest_mac_xlate, is_v4)) {
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600425 goto done1;
426 }
427
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600428#if (!SFE_HOOK_ABOVE_BRIDGE)
429 /*
430 * Now our devices may actually be a bridge interface. If that's
431 * the case then we need to hunt down the underlying interface.
432 */
433 if (src_dev->priv_flags & IFF_EBRIDGE) {
434 src_br_dev = br_port_dev_get(src_dev, sic.src_mac);
435 if (!src_br_dev) {
436 DEBUG_TRACE("no port found on bridge\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600437 goto done2;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600438 }
439
440 src_dev = src_br_dev;
441 }
442
443 if (dest_dev->priv_flags & IFF_EBRIDGE) {
444 dest_br_dev = br_port_dev_get(dest_dev, sic.dest_mac_xlate);
445 if (!dest_br_dev) {
446 DEBUG_TRACE("no port found on bridge\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600447 goto done3;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600448 }
449
450 dest_dev = dest_br_dev;
451 }
452#else
453 /*
454 * Our devices may actually be part of a bridge interface. If that's
455 * the case then find the bridge interface instead.
456 */
457 if (src_dev->priv_flags & IFF_BRIDGE_PORT) {
Xiaoping Fan3f1fe512014-11-05 12:14:57 -0800458 src_br_dev = SFE_DEV_MASTER(src_dev);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600459 if (!src_br_dev) {
460 DEBUG_TRACE("no bridge found for: %s\n", src_dev->name);
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600461 goto done2;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600462 }
463
464 dev_hold(src_br_dev);
465 src_dev = src_br_dev;
466 }
467
468 if (dest_dev->priv_flags & IFF_BRIDGE_PORT) {
Xiaoping Fan3f1fe512014-11-05 12:14:57 -0800469 dest_br_dev = SFE_DEV_MASTER(dest_dev);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600470 if (!dest_br_dev) {
471 DEBUG_TRACE("no bridge found for: %s\n", dest_dev->name);
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600472 goto done3;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600473 }
474
475 dev_hold(dest_br_dev);
476 dest_dev = dest_br_dev;
477 }
478#endif
479
480 sic.src_dev = src_dev;
481 sic.dest_dev = dest_dev;
482
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600483 sic.src_mtu = src_dev->mtu;
484 sic.dest_mtu = dest_dev->mtu;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600485
Xiaoping Fan978b3772015-05-27 14:15:18 -0700486 if (likely(is_v4)) {
487 sfe_ipv4_create_rule(&sic);
488 } else {
489 sfe_ipv6_create_rule(&sic);
490 }
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600491
492 /*
493 * If we had bridge ports then release them too.
494 */
495 if (dest_br_dev) {
496 dev_put(dest_br_dev);
497 }
498
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600499done3:
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600500 if (src_br_dev) {
501 dev_put(src_br_dev);
502 }
503
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600504done2:
505 dev_put(dest_dev);
506
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600507done1:
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600508 dev_put(src_dev);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600509
510 return NF_ACCEPT;
511}
512
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700513/*
514 * sfe_cm_ipv4_post_routing_hook()
515 * Called for packets about to leave the box - either locally generated or forwarded from another interface
516 */
517sfe_cm_ipv4_post_routing_hook(hooknum, ops, skb, in_unused, out, okfn)
518{
519 return sfe_cm_post_routing(skb, true);
520}
521
Xiaoping Fan978b3772015-05-27 14:15:18 -0700522/*
523 * sfe_cm_ipv6_post_routing_hook()
524 * Called for packets about to leave the box - either locally generated or forwarded from another interface
525 */
526sfe_cm_ipv6_post_routing_hook(hooknum, ops, skb, in_unused, out, okfn)
527{
528 return sfe_cm_post_routing(skb, false);
529}
530
531
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600532#ifdef CONFIG_NF_CONNTRACK_EVENTS
533/*
534 * sfe_cm_conntrack_event()
535 * Callback event invoked when a conntrack connection's state changes.
536 */
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600537#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
538static int sfe_cm_conntrack_event(struct notifier_block *this,
Matthew McClintock0f5c0592014-02-12 11:17:11 -0600539 unsigned long events, void *ptr)
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600540#else
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600541static int sfe_cm_conntrack_event(unsigned int events, struct nf_ct_event *item)
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600542#endif
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600543{
Matthew McClintock0f5c0592014-02-12 11:17:11 -0600544#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
545 struct nf_ct_event *item = ptr;
546#endif
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700547 struct sfe_connection_destroy sid;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600548 struct nf_conn *ct = item->ct;
549 struct nf_conntrack_tuple orig_tuple;
550
551 /*
552 * If we don't have a conntrack entry then we're done.
553 */
554 if (unlikely(!ct)) {
555 DEBUG_WARN("no ct in conntrack event callback\n");
556 return NOTIFY_DONE;
557 }
558
559 /*
560 * If this is an untracked connection then we can't have any state either.
561 */
562 if (unlikely(ct == &nf_conntrack_untracked)) {
563 DEBUG_TRACE("ignoring untracked conn\n");
564 return NOTIFY_DONE;
565 }
566
567 /*
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600568 * We're only interested in destroy events.
569 */
570 if (unlikely(!(events & (1 << IPCT_DESTROY)))) {
571 DEBUG_TRACE("ignoring non-destroy event\n");
572 return NOTIFY_DONE;
573 }
574
575 orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
576 sid.protocol = (int32_t)orig_tuple.dst.protonum;
577
578 /*
579 * Extract information from the conntrack connection. We're only interested
580 * in nominal connection information (i.e. we're ignoring any NAT information).
581 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600582 switch (sid.protocol) {
583 case IPPROTO_TCP:
584 sid.src_port = orig_tuple.src.u.tcp.port;
585 sid.dest_port = orig_tuple.dst.u.tcp.port;
586 break;
587
588 case IPPROTO_UDP:
589 sid.src_port = orig_tuple.src.u.udp.port;
590 sid.dest_port = orig_tuple.dst.u.udp.port;
591 break;
592
593 default:
594 DEBUG_TRACE("unhandled protocol: %d\n", sid.protocol);
595 return NOTIFY_DONE;
596 }
597
Xiaoping Fan978b3772015-05-27 14:15:18 -0700598 if (likely(nf_ct_l3num(ct) == AF_INET)) {
599 sid.src_ip.ip = (__be32)orig_tuple.src.u3.ip;
600 sid.dest_ip.ip = (__be32)orig_tuple.dst.u3.ip;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600601
Xiaoping Fan978b3772015-05-27 14:15:18 -0700602 sfe_ipv4_destroy_rule(&sid);
603 } else if (likely(nf_ct_l3num(ct) == AF_INET6)) {
604 sid.src_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.src.u3.in6);
605 sid.dest_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.dst.u3.in6);
606
607 sfe_ipv6_destroy_rule(&sid);
608 } else {
609 DEBUG_TRACE("ignoring non-IPv4 and non-IPv6 connection\n");
610 }
611
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600612 return NOTIFY_DONE;
613}
614
615/*
616 * Netfilter conntrack event system to monitor connection tracking changes
617 */
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600618#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
619static struct notifier_block sfe_cm_conntrack_notifier = {
620 .notifier_call = sfe_cm_conntrack_event,
621};
622#else
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600623static struct nf_ct_event_notifier sfe_cm_conntrack_notifier = {
624 .fcn = sfe_cm_conntrack_event,
625};
626#endif
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600627#endif
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600628
629/*
630 * Structure to establish a hook into the post routing netfilter point - this
631 * will pick up local outbound and packets going from one interface to another.
632 *
633 * Note: see include/linux/netfilter_ipv4.h for info related to priority levels.
634 * We want to examine packets after NAT translation and any ALG processing.
635 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700636static struct nf_hook_ops sfe_cm_ops_post_routing[] __read_mostly = {
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600637 {
Xiaoping Fan3f1fe512014-11-05 12:14:57 -0800638 .hook = __sfe_cm_ipv4_post_routing_hook,
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600639 .owner = THIS_MODULE,
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700640 .pf = NFPROTO_IPV4,
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600641 .hooknum = NF_INET_POST_ROUTING,
642 .priority = NF_IP_PRI_NAT_SRC + 1,
643 },
Xiaoping Fan978b3772015-05-27 14:15:18 -0700644#ifdef SFE_SUPPORT_IPV6
645 {
646 .hook = __sfe_cm_ipv6_post_routing_hook,
647 .owner = THIS_MODULE,
648 .pf = NFPROTO_IPV6,
649 .hooknum = NF_INET_POST_ROUTING,
650 .priority = NF_IP6_PRI_NAT_SRC + 1,
651 },
652#endif
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600653};
654
655/*
656 * sfe_cm_sync_rule()
657 * Synchronize a connection's state.
658 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700659static void sfe_cm_sync_rule(struct sfe_connection_sync *sis)
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600660{
661 struct nf_conntrack_tuple_hash *h;
662 struct nf_conntrack_tuple tuple;
663 struct nf_conn *ct;
Xiaoping Fan3f1fe512014-11-05 12:14:57 -0800664 SFE_NF_CONN_ACCT(acct);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600665
666 /*
667 * Create a tuple so as to be able to look up a connection
668 */
669 memset(&tuple, 0, sizeof(tuple));
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600670 tuple.src.u.all = (__be16)sis->src_port;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600671 tuple.dst.dir = IP_CT_DIR_ORIGINAL;
672 tuple.dst.protonum = (uint8_t)sis->protocol;
673 tuple.dst.u.all = (__be16)sis->dest_port;
674
Xiaoping Fan978b3772015-05-27 14:15:18 -0700675 if (sis->is_v6) {
676 tuple.src.u3.in6 = *((struct in6_addr *)sis->src_ip.ip6);
677 tuple.dst.u3.in6 = *((struct in6_addr *)sis->dest_ip.ip6);
678 tuple.src.l3num = AF_INET6;
679
680 DEBUG_TRACE("update connection - p: %d, s: %pI6:%u, d: %pI6:%u\n",
681 (int)tuple.dst.protonum,
682 &tuple.src.u3.in6, (unsigned int)ntohs(tuple.src.u.all),
683 &tuple.dst.u3.in6, (unsigned int)ntohs(tuple.dst.u.all));
684 } else {
685 tuple.src.u3.ip = sis->src_ip.ip;
686 tuple.dst.u3.ip = sis->dest_ip.ip;
687 tuple.src.l3num = AF_INET;
688
689 DEBUG_TRACE("update connection - p: %d, s: %pI4:%u, d: %pI4:%u\n",
690 (int)tuple.dst.protonum,
691 &tuple.src.u3.ip, (unsigned int)ntohs(tuple.src.u.all),
692 &tuple.dst.u3.ip, (unsigned int)ntohs(tuple.dst.u.all));
693 }
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600694
695 /*
696 * Look up conntrack connection
697 */
698 h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
699 if (unlikely(!h)) {
700 DEBUG_TRACE("no connection found\n");
701 return;
702 }
703
704 ct = nf_ct_tuplehash_to_ctrack(h);
705 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
706
707 /*
708 * Only update if this is not a fixed timeout
709 */
710 if (!test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700711 spin_lock_bh(&ct->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600712 ct->timeout.expires += sis->delta_jiffies;
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700713 spin_unlock_bh(&ct->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600714 }
715
716 acct = nf_conn_acct_find(ct);
717 if (acct) {
718 spin_lock_bh(&ct->lock);
Xiaoping Fan3f1fe512014-11-05 12:14:57 -0800719 atomic64_set(&SFE_ACCT_COUNTER(acct)[IP_CT_DIR_ORIGINAL].packets, sis->src_packet_count);
720 atomic64_set(&SFE_ACCT_COUNTER(acct)[IP_CT_DIR_ORIGINAL].bytes, sis->src_byte_count);
721 atomic64_set(&SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].packets, sis->dest_packet_count);
722 atomic64_set(&SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].bytes, sis->dest_byte_count);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600723 spin_unlock_bh(&ct->lock);
724 }
725
726 switch (sis->protocol) {
727 case IPPROTO_TCP:
728 spin_lock_bh(&ct->lock);
729 if (ct->proto.tcp.seen[0].td_maxwin < sis->src_td_max_window) {
730 ct->proto.tcp.seen[0].td_maxwin = sis->src_td_max_window;
731 }
732 if ((int32_t)(ct->proto.tcp.seen[0].td_end - sis->src_td_end) < 0) {
733 ct->proto.tcp.seen[0].td_end = sis->src_td_end;
734 }
735 if ((int32_t)(ct->proto.tcp.seen[0].td_maxend - sis->src_td_max_end) < 0) {
736 ct->proto.tcp.seen[0].td_maxend = sis->src_td_max_end;
737 }
738 if (ct->proto.tcp.seen[1].td_maxwin < sis->dest_td_max_window) {
739 ct->proto.tcp.seen[1].td_maxwin = sis->dest_td_max_window;
740 }
741 if ((int32_t)(ct->proto.tcp.seen[1].td_end - sis->dest_td_end) < 0) {
742 ct->proto.tcp.seen[1].td_end = sis->dest_td_end;
743 }
744 if ((int32_t)(ct->proto.tcp.seen[1].td_maxend - sis->dest_td_max_end) < 0) {
745 ct->proto.tcp.seen[1].td_maxend = sis->dest_td_max_end;
746 }
747 spin_unlock_bh(&ct->lock);
748 break;
749 }
750
751 /*
752 * Release connection
753 */
754 nf_ct_put(ct);
755}
756
757/*
758 * sfe_cm_device_event()
759 */
760static int sfe_cm_device_event(struct notifier_block *this, unsigned long event, void *ptr)
761{
762 struct net_device *dev = (struct net_device *)ptr;
763
764 switch (event) {
765 case NETDEV_DOWN:
766 if (dev) {
767 sfe_ipv4_destroy_all_rules_for_dev(dev);
Xiaoping Fan978b3772015-05-27 14:15:18 -0700768 sfe_ipv6_destroy_all_rules_for_dev(dev);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600769 }
770 break;
771 }
772
773 return NOTIFY_DONE;
774}
775
776/*
777 * sfe_cm_inet_event()
778 */
779static int sfe_cm_inet_event(struct notifier_block *this, unsigned long event, void *ptr)
780{
781 struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
782 return sfe_cm_device_event(this, event, dev);
783}
784
785/*
Xiaoping Fan978b3772015-05-27 14:15:18 -0700786 * sfe_cm_inet6_event()
787 */
788static int sfe_cm_inet6_event(struct notifier_block *this, unsigned long event, void *ptr)
789{
790 struct net_device *dev = ((struct inet6_ifaddr *)ptr)->idev->dev;
791 return sfe_cm_device_event(this, event, dev);
792}
793
794/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600795 * sfe_cm_init()
796 */
797static int __init sfe_cm_init(void)
798{
799 struct sfe_cm *sc = &__sc;
800 int result = -1;
801
802 DEBUG_INFO("SFE CM init\n");
803
804 /*
805 * Create sys/sfe_cm
806 */
807 sc->sys_sfe_cm = kobject_create_and_add("sfe_cm", NULL);
808 if (!sc->sys_sfe_cm) {
809 DEBUG_ERROR("failed to register sfe_cm\n");
810 goto exit1;
811 }
812
813 sc->dev_notifier.notifier_call = sfe_cm_device_event;
814 sc->dev_notifier.priority = 1;
815 register_netdevice_notifier(&sc->dev_notifier);
816
817 sc->inet_notifier.notifier_call = sfe_cm_inet_event;
818 sc->inet_notifier.priority = 1;
819 register_inetaddr_notifier(&sc->inet_notifier);
820
Xiaoping Fan978b3772015-05-27 14:15:18 -0700821 sc->inet6_notifier.notifier_call = sfe_cm_inet6_event;
822 sc->inet6_notifier.priority = 1;
823 register_inet6addr_notifier(&sc->inet6_notifier);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600824 /*
825 * Register our netfilter hooks.
826 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700827 result = nf_register_hooks(sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing));
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600828 if (result < 0) {
829 DEBUG_ERROR("can't register nf post routing hook: %d\n", result);
Xiaoping Fan59176422015-05-22 15:58:10 -0700830 goto exit2;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600831 }
832
833#ifdef CONFIG_NF_CONNTRACK_EVENTS
834 /*
835 * Register a notifier hook to get fast notifications of expired connections.
836 */
837 result = nf_conntrack_register_notifier(&init_net, &sfe_cm_conntrack_notifier);
838 if (result < 0) {
839 DEBUG_ERROR("can't register nf notifier hook: %d\n", result);
Xiaoping Fan59176422015-05-22 15:58:10 -0700840 goto exit3;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600841 }
842#endif
843
844 spin_lock_init(&sc->lock);
845
846 /*
847 * Hook the receive path in the network stack.
848 */
849 BUG_ON(athrs_fast_nat_recv != NULL);
850 RCU_INIT_POINTER(athrs_fast_nat_recv, sfe_cm_recv);
851
852 /*
853 * Hook the shortcut sync callback.
854 */
855 sfe_ipv4_register_sync_rule_callback(sfe_cm_sync_rule);
Xiaoping Fan978b3772015-05-27 14:15:18 -0700856 sfe_ipv6_register_sync_rule_callback(sfe_cm_sync_rule);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600857 return 0;
858
859#ifdef CONFIG_NF_CONNTRACK_EVENTS
Xiaoping Fan59176422015-05-22 15:58:10 -0700860exit3:
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600861#endif
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700862 nf_unregister_hooks(sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing));
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600863
Xiaoping Fan59176422015-05-22 15:58:10 -0700864exit2:
Xiaoping Fan978b3772015-05-27 14:15:18 -0700865 unregister_inet6addr_notifier(&sc->inet6_notifier);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600866 unregister_inetaddr_notifier(&sc->inet_notifier);
867 unregister_netdevice_notifier(&sc->dev_notifier);
868 kobject_put(sc->sys_sfe_cm);
869
870exit1:
871 return result;
872}
873
874/*
875 * sfe_cm_exit()
876 */
877static void __exit sfe_cm_exit(void)
878{
879 struct sfe_cm *sc = &__sc;
880
881 DEBUG_INFO("SFE CM exit\n");
882
883 /*
884 * Unregister our sync callback.
885 */
886 sfe_ipv4_register_sync_rule_callback(NULL);
Xiaoping Fan978b3772015-05-27 14:15:18 -0700887 sfe_ipv6_register_sync_rule_callback(NULL);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600888
889 /*
890 * Unregister our receive callback.
891 */
892 RCU_INIT_POINTER(athrs_fast_nat_recv, NULL);
893
894 /*
895 * Wait for all callbacks to complete.
896 */
897 rcu_barrier();
898
899 /*
900 * Destroy all connections.
901 */
902 sfe_ipv4_destroy_all_rules_for_dev(NULL);
Xiaoping Fan978b3772015-05-27 14:15:18 -0700903 sfe_ipv6_destroy_all_rules_for_dev(NULL);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600904
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600905#ifdef CONFIG_NF_CONNTRACK_EVENTS
906 nf_conntrack_unregister_notifier(&init_net, &sfe_cm_conntrack_notifier);
907
908#endif
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700909 nf_unregister_hooks(sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing));
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600910
Xiaoping Fan978b3772015-05-27 14:15:18 -0700911 unregister_inet6addr_notifier(&sc->inet6_notifier);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600912 unregister_inetaddr_notifier(&sc->inet_notifier);
913 unregister_netdevice_notifier(&sc->dev_notifier);
914
915 kobject_put(sc->sys_sfe_cm);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600916}
917
918module_init(sfe_cm_init)
919module_exit(sfe_cm_exit)
920
921MODULE_AUTHOR("Qualcomm Atheros Inc.");
922MODULE_DESCRIPTION("Shortcut Forwarding Engine - Connection Manager");
Matthew McClintocka3221942014-01-16 11:44:26 -0600923MODULE_LICENSE("Dual BSD/GPL");
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600924