blob: dd109e697352d402ba4e72c9504020914ee05b13 [file] [log] [blame]
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001/*
2 * sfe-cm.c
3 * Shortcut forwarding engine connection manager.
4 *
Matthew McClintocka3221942014-01-16 11:44:26 -06005 * Copyright (c) 2013 Qualcomm Atheros, Inc.
6 *
7 * All Rights Reserved.
8 * Qualcomm Atheros Confidential and Proprietary.
Dave Hudsondcd08fb2013-11-22 09:25:16 -06009 */
Matthew McClintocka3221942014-01-16 11:44:26 -060010
Dave Hudsondcd08fb2013-11-22 09:25:16 -060011#include <linux/module.h>
12#include <linux/sysfs.h>
13#include <linux/skbuff.h>
14#include <net/route.h>
15#include <linux/inetdevice.h>
16#include <linux/netfilter_bridge.h>
17#include <net/netfilter/nf_conntrack_acct.h>
18#include <net/netfilter/nf_conntrack_helper.h>
19#include <net/netfilter/nf_conntrack_zones.h>
20#include <net/netfilter/nf_conntrack_core.h>
21
22#include "sfe.h"
23#include "sfe_ipv4.h"
24
25/*
26 * Per-module structure.
27 */
28struct sfe_cm {
29 spinlock_t lock; /* Lock for SMP correctness */
30
31 /*
32 * Control state.
33 */
34 struct kobject *sys_sfe_cm; /* sysfs linkage */
35
36 /*
37 * Callback notifiers.
38 */
39 struct notifier_block dev_notifier;
40 /* Device notifier */
41 struct notifier_block inet_notifier;
42 /* IP notifier */
43};
44
45struct sfe_cm __sc;
46
47/*
48 * Expose the hook for the receive processing.
49 */
50extern int (*athrs_fast_nat_recv)(struct sk_buff *skb);
51
52/*
53 * Expose what should be a static flag in the TCP connection tracker.
54 */
55extern int nf_ct_tcp_no_window_check;
56
57/*
58 * sfe_cm_recv()
59 * Handle packet receives.
60 *
61 * Returns 1 if the packet is forwarded or 0 if it isn't.
62 */
63int sfe_cm_recv(struct sk_buff *skb)
64{
65 struct net_device *dev;
66#if (SFE_HOOK_ABOVE_BRIDGE)
67 struct in_device *in_dev;
68#endif
69
70 /*
71 * We know that for the vast majority of packets we need the transport
72 * layer header so we may as well start to fetch it now!
73 */
74 prefetch(skb->data + 32);
75 barrier();
76
77 dev = skb->dev;
78
79#if (SFE_HOOK_ABOVE_BRIDGE)
80 /*
81 * Does our input device support IP processing?
82 */
83 in_dev = (struct in_device *)dev->ip_ptr;
84 if (unlikely(!in_dev)) {
85 DEBUG_TRACE("no IP processing for device: %s\n", dev->name);
86 return 0;
87 }
88
89 /*
90 * Does it have an IP address? If it doesn't then we can't do anything
91 * interesting here!
92 */
93 if (unlikely(!in_dev->ifa_list)) {
94 DEBUG_TRACE("no IP address for device: %s\n", dev->name);
95 return 0;
96 }
97#endif
98
99 /*
100 * We're only interested in IP packets.
101 */
102 if (likely(htons(ETH_P_IP) == skb->protocol)) {
103 return sfe_ipv4_recv(dev, skb);
104 }
105
Matthew McClintocka8ad7962014-01-16 16:49:30 -0600106 DEBUG_TRACE("not IP packet\n");
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600107 return 0;
108}
109
110/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600111 * sfe_cm_find_dev_and_mac_addr()
112 * Find the device and MAC address for a given IPv4 address.
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600113 *
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600114 * Returns true if we find the device and MAC address, otherwise false.
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600115 *
116 * We look up the rtable entry for the address and, from its neighbour
117 * structure, obtain the hardware address. This means this function also
118 * works if the neighbours are routers too.
119 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600120static bool sfe_cm_find_dev_and_mac_addr(uint32_t addr, struct net_device **dev, uint8_t *mac_addr)
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600121{
122 struct neighbour *neigh;
123 struct rtable *rt;
124 struct dst_entry *dst;
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600125 struct net_device *mac_dev;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600126
127 /*
128 * Look up the rtable entry for the IP address then get the hardware
129 * address from its neighbour structure. This means this work when the
130 * neighbours are routers too.
131 */
132 rt = ip_route_output(&init_net, addr, 0, 0, 0);
133 if (unlikely(IS_ERR(rt))) {
134 return false;
135 }
136
137 dst = (struct dst_entry *)rt;
138
139 rcu_read_lock();
140 neigh = dst_get_neighbour_noref(dst);
141 if (unlikely(!neigh)) {
142 rcu_read_unlock();
143 dst_release(dst);
144 return false;
145 }
146
147 if (unlikely(!(neigh->nud_state & NUD_VALID))) {
148 rcu_read_unlock();
149 dst_release(dst);
150 return false;
151 }
152
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600153 mac_dev = neigh->dev;
154 if (!mac_dev) {
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600155 rcu_read_unlock();
156 dst_release(dst);
157 return false;
158 }
159
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600160 memcpy(mac_addr, neigh->ha, (size_t)mac_dev->addr_len);
161
162 dev_hold(mac_dev);
163 *dev = mac_dev;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600164 rcu_read_unlock();
165
166 dst_release(dst);
167
168 /*
169 * We're only interested in unicast MAC addresses - if it's not a unicast
170 * address then our IP address mustn't be unicast either.
171 */
172 if (is_multicast_ether_addr(mac_addr)) {
173 DEBUG_TRACE("MAC is non-unicast - ignoring\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600174 dev_put(mac_dev);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600175 return false;
176 }
177
178 return true;
179}
180
181/*
182 * sfe_cm_ipv4_post_routing_hook()
183 * Called for packets about to leave the box - either locally generated or forwarded from another interface
184 */
185static unsigned int sfe_cm_ipv4_post_routing_hook(unsigned int hooknum,
186 struct sk_buff *skb,
187 const struct net_device *in_unused,
188 const struct net_device *out,
189 int (*okfn)(struct sk_buff *))
190{
191 struct sfe_ipv4_create sic;
192 struct net_device *in;
193 struct nf_conn *ct;
194 enum ip_conntrack_info ctinfo;
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600195 struct net_device *dev;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600196 struct net_device *src_dev;
197 struct net_device *dest_dev;
198 struct net_device *src_br_dev = NULL;
199 struct net_device *dest_br_dev = NULL;
200 struct nf_conntrack_tuple orig_tuple;
201 struct nf_conntrack_tuple reply_tuple;
202
203 /*
204 * Don't process broadcast or multicast packets.
205 */
206 if (unlikely(skb->pkt_type == PACKET_BROADCAST)) {
207 DEBUG_TRACE("broadcast, ignoring\n");
208 return NF_ACCEPT;
209 }
210 if (unlikely(skb->pkt_type == PACKET_MULTICAST)) {
211 DEBUG_TRACE("multicast, ignoring\n");
212 return NF_ACCEPT;
213 }
214
215 /*
216 * Don't process packets that are not being forwarded.
217 */
218 in = dev_get_by_index(&init_net, skb->skb_iif);
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600219 if (!in) {
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600220 DEBUG_TRACE("packet not forwarding\n");
221 return NF_ACCEPT;
222 }
223
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600224 dev_put(in);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600225
226 /*
227 * Don't process packets that aren't being tracked by conntrack.
228 */
229 ct = nf_ct_get(skb, &ctinfo);
230 if (unlikely(!ct)) {
231 DEBUG_TRACE("no conntrack connection, ignoring\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600232 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600233 }
234
235 /*
236 * Don't process untracked connections.
237 */
238 if (unlikely(ct == &nf_conntrack_untracked)) {
239 DEBUG_TRACE("untracked connection\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600240 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600241 }
242
243 /*
244 * Don't process connections that require support from a 'helper' (typically a NAT ALG).
245 */
246 if (unlikely(nfct_help(ct))) {
247 DEBUG_TRACE("connection has helper\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600248 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600249 }
250
251 /*
252 * Look up the details of our connection in conntrack.
253 *
254 * Note that the data we get from conntrack is for the "ORIGINAL" direction
255 * but our packet may actually be in the "REPLY" direction.
256 */
257 orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
258 reply_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
259 sic.protocol = (int32_t)orig_tuple.dst.protonum;
260
261 /*
262 * Get addressing information, non-NAT first
263 */
264 sic.src_ip = (__be32)orig_tuple.src.u3.ip;
265 sic.dest_ip = (__be32)orig_tuple.dst.u3.ip;
266
267 /*
268 * NAT'ed addresses - note these are as seen from the 'reply' direction
269 * When NAT does not apply to this connection these will be identical to the above.
270 */
271 sic.src_ip_xlate = (__be32)reply_tuple.dst.u3.ip;
272 sic.dest_ip_xlate = (__be32)reply_tuple.src.u3.ip;
273
274 sic.flags = 0;
275
276 switch (sic.protocol) {
277 case IPPROTO_TCP:
278 sic.src_port = orig_tuple.src.u.tcp.port;
279 sic.dest_port = orig_tuple.dst.u.tcp.port;
280 sic.src_port_xlate = reply_tuple.dst.u.tcp.port;
281 sic.dest_port_xlate = reply_tuple.src.u.tcp.port;
282 sic.src_td_window_scale = ct->proto.tcp.seen[0].td_scale;
283 sic.src_td_max_window = ct->proto.tcp.seen[0].td_maxwin;
284 sic.src_td_end = ct->proto.tcp.seen[0].td_end;
285 sic.src_td_max_end = ct->proto.tcp.seen[0].td_maxend;
286 sic.dest_td_window_scale = ct->proto.tcp.seen[1].td_scale;
287 sic.dest_td_max_window = ct->proto.tcp.seen[1].td_maxwin;
288 sic.dest_td_end = ct->proto.tcp.seen[1].td_end;
289 sic.dest_td_max_end = ct->proto.tcp.seen[1].td_maxend;
290 if (nf_ct_tcp_no_window_check
291 || (ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_BE_LIBERAL)
292 || (ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_BE_LIBERAL)) {
293 sic.flags |= SFE_IPV4_CREATE_FLAG_NO_SEQ_CHECK;
294 }
295
296 /*
297 * Don't try to manage a non-established connection.
298 */
299 if (!test_bit(IPS_ASSURED_BIT, &ct->status)) {
300 DEBUG_TRACE("non-established connection\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600301 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600302 }
303
304 /*
305 * If the connection is shutting down do not manage it.
306 * state can not be SYN_SENT, SYN_RECV because connection is assured
307 * Not managed states: FIN_WAIT, CLOSE_WAIT, LAST_ACK, TIME_WAIT, CLOSE.
308 */
309 spin_lock_bh(&ct->lock);
310 if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) {
311 spin_unlock_bh(&ct->lock);
312 DEBUG_TRACE("connection in termination state: %#x, s: %pI4:%u, d: %pI4:%u\n",
313 ct->proto.tcp.state, &sic.src_ip, ntohs(sic.src_port),
314 &sic.dest_ip, ntohs(sic.dest_port));
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600315 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600316 }
317 spin_unlock_bh(&ct->lock);
318 break;
319
320 case IPPROTO_UDP:
321 sic.src_port = orig_tuple.src.u.udp.port;
322 sic.dest_port = orig_tuple.dst.u.udp.port;
323 sic.src_port_xlate = reply_tuple.dst.u.udp.port;
324 sic.dest_port_xlate = reply_tuple.src.u.udp.port;
325 break;
326
327 default:
328 DEBUG_TRACE("unhandled protocol %d\n", sic.protocol);
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600329 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600330 }
331
332 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600333 * Get the net device and MAC addresses that correspond to the various source and
334 * destination host addresses.
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600335 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600336 if (!sfe_cm_find_dev_and_mac_addr(sic.src_ip, &src_dev, sic.src_mac)) {
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600337 DEBUG_TRACE("failed to find MAC address for src IP: %pI4\n", &sic.src_ip);
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600338 return NF_ACCEPT;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600339 }
340
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600341 if (!sfe_cm_find_dev_and_mac_addr(sic.src_ip_xlate, &dev, sic.src_mac_xlate)) {
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600342 DEBUG_TRACE("failed to find MAC address for xlate src IP: %pI4\n", &sic.src_ip_xlate);
343 goto done1;
344 }
345
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600346 dev_put(dev);
347
348 if (!sfe_cm_find_dev_and_mac_addr(sic.dest_ip, &dev, sic.dest_mac)) {
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600349 DEBUG_TRACE("failed to find MAC address for dest IP: %pI4\n", &sic.dest_ip);
350 goto done1;
351 }
352
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600353 dev_put(dev);
354
355 if (!sfe_cm_find_dev_and_mac_addr(sic.dest_ip_xlate, &dest_dev, sic.dest_mac_xlate)) {
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600356 DEBUG_TRACE("failed to find MAC address for xlate dest IP: %pI4\n", &sic.dest_ip_xlate);
357 goto done1;
358 }
359
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600360#if (!SFE_HOOK_ABOVE_BRIDGE)
361 /*
362 * Now our devices may actually be a bridge interface. If that's
363 * the case then we need to hunt down the underlying interface.
364 */
365 if (src_dev->priv_flags & IFF_EBRIDGE) {
366 src_br_dev = br_port_dev_get(src_dev, sic.src_mac);
367 if (!src_br_dev) {
368 DEBUG_TRACE("no port found on bridge\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600369 goto done2;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600370 }
371
372 src_dev = src_br_dev;
373 }
374
375 if (dest_dev->priv_flags & IFF_EBRIDGE) {
376 dest_br_dev = br_port_dev_get(dest_dev, sic.dest_mac_xlate);
377 if (!dest_br_dev) {
378 DEBUG_TRACE("no port found on bridge\n");
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600379 goto done3;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600380 }
381
382 dest_dev = dest_br_dev;
383 }
384#else
385 /*
386 * Our devices may actually be part of a bridge interface. If that's
387 * the case then find the bridge interface instead.
388 */
389 if (src_dev->priv_flags & IFF_BRIDGE_PORT) {
390 src_br_dev = src_dev->master;
391 if (!src_br_dev) {
392 DEBUG_TRACE("no bridge found for: %s\n", src_dev->name);
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600393 goto done2;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600394 }
395
396 dev_hold(src_br_dev);
397 src_dev = src_br_dev;
398 }
399
400 if (dest_dev->priv_flags & IFF_BRIDGE_PORT) {
401 dest_br_dev = dest_dev->master;
402 if (!dest_br_dev) {
403 DEBUG_TRACE("no bridge found for: %s\n", dest_dev->name);
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600404 goto done3;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600405 }
406
407 dev_hold(dest_br_dev);
408 dest_dev = dest_br_dev;
409 }
410#endif
411
412 sic.src_dev = src_dev;
413 sic.dest_dev = dest_dev;
414
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600415 sic.src_mtu = src_dev->mtu;
416 sic.dest_mtu = dest_dev->mtu;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600417
418 sfe_ipv4_create_rule(&sic);
419
420 /*
421 * If we had bridge ports then release them too.
422 */
423 if (dest_br_dev) {
424 dev_put(dest_br_dev);
425 }
426
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600427done3:
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600428 if (src_br_dev) {
429 dev_put(src_br_dev);
430 }
431
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600432done2:
433 dev_put(dest_dev);
434
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600435done1:
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600436 dev_put(src_dev);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600437
438 return NF_ACCEPT;
439}
440
441#ifdef CONFIG_NF_CONNTRACK_EVENTS
442/*
443 * sfe_cm_conntrack_event()
444 * Callback event invoked when a conntrack connection's state changes.
445 */
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600446#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
447static int sfe_cm_conntrack_event(struct notifier_block *this,
Matthew McClintock0f5c0592014-02-12 11:17:11 -0600448 unsigned long events, void *ptr)
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600449#else
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600450static int sfe_cm_conntrack_event(unsigned int events, struct nf_ct_event *item)
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600451#endif
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600452{
Matthew McClintock0f5c0592014-02-12 11:17:11 -0600453#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
454 struct nf_ct_event *item = ptr;
455#endif
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600456 struct sfe_ipv4_destroy sid;
457 struct nf_conn *ct = item->ct;
458 struct nf_conntrack_tuple orig_tuple;
459
460 /*
461 * If we don't have a conntrack entry then we're done.
462 */
463 if (unlikely(!ct)) {
464 DEBUG_WARN("no ct in conntrack event callback\n");
465 return NOTIFY_DONE;
466 }
467
468 /*
469 * If this is an untracked connection then we can't have any state either.
470 */
471 if (unlikely(ct == &nf_conntrack_untracked)) {
472 DEBUG_TRACE("ignoring untracked conn\n");
473 return NOTIFY_DONE;
474 }
475
476 /*
477 * Ignore anything other than IPv4 connections.
478 */
479 if (unlikely(nf_ct_l3num(ct) != AF_INET)) {
480 DEBUG_TRACE("ignoring non-IPv4 conn\n");
481 return NOTIFY_DONE;
482 }
483
484 /*
485 * We're only interested in destroy events.
486 */
487 if (unlikely(!(events & (1 << IPCT_DESTROY)))) {
488 DEBUG_TRACE("ignoring non-destroy event\n");
489 return NOTIFY_DONE;
490 }
491
492 orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
493 sid.protocol = (int32_t)orig_tuple.dst.protonum;
494
495 /*
496 * Extract information from the conntrack connection. We're only interested
497 * in nominal connection information (i.e. we're ignoring any NAT information).
498 */
499 sid.src_ip = (__be32)orig_tuple.src.u3.ip;
500 sid.dest_ip = (__be32)orig_tuple.dst.u3.ip;
501
502 switch (sid.protocol) {
503 case IPPROTO_TCP:
504 sid.src_port = orig_tuple.src.u.tcp.port;
505 sid.dest_port = orig_tuple.dst.u.tcp.port;
506 break;
507
508 case IPPROTO_UDP:
509 sid.src_port = orig_tuple.src.u.udp.port;
510 sid.dest_port = orig_tuple.dst.u.udp.port;
511 break;
512
513 default:
514 DEBUG_TRACE("unhandled protocol: %d\n", sid.protocol);
515 return NOTIFY_DONE;
516 }
517
518
519 sfe_ipv4_destroy_rule(&sid);
520 return NOTIFY_DONE;
521}
522
523/*
524 * Netfilter conntrack event system to monitor connection tracking changes
525 */
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600526#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
527static struct notifier_block sfe_cm_conntrack_notifier = {
528 .notifier_call = sfe_cm_conntrack_event,
529};
530#else
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600531static struct nf_ct_event_notifier sfe_cm_conntrack_notifier = {
532 .fcn = sfe_cm_conntrack_event,
533};
534#endif
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600535#endif
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600536
537/*
538 * Structure to establish a hook into the post routing netfilter point - this
539 * will pick up local outbound and packets going from one interface to another.
540 *
541 * Note: see include/linux/netfilter_ipv4.h for info related to priority levels.
542 * We want to examine packets after NAT translation and any ALG processing.
543 */
544static struct nf_hook_ops sfe_cm_ipv4_ops_post_routing[] __read_mostly = {
545 {
546 .hook = sfe_cm_ipv4_post_routing_hook,
547 .owner = THIS_MODULE,
548 .pf = PF_INET,
549 .hooknum = NF_INET_POST_ROUTING,
550 .priority = NF_IP_PRI_NAT_SRC + 1,
551 },
552};
553
554/*
555 * sfe_cm_sync_rule()
556 * Synchronize a connection's state.
557 */
558static void sfe_cm_sync_rule(struct sfe_ipv4_sync *sis)
559{
560 struct nf_conntrack_tuple_hash *h;
561 struct nf_conntrack_tuple tuple;
562 struct nf_conn *ct;
563 struct nf_conn_counter *acct;
564
565 /*
566 * Create a tuple so as to be able to look up a connection
567 */
568 memset(&tuple, 0, sizeof(tuple));
569 tuple.src.u3.ip = sis->src_ip;
570 tuple.src.u.all = (__be16)sis->src_port;
571 tuple.src.l3num = AF_INET;
572
573 tuple.dst.u3.ip = sis->dest_ip;
574 tuple.dst.dir = IP_CT_DIR_ORIGINAL;
575 tuple.dst.protonum = (uint8_t)sis->protocol;
576 tuple.dst.u.all = (__be16)sis->dest_port;
577
578 DEBUG_TRACE("update connection - p: %d, s: %pI4:%u, d: %pI4:%u\n",
579 (int)tuple.dst.protonum,
580 &tuple.src.u3.ip, (unsigned int)ntohs(tuple.src.u.all),
581 &tuple.dst.u3.ip, (unsigned int)ntohs(tuple.dst.u.all));
582
583 /*
584 * Look up conntrack connection
585 */
586 h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
587 if (unlikely(!h)) {
588 DEBUG_TRACE("no connection found\n");
589 return;
590 }
591
592 ct = nf_ct_tuplehash_to_ctrack(h);
593 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
594
595 /*
596 * Only update if this is not a fixed timeout
597 */
598 if (!test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
599 ct->timeout.expires += sis->delta_jiffies;
600 }
601
602 acct = nf_conn_acct_find(ct);
603 if (acct) {
604 spin_lock_bh(&ct->lock);
Matthew McClintock704b7a62013-12-19 16:13:01 -0600605 atomic64_set(&acct[IP_CT_DIR_ORIGINAL].packets, sis->src_packet_count);
606 atomic64_set(&acct[IP_CT_DIR_ORIGINAL].bytes, sis->src_byte_count);
607 atomic64_set(&acct[IP_CT_DIR_REPLY].packets, sis->dest_packet_count);
608 atomic64_set(&acct[IP_CT_DIR_REPLY].bytes, sis->dest_byte_count);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600609 spin_unlock_bh(&ct->lock);
610 }
611
612 switch (sis->protocol) {
613 case IPPROTO_TCP:
614 spin_lock_bh(&ct->lock);
615 if (ct->proto.tcp.seen[0].td_maxwin < sis->src_td_max_window) {
616 ct->proto.tcp.seen[0].td_maxwin = sis->src_td_max_window;
617 }
618 if ((int32_t)(ct->proto.tcp.seen[0].td_end - sis->src_td_end) < 0) {
619 ct->proto.tcp.seen[0].td_end = sis->src_td_end;
620 }
621 if ((int32_t)(ct->proto.tcp.seen[0].td_maxend - sis->src_td_max_end) < 0) {
622 ct->proto.tcp.seen[0].td_maxend = sis->src_td_max_end;
623 }
624 if (ct->proto.tcp.seen[1].td_maxwin < sis->dest_td_max_window) {
625 ct->proto.tcp.seen[1].td_maxwin = sis->dest_td_max_window;
626 }
627 if ((int32_t)(ct->proto.tcp.seen[1].td_end - sis->dest_td_end) < 0) {
628 ct->proto.tcp.seen[1].td_end = sis->dest_td_end;
629 }
630 if ((int32_t)(ct->proto.tcp.seen[1].td_maxend - sis->dest_td_max_end) < 0) {
631 ct->proto.tcp.seen[1].td_maxend = sis->dest_td_max_end;
632 }
633 spin_unlock_bh(&ct->lock);
634 break;
635 }
636
637 /*
638 * Release connection
639 */
640 nf_ct_put(ct);
641}
642
643/*
644 * sfe_cm_device_event()
645 */
646static int sfe_cm_device_event(struct notifier_block *this, unsigned long event, void *ptr)
647{
648 struct net_device *dev = (struct net_device *)ptr;
649
650 switch (event) {
651 case NETDEV_DOWN:
652 if (dev) {
653 sfe_ipv4_destroy_all_rules_for_dev(dev);
654 }
655 break;
656 }
657
658 return NOTIFY_DONE;
659}
660
661/*
662 * sfe_cm_inet_event()
663 */
664static int sfe_cm_inet_event(struct notifier_block *this, unsigned long event, void *ptr)
665{
666 struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
667 return sfe_cm_device_event(this, event, dev);
668}
669
670/*
671 * sfe_cm_init()
672 */
673static int __init sfe_cm_init(void)
674{
675 struct sfe_cm *sc = &__sc;
676 int result = -1;
677
678 DEBUG_INFO("SFE CM init\n");
679
680 /*
681 * Create sys/sfe_cm
682 */
683 sc->sys_sfe_cm = kobject_create_and_add("sfe_cm", NULL);
684 if (!sc->sys_sfe_cm) {
685 DEBUG_ERROR("failed to register sfe_cm\n");
686 goto exit1;
687 }
688
689 sc->dev_notifier.notifier_call = sfe_cm_device_event;
690 sc->dev_notifier.priority = 1;
691 register_netdevice_notifier(&sc->dev_notifier);
692
693 sc->inet_notifier.notifier_call = sfe_cm_inet_event;
694 sc->inet_notifier.priority = 1;
695 register_inetaddr_notifier(&sc->inet_notifier);
696
697 /*
698 * Register our netfilter hooks.
699 */
700 result = nf_register_hooks(sfe_cm_ipv4_ops_post_routing, ARRAY_SIZE(sfe_cm_ipv4_ops_post_routing));
701 if (result < 0) {
702 DEBUG_ERROR("can't register nf post routing hook: %d\n", result);
703 goto exit6;
704 }
705
706#ifdef CONFIG_NF_CONNTRACK_EVENTS
707 /*
708 * Register a notifier hook to get fast notifications of expired connections.
709 */
710 result = nf_conntrack_register_notifier(&init_net, &sfe_cm_conntrack_notifier);
711 if (result < 0) {
712 DEBUG_ERROR("can't register nf notifier hook: %d\n", result);
713 goto exit7;
714 }
715#endif
716
717 spin_lock_init(&sc->lock);
718
719 /*
720 * Hook the receive path in the network stack.
721 */
722 BUG_ON(athrs_fast_nat_recv != NULL);
723 RCU_INIT_POINTER(athrs_fast_nat_recv, sfe_cm_recv);
724
725 /*
726 * Hook the shortcut sync callback.
727 */
728 sfe_ipv4_register_sync_rule_callback(sfe_cm_sync_rule);
729 return 0;
730
731#ifdef CONFIG_NF_CONNTRACK_EVENTS
732exit7:
733#endif
734 nf_unregister_hooks(sfe_cm_ipv4_ops_post_routing, ARRAY_SIZE(sfe_cm_ipv4_ops_post_routing));
735
736exit6:
737 unregister_inetaddr_notifier(&sc->inet_notifier);
738 unregister_netdevice_notifier(&sc->dev_notifier);
739 kobject_put(sc->sys_sfe_cm);
740
741exit1:
742 return result;
743}
744
745/*
746 * sfe_cm_exit()
747 */
748static void __exit sfe_cm_exit(void)
749{
750 struct sfe_cm *sc = &__sc;
751
752 DEBUG_INFO("SFE CM exit\n");
753
754 /*
755 * Unregister our sync callback.
756 */
757 sfe_ipv4_register_sync_rule_callback(NULL);
758
759 /*
760 * Unregister our receive callback.
761 */
762 RCU_INIT_POINTER(athrs_fast_nat_recv, NULL);
763
764 /*
765 * Wait for all callbacks to complete.
766 */
767 rcu_barrier();
768
769 /*
770 * Destroy all connections.
771 */
772 sfe_ipv4_destroy_all_rules_for_dev(NULL);
773
774// XXX - this is where we need to unregister with any lower level offload services.
775
776#ifdef CONFIG_NF_CONNTRACK_EVENTS
777 nf_conntrack_unregister_notifier(&init_net, &sfe_cm_conntrack_notifier);
778
779#endif
780 nf_unregister_hooks(sfe_cm_ipv4_ops_post_routing, ARRAY_SIZE(sfe_cm_ipv4_ops_post_routing));
781
782 unregister_inetaddr_notifier(&sc->inet_notifier);
783 unregister_netdevice_notifier(&sc->dev_notifier);
784
785 kobject_put(sc->sys_sfe_cm);
786
787}
788
789module_init(sfe_cm_init)
790module_exit(sfe_cm_exit)
791
792MODULE_AUTHOR("Qualcomm Atheros Inc.");
793MODULE_DESCRIPTION("Shortcut Forwarding Engine - Connection Manager");
Matthew McClintocka3221942014-01-16 11:44:26 -0600794MODULE_LICENSE("Dual BSD/GPL");
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600795