blob: 26a406c8162d93334d2ed91b2e5048f7a131da14 [file] [log] [blame]
Matthew McClintock6f29aa12013-11-06 15:49:01 -06001/*
Matthew McClintocke1bcfe42013-11-22 15:33:09 -06002 * fast-classifier.c
3 * Shortcut forwarding engine connection manager.
4 * fast-classifier style
5 *
6 * XXX - fill in the appropriate GPL notice.
Matthew McClintock6f29aa12013-11-06 15:49:01 -06007 */
Matthew McClintock6f29aa12013-11-06 15:49:01 -06008#include <linux/module.h>
Matthew McClintocke1bcfe42013-11-22 15:33:09 -06009#include <linux/sysfs.h>
10#include <linux/skbuff.h>
11#include <net/route.h>
12#include <linux/inetdevice.h>
13#include <linux/netfilter_bridge.h>
14#include <net/netfilter/nf_conntrack_acct.h>
15#include <net/netfilter/nf_conntrack_helper.h>
16#include <net/netfilter/nf_conntrack_zones.h>
17#include <net/netfilter/nf_conntrack_core.h>
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -060018#include <net/genetlink.h>
Matthew McClintockea00adf2013-11-25 19:24:30 -060019#include <linux/list.h>
20#include <linux/spinlock.h>
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -060021
Matthew McClintocke1bcfe42013-11-22 15:33:09 -060022#include "../shortcut-fe/sfe.h"
23#include "../shortcut-fe/sfe_ipv4.h"
24#include "fast-classifier-priv.h"
25
26/*
27 * Per-module structure.
28 */
29struct fast_classifier {
30 spinlock_t lock; /* Lock for SMP correctness */
31
32 /*
33 * Control state.
34 */
35 struct kobject *sys_fast_classifier; /* sysfs linkage */
36
37 /*
38 * Callback notifiers.
39 */
40 struct notifier_block dev_notifier;
41 /* Device notifier */
42 struct notifier_block inet_notifier;
43 /* IP notifier */
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -060044};
Matthew McClintocke1bcfe42013-11-22 15:33:09 -060045
46struct fast_classifier __sc;
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -060047
48static struct nla_policy fast_classifier_genl_policy[FAST_CLASSIFIER_A_MAX + 1] = {
49 [FAST_CLASSIFIER_A_MSG] = { .type = NLA_NUL_STRING },
50};
51
52static struct genl_family fast_classifier_gnl_family = {
53 .id = GENL_ID_GENERATE,
54 .hdrsize = 0,
55 .name = "FAST_CLASSIFIER",
56 .version = 1,
57 .maxattr = FAST_CLASSIFIER_A_MAX,
58};
59
Matthew McClintocke1bcfe42013-11-22 15:33:09 -060060
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -060061#define FAST_CLASSIFIER_C_MAX (__FAST_CLASSIFIER_C_MAX - 1)
62
Matthew McClintocke1bcfe42013-11-22 15:33:09 -060063static int fast_classifier_recv_genl_msg(struct sk_buff *skb, struct genl_info *info);
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -060064
65static struct genl_ops fast_classifier_gnl_ops_recv = {
66 .cmd = FAST_CLASSIFIER_C_RECV,
67 .flags = 0,
68 .policy = fast_classifier_genl_policy,
Matthew McClintocke1bcfe42013-11-22 15:33:09 -060069 .doit = fast_classifier_recv_genl_msg,
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -060070 .dumpit = NULL,
71};
72
Matthew McClintocke1bcfe42013-11-22 15:33:09 -060073/*
74 * Expose the hook for the receive processing.
75 */
76extern int (*athrs_fast_nat_recv)(struct sk_buff *skb);
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -060077
Matthew McClintocke1bcfe42013-11-22 15:33:09 -060078/*
79 * Expose what should be a static flag in the TCP connection tracker.
80 */
81extern int nf_ct_tcp_no_window_check;
82
83/*
84 * fast_classifier_recv()
85 * Handle packet receives.
86 *
87 * Returns 1 if the packet is forwarded or 0 if it isn't.
88 */
89int fast_classifier_recv(struct sk_buff *skb)
90{
91 struct net_device *dev;
92#if (SFE_HOOK_ABOVE_BRIDGE)
93 struct in_device *in_dev;
94#endif
95
96 /*
97 * We know that for the vast majority of packets we need the transport
98 * layer header so we may as well start to fetch it now!
99 */
100 prefetch(skb->data + 32);
101 barrier();
102
103 dev = skb->dev;
104
105#if (SFE_HOOK_ABOVE_BRIDGE)
106 /*
107 * Does our input device support IP processing?
108 */
109 in_dev = (struct in_device *)dev->ip_ptr;
110 if (unlikely(!in_dev)) {
111 DEBUG_TRACE("no IP processing for device: %s\n", dev->name);
112 return 0;
113 }
114
115 /*
116 * Does it have an IP address? If it doesn't then we can't do anything
117 * interesting here!
118 */
119 if (unlikely(!in_dev->ifa_list)) {
120 DEBUG_TRACE("no IP address for device: %s\n", dev->name);
121 return 0;
122 }
123#endif
124
125 /*
126 * We're only interested in IP packets.
127 */
128 if (likely(htons(ETH_P_IP) == skb->protocol)) {
129 return sfe_ipv4_recv(dev, skb);
130 }
131
132 DEBUG_TRACE("not IP packet\n");
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -0600133 return 0;
134}
Matthew McClintock6f29aa12013-11-06 15:49:01 -0600135
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600136/*
137 * fast_classifier_find_mac_addr()
138 * Find the MAC address for a given IPv4 address.
139 *
140 * Returns true if we find the MAC address, otherwise false.
141 *
142 * We look up the rtable entry for the address and, from its neighbour
143 * structure, obtain the hardware address. This means this function also
144 * works if the neighbours are routers too.
145 */
146static bool fast_classifier_find_mac_addr(uint32_t addr, uint8_t *mac_addr)
Matthew McClintock6f29aa12013-11-06 15:49:01 -0600147{
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600148 struct neighbour *neigh;
149 struct rtable *rt;
150 struct dst_entry *dst;
151 struct net_device *dev;
152
153 /*
154 * Look up the rtable entry for the IP address then get the hardware
155 * address from its neighbour structure. This means this work when the
156 * neighbours are routers too.
157 */
158 rt = ip_route_output(&init_net, addr, 0, 0, 0);
159 if (unlikely(IS_ERR(rt))) {
160 return false;
161 }
162
163 dst = (struct dst_entry *)rt;
164
165 rcu_read_lock();
166 neigh = dst_get_neighbour_noref(dst);
167 if (unlikely(!neigh)) {
168 rcu_read_unlock();
169 dst_release(dst);
170 return false;
171 }
172
173 if (unlikely(!(neigh->nud_state & NUD_VALID))) {
174 rcu_read_unlock();
175 dst_release(dst);
176 return false;
177 }
178
179 dev = neigh->dev;
180 if (!dev) {
181 rcu_read_unlock();
182 dst_release(dst);
183 return false;
184 }
185
186 memcpy(mac_addr, neigh->ha, (size_t)dev->addr_len);
187 rcu_read_unlock();
188
189 dst_release(dst);
190
191 /*
192 * We're only interested in unicast MAC addresses - if it's not a unicast
193 * address then our IP address mustn't be unicast either.
194 */
195 if (is_multicast_ether_addr(mac_addr)) {
196 DEBUG_TRACE("MAC is non-unicast - ignoring\n");
197 return false;
198 }
199
200 return true;
201}
202
Matthew McClintockea00adf2013-11-25 19:24:30 -0600203static DEFINE_SPINLOCK(sfe_connections_lock);
204
205struct sfe_connection {
206 struct list_head list;
207 struct sfe_ipv4_create *sic;
208 struct nf_conn *ct;
209};
210
211static LIST_HEAD(sfe_connections);
212
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600213/*
214 * fast_classifier_recv_genl_msg()
215 * Called from user space to offload a connection
216 */
217static int fast_classifier_recv_genl_msg(struct sk_buff *skb, struct genl_info *info)
218{
219 struct nlattr *na;
220 struct fast_classifier_msg *fc_msg;
Matthew McClintockea00adf2013-11-25 19:24:30 -0600221 struct sfe_ipv4_create *p_sic;
222 struct sfe_connection *conn;
223 unsigned long flags;
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600224
225 na = info->attrs[FAST_CLASSIFIER_C_RECV];
226 fc_msg = nla_data(na);
Matthew McClintockea00adf2013-11-25 19:24:30 -0600227
228 DEBUG_TRACE("INFO: want to offload: %d, %d, %d, %d, %d\n", fc_msg->proto,
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600229 fc_msg->src_saddr,
230 fc_msg->dst_saddr,
231 fc_msg->sport, fc_msg->dport);
Matthew McClintockea00adf2013-11-25 19:24:30 -0600232 spin_lock_irqsave(&sfe_connections_lock, flags);
233 list_for_each_entry(conn, &sfe_connections, list) {
234 struct nf_conn *ct = conn->ct;
235 p_sic = conn->sic;
236
237 DEBUG_TRACE(" -> COMPARING: proto: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d...",
238 p_sic->protocol, p_sic->src_ip, p_sic->dest_ip,
239 p_sic->src_port, p_sic->dest_port);
240
241 if (p_sic->protocol == fc_msg->proto &&
242 p_sic->src_port == fc_msg->sport &&
243 p_sic->dest_port == fc_msg->dport &&
244 p_sic->src_ip == fc_msg->src_saddr &&
245 p_sic->dest_ip == fc_msg->dst_saddr ) {
246 DEBUG_TRACE("FOUND, WILL OFFLOAD\n");
247 switch (p_sic->protocol) {
248 case IPPROTO_TCP:
249 p_sic->src_td_window_scale = ct->proto.tcp.seen[0].td_scale;
250 p_sic->src_td_max_window = ct->proto.tcp.seen[0].td_maxwin;
251 p_sic->src_td_end = ct->proto.tcp.seen[0].td_end;
252 p_sic->src_td_max_end = ct->proto.tcp.seen[0].td_maxend;
253 p_sic->dest_td_window_scale = ct->proto.tcp.seen[1].td_scale;
254 p_sic->dest_td_max_window = ct->proto.tcp.seen[1].td_maxwin;
255 p_sic->dest_td_end = ct->proto.tcp.seen[1].td_end;
256 p_sic->dest_td_max_end = ct->proto.tcp.seen[1].td_maxend;
257 if (nf_ct_tcp_no_window_check
258 || (ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_BE_LIBERAL)
259 || (ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_BE_LIBERAL)) {
260 p_sic->flags |= SFE_IPV4_CREATE_FLAG_NO_SEQ_CHECK;
261 }
262
263 /*
264 * If the connection is shutting down do not manage it.
265 * state can not be SYN_SENT, SYN_RECV because connection is assured
266 * Not managed states: FIN_WAIT, CLOSE_WAIT, LAST_ACK, TIME_WAIT, CLOSE.
267 */
268 spin_lock(&ct->lock);
269 if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) {
270 spin_unlock_bh(&ct->lock);
271 DEBUG_TRACE("connection in termination state: %#x, s: %pI4:%u, d: %pI4:%u\n",
272 ct->proto.tcp.state, &p_sic->src_ip, ntohs(p_sic->src_port),
273 &p_sic->dest_ip, ntohs(p_sic->dest_port));
274 spin_unlock_irqrestore(&sfe_connections_lock, flags);
275 return 0;
276 }
277 spin_unlock(&ct->lock);
278 break;
279
280 case IPPROTO_UDP:
281 break;
282
283 default:
284 DEBUG_TRACE("unhandled protocol %d\n", p_sic->protocol);
285 spin_unlock_irqrestore(&sfe_connections_lock, flags);
286 return 0;
287 }
288
289 DEBUG_TRACE("INFO: calling sfe rule creation!\n");
290 spin_unlock_irqrestore(&sfe_connections_lock, flags);
291 sfe_ipv4_create_rule(p_sic);
292 return 0;
293 }
294 DEBUG_TRACE("SEARCH CONTINUES\n");
295 }
296
297 spin_unlock_irqrestore(&sfe_connections_lock, flags);
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600298 return 0;
299}
300
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600301/*
302 * fast_classifier_ipv4_post_routing_hook()
303 * Called for packets about to leave the box - either locally generated or forwarded from another interface
304 */
305static unsigned int fast_classifier_ipv4_post_routing_hook(unsigned int hooknum,
306 struct sk_buff *skb,
307 const struct net_device *in_unused,
308 const struct net_device *out,
309 int (*okfn)(struct sk_buff *))
310{
311 struct sfe_ipv4_create sic;
Matthew McClintockea00adf2013-11-25 19:24:30 -0600312 struct sfe_ipv4_create *p_sic;
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600313 struct net_device *in;
314 struct nf_conn *ct;
315 enum ip_conntrack_info ctinfo;
316 struct net_device *src_dev;
317 struct net_device *dest_dev;
318 struct net_device *src_br_dev = NULL;
319 struct net_device *dest_br_dev = NULL;
320 struct nf_conntrack_tuple orig_tuple;
321 struct nf_conntrack_tuple reply_tuple;
Matthew McClintockea00adf2013-11-25 19:24:30 -0600322 struct sfe_connection *conn;
323 int sfe_connections_size = 0;
324 unsigned long flags;
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600325
326 /*
327 * Don't process broadcast or multicast packets.
328 */
329 if (unlikely(skb->pkt_type == PACKET_BROADCAST)) {
330 DEBUG_TRACE("broadcast, ignoring\n");
331 return NF_ACCEPT;
332 }
333 if (unlikely(skb->pkt_type == PACKET_MULTICAST)) {
334 DEBUG_TRACE("multicast, ignoring\n");
335 return NF_ACCEPT;
336 }
337
338 /*
339 * Don't process packets that are not being forwarded.
340 */
341 in = dev_get_by_index(&init_net, skb->skb_iif);
342 if (!in) {
343 DEBUG_TRACE("packet not forwarding\n");
344 return NF_ACCEPT;
345 }
346
347 /*
348 * Don't process packets with non-standard 802.3 MAC address sizes.
349 */
350 if (unlikely(in->addr_len != ETH_ALEN)) {
351 DEBUG_TRACE("in device: %s not 802.3 hw addr len: %u, ignoring\n",
352 in->name, (unsigned)in->addr_len);
353 goto done1;
354 }
355 if (unlikely(out->addr_len != ETH_ALEN)) {
356 DEBUG_TRACE("out device: %s not 802.3 hw addr len: %u, ignoring\n",
357 out->name, (unsigned)out->addr_len);
358 goto done1;
359 }
360
361 /*
362 * Don't process packets that aren't being tracked by conntrack.
363 */
364 ct = nf_ct_get(skb, &ctinfo);
365 if (unlikely(!ct)) {
366 DEBUG_TRACE("no conntrack connection, ignoring\n");
367 goto done1;
368 }
369
370 /*
371 * Don't process untracked connections.
372 */
373 if (unlikely(ct == &nf_conntrack_untracked)) {
374 DEBUG_TRACE("untracked connection\n");
375 goto done1;
376 }
377
378 /*
379 * Don't process connections that require support from a 'helper' (typically a NAT ALG).
380 */
381 if (unlikely(nfct_help(ct))) {
382 DEBUG_TRACE("connection has helper\n");
383 goto done1;
384 }
385
386 /*
387 * Look up the details of our connection in conntrack.
388 *
389 * Note that the data we get from conntrack is for the "ORIGINAL" direction
390 * but our packet may actually be in the "REPLY" direction.
391 */
392 orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
393 reply_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
394 sic.protocol = (int32_t)orig_tuple.dst.protonum;
395
396 /*
397 * Get addressing information, non-NAT first
398 */
399 sic.src_ip = (__be32)orig_tuple.src.u3.ip;
400 sic.dest_ip = (__be32)orig_tuple.dst.u3.ip;
401
402 /*
403 * NAT'ed addresses - note these are as seen from the 'reply' direction
404 * When NAT does not apply to this connection these will be identical to the above.
405 */
406 sic.src_ip_xlate = (__be32)reply_tuple.dst.u3.ip;
407 sic.dest_ip_xlate = (__be32)reply_tuple.src.u3.ip;
408
409 sic.flags = 0;
410
411 switch (sic.protocol) {
412 case IPPROTO_TCP:
413 sic.src_port = orig_tuple.src.u.tcp.port;
414 sic.dest_port = orig_tuple.dst.u.tcp.port;
415 sic.src_port_xlate = reply_tuple.dst.u.tcp.port;
416 sic.dest_port_xlate = reply_tuple.src.u.tcp.port;
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600417
418 /*
419 * Don't try to manage a non-established connection.
420 */
421 if (!test_bit(IPS_ASSURED_BIT, &ct->status)) {
422 DEBUG_TRACE("non-established connection\n");
423 goto done1;
424 }
425
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600426 break;
427
428 case IPPROTO_UDP:
429 sic.src_port = orig_tuple.src.u.udp.port;
430 sic.dest_port = orig_tuple.dst.u.udp.port;
431 sic.src_port_xlate = reply_tuple.dst.u.udp.port;
432 sic.dest_port_xlate = reply_tuple.src.u.udp.port;
433 break;
434
435 default:
436 DEBUG_TRACE("unhandled protocol %d\n", sic.protocol);
437 goto done1;
438 }
439
440 /*
Matthew McClintockea00adf2013-11-25 19:24:30 -0600441 * If we already have this connection in our list, skip it
442 * XXX: this may need to be optimized
443 */
444 DEBUG_TRACE("POST_ROUTE: checking new connection: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d\n",
445 sic.protocol, sic.src_ip, sic.dest_ip,
446 sic.src_port, sic.dest_port);
447 spin_lock_irqsave(&sfe_connections_lock, flags);
448 list_for_each_entry(conn, &sfe_connections, list) {
449 p_sic = conn->sic;
450 DEBUG_TRACE("\t\t-> COMPARING: proto: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d...",
451 p_sic->protocol, p_sic->src_ip, p_sic->dest_ip,
452 p_sic->src_port, p_sic->dest_port);
453
454 if (p_sic->protocol == sic.protocol &&
455 p_sic->src_port == sic.src_port &&
456 p_sic->dest_port == sic.dest_port &&
457 p_sic->src_ip == sic.src_ip &&
458 p_sic->dest_ip == sic.dest_ip ) {
459 DEBUG_TRACE("FOUND, SKIPPING\n");
460 spin_unlock_irqrestore(&sfe_connections_lock, flags);
461 goto done1;
462 } else {
463 DEBUG_TRACE("SEARCH CONTINUES");
464 }
465
466 sfe_connections_size++;
467 }
468 spin_unlock_irqrestore(&sfe_connections_lock, flags);
469
470 /*
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600471 * Get the MAC addresses that correspond to source and destination host addresses.
472 */
473 if (!fast_classifier_find_mac_addr(sic.src_ip, sic.src_mac)) {
474 DEBUG_TRACE("failed to find MAC address for src IP: %pI4\n", &sic.src_ip);
475 goto done1;
476 }
477
478 if (!fast_classifier_find_mac_addr(sic.src_ip_xlate, sic.src_mac_xlate)) {
479 DEBUG_TRACE("failed to find MAC address for xlate src IP: %pI4\n", &sic.src_ip_xlate);
480 goto done1;
481 }
482
483 /*
484 * Do dest now
485 */
486 if (!fast_classifier_find_mac_addr(sic.dest_ip, sic.dest_mac)) {
487 DEBUG_TRACE("failed to find MAC address for dest IP: %pI4\n", &sic.dest_ip);
488 goto done1;
489 }
490
491 if (!fast_classifier_find_mac_addr(sic.dest_ip_xlate, sic.dest_mac_xlate)) {
492 DEBUG_TRACE("failed to find MAC address for xlate dest IP: %pI4\n", &sic.dest_ip_xlate);
493 goto done1;
494 }
495
496 /*
497 * Get our device info. If we're dealing with the "reply" direction here then
498 * we'll need things swapped around.
499 */
500 if (ctinfo < IP_CT_IS_REPLY) {
501 src_dev = in;
502 dest_dev = (struct net_device *)out;
503 } else {
504 src_dev = (struct net_device *)out;
505 dest_dev = in;
506 }
507
508#if (!SFE_HOOK_ABOVE_BRIDGE)
509 /*
510 * Now our devices may actually be a bridge interface. If that's
511 * the case then we need to hunt down the underlying interface.
512 */
513 if (src_dev->priv_flags & IFF_EBRIDGE) {
514 src_br_dev = br_port_dev_get(src_dev, sic.src_mac);
515 if (!src_br_dev) {
516 DEBUG_TRACE("no port found on bridge\n");
517 goto done1;
518 }
519
520 src_dev = src_br_dev;
521 }
522
523 if (dest_dev->priv_flags & IFF_EBRIDGE) {
524 dest_br_dev = br_port_dev_get(dest_dev, sic.dest_mac_xlate);
525 if (!dest_br_dev) {
526 DEBUG_TRACE("no port found on bridge\n");
527 goto done2;
528 }
529
530 dest_dev = dest_br_dev;
531 }
532#else
533 /*
534 * Our devices may actually be part of a bridge interface. If that's
535 * the case then find the bridge interface instead.
536 */
537 if (src_dev->priv_flags & IFF_BRIDGE_PORT) {
538 src_br_dev = src_dev->master;
539 if (!src_br_dev) {
540 DEBUG_TRACE("no bridge found for: %s\n", src_dev->name);
541 goto done1;
542 }
543
544 dev_hold(src_br_dev);
545 src_dev = src_br_dev;
546 }
547
548 if (dest_dev->priv_flags & IFF_BRIDGE_PORT) {
549 dest_br_dev = dest_dev->master;
550 if (!dest_br_dev) {
551 DEBUG_TRACE("no bridge found for: %s\n", dest_dev->name);
552 goto done2;
553 }
554
555 dev_hold(dest_br_dev);
556 dest_dev = dest_br_dev;
557 }
558#endif
559
560 sic.src_dev = src_dev;
561 sic.dest_dev = dest_dev;
562
563// XXX - these MTUs need handling correctly!
564 sic.src_mtu = 1500;
565 sic.dest_mtu = 1500;
566
Matthew McClintockea00adf2013-11-25 19:24:30 -0600567 conn = kmalloc(sizeof(struct sfe_connection), GFP_KERNEL);
568 if (conn == NULL) {
569 printk(KERN_CRIT "ERROR: no memory for sfe\n");
570 goto done3;
571 }
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600572
Matthew McClintockea00adf2013-11-25 19:24:30 -0600573 p_sic = kmalloc(sizeof(struct sfe_ipv4_create), GFP_KERNEL);
574 if (p_sic == NULL) {
575 printk(KERN_CRIT "ERROR: no memory for sfe\n");
576 kfree(conn);
577 goto done3;
578 }
579
580 memcpy(p_sic, &sic, sizeof(sic));
581 conn->sic = p_sic;
582 conn->ct = ct;
583 DEBUG_TRACE(" -> adding item to sfe_connections, new size: %d\n", ++sfe_connections_size);
584 DEBUG_TRACE("POST_ROUTE: new offloadable connection: proto: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d\n",
585 p_sic->protocol, p_sic->src_ip, p_sic->dest_ip,
586 p_sic->src_port, p_sic->dest_port);
587 spin_lock_irqsave(&sfe_connections_lock, flags);
588 list_add_tail(&(conn->list), &sfe_connections);
589 spin_unlock_irqrestore(&sfe_connections_lock, flags);
590done3:
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600591 /*
592 * If we had bridge ports then release them too.
593 */
594 if (dest_br_dev) {
595 dev_put(dest_br_dev);
596 }
597
598done2:
599 if (src_br_dev) {
600 dev_put(src_br_dev);
601 }
602
603done1:
604 /*
605 * Release the interface on which this skb arrived
606 */
607 dev_put(in);
608
609 return NF_ACCEPT;
610}
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600611
612#ifdef CONFIG_NF_CONNTRACK_EVENTS
613/*
614 * fast_classifier_conntrack_event()
615 * Callback event invoked when a conntrack connection's state changes.
616 */
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600617#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
618static int fast_classifier_conntrack_event(struct notifier_block *this,
619 unsigned int events, struct nf_ct_event *item)
620#else
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600621static int fast_classifier_conntrack_event(unsigned int events, struct nf_ct_event *item)
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600622#endif
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600623{
624 struct sfe_ipv4_destroy sid;
625 struct nf_conn *ct = item->ct;
626 struct nf_conntrack_tuple orig_tuple;
Matthew McClintockea00adf2013-11-25 19:24:30 -0600627 struct sfe_connection *conn;
628 struct sfe_ipv4_create *p_sic;
629 int sfe_found_match = 0;
630 int sfe_connections_size = 0;
631 unsigned long flags;
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600632
633 /*
634 * If we don't have a conntrack entry then we're done.
635 */
636 if (unlikely(!ct)) {
637 DEBUG_WARN("no ct in conntrack event callback\n");
638 return NOTIFY_DONE;
639 }
640
641 /*
642 * If this is an untracked connection then we can't have any state either.
643 */
644 if (unlikely(ct == &nf_conntrack_untracked)) {
645 DEBUG_TRACE("ignoring untracked conn\n");
646 return NOTIFY_DONE;
647 }
648
649 /*
650 * Ignore anything other than IPv4 connections.
651 */
652 if (unlikely(nf_ct_l3num(ct) != AF_INET)) {
653 DEBUG_TRACE("ignoring non-IPv4 conn\n");
654 return NOTIFY_DONE;
655 }
656
657 /*
658 * We're only interested in destroy events.
659 */
660 if (unlikely(!(events & (1 << IPCT_DESTROY)))) {
661 DEBUG_TRACE("ignoring non-destroy event\n");
662 return NOTIFY_DONE;
663 }
664
665 orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
666 sid.protocol = (int32_t)orig_tuple.dst.protonum;
667
668 /*
669 * Extract information from the conntrack connection. We're only interested
670 * in nominal connection information (i.e. we're ignoring any NAT information).
671 */
672 sid.src_ip = (__be32)orig_tuple.src.u3.ip;
673 sid.dest_ip = (__be32)orig_tuple.dst.u3.ip;
674
675 switch (sid.protocol) {
676 case IPPROTO_TCP:
677 sid.src_port = orig_tuple.src.u.tcp.port;
678 sid.dest_port = orig_tuple.dst.u.tcp.port;
679 break;
680
681 case IPPROTO_UDP:
682 sid.src_port = orig_tuple.src.u.udp.port;
683 sid.dest_port = orig_tuple.dst.u.udp.port;
684 break;
685
686 default:
687 DEBUG_TRACE("unhandled protocol: %d\n", sid.protocol);
688 return NOTIFY_DONE;
689 }
690
Matthew McClintockea00adf2013-11-25 19:24:30 -0600691 /*
692 * If we already have this connection in our list, skip it
693 * XXX: this may need to be optimized
694 */
695 DEBUG_TRACE("INFO: want to clean up: proto: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d\n",
696 sid.protocol, sid.src_ip, sid.dest_ip,
697 sid.src_port, sid.dest_port);
698 spin_lock_irqsave(&sfe_connections_lock, flags);
699 list_for_each_entry(conn, &sfe_connections, list) {
700 p_sic = conn->sic;
701 DEBUG_TRACE(" -> COMPARING: proto: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d...",
702 p_sic->protocol, p_sic->src_ip, p_sic->dest_ip,
703 p_sic->src_port, p_sic->dest_port);
704
705 if (p_sic->protocol == sid.protocol &&
706 p_sic->src_port == sid.src_port &&
707 p_sic->dest_port == sid.dest_port &&
708 p_sic->src_ip == sid.src_ip &&
709 p_sic->dest_ip == sid.dest_ip ) {
710 sfe_found_match = 1;
711 DEBUG_TRACE("FOUND, DELETING\n");
712 break;
713 } else {
714 DEBUG_TRACE("SEARCH CONTINUES\n");
715 }
716 sfe_connections_size++;
717 }
718
719 if (sfe_found_match) {
720 DEBUG_TRACE("INFO: connection over proto: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d\n",
721 p_sic->protocol, p_sic->src_ip, p_sic->dest_ip,
722 p_sic->src_port, p_sic->dest_port);
723 kfree(conn->sic);
724 list_del(&(conn->list));
725 kfree(conn);
726 } else {
727 DEBUG_TRACE("NO MATCH FOUND IN %d ENTRIES!!\n", sfe_connections_size);
728 }
729 spin_unlock_irqrestore(&sfe_connections_lock, flags);
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600730
731 sfe_ipv4_destroy_rule(&sid);
732 return NOTIFY_DONE;
733}
734
735/*
736 * Netfilter conntrack event system to monitor connection tracking changes
737 */
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600738#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
739static struct notifier_block fast_classifier_conntrack_notifier = {
740 .notifier_call = fast_classifier_conntrack_event,
741};
742#else
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600743static struct nf_ct_event_notifier fast_classifier_conntrack_notifier = {
744 .fcn = fast_classifier_conntrack_event,
745};
746#endif
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600747#endif
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600748
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600749/*
750 * Structure to establish a hook into the post routing netfilter point - this
751 * will pick up local outbound and packets going from one interface to another.
752 *
753 * Note: see include/linux/netfilter_ipv4.h for info related to priority levels.
754 * We want to examine packets after NAT translation and any ALG processing.
755 */
756static struct nf_hook_ops fast_classifier_ipv4_ops_post_routing[] __read_mostly = {
757 {
758 .hook = fast_classifier_ipv4_post_routing_hook,
759 .owner = THIS_MODULE,
760 .pf = PF_INET,
761 .hooknum = NF_INET_POST_ROUTING,
762 .priority = NF_IP_PRI_NAT_SRC + 1,
763 },
764};
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600765
766/*
767 * fast_classifier_sync_rule()
768 * Synchronize a connection's state.
769 */
770static void fast_classifier_sync_rule(struct sfe_ipv4_sync *sis)
771{
772 struct nf_conntrack_tuple_hash *h;
773 struct nf_conntrack_tuple tuple;
774 struct nf_conn *ct;
775 struct nf_conn_counter *acct;
776
777 /*
778 * Create a tuple so as to be able to look up a connection
779 */
780 memset(&tuple, 0, sizeof(tuple));
781 tuple.src.u3.ip = sis->src_ip;
782 tuple.src.u.all = (__be16)sis->src_port;
783 tuple.src.l3num = AF_INET;
784
785 tuple.dst.u3.ip = sis->dest_ip;
786 tuple.dst.dir = IP_CT_DIR_ORIGINAL;
787 tuple.dst.protonum = (uint8_t)sis->protocol;
788 tuple.dst.u.all = (__be16)sis->dest_port;
789
790 DEBUG_TRACE("update connection - p: %d, s: %pI4:%u, d: %pI4:%u\n",
791 (int)tuple.dst.protonum,
792 &tuple.src.u3.ip, (unsigned int)ntohs(tuple.src.u.all),
793 &tuple.dst.u3.ip, (unsigned int)ntohs(tuple.dst.u.all));
794
795 /*
796 * Look up conntrack connection
797 */
798 h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
799 if (unlikely(!h)) {
800 DEBUG_TRACE("no connection found\n");
801 return;
802 }
803
804 ct = nf_ct_tuplehash_to_ctrack(h);
805 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
806
807 /*
808 * Only update if this is not a fixed timeout
809 */
810 if (!test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
811 ct->timeout.expires += sis->delta_jiffies;
812 }
813
814 acct = nf_conn_acct_find(ct);
815 if (acct) {
816 spin_lock_bh(&ct->lock);
817 atomic64_add(sis->src_packet_count, &acct[IP_CT_DIR_ORIGINAL].packets);
818 atomic64_add(sis->src_byte_count, &acct[IP_CT_DIR_ORIGINAL].bytes);
819 atomic64_add(sis->dest_packet_count, &acct[IP_CT_DIR_REPLY].packets);
820 atomic64_add(sis->dest_byte_count, &acct[IP_CT_DIR_REPLY].bytes);
821 spin_unlock_bh(&ct->lock);
822 }
823
824 switch (sis->protocol) {
825 case IPPROTO_TCP:
826 spin_lock_bh(&ct->lock);
827 if (ct->proto.tcp.seen[0].td_maxwin < sis->src_td_max_window) {
828 ct->proto.tcp.seen[0].td_maxwin = sis->src_td_max_window;
829 }
830 if ((int32_t)(ct->proto.tcp.seen[0].td_end - sis->src_td_end) < 0) {
831 ct->proto.tcp.seen[0].td_end = sis->src_td_end;
832 }
833 if ((int32_t)(ct->proto.tcp.seen[0].td_maxend - sis->src_td_max_end) < 0) {
834 ct->proto.tcp.seen[0].td_maxend = sis->src_td_max_end;
835 }
836 if (ct->proto.tcp.seen[1].td_maxwin < sis->dest_td_max_window) {
837 ct->proto.tcp.seen[1].td_maxwin = sis->dest_td_max_window;
838 }
839 if ((int32_t)(ct->proto.tcp.seen[1].td_end - sis->dest_td_end) < 0) {
840 ct->proto.tcp.seen[1].td_end = sis->dest_td_end;
841 }
842 if ((int32_t)(ct->proto.tcp.seen[1].td_maxend - sis->dest_td_max_end) < 0) {
843 ct->proto.tcp.seen[1].td_maxend = sis->dest_td_max_end;
844 }
845 spin_unlock_bh(&ct->lock);
846 break;
847 }
848
849 /*
850 * Release connection
851 */
852 nf_ct_put(ct);
853}
854
855/*
856 * fast_classifier_device_event()
857 */
858static int fast_classifier_device_event(struct notifier_block *this, unsigned long event, void *ptr)
859{
860 struct net_device *dev = (struct net_device *)ptr;
861
862 switch (event) {
863 case NETDEV_DOWN:
864 if (dev) {
865 sfe_ipv4_destroy_all_rules_for_dev(dev);
866 }
867 break;
868 }
869
870 return NOTIFY_DONE;
871}
872
873/*
874 * fast_classifier_inet_event()
875 */
876static int fast_classifier_inet_event(struct notifier_block *this, unsigned long event, void *ptr)
877{
878 struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
879 return fast_classifier_device_event(this, event, dev);
880}
881
882/*
883 * fast_classifier_init()
884 */
885static int __init fast_classifier_init(void)
886{
887 struct fast_classifier *sc = &__sc;
888 int result = -1;
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -0600889
890 printk(KERN_ALERT "fast-classifier: starting up\n");
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600891 DEBUG_INFO("SFE CM init\n");
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -0600892
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600893 /*
894 * Create sys/fast_classifier
895 */
896 sc->sys_fast_classifier = kobject_create_and_add("fast_classifier", NULL);
897 if (!sc->sys_fast_classifier) {
898 DEBUG_ERROR("failed to register fast_classifier\n");
899 goto exit1;
900 }
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -0600901
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600902 sc->dev_notifier.notifier_call = fast_classifier_device_event;
903 sc->dev_notifier.priority = 1;
904 register_netdevice_notifier(&sc->dev_notifier);
905
906 sc->inet_notifier.notifier_call = fast_classifier_inet_event;
907 sc->inet_notifier.priority = 1;
908 register_inetaddr_notifier(&sc->inet_notifier);
909
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600910 /*
911 * Register our netfilter hooks.
912 */
913 result = nf_register_hooks(fast_classifier_ipv4_ops_post_routing, ARRAY_SIZE(fast_classifier_ipv4_ops_post_routing));
914 if (result < 0) {
915 DEBUG_ERROR("can't register nf post routing hook: %d\n", result);
916 goto exit6;
917 }
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600918
919#ifdef CONFIG_NF_CONNTRACK_EVENTS
920 /*
921 * Register a notifier hook to get fast notifications of expired connections.
922 */
923 result = nf_conntrack_register_notifier(&init_net, &fast_classifier_conntrack_notifier);
924 if (result < 0) {
925 DEBUG_ERROR("can't register nf notifier hook: %d\n", result);
926 goto exit7;
927 }
928#endif
929
930 spin_lock_init(&sc->lock);
931
932 /*
933 * Hook the receive path in the network stack.
934 */
935 BUG_ON(athrs_fast_nat_recv != NULL);
936 RCU_INIT_POINTER(athrs_fast_nat_recv, fast_classifier_recv);
937
938 /*
939 * Hook the shortcut sync callback.
940 */
941 sfe_ipv4_register_sync_rule_callback(fast_classifier_sync_rule);
942
943 result = genl_register_family(&fast_classifier_gnl_family);
944 if (result!= 0)
945 goto exit8;
946
947 result = genl_register_ops(&fast_classifier_gnl_family, &fast_classifier_gnl_ops_recv);
948 if (result != 0)
949 goto exit9;
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -0600950
951 printk(KERN_ALERT "fast-classifier: registered\n");
Matthew McClintock6f29aa12013-11-06 15:49:01 -0600952
953 return 0;
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -0600954
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600955exit9:
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -0600956 genl_unregister_family(&fast_classifier_gnl_family);
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600957exit8:
958
959#ifdef CONFIG_NF_CONNTRACK_EVENTS
960exit7:
961#endif
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600962 nf_unregister_hooks(fast_classifier_ipv4_ops_post_routing, ARRAY_SIZE(fast_classifier_ipv4_ops_post_routing));
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600963
964exit6:
965 unregister_inetaddr_notifier(&sc->inet_notifier);
966 unregister_netdevice_notifier(&sc->dev_notifier);
967 kobject_put(sc->sys_fast_classifier);
968
969exit1:
970 return result;
Matthew McClintock6f29aa12013-11-06 15:49:01 -0600971}
972
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600973/*
974 * fast_classifier_exit()
975 */
976static void __exit fast_classifier_exit(void)
Matthew McClintock6f29aa12013-11-06 15:49:01 -0600977{
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600978 struct fast_classifier *sc = &__sc;
979 int result = -1;
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -0600980
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600981 DEBUG_INFO("SFE CM exit\n");
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -0600982 printk(KERN_ALERT "fast-classifier: shutting down\n");
983
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600984 /*
985 * Unregister our sync callback.
986 */
987 sfe_ipv4_register_sync_rule_callback(NULL);
988
989 /*
990 * Unregister our receive callback.
991 */
992 RCU_INIT_POINTER(athrs_fast_nat_recv, NULL);
993
994 /*
995 * Wait for all callbacks to complete.
996 */
997 rcu_barrier();
998
999 /*
1000 * Destroy all connections.
1001 */
1002 sfe_ipv4_destroy_all_rules_for_dev(NULL);
1003
1004// XXX - this is where we need to unregister with any lower level offload services.
1005
1006#ifdef CONFIG_NF_CONNTRACK_EVENTS
1007 nf_conntrack_unregister_notifier(&init_net, &fast_classifier_conntrack_notifier);
1008
1009#endif
Matthew McClintocke1bcfe42013-11-22 15:33:09 -06001010 nf_unregister_hooks(fast_classifier_ipv4_ops_post_routing, ARRAY_SIZE(fast_classifier_ipv4_ops_post_routing));
Matthew McClintocke1bcfe42013-11-22 15:33:09 -06001011
1012 unregister_inetaddr_notifier(&sc->inet_notifier);
1013 unregister_netdevice_notifier(&sc->dev_notifier);
1014
1015 kobject_put(sc->sys_fast_classifier);
1016
1017 result = genl_register_family(&fast_classifier_gnl_family);
1018 if (result != 0)
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -06001019 printk(KERN_CRIT "Unable to unreigster genl_family\n");
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -06001020
Matthew McClintocke1bcfe42013-11-22 15:33:09 -06001021 result = genl_register_ops(&fast_classifier_gnl_family, &fast_classifier_gnl_ops_recv);
1022 if (result != 0)
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -06001023 printk(KERN_CRIT "Unable to unreigster genl_ops\n");
Matthew McClintock6f29aa12013-11-06 15:49:01 -06001024}
1025
Matthew McClintocke1bcfe42013-11-22 15:33:09 -06001026module_init(fast_classifier_init)
1027module_exit(fast_classifier_exit)
1028
1029MODULE_AUTHOR("Qualcomm Atheros Inc.");
1030MODULE_DESCRIPTION("Shortcut Forwarding Engine - Connection Manager");
Matthew McClintock6f29aa12013-11-06 15:49:01 -06001031MODULE_LICENSE("GPL");
1032