blob: 30103355a17b7b39f2d03fa13aa21684ada5dfdf [file] [log] [blame]
Matthew McClintock6f29aa12013-11-06 15:49:01 -06001/*
Matthew McClintocke1bcfe42013-11-22 15:33:09 -06002 * fast-classifier.c
3 * Shortcut forwarding engine connection manager.
4 * fast-classifier style
5 *
6 * XXX - fill in the appropriate GPL notice.
Matthew McClintock6f29aa12013-11-06 15:49:01 -06007 */
Matthew McClintock6f29aa12013-11-06 15:49:01 -06008#include <linux/module.h>
Matthew McClintocke1bcfe42013-11-22 15:33:09 -06009#include <linux/sysfs.h>
10#include <linux/skbuff.h>
11#include <net/route.h>
12#include <linux/inetdevice.h>
13#include <linux/netfilter_bridge.h>
14#include <net/netfilter/nf_conntrack_acct.h>
15#include <net/netfilter/nf_conntrack_helper.h>
16#include <net/netfilter/nf_conntrack_zones.h>
17#include <net/netfilter/nf_conntrack_core.h>
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -060018#include <net/genetlink.h>
Matthew McClintockea00adf2013-11-25 19:24:30 -060019#include <linux/list.h>
20#include <linux/spinlock.h>
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -060021
Matthew McClintocke1bcfe42013-11-22 15:33:09 -060022#include "../shortcut-fe/sfe.h"
23#include "../shortcut-fe/sfe_ipv4.h"
24#include "fast-classifier-priv.h"
25
26/*
27 * Per-module structure.
28 */
29struct fast_classifier {
30 spinlock_t lock; /* Lock for SMP correctness */
31
32 /*
33 * Control state.
34 */
35 struct kobject *sys_fast_classifier; /* sysfs linkage */
36
37 /*
38 * Callback notifiers.
39 */
40 struct notifier_block dev_notifier;
41 /* Device notifier */
42 struct notifier_block inet_notifier;
43 /* IP notifier */
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -060044};
Matthew McClintocke1bcfe42013-11-22 15:33:09 -060045
46struct fast_classifier __sc;
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -060047
48static struct nla_policy fast_classifier_genl_policy[FAST_CLASSIFIER_A_MAX + 1] = {
49 [FAST_CLASSIFIER_A_MSG] = { .type = NLA_NUL_STRING },
50};
51
52static struct genl_family fast_classifier_gnl_family = {
53 .id = GENL_ID_GENERATE,
54 .hdrsize = 0,
55 .name = "FAST_CLASSIFIER",
56 .version = 1,
57 .maxattr = FAST_CLASSIFIER_A_MAX,
58};
59
Matthew McClintocke1bcfe42013-11-22 15:33:09 -060060
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -060061#define FAST_CLASSIFIER_C_MAX (__FAST_CLASSIFIER_C_MAX - 1)
62
Matthew McClintocke1bcfe42013-11-22 15:33:09 -060063static int fast_classifier_recv_genl_msg(struct sk_buff *skb, struct genl_info *info);
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -060064
65static struct genl_ops fast_classifier_gnl_ops_recv = {
66 .cmd = FAST_CLASSIFIER_C_RECV,
67 .flags = 0,
68 .policy = fast_classifier_genl_policy,
Matthew McClintocke1bcfe42013-11-22 15:33:09 -060069 .doit = fast_classifier_recv_genl_msg,
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -060070 .dumpit = NULL,
71};
72
Matthew McClintocke1bcfe42013-11-22 15:33:09 -060073/*
74 * Expose the hook for the receive processing.
75 */
76extern int (*athrs_fast_nat_recv)(struct sk_buff *skb);
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -060077
Matthew McClintocke1bcfe42013-11-22 15:33:09 -060078/*
79 * Expose what should be a static flag in the TCP connection tracker.
80 */
81extern int nf_ct_tcp_no_window_check;
82
83/*
84 * fast_classifier_recv()
85 * Handle packet receives.
86 *
87 * Returns 1 if the packet is forwarded or 0 if it isn't.
88 */
89int fast_classifier_recv(struct sk_buff *skb)
90{
91 struct net_device *dev;
92#if (SFE_HOOK_ABOVE_BRIDGE)
93 struct in_device *in_dev;
94#endif
95
96 /*
97 * We know that for the vast majority of packets we need the transport
98 * layer header so we may as well start to fetch it now!
99 */
100 prefetch(skb->data + 32);
101 barrier();
102
103 dev = skb->dev;
104
105#if (SFE_HOOK_ABOVE_BRIDGE)
106 /*
107 * Does our input device support IP processing?
108 */
109 in_dev = (struct in_device *)dev->ip_ptr;
110 if (unlikely(!in_dev)) {
111 DEBUG_TRACE("no IP processing for device: %s\n", dev->name);
112 return 0;
113 }
114
115 /*
116 * Does it have an IP address? If it doesn't then we can't do anything
117 * interesting here!
118 */
119 if (unlikely(!in_dev->ifa_list)) {
120 DEBUG_TRACE("no IP address for device: %s\n", dev->name);
121 return 0;
122 }
123#endif
124
125 /*
126 * We're only interested in IP packets.
127 */
128 if (likely(htons(ETH_P_IP) == skb->protocol)) {
129 return sfe_ipv4_recv(dev, skb);
130 }
131
132 DEBUG_TRACE("not IP packet\n");
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -0600133 return 0;
134}
Matthew McClintock6f29aa12013-11-06 15:49:01 -0600135
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600136/*
137 * fast_classifier_find_mac_addr()
138 * Find the MAC address for a given IPv4 address.
139 *
140 * Returns true if we find the MAC address, otherwise false.
141 *
142 * We look up the rtable entry for the address and, from its neighbour
143 * structure, obtain the hardware address. This means this function also
144 * works if the neighbours are routers too.
145 */
146static bool fast_classifier_find_mac_addr(uint32_t addr, uint8_t *mac_addr)
Matthew McClintock6f29aa12013-11-06 15:49:01 -0600147{
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600148 struct neighbour *neigh;
149 struct rtable *rt;
150 struct dst_entry *dst;
151 struct net_device *dev;
152
153 /*
154 * Look up the rtable entry for the IP address then get the hardware
155 * address from its neighbour structure. This means this work when the
156 * neighbours are routers too.
157 */
158 rt = ip_route_output(&init_net, addr, 0, 0, 0);
159 if (unlikely(IS_ERR(rt))) {
160 return false;
161 }
162
163 dst = (struct dst_entry *)rt;
164
165 rcu_read_lock();
166 neigh = dst_get_neighbour_noref(dst);
167 if (unlikely(!neigh)) {
168 rcu_read_unlock();
169 dst_release(dst);
170 return false;
171 }
172
173 if (unlikely(!(neigh->nud_state & NUD_VALID))) {
174 rcu_read_unlock();
175 dst_release(dst);
176 return false;
177 }
178
179 dev = neigh->dev;
180 if (!dev) {
181 rcu_read_unlock();
182 dst_release(dst);
183 return false;
184 }
185
186 memcpy(mac_addr, neigh->ha, (size_t)dev->addr_len);
187 rcu_read_unlock();
188
189 dst_release(dst);
190
191 /*
192 * We're only interested in unicast MAC addresses - if it's not a unicast
193 * address then our IP address mustn't be unicast either.
194 */
195 if (is_multicast_ether_addr(mac_addr)) {
196 DEBUG_TRACE("MAC is non-unicast - ignoring\n");
197 return false;
198 }
199
200 return true;
201}
202
Matthew McClintockea00adf2013-11-25 19:24:30 -0600203static DEFINE_SPINLOCK(sfe_connections_lock);
204
205struct sfe_connection {
206 struct list_head list;
207 struct sfe_ipv4_create *sic;
208 struct nf_conn *ct;
209};
210
211static LIST_HEAD(sfe_connections);
212
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600213/*
214 * fast_classifier_recv_genl_msg()
215 * Called from user space to offload a connection
216 */
217static int fast_classifier_recv_genl_msg(struct sk_buff *skb, struct genl_info *info)
218{
219 struct nlattr *na;
220 struct fast_classifier_msg *fc_msg;
Matthew McClintockea00adf2013-11-25 19:24:30 -0600221 struct sfe_ipv4_create *p_sic;
222 struct sfe_connection *conn;
223 unsigned long flags;
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600224
225 na = info->attrs[FAST_CLASSIFIER_C_RECV];
226 fc_msg = nla_data(na);
Matthew McClintockea00adf2013-11-25 19:24:30 -0600227
228 DEBUG_TRACE("INFO: want to offload: %d, %d, %d, %d, %d\n", fc_msg->proto,
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600229 fc_msg->src_saddr,
230 fc_msg->dst_saddr,
231 fc_msg->sport, fc_msg->dport);
Matthew McClintockea00adf2013-11-25 19:24:30 -0600232 spin_lock_irqsave(&sfe_connections_lock, flags);
233 list_for_each_entry(conn, &sfe_connections, list) {
234 struct nf_conn *ct = conn->ct;
235 p_sic = conn->sic;
236
237 DEBUG_TRACE(" -> COMPARING: proto: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d...",
238 p_sic->protocol, p_sic->src_ip, p_sic->dest_ip,
239 p_sic->src_port, p_sic->dest_port);
240
241 if (p_sic->protocol == fc_msg->proto &&
242 p_sic->src_port == fc_msg->sport &&
243 p_sic->dest_port == fc_msg->dport &&
244 p_sic->src_ip == fc_msg->src_saddr &&
245 p_sic->dest_ip == fc_msg->dst_saddr ) {
246 DEBUG_TRACE("FOUND, WILL OFFLOAD\n");
247 switch (p_sic->protocol) {
248 case IPPROTO_TCP:
249 p_sic->src_td_window_scale = ct->proto.tcp.seen[0].td_scale;
250 p_sic->src_td_max_window = ct->proto.tcp.seen[0].td_maxwin;
251 p_sic->src_td_end = ct->proto.tcp.seen[0].td_end;
252 p_sic->src_td_max_end = ct->proto.tcp.seen[0].td_maxend;
253 p_sic->dest_td_window_scale = ct->proto.tcp.seen[1].td_scale;
254 p_sic->dest_td_max_window = ct->proto.tcp.seen[1].td_maxwin;
255 p_sic->dest_td_end = ct->proto.tcp.seen[1].td_end;
256 p_sic->dest_td_max_end = ct->proto.tcp.seen[1].td_maxend;
257 if (nf_ct_tcp_no_window_check
258 || (ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_BE_LIBERAL)
259 || (ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_BE_LIBERAL)) {
260 p_sic->flags |= SFE_IPV4_CREATE_FLAG_NO_SEQ_CHECK;
261 }
262
263 /*
264 * If the connection is shutting down do not manage it.
265 * state can not be SYN_SENT, SYN_RECV because connection is assured
266 * Not managed states: FIN_WAIT, CLOSE_WAIT, LAST_ACK, TIME_WAIT, CLOSE.
267 */
268 spin_lock(&ct->lock);
269 if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) {
270 spin_unlock_bh(&ct->lock);
271 DEBUG_TRACE("connection in termination state: %#x, s: %pI4:%u, d: %pI4:%u\n",
272 ct->proto.tcp.state, &p_sic->src_ip, ntohs(p_sic->src_port),
273 &p_sic->dest_ip, ntohs(p_sic->dest_port));
274 spin_unlock_irqrestore(&sfe_connections_lock, flags);
275 return 0;
276 }
277 spin_unlock(&ct->lock);
278 break;
279
280 case IPPROTO_UDP:
281 break;
282
283 default:
284 DEBUG_TRACE("unhandled protocol %d\n", p_sic->protocol);
285 spin_unlock_irqrestore(&sfe_connections_lock, flags);
286 return 0;
287 }
288
289 DEBUG_TRACE("INFO: calling sfe rule creation!\n");
290 spin_unlock_irqrestore(&sfe_connections_lock, flags);
291 sfe_ipv4_create_rule(p_sic);
292 return 0;
293 }
294 DEBUG_TRACE("SEARCH CONTINUES\n");
295 }
296
297 spin_unlock_irqrestore(&sfe_connections_lock, flags);
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600298 return 0;
299}
300
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600301/*
302 * fast_classifier_ipv4_post_routing_hook()
303 * Called for packets about to leave the box - either locally generated or forwarded from another interface
304 */
305static unsigned int fast_classifier_ipv4_post_routing_hook(unsigned int hooknum,
306 struct sk_buff *skb,
307 const struct net_device *in_unused,
308 const struct net_device *out,
309 int (*okfn)(struct sk_buff *))
310{
311 struct sfe_ipv4_create sic;
Matthew McClintockea00adf2013-11-25 19:24:30 -0600312 struct sfe_ipv4_create *p_sic;
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600313 struct net_device *in;
314 struct nf_conn *ct;
315 enum ip_conntrack_info ctinfo;
316 struct net_device *src_dev;
317 struct net_device *dest_dev;
318 struct net_device *src_br_dev = NULL;
319 struct net_device *dest_br_dev = NULL;
320 struct nf_conntrack_tuple orig_tuple;
321 struct nf_conntrack_tuple reply_tuple;
Matthew McClintockea00adf2013-11-25 19:24:30 -0600322 struct sfe_connection *conn;
323 int sfe_connections_size = 0;
324 unsigned long flags;
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600325
326 /*
327 * Don't process broadcast or multicast packets.
328 */
329 if (unlikely(skb->pkt_type == PACKET_BROADCAST)) {
330 DEBUG_TRACE("broadcast, ignoring\n");
331 return NF_ACCEPT;
332 }
333 if (unlikely(skb->pkt_type == PACKET_MULTICAST)) {
334 DEBUG_TRACE("multicast, ignoring\n");
335 return NF_ACCEPT;
336 }
337
338 /*
339 * Don't process packets that are not being forwarded.
340 */
341 in = dev_get_by_index(&init_net, skb->skb_iif);
342 if (!in) {
343 DEBUG_TRACE("packet not forwarding\n");
344 return NF_ACCEPT;
345 }
346
347 /*
348 * Don't process packets with non-standard 802.3 MAC address sizes.
349 */
350 if (unlikely(in->addr_len != ETH_ALEN)) {
351 DEBUG_TRACE("in device: %s not 802.3 hw addr len: %u, ignoring\n",
352 in->name, (unsigned)in->addr_len);
353 goto done1;
354 }
355 if (unlikely(out->addr_len != ETH_ALEN)) {
356 DEBUG_TRACE("out device: %s not 802.3 hw addr len: %u, ignoring\n",
357 out->name, (unsigned)out->addr_len);
358 goto done1;
359 }
360
361 /*
362 * Don't process packets that aren't being tracked by conntrack.
363 */
364 ct = nf_ct_get(skb, &ctinfo);
365 if (unlikely(!ct)) {
366 DEBUG_TRACE("no conntrack connection, ignoring\n");
367 goto done1;
368 }
369
370 /*
371 * Don't process untracked connections.
372 */
373 if (unlikely(ct == &nf_conntrack_untracked)) {
374 DEBUG_TRACE("untracked connection\n");
375 goto done1;
376 }
377
378 /*
379 * Don't process connections that require support from a 'helper' (typically a NAT ALG).
380 */
381 if (unlikely(nfct_help(ct))) {
382 DEBUG_TRACE("connection has helper\n");
383 goto done1;
384 }
385
386 /*
387 * Look up the details of our connection in conntrack.
388 *
389 * Note that the data we get from conntrack is for the "ORIGINAL" direction
390 * but our packet may actually be in the "REPLY" direction.
391 */
392 orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
393 reply_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
394 sic.protocol = (int32_t)orig_tuple.dst.protonum;
395
396 /*
397 * Get addressing information, non-NAT first
398 */
399 sic.src_ip = (__be32)orig_tuple.src.u3.ip;
400 sic.dest_ip = (__be32)orig_tuple.dst.u3.ip;
401
402 /*
403 * NAT'ed addresses - note these are as seen from the 'reply' direction
404 * When NAT does not apply to this connection these will be identical to the above.
405 */
406 sic.src_ip_xlate = (__be32)reply_tuple.dst.u3.ip;
407 sic.dest_ip_xlate = (__be32)reply_tuple.src.u3.ip;
408
409 sic.flags = 0;
410
411 switch (sic.protocol) {
412 case IPPROTO_TCP:
413 sic.src_port = orig_tuple.src.u.tcp.port;
414 sic.dest_port = orig_tuple.dst.u.tcp.port;
415 sic.src_port_xlate = reply_tuple.dst.u.tcp.port;
416 sic.dest_port_xlate = reply_tuple.src.u.tcp.port;
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600417
418 /*
419 * Don't try to manage a non-established connection.
420 */
421 if (!test_bit(IPS_ASSURED_BIT, &ct->status)) {
422 DEBUG_TRACE("non-established connection\n");
423 goto done1;
424 }
425
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600426 break;
427
428 case IPPROTO_UDP:
429 sic.src_port = orig_tuple.src.u.udp.port;
430 sic.dest_port = orig_tuple.dst.u.udp.port;
431 sic.src_port_xlate = reply_tuple.dst.u.udp.port;
432 sic.dest_port_xlate = reply_tuple.src.u.udp.port;
433 break;
434
435 default:
436 DEBUG_TRACE("unhandled protocol %d\n", sic.protocol);
437 goto done1;
438 }
439
440 /*
Matthew McClintockea00adf2013-11-25 19:24:30 -0600441 * If we already have this connection in our list, skip it
442 * XXX: this may need to be optimized
443 */
444 DEBUG_TRACE("POST_ROUTE: checking new connection: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d\n",
445 sic.protocol, sic.src_ip, sic.dest_ip,
446 sic.src_port, sic.dest_port);
447 spin_lock_irqsave(&sfe_connections_lock, flags);
448 list_for_each_entry(conn, &sfe_connections, list) {
449 p_sic = conn->sic;
450 DEBUG_TRACE("\t\t-> COMPARING: proto: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d...",
451 p_sic->protocol, p_sic->src_ip, p_sic->dest_ip,
452 p_sic->src_port, p_sic->dest_port);
453
454 if (p_sic->protocol == sic.protocol &&
455 p_sic->src_port == sic.src_port &&
456 p_sic->dest_port == sic.dest_port &&
457 p_sic->src_ip == sic.src_ip &&
458 p_sic->dest_ip == sic.dest_ip ) {
459 DEBUG_TRACE("FOUND, SKIPPING\n");
Matthew McClintocke1cf6f22013-11-27 13:27:09 -0600460 if (skb->mark) {
461 DEBUG_TRACE("UPDATING MARK %x\n", skb->mark);
462 }
463 p_sic->mark = skb->mark;
Matthew McClintockea00adf2013-11-25 19:24:30 -0600464 spin_unlock_irqrestore(&sfe_connections_lock, flags);
465 goto done1;
466 } else {
467 DEBUG_TRACE("SEARCH CONTINUES");
468 }
469
470 sfe_connections_size++;
471 }
472 spin_unlock_irqrestore(&sfe_connections_lock, flags);
473
474 /*
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600475 * Get the MAC addresses that correspond to source and destination host addresses.
476 */
477 if (!fast_classifier_find_mac_addr(sic.src_ip, sic.src_mac)) {
478 DEBUG_TRACE("failed to find MAC address for src IP: %pI4\n", &sic.src_ip);
479 goto done1;
480 }
481
482 if (!fast_classifier_find_mac_addr(sic.src_ip_xlate, sic.src_mac_xlate)) {
483 DEBUG_TRACE("failed to find MAC address for xlate src IP: %pI4\n", &sic.src_ip_xlate);
484 goto done1;
485 }
486
487 /*
488 * Do dest now
489 */
490 if (!fast_classifier_find_mac_addr(sic.dest_ip, sic.dest_mac)) {
491 DEBUG_TRACE("failed to find MAC address for dest IP: %pI4\n", &sic.dest_ip);
492 goto done1;
493 }
494
495 if (!fast_classifier_find_mac_addr(sic.dest_ip_xlate, sic.dest_mac_xlate)) {
496 DEBUG_TRACE("failed to find MAC address for xlate dest IP: %pI4\n", &sic.dest_ip_xlate);
497 goto done1;
498 }
499
500 /*
501 * Get our device info. If we're dealing with the "reply" direction here then
502 * we'll need things swapped around.
503 */
504 if (ctinfo < IP_CT_IS_REPLY) {
505 src_dev = in;
506 dest_dev = (struct net_device *)out;
507 } else {
508 src_dev = (struct net_device *)out;
509 dest_dev = in;
510 }
511
512#if (!SFE_HOOK_ABOVE_BRIDGE)
513 /*
514 * Now our devices may actually be a bridge interface. If that's
515 * the case then we need to hunt down the underlying interface.
516 */
517 if (src_dev->priv_flags & IFF_EBRIDGE) {
518 src_br_dev = br_port_dev_get(src_dev, sic.src_mac);
519 if (!src_br_dev) {
520 DEBUG_TRACE("no port found on bridge\n");
521 goto done1;
522 }
523
524 src_dev = src_br_dev;
525 }
526
527 if (dest_dev->priv_flags & IFF_EBRIDGE) {
528 dest_br_dev = br_port_dev_get(dest_dev, sic.dest_mac_xlate);
529 if (!dest_br_dev) {
530 DEBUG_TRACE("no port found on bridge\n");
531 goto done2;
532 }
533
534 dest_dev = dest_br_dev;
535 }
536#else
537 /*
538 * Our devices may actually be part of a bridge interface. If that's
539 * the case then find the bridge interface instead.
540 */
541 if (src_dev->priv_flags & IFF_BRIDGE_PORT) {
542 src_br_dev = src_dev->master;
543 if (!src_br_dev) {
544 DEBUG_TRACE("no bridge found for: %s\n", src_dev->name);
545 goto done1;
546 }
547
548 dev_hold(src_br_dev);
549 src_dev = src_br_dev;
550 }
551
552 if (dest_dev->priv_flags & IFF_BRIDGE_PORT) {
553 dest_br_dev = dest_dev->master;
554 if (!dest_br_dev) {
555 DEBUG_TRACE("no bridge found for: %s\n", dest_dev->name);
556 goto done2;
557 }
558
559 dev_hold(dest_br_dev);
560 dest_dev = dest_br_dev;
561 }
562#endif
563
564 sic.src_dev = src_dev;
565 sic.dest_dev = dest_dev;
566
567// XXX - these MTUs need handling correctly!
568 sic.src_mtu = 1500;
569 sic.dest_mtu = 1500;
570
Matthew McClintocke1cf6f22013-11-27 13:27:09 -0600571 if (skb->mark) {
572 DEBUG_TRACE("SKB MARK NON ZERO %x\n", skb->mark);
573 }
574 sic.mark = skb->mark;
575
Matthew McClintockea00adf2013-11-25 19:24:30 -0600576 conn = kmalloc(sizeof(struct sfe_connection), GFP_KERNEL);
577 if (conn == NULL) {
578 printk(KERN_CRIT "ERROR: no memory for sfe\n");
579 goto done3;
580 }
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600581
Matthew McClintockea00adf2013-11-25 19:24:30 -0600582 p_sic = kmalloc(sizeof(struct sfe_ipv4_create), GFP_KERNEL);
583 if (p_sic == NULL) {
584 printk(KERN_CRIT "ERROR: no memory for sfe\n");
585 kfree(conn);
586 goto done3;
587 }
588
589 memcpy(p_sic, &sic, sizeof(sic));
590 conn->sic = p_sic;
591 conn->ct = ct;
592 DEBUG_TRACE(" -> adding item to sfe_connections, new size: %d\n", ++sfe_connections_size);
593 DEBUG_TRACE("POST_ROUTE: new offloadable connection: proto: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d\n",
594 p_sic->protocol, p_sic->src_ip, p_sic->dest_ip,
595 p_sic->src_port, p_sic->dest_port);
596 spin_lock_irqsave(&sfe_connections_lock, flags);
597 list_add_tail(&(conn->list), &sfe_connections);
598 spin_unlock_irqrestore(&sfe_connections_lock, flags);
599done3:
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600600 /*
601 * If we had bridge ports then release them too.
602 */
603 if (dest_br_dev) {
604 dev_put(dest_br_dev);
605 }
606
607done2:
608 if (src_br_dev) {
609 dev_put(src_br_dev);
610 }
611
612done1:
613 /*
614 * Release the interface on which this skb arrived
615 */
616 dev_put(in);
617
618 return NF_ACCEPT;
619}
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600620
621#ifdef CONFIG_NF_CONNTRACK_EVENTS
622/*
623 * fast_classifier_conntrack_event()
624 * Callback event invoked when a conntrack connection's state changes.
625 */
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600626#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
627static int fast_classifier_conntrack_event(struct notifier_block *this,
628 unsigned int events, struct nf_ct_event *item)
629#else
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600630static int fast_classifier_conntrack_event(unsigned int events, struct nf_ct_event *item)
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600631#endif
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600632{
633 struct sfe_ipv4_destroy sid;
634 struct nf_conn *ct = item->ct;
635 struct nf_conntrack_tuple orig_tuple;
Matthew McClintockea00adf2013-11-25 19:24:30 -0600636 struct sfe_connection *conn;
637 struct sfe_ipv4_create *p_sic;
638 int sfe_found_match = 0;
639 int sfe_connections_size = 0;
640 unsigned long flags;
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600641
Matthew McClintocke1cf6f22013-11-27 13:27:09 -0600642 if (events & IPCT_MARK) {
643 struct sfe_ipv4_mark mark;
644 orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
645
646 mark.protocol = (int32_t)orig_tuple.dst.protonum;
647 mark.src_ip = (__be32)orig_tuple.src.u3.ip;
648 mark.dest_ip = (__be32)orig_tuple.dst.u3.ip;
649 switch (mark.protocol) {
650 case IPPROTO_TCP:
651 mark.src_port = orig_tuple.src.u.tcp.port;
652 mark.dest_port = orig_tuple.dst.u.tcp.port;
653 break;
654 case IPPROTO_UDP:
655 mark.src_port = orig_tuple.src.u.udp.port;
656 mark.dest_port = orig_tuple.dst.u.udp.port;
657 break;
658 default:
659 break;
660 }
661
662 sfe_ipv4_mark_rule(&mark);
663 }
664
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600665 /*
666 * If we don't have a conntrack entry then we're done.
667 */
668 if (unlikely(!ct)) {
669 DEBUG_WARN("no ct in conntrack event callback\n");
670 return NOTIFY_DONE;
671 }
672
673 /*
674 * If this is an untracked connection then we can't have any state either.
675 */
676 if (unlikely(ct == &nf_conntrack_untracked)) {
677 DEBUG_TRACE("ignoring untracked conn\n");
678 return NOTIFY_DONE;
679 }
680
681 /*
682 * Ignore anything other than IPv4 connections.
683 */
684 if (unlikely(nf_ct_l3num(ct) != AF_INET)) {
685 DEBUG_TRACE("ignoring non-IPv4 conn\n");
686 return NOTIFY_DONE;
687 }
688
689 /*
Matthew McClintocke1cf6f22013-11-27 13:27:09 -0600690 * We're only interested in destroy events at this point
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600691 */
692 if (unlikely(!(events & (1 << IPCT_DESTROY)))) {
693 DEBUG_TRACE("ignoring non-destroy event\n");
694 return NOTIFY_DONE;
695 }
696
697 orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
698 sid.protocol = (int32_t)orig_tuple.dst.protonum;
699
700 /*
701 * Extract information from the conntrack connection. We're only interested
702 * in nominal connection information (i.e. we're ignoring any NAT information).
703 */
704 sid.src_ip = (__be32)orig_tuple.src.u3.ip;
705 sid.dest_ip = (__be32)orig_tuple.dst.u3.ip;
706
707 switch (sid.protocol) {
708 case IPPROTO_TCP:
709 sid.src_port = orig_tuple.src.u.tcp.port;
710 sid.dest_port = orig_tuple.dst.u.tcp.port;
711 break;
712
713 case IPPROTO_UDP:
714 sid.src_port = orig_tuple.src.u.udp.port;
715 sid.dest_port = orig_tuple.dst.u.udp.port;
716 break;
717
718 default:
719 DEBUG_TRACE("unhandled protocol: %d\n", sid.protocol);
720 return NOTIFY_DONE;
721 }
722
Matthew McClintockea00adf2013-11-25 19:24:30 -0600723 /*
724 * If we already have this connection in our list, skip it
725 * XXX: this may need to be optimized
726 */
727 DEBUG_TRACE("INFO: want to clean up: proto: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d\n",
728 sid.protocol, sid.src_ip, sid.dest_ip,
729 sid.src_port, sid.dest_port);
730 spin_lock_irqsave(&sfe_connections_lock, flags);
731 list_for_each_entry(conn, &sfe_connections, list) {
732 p_sic = conn->sic;
733 DEBUG_TRACE(" -> COMPARING: proto: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d...",
734 p_sic->protocol, p_sic->src_ip, p_sic->dest_ip,
735 p_sic->src_port, p_sic->dest_port);
736
737 if (p_sic->protocol == sid.protocol &&
738 p_sic->src_port == sid.src_port &&
739 p_sic->dest_port == sid.dest_port &&
740 p_sic->src_ip == sid.src_ip &&
741 p_sic->dest_ip == sid.dest_ip ) {
742 sfe_found_match = 1;
743 DEBUG_TRACE("FOUND, DELETING\n");
744 break;
745 } else {
746 DEBUG_TRACE("SEARCH CONTINUES\n");
747 }
748 sfe_connections_size++;
749 }
750
751 if (sfe_found_match) {
752 DEBUG_TRACE("INFO: connection over proto: %d src_ip: %d dst_ip: %d, src_port: %d, dst_port: %d\n",
753 p_sic->protocol, p_sic->src_ip, p_sic->dest_ip,
754 p_sic->src_port, p_sic->dest_port);
755 kfree(conn->sic);
756 list_del(&(conn->list));
757 kfree(conn);
758 } else {
759 DEBUG_TRACE("NO MATCH FOUND IN %d ENTRIES!!\n", sfe_connections_size);
760 }
761 spin_unlock_irqrestore(&sfe_connections_lock, flags);
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600762
763 sfe_ipv4_destroy_rule(&sid);
764 return NOTIFY_DONE;
765}
766
767/*
768 * Netfilter conntrack event system to monitor connection tracking changes
769 */
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600770#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
771static struct notifier_block fast_classifier_conntrack_notifier = {
772 .notifier_call = fast_classifier_conntrack_event,
773};
774#else
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600775static struct nf_ct_event_notifier fast_classifier_conntrack_notifier = {
776 .fcn = fast_classifier_conntrack_event,
777};
778#endif
Matthew McClintock0680e9f2013-11-26 15:43:10 -0600779#endif
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600780
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600781/*
782 * Structure to establish a hook into the post routing netfilter point - this
783 * will pick up local outbound and packets going from one interface to another.
784 *
785 * Note: see include/linux/netfilter_ipv4.h for info related to priority levels.
786 * We want to examine packets after NAT translation and any ALG processing.
787 */
788static struct nf_hook_ops fast_classifier_ipv4_ops_post_routing[] __read_mostly = {
789 {
790 .hook = fast_classifier_ipv4_post_routing_hook,
791 .owner = THIS_MODULE,
792 .pf = PF_INET,
793 .hooknum = NF_INET_POST_ROUTING,
794 .priority = NF_IP_PRI_NAT_SRC + 1,
795 },
796};
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600797
798/*
799 * fast_classifier_sync_rule()
800 * Synchronize a connection's state.
801 */
802static void fast_classifier_sync_rule(struct sfe_ipv4_sync *sis)
803{
804 struct nf_conntrack_tuple_hash *h;
805 struct nf_conntrack_tuple tuple;
806 struct nf_conn *ct;
807 struct nf_conn_counter *acct;
808
809 /*
810 * Create a tuple so as to be able to look up a connection
811 */
812 memset(&tuple, 0, sizeof(tuple));
813 tuple.src.u3.ip = sis->src_ip;
814 tuple.src.u.all = (__be16)sis->src_port;
815 tuple.src.l3num = AF_INET;
816
817 tuple.dst.u3.ip = sis->dest_ip;
818 tuple.dst.dir = IP_CT_DIR_ORIGINAL;
819 tuple.dst.protonum = (uint8_t)sis->protocol;
820 tuple.dst.u.all = (__be16)sis->dest_port;
821
822 DEBUG_TRACE("update connection - p: %d, s: %pI4:%u, d: %pI4:%u\n",
823 (int)tuple.dst.protonum,
824 &tuple.src.u3.ip, (unsigned int)ntohs(tuple.src.u.all),
825 &tuple.dst.u3.ip, (unsigned int)ntohs(tuple.dst.u.all));
826
827 /*
828 * Look up conntrack connection
829 */
830 h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
831 if (unlikely(!h)) {
832 DEBUG_TRACE("no connection found\n");
833 return;
834 }
835
836 ct = nf_ct_tuplehash_to_ctrack(h);
837 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
838
839 /*
840 * Only update if this is not a fixed timeout
841 */
842 if (!test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
843 ct->timeout.expires += sis->delta_jiffies;
844 }
845
846 acct = nf_conn_acct_find(ct);
847 if (acct) {
848 spin_lock_bh(&ct->lock);
849 atomic64_add(sis->src_packet_count, &acct[IP_CT_DIR_ORIGINAL].packets);
850 atomic64_add(sis->src_byte_count, &acct[IP_CT_DIR_ORIGINAL].bytes);
851 atomic64_add(sis->dest_packet_count, &acct[IP_CT_DIR_REPLY].packets);
852 atomic64_add(sis->dest_byte_count, &acct[IP_CT_DIR_REPLY].bytes);
853 spin_unlock_bh(&ct->lock);
854 }
855
856 switch (sis->protocol) {
857 case IPPROTO_TCP:
858 spin_lock_bh(&ct->lock);
859 if (ct->proto.tcp.seen[0].td_maxwin < sis->src_td_max_window) {
860 ct->proto.tcp.seen[0].td_maxwin = sis->src_td_max_window;
861 }
862 if ((int32_t)(ct->proto.tcp.seen[0].td_end - sis->src_td_end) < 0) {
863 ct->proto.tcp.seen[0].td_end = sis->src_td_end;
864 }
865 if ((int32_t)(ct->proto.tcp.seen[0].td_maxend - sis->src_td_max_end) < 0) {
866 ct->proto.tcp.seen[0].td_maxend = sis->src_td_max_end;
867 }
868 if (ct->proto.tcp.seen[1].td_maxwin < sis->dest_td_max_window) {
869 ct->proto.tcp.seen[1].td_maxwin = sis->dest_td_max_window;
870 }
871 if ((int32_t)(ct->proto.tcp.seen[1].td_end - sis->dest_td_end) < 0) {
872 ct->proto.tcp.seen[1].td_end = sis->dest_td_end;
873 }
874 if ((int32_t)(ct->proto.tcp.seen[1].td_maxend - sis->dest_td_max_end) < 0) {
875 ct->proto.tcp.seen[1].td_maxend = sis->dest_td_max_end;
876 }
877 spin_unlock_bh(&ct->lock);
878 break;
879 }
880
881 /*
882 * Release connection
883 */
884 nf_ct_put(ct);
885}
886
887/*
888 * fast_classifier_device_event()
889 */
890static int fast_classifier_device_event(struct notifier_block *this, unsigned long event, void *ptr)
891{
892 struct net_device *dev = (struct net_device *)ptr;
893
894 switch (event) {
895 case NETDEV_DOWN:
896 if (dev) {
897 sfe_ipv4_destroy_all_rules_for_dev(dev);
898 }
899 break;
900 }
901
902 return NOTIFY_DONE;
903}
904
905/*
906 * fast_classifier_inet_event()
907 */
908static int fast_classifier_inet_event(struct notifier_block *this, unsigned long event, void *ptr)
909{
910 struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
911 return fast_classifier_device_event(this, event, dev);
912}
913
914/*
915 * fast_classifier_init()
916 */
917static int __init fast_classifier_init(void)
918{
919 struct fast_classifier *sc = &__sc;
920 int result = -1;
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -0600921
922 printk(KERN_ALERT "fast-classifier: starting up\n");
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600923 DEBUG_INFO("SFE CM init\n");
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -0600924
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600925 /*
926 * Create sys/fast_classifier
927 */
928 sc->sys_fast_classifier = kobject_create_and_add("fast_classifier", NULL);
929 if (!sc->sys_fast_classifier) {
930 DEBUG_ERROR("failed to register fast_classifier\n");
931 goto exit1;
932 }
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -0600933
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600934 sc->dev_notifier.notifier_call = fast_classifier_device_event;
935 sc->dev_notifier.priority = 1;
936 register_netdevice_notifier(&sc->dev_notifier);
937
938 sc->inet_notifier.notifier_call = fast_classifier_inet_event;
939 sc->inet_notifier.priority = 1;
940 register_inetaddr_notifier(&sc->inet_notifier);
941
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600942 /*
943 * Register our netfilter hooks.
944 */
945 result = nf_register_hooks(fast_classifier_ipv4_ops_post_routing, ARRAY_SIZE(fast_classifier_ipv4_ops_post_routing));
946 if (result < 0) {
947 DEBUG_ERROR("can't register nf post routing hook: %d\n", result);
948 goto exit6;
949 }
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600950
951#ifdef CONFIG_NF_CONNTRACK_EVENTS
952 /*
953 * Register a notifier hook to get fast notifications of expired connections.
954 */
955 result = nf_conntrack_register_notifier(&init_net, &fast_classifier_conntrack_notifier);
956 if (result < 0) {
957 DEBUG_ERROR("can't register nf notifier hook: %d\n", result);
958 goto exit7;
959 }
960#endif
961
962 spin_lock_init(&sc->lock);
963
964 /*
965 * Hook the receive path in the network stack.
966 */
967 BUG_ON(athrs_fast_nat_recv != NULL);
968 RCU_INIT_POINTER(athrs_fast_nat_recv, fast_classifier_recv);
969
970 /*
971 * Hook the shortcut sync callback.
972 */
973 sfe_ipv4_register_sync_rule_callback(fast_classifier_sync_rule);
974
975 result = genl_register_family(&fast_classifier_gnl_family);
976 if (result!= 0)
977 goto exit8;
978
979 result = genl_register_ops(&fast_classifier_gnl_family, &fast_classifier_gnl_ops_recv);
980 if (result != 0)
981 goto exit9;
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -0600982
983 printk(KERN_ALERT "fast-classifier: registered\n");
Matthew McClintock6f29aa12013-11-06 15:49:01 -0600984
985 return 0;
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -0600986
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600987exit9:
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -0600988 genl_unregister_family(&fast_classifier_gnl_family);
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600989exit8:
990
991#ifdef CONFIG_NF_CONNTRACK_EVENTS
992exit7:
993#endif
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600994 nf_unregister_hooks(fast_classifier_ipv4_ops_post_routing, ARRAY_SIZE(fast_classifier_ipv4_ops_post_routing));
Matthew McClintocke1bcfe42013-11-22 15:33:09 -0600995
996exit6:
997 unregister_inetaddr_notifier(&sc->inet_notifier);
998 unregister_netdevice_notifier(&sc->dev_notifier);
999 kobject_put(sc->sys_fast_classifier);
1000
1001exit1:
1002 return result;
Matthew McClintock6f29aa12013-11-06 15:49:01 -06001003}
1004
Matthew McClintocke1bcfe42013-11-22 15:33:09 -06001005/*
1006 * fast_classifier_exit()
1007 */
1008static void __exit fast_classifier_exit(void)
Matthew McClintock6f29aa12013-11-06 15:49:01 -06001009{
Matthew McClintocke1bcfe42013-11-22 15:33:09 -06001010 struct fast_classifier *sc = &__sc;
1011 int result = -1;
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -06001012
Matthew McClintocke1bcfe42013-11-22 15:33:09 -06001013 DEBUG_INFO("SFE CM exit\n");
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -06001014 printk(KERN_ALERT "fast-classifier: shutting down\n");
1015
Matthew McClintocke1bcfe42013-11-22 15:33:09 -06001016 /*
1017 * Unregister our sync callback.
1018 */
1019 sfe_ipv4_register_sync_rule_callback(NULL);
1020
1021 /*
1022 * Unregister our receive callback.
1023 */
1024 RCU_INIT_POINTER(athrs_fast_nat_recv, NULL);
1025
1026 /*
1027 * Wait for all callbacks to complete.
1028 */
1029 rcu_barrier();
1030
1031 /*
1032 * Destroy all connections.
1033 */
1034 sfe_ipv4_destroy_all_rules_for_dev(NULL);
1035
1036// XXX - this is where we need to unregister with any lower level offload services.
1037
1038#ifdef CONFIG_NF_CONNTRACK_EVENTS
1039 nf_conntrack_unregister_notifier(&init_net, &fast_classifier_conntrack_notifier);
1040
1041#endif
Matthew McClintocke1bcfe42013-11-22 15:33:09 -06001042 nf_unregister_hooks(fast_classifier_ipv4_ops_post_routing, ARRAY_SIZE(fast_classifier_ipv4_ops_post_routing));
Matthew McClintocke1bcfe42013-11-22 15:33:09 -06001043
1044 unregister_inetaddr_notifier(&sc->inet_notifier);
1045 unregister_netdevice_notifier(&sc->dev_notifier);
1046
1047 kobject_put(sc->sys_fast_classifier);
1048
1049 result = genl_register_family(&fast_classifier_gnl_family);
1050 if (result != 0)
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -06001051 printk(KERN_CRIT "Unable to unreigster genl_family\n");
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -06001052
Matthew McClintocke1bcfe42013-11-22 15:33:09 -06001053 result = genl_register_ops(&fast_classifier_gnl_family, &fast_classifier_gnl_ops_recv);
1054 if (result != 0)
Matthew McClintock6ab3b3f2013-11-14 15:39:15 -06001055 printk(KERN_CRIT "Unable to unreigster genl_ops\n");
Matthew McClintock6f29aa12013-11-06 15:49:01 -06001056}
1057
Matthew McClintocke1bcfe42013-11-22 15:33:09 -06001058module_init(fast_classifier_init)
1059module_exit(fast_classifier_exit)
1060
1061MODULE_AUTHOR("Qualcomm Atheros Inc.");
1062MODULE_DESCRIPTION("Shortcut Forwarding Engine - Connection Manager");
Matthew McClintock6f29aa12013-11-06 15:49:01 -06001063MODULE_LICENSE("GPL");
1064