blob: 39e0e036c1359f870f826ec38ac17c9644ab2e0f [file] [log] [blame]
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001/*
2 * sfe_ipv4.c
3 * Shortcut forwarding engine - IPv4 edition.
4 *
5 * XXX - fill in the appropriate GPL notice.
6 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01007#include <linux/module.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -06008#include <linux/sysfs.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01009#include <linux/skbuff.h>
10#include <linux/icmp.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010011#include <net/tcp.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060012#include <linux/etherdevice.h>
Ben Menchaca0971b7a2014-01-10 14:43:02 -060013#include <linux/if_pppox.h>
14#include <linux/ppp_defs.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010015
Dave Hudsondcd08fb2013-11-22 09:25:16 -060016#include "sfe.h"
17#include "sfe_ipv4.h"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010018
19/*
Dave Hudsona8197e72013-12-17 23:46:22 +000020 * By default Linux IP header and transport layer header structures are
21 * unpacked, assuming that such headers should be 32-bit aligned.
22 * Unfortunately some wireless adaptors can't cope with this requirement and
23 * some CPUs can't handle misaligned accesses. For those platforms we
24 * define SFE_IPV4_UNALIGNED_IP_HEADER and mark the structures as packed.
25 * When we do this the compiler will generate slightly worse code than for the
26 * aligned case (on most platforms) but will be much quicker than fixing
27 * things up in an unaligned trap handler.
28 */
29#define SFE_IPV4_UNALIGNED_IP_HEADER 1
30#if SFE_IPV4_UNALIGNED_IP_HEADER
31#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed))
32#else
33#define SFE_IPV4_UNALIGNED_STRUCT
34#endif
35
36/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010037 * The default Linux ethhdr structure is "packed". It also has byte aligned
38 * MAC addresses and this leads to poor performance. This version is not
39 * packed and has better alignment for the MAC addresses.
40 */
41struct sfe_ipv4_ethhdr {
42 __be16 h_dest[ETH_ALEN / 2];
43 __be16 h_source[ETH_ALEN / 2];
44 __be16 h_proto;
45};
46
47/*
Dave Hudsona8197e72013-12-17 23:46:22 +000048 * Based on the Linux IPv4 header, but with an optional "packed" attribute to
49 * help with performance on some platforms (see the definition of
50 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010051 */
52struct sfe_ipv4_iphdr {
53#if defined(__LITTLE_ENDIAN_BITFIELD)
54 __u8 ihl:4,
55 version:4;
56#elif defined (__BIG_ENDIAN_BITFIELD)
57 __u8 version:4,
58 ihl:4;
59#else
60#error "Please fix <asm/byteorder.h>"
61#endif
62 __u8 tos;
63 __be16 tot_len;
64 __be16 id;
65 __be16 frag_off;
66 __u8 ttl;
67 __u8 protocol;
68 __sum16 check;
69 __be32 saddr;
70 __be32 daddr;
Dave Hudsondcd08fb2013-11-22 09:25:16 -060071
72 /*
73 * The options start here.
74 */
Dave Hudsona8197e72013-12-17 23:46:22 +000075} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010076
77/*
Dave Hudsona8197e72013-12-17 23:46:22 +000078 * Based on the Linux UDP header, but with an optional "packed" attribute to
79 * help with performance on some platforms (see the definition of
80 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010081 */
82struct sfe_ipv4_udphdr {
83 __be16 source;
84 __be16 dest;
85 __be16 len;
86 __sum16 check;
Dave Hudsona8197e72013-12-17 23:46:22 +000087} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010088
89/*
Dave Hudsona8197e72013-12-17 23:46:22 +000090 * Based on the Linux TCP header, but with an optional "packed" attribute to
91 * help with performance on some platforms (see the definition of
92 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010093 */
94struct sfe_ipv4_tcphdr {
95 __be16 source;
96 __be16 dest;
97 __be32 seq;
98 __be32 ack_seq;
99#if defined(__LITTLE_ENDIAN_BITFIELD)
100 __u16 res1:4,
101 doff:4,
102 fin:1,
103 syn:1,
104 rst:1,
105 psh:1,
106 ack:1,
107 urg:1,
108 ece:1,
109 cwr:1;
110#elif defined(__BIG_ENDIAN_BITFIELD)
111 __u16 doff:4,
112 res1:4,
113 cwr:1,
114 ece:1,
115 urg:1,
116 ack:1,
117 psh:1,
118 rst:1,
119 syn:1,
120 fin:1;
121#else
122#error "Adjust your <asm/byteorder.h> defines"
Nicolas Costaac2979c2014-01-14 10:35:24 -0600123#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100124 __be16 window;
125 __sum16 check;
126 __be16 urg_ptr;
Dave Hudsona8197e72013-12-17 23:46:22 +0000127} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100128
129/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100130 * Specifies the lower bound on ACK numbers carried in the TCP header
131 */
132#define SFE_IPV4_TCP_MAX_ACK_WINDOW 65520
133
134/*
135 * IPv4 TCP connection match additional data.
136 */
137struct sfe_ipv4_tcp_connection_match {
138 uint8_t win_scale; /* Window scale */
139 uint32_t max_win; /* Maximum window size seen */
140 uint32_t end; /* Sequence number of the next byte to send (seq + segment length) */
141 uint32_t max_end; /* Sequence number of the last byte to ack */
142};
143
144/*
145 * Bit flags for IPv4 connection matching entry.
146 */
147#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC 0x1
148 /* Perform source translation */
149#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST 0x2
150 /* Perform destination translation */
151#define SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK 0x4
152 /* Ignore TCP sequence numbers */
153#define SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_ETH_HDR 0x8
154 /* Fast Ethernet header write */
155
156/*
157 * IPv4 connection matching structure.
158 */
159struct sfe_ipv4_connection_match {
160 /*
161 * References to other objects.
162 */
163 struct sfe_ipv4_connection_match *next;
164 /* Next connection match entry in a list */
165 struct sfe_ipv4_connection_match *prev;
166 /* Previous connection match entry in a list */
167 struct sfe_ipv4_connection *connection;
168 /* Pointer to our connection */
169 struct sfe_ipv4_connection_match *counter_match;
170 /* Pointer to the connection match in the "counter" direction to this one */
171 struct sfe_ipv4_connection_match *active_next;
172 /* Pointer to the next connection in the active list */
173 struct sfe_ipv4_connection_match *active_prev;
174 /* Pointer to the previous connection in the active list */
175 bool active; /* Flag to indicate if we're on the active list */
176
177 /*
178 * Characteristics that identify flows that match this rule.
179 */
180 struct net_device *match_dev; /* Network device */
181 uint8_t match_protocol; /* Protocol */
Dave Hudson87973cd2013-10-22 16:00:04 +0100182 __be32 match_src_ip; /* Source IP address */
183 __be32 match_dest_ip; /* Destination IP address */
184 __be16 match_src_port; /* Source port/connection ident */
185 __be16 match_dest_port; /* Destination port/connection ident */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100186
187 /*
188 * Control the operations of the match.
189 */
190 uint32_t flags; /* Bit flags */
191
192 /*
193 * Connection state that we track once we match.
194 */
195 union { /* Protocol-specific state */
196 struct sfe_ipv4_tcp_connection_match tcp;
197 } protocol_state;
198 uint32_t rx_packet_count; /* Number of packets RX'd */
199 uint32_t rx_byte_count; /* Number of bytes RX'd */
200
201 /*
202 * Packet translation information.
203 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100204 __be32 xlate_src_ip; /* Address after source translation */
205 __be16 xlate_src_port; /* Port/connection ident after source translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100206 uint16_t xlate_src_csum_adjustment;
207 /* Transport layer checksum adjustment after source translation */
Dave Hudson87973cd2013-10-22 16:00:04 +0100208 __be32 xlate_dest_ip; /* Address after destination translation */
209 __be16 xlate_dest_port; /* Port/connection ident after destination translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100210 uint16_t xlate_dest_csum_adjustment;
211 /* Transport layer checksum adjustment after destination translation */
212
213 /*
214 * Packet transmit information.
215 */
216 struct net_device *xmit_dev; /* Network device on which to transmit */
217 unsigned short int xmit_dev_mtu;
218 /* Interface MTU */
219 uint16_t xmit_dest_mac[ETH_ALEN / 2];
220 /* Destination MAC address to use when forwarding */
221 uint16_t xmit_src_mac[ETH_ALEN / 2];
222 /* Source MAC address to use when forwarding */
Ben Menchaca0971b7a2014-01-10 14:43:02 -0600223 struct sock *pppoe_sk; /* pppoe socket for transmitting to this xmit_dev */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100224
225 /*
226 * Summary stats.
227 */
228 uint64_t rx_packet_count64; /* Number of packets RX'd */
229 uint64_t rx_byte_count64; /* Number of bytes RX'd */
230};
231
232/*
233 * Per-connection data structure.
234 */
235struct sfe_ipv4_connection {
236 struct sfe_ipv4_connection *next;
237 /* Pointer to the next entry in a hash chain */
238 struct sfe_ipv4_connection *prev;
239 /* Pointer to the previous entry in a hash chain */
240 int protocol; /* IP protocol number */
Dave Hudson87973cd2013-10-22 16:00:04 +0100241 __be32 src_ip; /* Source IP address */
242 __be32 src_ip_xlate; /* NAT-translated source IP address */
243 __be32 dest_ip; /* Destination IP address */
244 __be32 dest_ip_xlate; /* NAT-translated destination IP address */
245 __be16 src_port; /* Source port */
246 __be16 src_port_xlate; /* NAT-translated source port */
247 __be16 dest_port; /* Destination port */
248 __be16 dest_port_xlate; /* NAT-translated destination port */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100249 struct sfe_ipv4_connection_match *original_match;
250 /* Original direction matching structure */
251 struct net_device *original_dev;
252 /* Original direction source device */
253 struct sfe_ipv4_connection_match *reply_match;
254 /* Reply direction matching structure */
255 struct net_device *reply_dev; /* Reply direction source device */
256 uint64_t last_sync_jiffies; /* Jiffies count for the last sync */
257 struct sfe_ipv4_connection *all_connections_next;
258 /* Pointer to the next entry in the list of all connections */
259 struct sfe_ipv4_connection *all_connections_prev;
260 /* Pointer to the previous entry in the list of all connections */
261 int iterators; /* Number of iterators currently using this connection */
262 bool pending_free; /* Flag that indicates that this connection should be freed after iteration */
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600263 uint32_t mark; /* mark for outgoing packet */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100264};
265
266/*
267 * IPv4 connections and hash table size information.
268 */
269#define SFE_IPV4_CONNECTION_HASH_SHIFT 12
270#define SFE_IPV4_CONNECTION_HASH_SIZE (1 << SFE_IPV4_CONNECTION_HASH_SHIFT)
271#define SFE_IPV4_CONNECTION_HASH_MASK (SFE_IPV4_CONNECTION_HASH_SIZE - 1)
272
273enum sfe_ipv4_exception_events {
274 SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE,
275 SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION,
276 SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
277 SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL,
278 SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION,
279 SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE,
280 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS,
281 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS,
282 SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
283 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL,
284 SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION,
285 SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS,
286 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE,
287 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS,
288 SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK,
289 SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS,
290 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE,
291 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE,
292 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE,
293 SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE,
294 SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE,
295 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE,
296 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4,
297 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE,
298 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE,
299 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE,
300 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL,
301 SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION,
302 SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION,
303 SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE,
304 SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH,
305 SFE_IPV4_EXCEPTION_EVENT_NON_V4,
306 SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT,
307 SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE,
308 SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE,
309 SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL,
310 SFE_IPV4_EXCEPTION_EVENT_LAST
311};
312
313static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = {
314 "UDP_HEADER_INCOMPLETE",
315 "UDP_NO_CONNECTION",
316 "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
317 "UDP_SMALL_TTL",
318 "UDP_NEEDS_FRAGMENTATION",
319 "TCP_HEADER_INCOMPLETE",
320 "TCP_NO_CONNECTION_SLOW_FLAGS",
321 "TCP_NO_CONNECTION_FAST_FLAGS",
322 "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
323 "TCP_SMALL_TTL",
324 "TCP_NEEDS_FRAGMENTATION",
325 "TCP_FLAGS",
326 "TCP_SEQ_EXCEEDS_RIGHT_EDGE",
327 "TCP_SMALL_DATA_OFFS",
328 "TCP_BAD_SACK",
329 "TCP_BIG_DATA_OFFS",
330 "TCP_SEQ_BEFORE_LEFT_EDGE",
331 "TCP_ACK_EXCEEDS_RIGHT_EDGE",
332 "TCP_ACK_BEFORE_LEFT_EDGE",
333 "ICMP_HEADER_INCOMPLETE",
334 "ICMP_UNHANDLED_TYPE",
335 "ICMP_IPV4_HEADER_INCOMPLETE",
336 "ICMP_IPV4_NON_V4",
337 "ICMP_IPV4_IP_OPTIONS_INCOMPLETE",
338 "ICMP_IPV4_UDP_HEADER_INCOMPLETE",
339 "ICMP_IPV4_TCP_HEADER_INCOMPLETE",
340 "ICMP_IPV4_UNHANDLED_PROTOCOL",
341 "ICMP_NO_CONNECTION",
342 "ICMP_FLUSHED_CONNECTION",
343 "HEADER_INCOMPLETE",
344 "BAD_TOTAL_LENGTH",
345 "NON_V4",
346 "NON_INITIAL_FRAGMENT",
347 "DATAGRAM_INCOMPLETE",
348 "IP_OPTIONS_INCOMPLETE",
349 "UNHANDLED_PROTOCOL"
350};
351
352/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600353 * Per-module structure.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100354 */
355struct sfe_ipv4 {
356 spinlock_t lock; /* Lock for SMP correctness */
357 struct sfe_ipv4_connection_match *active_head;
358 /* Head of the list of recently active connections */
359 struct sfe_ipv4_connection_match *active_tail;
360 /* Tail of the list of recently active connections */
361 struct sfe_ipv4_connection *all_connections_head;
362 /* Head of the list of all connections */
363 struct sfe_ipv4_connection *all_connections_tail;
364 /* Tail of the list of all connections */
365 unsigned int num_connections; /* Number of connections */
366 struct timer_list timer; /* Timer used for periodic sync ops */
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600367 sfe_ipv4_sync_rule_callback_t __rcu sync_rule_callback;
368 /* Callback function registered by a connection manager for stats syncing */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100369 struct sfe_ipv4_connection *conn_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
370 /* Connection hash table */
371 struct sfe_ipv4_connection_match *conn_match_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
372 /* Connection match hash table */
373
374 /*
375 * Statistics.
376 */
377 uint32_t connection_create_requests;
378 /* Number of IPv4 connection create requests */
379 uint32_t connection_create_collisions;
380 /* Number of IPv4 connection create requests that collided with existing hash table entries */
381 uint32_t connection_destroy_requests;
382 /* Number of IPv4 connection destroy requests */
383 uint32_t connection_destroy_misses;
384 /* Number of IPv4 connection destroy requests that missed our hash table */
385 uint32_t connection_match_hash_hits;
386 /* Number of IPv4 connection match hash hits */
387 uint32_t connection_match_hash_reorders;
388 /* Number of IPv4 connection match hash reorders */
389 uint32_t connection_flushes; /* Number of IPv4 connection flushes */
390 uint32_t packets_forwarded; /* Number of IPv4 packets forwarded */
391 uint32_t packets_not_forwarded; /* Number of IPv4 packets not forwarded */
392 uint32_t exception_events[SFE_IPV4_EXCEPTION_EVENT_LAST];
393
394 /*
395 * Summary tatistics.
396 */
397 uint64_t connection_create_requests64;
398 /* Number of IPv4 connection create requests */
399 uint64_t connection_create_collisions64;
400 /* Number of IPv4 connection create requests that collided with existing hash table entries */
401 uint64_t connection_destroy_requests64;
402 /* Number of IPv4 connection destroy requests */
403 uint64_t connection_destroy_misses64;
404 /* Number of IPv4 connection destroy requests that missed our hash table */
405 uint64_t connection_match_hash_hits64;
406 /* Number of IPv4 connection match hash hits */
407 uint64_t connection_match_hash_reorders64;
408 /* Number of IPv4 connection match hash reorders */
409 uint64_t connection_flushes64; /* Number of IPv4 connection flushes */
410 uint64_t packets_forwarded64; /* Number of IPv4 packets forwarded */
411 uint64_t packets_not_forwarded64;
412 /* Number of IPv4 packets not forwarded */
413 uint64_t exception_events64[SFE_IPV4_EXCEPTION_EVENT_LAST];
414
415 /*
416 * Control state.
417 */
418 struct kobject *sys_sfe_ipv4; /* sysfs linkage */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100419 int debug_dev; /* Major number of the debug char device */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100420};
421
422/*
423 * Enumeration of the XML output.
424 */
425enum sfe_ipv4_debug_xml_states {
426 SFE_IPV4_DEBUG_XML_STATE_START,
427 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_START,
428 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_CONNECTION,
429 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_END,
430 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_START,
431 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION,
432 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_END,
433 SFE_IPV4_DEBUG_XML_STATE_STATS,
434 SFE_IPV4_DEBUG_XML_STATE_END,
435 SFE_IPV4_DEBUG_XML_STATE_DONE
436};
437
438/*
439 * XML write state.
440 */
441struct sfe_ipv4_debug_xml_write_state {
442 enum sfe_ipv4_debug_xml_states state;
443 /* XML output file state machine state */
444 struct sfe_ipv4_connection *iter_conn;
445 /* Next connection iterator */
446 int iter_exception; /* Next exception iterator */
447};
448
449typedef bool (*sfe_ipv4_debug_xml_write_method_t)(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
450 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws);
451
452struct sfe_ipv4 __si;
453
454/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100455 * sfe_ipv4_gen_ip_csum()
456 * Generate the IP checksum for an IPv4 header.
457 *
458 * Note that this function assumes that we have only 20 bytes of IP header.
459 */
460static inline uint16_t sfe_ipv4_gen_ip_csum(struct sfe_ipv4_iphdr *iph)
461{
462 uint32_t sum;
463 uint16_t *i = (uint16_t *)iph;
464
465 iph->check = 0;
466
467 /*
468 * Generate the sum.
469 */
470 sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9];
471
472 /*
473 * Fold it to ones-complement form.
474 */
475 sum = (sum & 0xffff) + (sum >> 16);
476 sum = (sum & 0xffff) + (sum >> 16);
477
478 return (uint16_t)sum ^ 0xffff;
479}
480
481/*
482 * sfe_ipv4_get_connection_match_hash()
483 * Generate the hash used in connection match lookups.
484 */
485static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100486 __be32 src_ip, __be16 src_port,
487 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100488{
489 size_t dev_addr = (size_t)dev;
Dave Hudson87973cd2013-10-22 16:00:04 +0100490 uint32_t hash = ((uint32_t)dev_addr) ^ ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100491 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
492}
493
494/*
495 * sfe_ipv4_find_sfe_ipv4_connection_match()
496 * Get the IPv4 flow match info that corresponds to a particular 5-tuple.
497 *
498 * On entry we must be holding the lock that protects the hash table.
499 */
500static struct sfe_ipv4_connection_match *
501sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100502 __be32 src_ip, __be16 src_port,
503 __be32 dest_ip, __be16 dest_port) __attribute__((always_inline));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100504static struct sfe_ipv4_connection_match *
505sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100506 __be32 src_ip, __be16 src_port,
507 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100508{
509 struct sfe_ipv4_connection_match *cm;
510 struct sfe_ipv4_connection_match *head;
511 unsigned int conn_match_idx;
512
513 conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port);
514 cm = si->conn_match_hash[conn_match_idx];
515
516 /*
517 * If we don't have anything in this chain then bale.
518 */
519 if (unlikely(!cm)) {
520 return cm;
521 }
522
523 /*
524 * Hopefully the first entry is the one we want.
525 */
526 if (likely(cm->match_src_port == src_port)
527 && likely(cm->match_dest_port == dest_port)
528 && likely(cm->match_src_ip == src_ip)
529 && likely(cm->match_dest_ip == dest_ip)
530 && likely(cm->match_protocol == protocol)
531 && likely(cm->match_dev == dev)) {
532 si->connection_match_hash_hits++;
533 return cm;
534 }
535
536 /*
537 * We may or may not have a matching entry but if we do then we want to
538 * move that entry to the top of the hash chain when we get to it. We
539 * presume that this will be reused again very quickly.
540 */
541 head = cm;
542 do {
543 cm = cm->next;
544 } while (cm && (cm->match_src_port != src_port
545 || cm->match_dest_port != dest_port
546 || cm->match_src_ip != src_ip
547 || cm->match_dest_ip != dest_ip
548 || cm->match_protocol != protocol
549 || cm->match_dev != dev));
550
551 /*
552 * Not found then we're done.
553 */
554 if (unlikely(!cm)) {
555 return cm;
556 }
557
558 /*
559 * We found a match so move it.
560 */
561 if (cm->next) {
562 cm->next->prev = cm->prev;
563 }
564 cm->prev->next = cm->next;
565 cm->prev = NULL;
566 cm->next = head;
567 head->prev = cm;
568 si->conn_match_hash[conn_match_idx] = cm;
569 si->connection_match_hash_reorders++;
570
571 return cm;
572}
573
574/*
575 * sfe_ipv4_connection_match_update_summary_stats()
576 * Update the summary stats for a connection match entry.
577 */
578static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm)
579{
580 cm->rx_packet_count64 += cm->rx_packet_count;
581 cm->rx_packet_count = 0;
582 cm->rx_byte_count64 += cm->rx_byte_count;
583 cm->rx_byte_count = 0;
584}
585
586/*
587 * sfe_ipv4_connection_match_compute_translations()
588 * Compute port and address translations for a connection match entry.
589 */
590static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm)
591{
592 /*
593 * Before we insert the entry look to see if this is tagged as doing address
594 * translations. If it is then work out the adjustment that we need to apply
595 * to the transport checksum.
596 */
597 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
598 /*
599 * Precompute an incremental checksum adjustment so we can
600 * edit packets in this stream very quickly. The algorithm is from RFC1624.
601 */
602 uint16_t src_ip_hi = cm->match_src_ip >> 16;
603 uint16_t src_ip_lo = cm->match_src_ip & 0xffff;
604 uint32_t xlate_src_ip = ~cm->xlate_src_ip;
605 uint16_t xlate_src_ip_hi = xlate_src_ip >> 16;
606 uint16_t xlate_src_ip_lo = xlate_src_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100607 uint16_t xlate_src_port = ~cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100608 uint32_t adj;
609
610 /*
611 * When we compute this fold it down to a 16-bit offset
612 * as that way we can avoid having to do a double
613 * folding of the twos-complement result because the
614 * addition of 2 16-bit values cannot cause a double
615 * wrap-around!
616 */
617 adj = src_ip_hi + src_ip_lo + cm->match_src_port
618 + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port;
619 adj = (adj & 0xffff) + (adj >> 16);
620 adj = (adj & 0xffff) + (adj >> 16);
621 cm->xlate_src_csum_adjustment = (uint16_t)adj;
Nicolas Costaac2979c2014-01-14 10:35:24 -0600622
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100623 }
624
625 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
626 /*
627 * Precompute an incremental checksum adjustment so we can
628 * edit packets in this stream very quickly. The algorithm is from RFC1624.
629 */
630 uint16_t dest_ip_hi = cm->match_dest_ip >> 16;
631 uint16_t dest_ip_lo = cm->match_dest_ip & 0xffff;
632 uint32_t xlate_dest_ip = ~cm->xlate_dest_ip;
633 uint16_t xlate_dest_ip_hi = xlate_dest_ip >> 16;
634 uint16_t xlate_dest_ip_lo = xlate_dest_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100635 uint16_t xlate_dest_port = ~cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100636 uint32_t adj;
637
638 /*
639 * When we compute this fold it down to a 16-bit offset
640 * as that way we can avoid having to do a double
641 * folding of the twos-complement result because the
642 * addition of 2 16-bit values cannot cause a double
643 * wrap-around!
644 */
645 adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port
646 + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port;
647 adj = (adj & 0xffff) + (adj >> 16);
648 adj = (adj & 0xffff) + (adj >> 16);
649 cm->xlate_dest_csum_adjustment = (uint16_t)adj;
650 }
651}
652
653/*
654 * sfe_ipv4_update_summary_stats()
655 * Update the summary stats.
656 */
657static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si)
658{
659 int i;
660
661 si->connection_create_requests64 += si->connection_create_requests;
662 si->connection_create_requests = 0;
663 si->connection_create_collisions64 += si->connection_create_collisions;
664 si->connection_create_collisions = 0;
665 si->connection_destroy_requests64 += si->connection_destroy_requests;
666 si->connection_destroy_requests = 0;
667 si->connection_destroy_misses64 += si->connection_destroy_misses;
668 si->connection_destroy_misses = 0;
669 si->connection_match_hash_hits64 += si->connection_match_hash_hits;
670 si->connection_match_hash_hits = 0;
671 si->connection_match_hash_reorders64 += si->connection_match_hash_reorders;
672 si->connection_match_hash_reorders = 0;
673 si->connection_flushes64 += si->connection_flushes;
674 si->connection_flushes = 0;
675 si->packets_forwarded64 += si->packets_forwarded;
676 si->packets_forwarded = 0;
677 si->packets_not_forwarded64 += si->packets_not_forwarded;
678 si->packets_not_forwarded = 0;
679
680 for (i = 0; i < SFE_IPV4_EXCEPTION_EVENT_LAST; i++) {
681 si->exception_events64[i] += si->exception_events[i];
682 si->exception_events[i] = 0;
683 }
684}
685
686/*
687 * sfe_ipv4_insert_sfe_ipv4_connection_match()
688 * Insert a connection match into the hash.
689 *
690 * On entry we must be holding the lock that protects the hash table.
691 */
692static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
693{
694 struct sfe_ipv4_connection_match **hash_head;
695 struct sfe_ipv4_connection_match *prev_head;
696 unsigned int conn_match_idx
697 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
698 cm->match_src_ip, cm->match_src_port,
699 cm->match_dest_ip, cm->match_dest_port);
700 hash_head = &si->conn_match_hash[conn_match_idx];
701 prev_head = *hash_head;
702 cm->prev = NULL;
703 if (prev_head) {
704 prev_head->prev = cm;
705 }
706
707 cm->next = prev_head;
708 *hash_head = cm;
709}
710
711/*
712 * sfe_ipv4_remove_sfe_ipv4_connection_match()
713 * Remove a connection match object from the hash.
714 *
715 * On entry we must be holding the lock that protects the hash table.
716 */
717static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
718{
719 /*
720 * Unlink the connection match entry from the hash.
721 */
722 if (cm->prev) {
723 cm->prev->next = cm->next;
724 } else {
725 unsigned int conn_match_idx
726 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
727 cm->match_src_ip, cm->match_src_port,
728 cm->match_dest_ip, cm->match_dest_port);
729 si->conn_match_hash[conn_match_idx] = cm->next;
730 }
731
732 if (cm->next) {
733 cm->next->prev = cm->prev;
734 }
735
736 /*
737 * Unlink the connection match entry from the active list.
738 */
739 if (likely(cm->active_prev)) {
740 cm->active_prev->active_next = cm->active_next;
741 } else {
742 si->active_head = cm->active_next;
743 }
744
745 if (likely(cm->active_next)) {
746 cm->active_next->active_prev = cm->active_prev;
747 } else {
748 si->active_tail = cm->active_prev;
749 }
750
751}
752
753/*
754 * sfe_ipv4_get_connection_hash()
755 * Generate the hash used in connection lookups.
756 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100757static inline unsigned int sfe_ipv4_get_connection_hash(uint8_t protocol, __be32 src_ip, __be16 src_port,
758 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100759{
Dave Hudson87973cd2013-10-22 16:00:04 +0100760 uint32_t hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100761 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
762}
763
764/*
765 * sfe_ipv4_find_sfe_ipv4_connection()
766 * Get the IPv4 connection info that corresponds to a particular 5-tuple.
767 *
768 * On entry we must be holding the lock that protects the hash table.
769 */
770static inline struct sfe_ipv4_connection *sfe_ipv4_find_sfe_ipv4_connection(struct sfe_ipv4 *si, uint32_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100771 __be32 src_ip, __be16 src_port,
772 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100773{
774 struct sfe_ipv4_connection *c;
775 unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port);
776 c = si->conn_hash[conn_idx];
777
778 /*
779 * If we don't have anything in this chain then bale.
780 */
781 if (unlikely(!c)) {
782 return c;
783 }
784
785 /*
786 * Hopefully the first entry is the one we want.
787 */
788 if (likely(c->src_port == src_port)
789 && likely(c->dest_port == dest_port)
790 && likely(c->src_ip == src_ip)
791 && likely(c->dest_ip == dest_ip)
792 && likely(c->protocol == protocol)) {
793 return c;
794 }
795
796 /*
797 * We may or may not have a matching entry but if we do then we want to
798 * move that entry to the top of the hash chain when we get to it. We
799 * presume that this will be reused again very quickly.
800 */
801 do {
802 c = c->next;
803 } while (c && (c->src_port != src_port
804 || c->dest_port != dest_port
805 || c->src_ip != src_ip
806 || c->dest_ip != dest_ip
807 || c->protocol != protocol));
808
809 /*
810 * Will need connection entry for next create/destroy metadata,
811 * So no need to re-order entry for these requests
812 */
813 return c;
814}
815
816/*
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600817 * sfe_ipv4_mark_rule()
818 * Updates the mark for a current offloaded connection
819 *
820 * Will take hash lock upon entry
821 */
822static void sfe_ipv4_mark_rule(struct sfe_ipv4_mark *mark)
823{
824 struct sfe_ipv4 *si = &__si;
825 struct sfe_ipv4_connection *c;
826 spin_lock(&si->lock);
827 c = sfe_ipv4_find_sfe_ipv4_connection(si, mark->protocol,
828 mark->src_ip, mark->src_port,
829 mark->dest_ip, mark->dest_port);
830 if (c) {
Nicolas Costaf53d6fe2014-01-13 16:03:46 -0600831 DEBUG_TRACE("Matching connection found for mark, "
832 "setting from %08x to %08x\n",
833 c->mark, mark->mark);
834 WARN_ON((0 != c->mark) && (0 == mark->mark));
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600835 c->mark = mark->mark;
836 }
837 spin_unlock(&si->lock);
838}
839
840/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100841 * sfe_ipv4_insert_sfe_ipv4_connection()
842 * Insert a connection into the hash.
843 *
844 * On entry we must be holding the lock that protects the hash table.
845 */
846static void sfe_ipv4_insert_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
847{
848 struct sfe_ipv4_connection **hash_head;
849 struct sfe_ipv4_connection *prev_head;
850 unsigned int conn_idx;
851
852 /*
853 * Insert entry into the connection hash.
854 */
855 conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
856 c->dest_ip, c->dest_port);
857 hash_head = &si->conn_hash[conn_idx];
858 prev_head = *hash_head;
859 c->prev = NULL;
860 if (prev_head) {
861 prev_head->prev = c;
862 }
863
864 c->next = prev_head;
865 *hash_head = c;
866
867 /*
868 * Insert entry into the "all connections" list.
869 */
870 if (si->all_connections_tail) {
871 c->all_connections_prev = si->all_connections_tail;
872 si->all_connections_tail->all_connections_next = c;
873 } else {
874 c->all_connections_prev = NULL;
875 si->all_connections_head = c;
876 }
877
878 si->all_connections_tail = c;
879 c->all_connections_next = NULL;
880 si->num_connections++;
881
882 /*
883 * Insert the connection match objects too.
884 */
885 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->original_match);
886 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->reply_match);
887}
888
889/*
890 * sfe_ipv4_remove_sfe_ipv4_connection()
891 * Remove a sfe_ipv4_connection object from the hash.
892 *
893 * On entry we must be holding the lock that protects the hash table.
894 */
895static void sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
896{
897 /*
898 * Remove the connection match objects.
899 */
900 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->reply_match);
901 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->original_match);
902
903 /*
904 * Unlink the connection.
905 */
906 if (c->prev) {
907 c->prev->next = c->next;
908 } else {
909 unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
910 c->dest_ip, c->dest_port);
911 si->conn_hash[conn_idx] = c->next;
912 }
913
914 if (c->next) {
915 c->next->prev = c->prev;
916 }
917}
918
919/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100920 * sfe_ipv4_sync_sfe_ipv4_connection()
921 * Sync a connection.
922 *
923 * On entry to this function we expect that the lock for the connection is either
924 * already held or isn't required.
925 */
926static void sfe_ipv4_gen_sync_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c,
927 struct sfe_ipv4_sync *sis, uint64_t now_jiffies)
928{
929 struct sfe_ipv4_connection_match *original_cm;
930 struct sfe_ipv4_connection_match *reply_cm;
931
932 /*
933 * Fill in the update message.
934 */
935 sis->protocol = c->protocol;
936 sis->src_ip = c->src_ip;
937 sis->dest_ip = c->dest_ip;
938 sis->src_port = c->src_port;
939 sis->dest_port = c->dest_port;
940
941 original_cm = c->original_match;
942 reply_cm = c->reply_match;
943 sis->src_td_max_window = original_cm->protocol_state.tcp.max_win;
944 sis->src_td_end = original_cm->protocol_state.tcp.end;
945 sis->src_td_max_end = original_cm->protocol_state.tcp.max_end;
946 sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win;
947 sis->dest_td_end = reply_cm->protocol_state.tcp.end;
948 sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end;
949
950 sfe_ipv4_connection_match_update_summary_stats(original_cm);
951 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
952
953 sis->src_packet_count = original_cm->rx_packet_count64;
954 sis->src_byte_count = original_cm->rx_byte_count64;
955 sis->dest_packet_count = reply_cm->rx_packet_count64;
956 sis->dest_byte_count = reply_cm->rx_byte_count64;
957
958 /*
959 * Get the time increment since our last sync.
960 */
961 sis->delta_jiffies = now_jiffies - c->last_sync_jiffies;
962 c->last_sync_jiffies = now_jiffies;
963}
964
965/*
966 * sfe_ipv4_decrement_sfe_ipv4_connection_iterator()
967 * Remove an iterator from a connection - free all resources if necessary.
968 *
969 * Returns true if the connection should now be free, false if not.
970 *
971 * We must be locked on entry to this function.
972 */
973static bool sfe_ipv4_decrement_sfe_ipv4_connection_iterator(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
974{
975 /*
976 * Are we the last iterator for this connection?
977 */
978 c->iterators--;
979 if (c->iterators) {
980 return false;
981 }
982
983 /*
984 * Is this connection marked for deletion?
985 */
986 if (!c->pending_free) {
987 return false;
988 }
989
990 /*
991 * We're ready to delete this connection so unlink it from the "all
992 * connections" list.
993 */
994 si->num_connections--;
995 if (c->all_connections_prev) {
996 c->all_connections_prev->all_connections_next = c->all_connections_next;
997 } else {
998 si->all_connections_head = c->all_connections_next;
999 }
1000
1001 if (c->all_connections_next) {
1002 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1003 } else {
1004 si->all_connections_tail = c->all_connections_prev;
1005 }
1006
1007 return true;
1008}
1009
1010/*
1011 * sfe_ipv4_flush_sfe_ipv4_connection()
1012 * Flush a connection and free all associated resources.
1013 *
1014 * We need to be called with bottom halves disabled locally as we need to acquire
1015 * the connection hash lock and release it again. In general we're actually called
1016 * from within a BH and so we're fine, but we're also called when connections are
1017 * torn down.
1018 */
1019static void sfe_ipv4_flush_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1020{
1021 struct sfe_ipv4_sync sis;
1022 uint64_t now_jiffies;
1023 bool pending_free = false;
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001024 sfe_ipv4_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001025
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001026 rcu_read_lock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001027 spin_lock(&si->lock);
1028 si->connection_flushes++;
1029
1030 /*
1031 * Check that we're not currently being iterated. If we are then
1032 * we can't free this entry yet but must mark it pending a free. If it's
1033 * not being iterated then we can unlink it from the list of all
1034 * connections.
1035 */
1036 if (c->iterators) {
1037 pending_free = true;
1038 c->pending_free = true;
1039 } else {
1040 si->num_connections--;
1041 if (c->all_connections_prev) {
1042 c->all_connections_prev->all_connections_next = c->all_connections_next;
1043 } else {
1044 si->all_connections_head = c->all_connections_next;
1045 }
1046
1047 if (c->all_connections_next) {
1048 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1049 } else {
1050 si->all_connections_tail = c->all_connections_prev;
1051 }
1052 }
1053
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001054 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
1055
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001056 spin_unlock(&si->lock);
1057
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001058 if (sync_rule_callback) {
1059 /*
1060 * Generate a sync message and then sync.
1061 */
1062 now_jiffies = get_jiffies_64();
1063 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, now_jiffies);
1064 sync_rule_callback(&sis);
1065 }
1066
1067 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001068
1069 /*
1070 * If we can't yet free the underlying memory then we're done.
1071 */
1072 if (pending_free) {
1073 return;
1074 }
1075
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001076 if (c->original_match->pppoe_sk) {
1077 sock_put(c->original_match->pppoe_sk);
1078 }
1079 if (c->reply_match->pppoe_sk) {
1080 sock_put(c->reply_match->pppoe_sk);
1081 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001082 /*
1083 * Release our hold of the source and dest devices and free the memory
1084 * for our connection objects.
1085 */
1086 dev_put(c->original_dev);
1087 dev_put(c->reply_dev);
1088 kfree(c->original_match);
1089 kfree(c->reply_match);
1090 kfree(c);
1091}
1092
1093/*
1094 * sfe_ipv4_recv_udp()
1095 * Handle UDP packet receives and forwarding.
1096 */
1097static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
1098 unsigned int len, struct sfe_ipv4_iphdr *iph, unsigned int ihl, bool flush_on_find)
1099{
1100 struct sfe_ipv4_udphdr *udph;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001101 __be16 proto;
Dave Hudson87973cd2013-10-22 16:00:04 +01001102 __be32 src_ip;
1103 __be32 dest_ip;
1104 __be16 src_port;
1105 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001106 struct sfe_ipv4_connection_match *cm;
1107 uint8_t ttl;
1108 struct net_device *xmit_dev;
1109
1110 /*
1111 * Is our packet too short to contain a valid UDP header?
1112 */
1113 if (unlikely(len < (sizeof(struct sfe_ipv4_udphdr) + ihl))) {
1114 spin_lock(&si->lock);
1115 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++;
1116 si->packets_not_forwarded++;
1117 spin_unlock(&si->lock);
1118
1119 DEBUG_TRACE("packet too short for UDP header\n");
1120 return 0;
1121 }
1122
1123 /*
1124 * Read the IP address and port information. Read the IP header data first
1125 * because we've almost certainly got that in the cache. We may not yet have
1126 * the UDP header cached though so allow more time for any prefetching.
1127 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001128 src_ip = iph->saddr;
1129 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001130
1131 udph = (struct sfe_ipv4_udphdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001132 src_port = udph->source;
1133 dest_port = udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001134
1135 spin_lock(&si->lock);
1136
1137 /*
1138 * Look for a connection match.
1139 */
1140 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
1141 if (unlikely(!cm)) {
1142 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION]++;
1143 si->packets_not_forwarded++;
1144 spin_unlock(&si->lock);
1145
1146 DEBUG_TRACE("no connection found\n");
1147 return 0;
1148 }
1149
1150 /*
1151 * If our packet has beern marked as "flush on find" we can't actually
1152 * forward it in the fast path, but now that we've found an associated
1153 * connection we can flush that out before we process the packet.
1154 */
1155 if (unlikely(flush_on_find)) {
1156 struct sfe_ipv4_connection *c = cm->connection;
1157 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1158 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1159 si->packets_not_forwarded++;
1160 spin_unlock(&si->lock);
1161
1162 DEBUG_TRACE("flush on find\n");
1163 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1164 return 0;
1165 }
1166
1167 /*
1168 * Does our TTL allow forwarding?
1169 */
1170 ttl = iph->ttl;
1171 if (unlikely(ttl < 2)) {
1172 struct sfe_ipv4_connection *c = cm->connection;
1173 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1174 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL]++;
1175 si->packets_not_forwarded++;
1176 spin_unlock(&si->lock);
1177
1178 DEBUG_TRACE("ttl too low\n");
1179 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1180 return 0;
1181 }
1182
1183 /*
1184 * If our packet is larger than the MTU of the transmit interface then
1185 * we can't forward it easily.
1186 */
1187 if (unlikely(len > cm->xmit_dev_mtu)) {
1188 struct sfe_ipv4_connection *c = cm->connection;
1189 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1190 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++;
1191 si->packets_not_forwarded++;
1192 spin_unlock(&si->lock);
1193
1194 DEBUG_TRACE("larger than mtu\n");
1195 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1196 return 0;
1197 }
1198
1199 /*
1200 * From this point on we're good to modify the packet.
1201 */
1202
1203 /*
1204 * Decrement our TTL.
1205 */
1206 iph->ttl = ttl - 1;
1207
1208 /*
1209 * Do we have to perform translations of the source address/port?
1210 */
1211 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1212 uint16_t udp_csum;
1213
Dave Hudson87973cd2013-10-22 16:00:04 +01001214 iph->saddr = cm->xlate_src_ip;
1215 udph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001216
1217 /*
1218 * Do we have a non-zero UDP checksum? If we do then we need
1219 * to update it.
1220 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001221 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001222 if (likely(udp_csum)) {
1223 uint32_t sum = udp_csum + cm->xlate_src_csum_adjustment;
1224 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001225 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001226 }
1227 }
1228
1229 /*
1230 * Do we have to perform translations of the destination address/port?
1231 */
1232 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1233 uint16_t udp_csum;
1234
Dave Hudson87973cd2013-10-22 16:00:04 +01001235 iph->daddr = cm->xlate_dest_ip;
1236 udph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001237
1238 /*
1239 * Do we have a non-zero UDP checksum? If we do then we need
1240 * to update it.
1241 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001242 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001243 if (likely(udp_csum)) {
1244 uint32_t sum = udp_csum + cm->xlate_dest_csum_adjustment;
1245 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001246 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001247 }
1248 }
1249
1250 /*
1251 * Replace the IP checksum.
1252 */
1253 iph->check = sfe_ipv4_gen_ip_csum(iph);
1254
1255// if ((nat_entry_data->tos & FASTNAT_DSCP_MASK) != (iph->tos & FASTNAT_DSCP_MASK)) {
1256// ipv4_change_dsfield(iph, (u_int8_t)(~FASTNAT_DSCP_MASK), nat_entry_data->tos);
1257// }
1258
1259// skb->priority = nat_entry_data->priority;
1260// skb->mark = nat_entry_data->mark;
1261
1262 /*
1263 * Update traffic stats.
1264 */
1265 cm->rx_packet_count++;
1266 cm->rx_byte_count += len;
1267
1268 /*
1269 * If we're not already on the active list then insert ourselves at the tail
1270 * of the current list.
1271 */
1272 if (unlikely(!cm->active)) {
1273 cm->active = true;
1274 cm->active_prev = si->active_tail;
1275 if (likely(si->active_tail)) {
1276 si->active_tail->active_next = cm;
1277 } else {
1278 si->active_head = cm;
1279 }
1280 si->active_tail = cm;
1281 }
1282
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001283 /*
1284 * On creation, we assume that cm->pppoe_sk is set, and that the socket
1285 * is held. If the connection is no longer established, we neeed to
1286 * release the socket, and we need to un-offload the connection. We
1287 * must also release the socket if the connection is closed, of course.
1288 * We assume that the xmit dev, as well as the dest MAC are set based on
1289 * the pppoe header (dest is po->pppoe_pa.remote).
1290 */
1291 if (cm->pppoe_sk) {
1292 struct pppoe_hdr *ph;
1293 int data_len = skb->len;
1294 struct sock *sk = cm->pppoe_sk;
1295 struct pppox_sock *po = pppox_sk(sk);
1296 struct net_device *dev = po->pppoe_dev;
1297
1298 if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
1299 goto abort;
1300
1301 ph = (struct pppoe_hdr *)__skb_push(skb, PPPOE_SES_HLEN);
1302 ph->ver = 1;
1303 ph->type = 1;
1304 ph->code = 0;
1305 ph->sid = po->num;
1306 ph->length = htons(data_len + 2);
1307 ph->tag[0].tag_type = htons(PPP_IP);
1308 memcpy(cm->xmit_dest_mac, po->pppoe_pa.remote, ETH_ALEN);
1309
1310 proto = ETH_P_PPP_SES;
1311 } else {
1312 proto = ETH_P_IP;
1313 }
1314
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001315 xmit_dev = cm->xmit_dev;
1316 skb->dev = xmit_dev;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001317 skb->protocol = cpu_to_be16(proto);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001318
1319 /*
1320 * Do we have a simple Ethernet header to write?
1321 */
1322 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_ETH_HDR))) {
1323 /*
1324 * If this is anything other than a point-to-point interface then we need to
1325 * create a header based on MAC addresses.
1326 */
1327 if (likely(!(xmit_dev->flags & IFF_POINTOPOINT))) {
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001328 xmit_dev->header_ops->create(skb, xmit_dev, proto,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001329 cm->xmit_dest_mac, cm->xmit_src_mac, len);
1330 }
1331 } else {
1332 struct sfe_ipv4_ethhdr *eth = (struct sfe_ipv4_ethhdr *)__skb_push(skb, ETH_HLEN);
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001333 eth->h_proto = skb->protocol;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001334 eth->h_dest[0] = htons(cm->xmit_dest_mac[0]);
1335 eth->h_dest[1] = htons(cm->xmit_dest_mac[1]);
1336 eth->h_dest[2] = htons(cm->xmit_dest_mac[2]);
1337 eth->h_source[0] = htons(cm->xmit_src_mac[0]);
1338 eth->h_source[1] = htons(cm->xmit_src_mac[1]);
1339 eth->h_source[2] = htons(cm->xmit_src_mac[2]);
1340 }
1341
Cristian Prundeanu94fff982013-12-23 15:02:59 -06001342 /* Mark outgoing packet */
1343 skb->mark = cm->connection->mark;
1344 if (skb->mark) {
1345 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1346 }
1347
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001348 si->packets_forwarded++;
1349 spin_unlock(&si->lock);
1350
1351 /*
1352 * We're going to check for GSO flags when we transmit the packet so
1353 * start fetching the necessary cache line now.
1354 */
1355 prefetch(skb_shinfo(skb));
1356
1357 /*
1358 * Send the packet on its way.
1359 */
1360 dev_queue_xmit(skb);
1361
1362 return 1;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001363
1364abort:
1365 kfree_skb(skb);
1366 return 1;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001367}
1368
1369/*
1370 * sfe_ipv4_process_tcp_option_sack()
1371 * Parse TCP SACK option and update ack according
1372 */
1373static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcphdr *th, const uint32_t data_offs,
1374 uint32_t *ack) __attribute__((always_inline));
1375static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcphdr *th, const uint32_t data_offs,
1376 uint32_t *ack)
1377{
1378 uint32_t length = sizeof(struct sfe_ipv4_tcphdr);
1379 uint8_t *ptr = (uint8_t *)th + length;
1380
1381 /*
1382 * If option is TIMESTAMP discard it.
1383 */
1384 if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
1385 && likely(ptr[0] == TCPOPT_NOP)
1386 && likely(ptr[1] == TCPOPT_NOP)
1387 && likely(ptr[2] == TCPOPT_TIMESTAMP)
1388 && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
1389 return true;
1390 }
1391
1392 /*
1393 * TCP options. Parse SACK option.
1394 */
1395 while (length < data_offs) {
1396 uint8_t size;
1397 uint8_t kind;
1398
1399 ptr = (uint8_t *)th + length;
1400 kind = *ptr;
1401
1402 /*
1403 * NOP, for padding
1404 * Not in the switch because to fast escape and to not calculate size
1405 */
1406 if (kind == TCPOPT_NOP) {
1407 length++;
1408 continue;
1409 }
1410
1411 if (kind == TCPOPT_SACK) {
1412 uint32_t sack = 0;
1413 uint8_t re = 1 + 1;
1414
1415 size = *(ptr + 1);
1416 if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
1417 || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
1418 || (size > (data_offs - length))) {
1419 return false;
1420 }
1421
1422 re += 4;
1423 while (re < size) {
1424 uint32_t sack_re;
1425 uint8_t *sptr = ptr + re;
1426 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
1427 if (sack_re > sack) {
1428 sack = sack_re;
1429 }
1430 re += TCPOLEN_SACK_PERBLOCK;
1431 }
1432 if (sack > *ack) {
1433 *ack = sack;
1434 }
1435 length += size;
1436 continue;
1437 }
1438 if (kind == TCPOPT_EOL) {
1439 return true;
1440 }
1441 size = *(ptr + 1);
1442 if (size < 2) {
1443 return false;
1444 }
1445 length += size;
1446 }
1447
1448 return true;
1449}
1450
1451/*
1452 * sfe_ipv4_recv_tcp()
1453 * Handle TCP packet receives and forwarding.
1454 */
1455static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
1456 unsigned int len, struct sfe_ipv4_iphdr *iph, unsigned int ihl, bool flush_on_find)
1457{
1458 struct sfe_ipv4_tcphdr *tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001459 __be32 src_ip;
1460 __be32 dest_ip;
1461 __be16 src_port;
1462 __be16 dest_port;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001463 __be16 proto;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001464 struct sfe_ipv4_connection_match *cm;
1465 struct sfe_ipv4_connection_match *counter_cm;
1466 uint8_t ttl;
1467 uint32_t flags;
1468 struct net_device *xmit_dev;
1469
1470 /*
1471 * Is our packet too short to contain a valid UDP header?
1472 */
1473 if (unlikely(len < (sizeof(struct sfe_ipv4_tcphdr) + ihl))) {
1474 spin_lock(&si->lock);
1475 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++;
1476 si->packets_not_forwarded++;
1477 spin_unlock(&si->lock);
1478
1479 DEBUG_TRACE("packet too short for TCP header\n");
1480 return 0;
1481 }
1482
1483 /*
1484 * Read the IP address and port information. Read the IP header data first
1485 * because we've almost certainly got that in the cache. We may not yet have
1486 * the TCP header cached though so allow more time for any prefetching.
1487 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001488 src_ip = iph->saddr;
1489 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001490
1491 tcph = (struct sfe_ipv4_tcphdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001492 src_port = tcph->source;
1493 dest_port = tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001494 flags = tcp_flag_word(tcph);
1495
1496 spin_lock(&si->lock);
1497
1498 /*
1499 * Look for a connection match.
1500 */
1501 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
1502 if (unlikely(!cm)) {
1503 /*
1504 * We didn't get a connection but as TCP is connection-oriented that
1505 * may be because this is a non-fast connection (not running established).
1506 * For diagnostic purposes we differentiate this here.
1507 */
1508 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
1509 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++;
1510 si->packets_not_forwarded++;
1511 spin_unlock(&si->lock);
1512
1513 DEBUG_TRACE("no connection found - fast flags\n");
1514 return 0;
1515 }
1516 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++;
1517 si->packets_not_forwarded++;
1518 spin_unlock(&si->lock);
1519
1520 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
1521 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1522 return 0;
1523 }
1524
1525 /*
1526 * If our packet has beern marked as "flush on find" we can't actually
1527 * forward it in the fast path, but now that we've found an associated
1528 * connection we can flush that out before we process the packet.
1529 */
1530 if (unlikely(flush_on_find)) {
1531 struct sfe_ipv4_connection *c = cm->connection;
1532 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1533 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1534 si->packets_not_forwarded++;
1535 spin_unlock(&si->lock);
1536
1537 DEBUG_TRACE("flush on find\n");
1538 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1539 return 0;
1540 }
1541
1542 /*
1543 * Does our TTL allow forwarding?
1544 */
1545 ttl = iph->ttl;
1546 if (unlikely(ttl < 2)) {
1547 struct sfe_ipv4_connection *c = cm->connection;
1548 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1549 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL]++;
1550 si->packets_not_forwarded++;
1551 spin_unlock(&si->lock);
1552
1553 DEBUG_TRACE("ttl too low\n");
1554 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1555 return 0;
1556 }
1557
1558 /*
1559 * If our packet is larger than the MTU of the transmit interface then
1560 * we can't forward it easily.
1561 */
1562 if (unlikely(len > cm->xmit_dev_mtu)) {
1563 struct sfe_ipv4_connection *c = cm->connection;
1564 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1565 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++;
1566 si->packets_not_forwarded++;
1567 spin_unlock(&si->lock);
1568
1569 DEBUG_TRACE("larger than mtu\n");
1570 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1571 return 0;
1572 }
1573
1574 /*
1575 * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN
1576 * set is not a fast path packet.
1577 */
1578 if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
1579 struct sfe_ipv4_connection *c = cm->connection;
1580 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1581 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS]++;
1582 si->packets_not_forwarded++;
1583 spin_unlock(&si->lock);
1584
1585 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
1586 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1587 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1588 return 0;
1589 }
1590
1591 counter_cm = cm->counter_match;
1592
1593 /*
1594 * Are we doing sequence number checking?
1595 */
1596 if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
1597 uint32_t seq;
1598 uint32_t ack;
1599 uint32_t sack;
1600 uint32_t data_offs;
1601 uint32_t end;
1602 uint32_t left_edge;
1603 uint32_t scaled_win;
1604 uint32_t max_end;
1605
1606 /*
1607 * Is our sequence fully past the right hand edge of the window?
1608 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001609 seq = ntohl(tcph->seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001610 if (unlikely((int32_t)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
1611 struct sfe_ipv4_connection *c = cm->connection;
1612 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1613 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++;
1614 si->packets_not_forwarded++;
1615 spin_unlock(&si->lock);
1616
1617 DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
1618 seq, cm->protocol_state.tcp.max_end + 1);
1619 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1620 return 0;
1621 }
1622
1623 /*
1624 * Check that our TCP data offset isn't too short.
1625 */
1626 data_offs = tcph->doff << 2;
1627 if (unlikely(data_offs < sizeof(struct sfe_ipv4_tcphdr))) {
1628 struct sfe_ipv4_connection *c = cm->connection;
1629 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1630 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++;
1631 si->packets_not_forwarded++;
1632 spin_unlock(&si->lock);
1633
1634 DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
1635 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1636 return 0;
1637 }
1638
1639 /*
1640 * Update ACK according to any SACK option.
1641 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001642 ack = ntohl(tcph->ack_seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001643 sack = ack;
1644 if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) {
1645 struct sfe_ipv4_connection *c = cm->connection;
1646 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1647 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK]++;
1648 si->packets_not_forwarded++;
1649 spin_unlock(&si->lock);
1650
1651 DEBUG_TRACE("TCP option SACK size is wrong\n");
1652 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1653 return 0;
1654 }
1655
1656 /*
1657 * Check that our TCP data offset isn't past the end of the packet.
1658 */
1659 data_offs += sizeof(struct sfe_ipv4_iphdr);
1660 if (unlikely(len < data_offs)) {
1661 struct sfe_ipv4_connection *c = cm->connection;
1662 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1663 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++;
1664 si->packets_not_forwarded++;
1665 spin_unlock(&si->lock);
1666
1667 DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
1668 data_offs, len);
1669 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1670 return 0;
1671 }
1672
1673 end = seq + len - data_offs;
1674
1675 /*
1676 * Is our sequence fully before the left hand edge of the window?
1677 */
1678 if (unlikely((int32_t)(end - (cm->protocol_state.tcp.end
1679 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
1680 struct sfe_ipv4_connection *c = cm->connection;
1681 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1682 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++;
1683 si->packets_not_forwarded++;
1684 spin_unlock(&si->lock);
1685
1686 DEBUG_TRACE("seq: %u before left edge: %u\n",
1687 end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
1688 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1689 return 0;
1690 }
1691
1692 /*
1693 * Are we acking data that is to the right of what has been sent?
1694 */
1695 if (unlikely((int32_t)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
1696 struct sfe_ipv4_connection *c = cm->connection;
1697 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1698 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++;
1699 si->packets_not_forwarded++;
1700 spin_unlock(&si->lock);
1701
1702 DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
1703 sack, counter_cm->protocol_state.tcp.end + 1);
1704 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1705 return 0;
1706 }
1707
1708 /*
1709 * Is our ack too far before the left hand edge of the window?
1710 */
1711 left_edge = counter_cm->protocol_state.tcp.end
1712 - cm->protocol_state.tcp.max_win
1713 - SFE_IPV4_TCP_MAX_ACK_WINDOW
1714 - 1;
1715 if (unlikely((int32_t)(sack - left_edge) < 0)) {
1716 struct sfe_ipv4_connection *c = cm->connection;
1717 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1718 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++;
1719 si->packets_not_forwarded++;
1720 spin_unlock(&si->lock);
1721
1722 DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
1723 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1724 return 0;
1725 }
1726
1727 /*
1728 * Have we just seen the largest window size yet for this connection? If yes
1729 * then we need to record the new value.
1730 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001731 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001732 scaled_win += (sack - ack);
1733 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
1734 cm->protocol_state.tcp.max_win = scaled_win;
1735 }
1736
1737 /*
1738 * If our sequence and/or ack numbers have advanced then record the new state.
1739 */
1740 if (likely((int32_t)(end - cm->protocol_state.tcp.end) >= 0)) {
1741 cm->protocol_state.tcp.end = end;
1742 }
1743
1744 max_end = sack + scaled_win;
1745 if (likely((int32_t)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
1746 counter_cm->protocol_state.tcp.max_end = max_end;
1747 }
1748 }
1749
1750 /*
1751 * From this point on we're good to modify the packet.
1752 */
1753
1754 /*
1755 * Decrement our TTL.
1756 */
1757 iph->ttl = ttl - 1;
1758
1759 /*
1760 * Do we have to perform translations of the source address/port?
1761 */
1762 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1763 uint16_t tcp_csum;
1764 uint32_t sum;
1765
Dave Hudson87973cd2013-10-22 16:00:04 +01001766 iph->saddr = cm->xlate_src_ip;
1767 tcph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001768
1769 /*
1770 * Do we have a non-zero UDP checksum? If we do then we need
1771 * to update it.
1772 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001773 tcp_csum = tcph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001774 sum = tcp_csum + cm->xlate_src_csum_adjustment;
1775 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001776 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001777 }
1778
1779 /*
1780 * Do we have to perform translations of the destination address/port?
1781 */
1782 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1783 uint16_t tcp_csum;
1784 uint32_t sum;
1785
Dave Hudson87973cd2013-10-22 16:00:04 +01001786 iph->daddr = cm->xlate_dest_ip;
1787 tcph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001788
1789 /*
1790 * Do we have a non-zero UDP checksum? If we do then we need
1791 * to update it.
1792 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001793 tcp_csum = tcph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001794 sum = tcp_csum + cm->xlate_dest_csum_adjustment;
1795 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001796 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001797 }
1798
1799 /*
1800 * Replace the IP checksum.
1801 */
1802 iph->check = sfe_ipv4_gen_ip_csum(iph);
1803
1804// if ((nat_entry_data->tos & FASTNAT_DSCP_MASK) != (iph->tos & FASTNAT_DSCP_MASK)) {
1805// ipv4_change_dsfield(iph, (u_int8_t)(~FASTNAT_DSCP_MASK), nat_entry_data->tos);
1806// }
1807
1808// skb->priority = nat_entry_data->priority;
1809// skb->mark = nat_entry_data->mark;
1810
1811 /*
1812 * Update traffic stats.
1813 */
1814 cm->rx_packet_count++;
1815 cm->rx_byte_count += len;
1816
1817 /*
1818 * If we're not already on the active list then insert ourselves at the tail
1819 * of the current list.
1820 */
1821 if (unlikely(!cm->active)) {
1822 cm->active = true;
1823 cm->active_prev = si->active_tail;
1824 if (likely(si->active_tail)) {
1825 si->active_tail->active_next = cm;
1826 } else {
1827 si->active_head = cm;
1828 }
1829 si->active_tail = cm;
1830 }
1831
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001832 if (cm->pppoe_sk) {
1833 struct pppoe_hdr *ph;
1834 int data_len = skb->len;
1835 struct sock *sk = cm->pppoe_sk;
1836 struct pppox_sock *po = pppox_sk(sk);
1837 struct net_device *dev = po->pppoe_dev;
1838
1839 if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
1840 goto abort;
1841
1842 ph = (struct pppoe_hdr *)__skb_push(skb, PPPOE_SES_HLEN);
1843 ph->ver = 1;
1844 ph->type = 1;
1845 ph->code = 0;
1846 ph->sid = po->num;
1847 ph->length = htons(data_len + 2);
1848 ph->tag[0].tag_type = htons(PPP_IP);
1849 memcpy(cm->xmit_dest_mac, po->pppoe_pa.remote, ETH_ALEN);
1850
1851 proto = ETH_P_PPP_SES;
1852 } else {
1853 proto = ETH_P_IP;
1854 }
1855
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001856 xmit_dev = cm->xmit_dev;
1857 skb->dev = xmit_dev;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001858 skb->protocol = cpu_to_be16(proto);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001859
1860 /*
1861 * Do we have a simple Ethernet header to write?
1862 */
1863 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_ETH_HDR))) {
1864 /*
1865 * If this is anything other than a point-to-point interface then we need to
1866 * create a header based on MAC addresses.
1867 */
1868 if (likely(!(xmit_dev->flags & IFF_POINTOPOINT))) {
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001869 xmit_dev->header_ops->create(skb, xmit_dev, proto,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001870 cm->xmit_dest_mac, cm->xmit_src_mac, len);
1871 }
1872 } else {
1873 struct sfe_ipv4_ethhdr *eth = (struct sfe_ipv4_ethhdr *)__skb_push(skb, ETH_HLEN);
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001874 eth->h_proto = skb->protocol;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001875 eth->h_dest[0] = htons(cm->xmit_dest_mac[0]);
1876 eth->h_dest[1] = htons(cm->xmit_dest_mac[1]);
1877 eth->h_dest[2] = htons(cm->xmit_dest_mac[2]);
1878 eth->h_source[0] = htons(cm->xmit_src_mac[0]);
1879 eth->h_source[1] = htons(cm->xmit_src_mac[1]);
1880 eth->h_source[2] = htons(cm->xmit_src_mac[2]);
1881 }
1882
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06001883 /*
1884 * Mark outgoing packet
1885 */
1886 skb->mark = cm->connection->mark;
1887 if (skb->mark) {
1888 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1889 }
1890
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001891 si->packets_forwarded++;
1892 spin_unlock(&si->lock);
1893
1894 /*
1895 * We're going to check for GSO flags when we transmit the packet so
1896 * start fetching the necessary cache line now.
1897 */
1898 prefetch(skb_shinfo(skb));
1899
1900 /*
1901 * Send the packet on its way.
1902 */
1903 dev_queue_xmit(skb);
1904
1905 return 1;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001906
1907abort:
1908 kfree_skb(skb);
1909 return 1;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001910}
1911
1912/*
1913 * sfe_ipv4_recv_icmp()
1914 * Handle ICMP packet receives.
1915 *
1916 * ICMP packets aren't handled as a "fast path" and always have us process them
1917 * through the default Linux stack. What we do need to do is look for any errors
1918 * about connections we are handling in the fast path. If we find any such
1919 * connections then we want to flush their state so that the ICMP error path
1920 * within Linux has all of the correct state should it need it.
1921 */
1922static int sfe_ipv4_recv_icmp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
1923 unsigned int len, struct sfe_ipv4_iphdr *iph, unsigned int ihl)
1924{
1925 struct icmphdr *icmph;
1926 struct sfe_ipv4_iphdr *icmp_iph;
1927 unsigned int icmp_ihl_words;
1928 unsigned int icmp_ihl;
1929 uint32_t *icmp_trans_h;
1930 struct sfe_ipv4_udphdr *icmp_udph;
1931 struct sfe_ipv4_tcphdr *icmp_tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001932 __be32 src_ip;
1933 __be32 dest_ip;
1934 __be16 src_port;
1935 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001936 struct sfe_ipv4_connection_match *cm;
1937 struct sfe_ipv4_connection *c;
1938
1939 /*
1940 * Is our packet too short to contain a valid UDP header?
1941 */
1942 len -= ihl;
1943 if (unlikely(len < sizeof(struct icmphdr))) {
1944 spin_lock(&si->lock);
1945 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++;
1946 si->packets_not_forwarded++;
1947 spin_unlock(&si->lock);
1948
1949 DEBUG_TRACE("packet too short for ICMP header\n");
1950 return 0;
1951 }
1952
1953 /*
1954 * We only handle "destination unreachable" and "time exceeded" messages.
1955 */
1956 icmph = (struct icmphdr *)(skb->data + ihl);
1957 if ((icmph->type != ICMP_DEST_UNREACH)
1958 && (icmph->type != ICMP_TIME_EXCEEDED)) {
1959 spin_lock(&si->lock);
1960 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++;
1961 si->packets_not_forwarded++;
1962 spin_unlock(&si->lock);
1963
1964 DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->type);
1965 return 0;
1966 }
1967
1968 /*
1969 * Do we have the full embedded IP header?
1970 */
1971 len -= sizeof(struct icmphdr);
1972 if (unlikely(len < sizeof(struct sfe_ipv4_iphdr))) {
1973 spin_lock(&si->lock);
1974 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE]++;
1975 si->packets_not_forwarded++;
1976 spin_unlock(&si->lock);
1977
1978 DEBUG_TRACE("Embedded IP header not complete\n");
1979 return 0;
1980 }
1981
1982 /*
1983 * Is our embedded IP version wrong?
1984 */
1985 icmp_iph = (struct sfe_ipv4_iphdr *)(icmph + 1);
1986 if (unlikely(icmp_iph->version != 4)) {
1987 spin_lock(&si->lock);
1988 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4]++;
1989 si->packets_not_forwarded++;
1990 spin_unlock(&si->lock);
1991
1992 DEBUG_TRACE("IP version: %u\n", icmp_iph->version);
1993 return 0;
1994 }
1995
1996 /*
1997 * Do we have the full embedded IP header, including any options?
1998 */
1999 icmp_ihl_words = icmp_iph->ihl;
2000 icmp_ihl = icmp_ihl_words << 2;
2001 if (unlikely(len < icmp_ihl)) {
2002 spin_lock(&si->lock);
2003 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE]++;
2004 si->packets_not_forwarded++;
2005 spin_unlock(&si->lock);
2006
2007 DEBUG_TRACE("Embedded header not large enough for IP options\n");
2008 return 0;
2009 }
2010
Nicolas Costaac2979c2014-01-14 10:35:24 -06002011 len -= icmp_ihl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002012 icmp_trans_h = ((uint32_t *)icmp_iph) + icmp_ihl_words;
2013
2014 /*
2015 * Handle the embedded transport layer header.
2016 */
2017 switch (icmp_iph->protocol) {
2018 case IPPROTO_UDP:
2019 /*
2020 * We should have 8 bytes of UDP header - that's enough to identify
2021 * the connection.
2022 */
2023 if (unlikely(len < 8)) {
2024 spin_lock(&si->lock);
2025 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE]++;
2026 si->packets_not_forwarded++;
2027 spin_unlock(&si->lock);
2028
2029 DEBUG_TRACE("Incomplete embedded UDP header\n");
2030 return 0;
2031 }
2032
2033 icmp_udph = (struct sfe_ipv4_udphdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002034 src_port = icmp_udph->source;
2035 dest_port = icmp_udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002036 break;
2037
2038 case IPPROTO_TCP:
2039 /*
2040 * We should have 8 bytes of TCP header - that's enough to identify
2041 * the connection.
2042 */
2043 if (unlikely(len < 8)) {
2044 spin_lock(&si->lock);
2045 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE]++;
2046 si->packets_not_forwarded++;
2047 spin_unlock(&si->lock);
2048
2049 DEBUG_TRACE("Incomplete embedded TCP header\n");
2050 return 0;
2051 }
2052
2053 icmp_tcph = (struct sfe_ipv4_tcphdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002054 src_port = icmp_tcph->source;
2055 dest_port = icmp_tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002056 break;
2057
2058 default:
2059 spin_lock(&si->lock);
2060 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL]++;
2061 si->packets_not_forwarded++;
2062 spin_unlock(&si->lock);
2063
2064 DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", icmp_iph->protocol);
2065 return 0;
2066 }
2067
Dave Hudson87973cd2013-10-22 16:00:04 +01002068 src_ip = icmp_iph->saddr;
2069 dest_ip = icmp_iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002070
2071 spin_lock(&si->lock);
2072
2073 /*
2074 * Look for a connection match. Note that we reverse the source and destination
2075 * here because our embedded message contains a packet that was sent in the
2076 * opposite direction to the one in which we just received it. It will have
2077 * been sent on the interface from which we received it though so that's still
2078 * ok to use.
2079 */
2080 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port);
2081 if (unlikely(!cm)) {
2082 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++;
2083 si->packets_not_forwarded++;
2084 spin_unlock(&si->lock);
2085
2086 DEBUG_TRACE("no connection found\n");
2087 return 0;
2088 }
2089
2090 /*
2091 * We found a connection so now remove it from the connection list and flush
2092 * its state.
2093 */
2094 c = cm->connection;
2095 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2096 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++;
2097 si->packets_not_forwarded++;
2098 spin_unlock(&si->lock);
2099
2100 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
2101 return 0;
2102}
2103
Ben Menchaca0971b7a2014-01-10 14:43:02 -06002104int sfe_pppoe_recv(struct net_device *dev, struct sk_buff *skb)
2105{
2106 int offloaded;
2107 int ppplen, skblen, proto;
2108 struct pppoe_hdr *phdr;
2109
2110 if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) {
2111 DEBUG_TRACE( "sfe pppoe: failed at pskb_may_pull\n");
2112 return 0;
2113 }
2114
2115 phdr = pppoe_hdr(skb);
2116 ppplen = ntohs(phdr->length) - sizeof(struct pppoe_tag);
2117 skblen = skb->len - PPPOE_SES_HLEN;
2118 proto = skb->protocol;
2119
2120 /* check pppoe len < len */
2121 if (skblen < ppplen) {
2122 DEBUG_TRACE( "sfe pppoe: skblen (%d) < ppplen (%d)\n",
2123 skblen, ppplen);
2124 return 0;
2125 }
2126
2127 /* We already calculated the skblen diff; inline skb_pull */
2128 skb->len = skblen;
2129 BUG_ON(skb->len < skb->data_len);
2130 skb->data += PPPOE_SES_HLEN;
2131 switch(ntohs(phdr->tag[0].tag_type)) {
2132 case PPP_IP:
2133 skb->protocol = ETH_P_IP;
2134 offloaded = sfe_ipv4_recv(dev, skb);
2135 break;
2136 default:
2137 DEBUG_TRACE("sfe pppoe: unknown protocol %x",
2138 ntohs(phdr->tag[0].tag_type));
2139 offloaded = 0;
2140 break;
2141 }
2142
2143 if (!offloaded) {
2144 /* Put the packet back the way we found it - not offloaded */
2145 skb_push(skb, PPPOE_SES_HLEN);
2146 skb->protocol = proto;
2147 }
2148
2149 return offloaded;
2150}
2151
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002152/*
2153 * sfe_ipv4_recv()
Ben Menchaca0971b7a2014-01-10 14:43:02 -06002154 * Handle packet receives and forwarding.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002155 *
2156 * Returns 1 if the packet is forwarded or 0 if it isn't.
2157 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002158int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002159{
2160 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002161 unsigned int len;
2162 unsigned int tot_len;
2163 unsigned int frag_off;
2164 unsigned int ihl;
2165 bool flush_on_find;
2166 bool ip_options;
2167 struct sfe_ipv4_iphdr *iph;
2168 uint32_t protocol;
2169
2170 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002171 * Check that we have space for an IP header here.
2172 */
2173 len = skb->len;
2174 if (unlikely(len < sizeof(struct sfe_ipv4_iphdr))) {
2175 spin_lock(&si->lock);
2176 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2177 si->packets_not_forwarded++;
2178 spin_unlock(&si->lock);
2179
2180 DEBUG_TRACE("len: %u is too short\n", len);
2181 return 0;
2182 }
2183
2184 /*
2185 * Check that our "total length" is large enough for an IP header.
2186 */
2187 iph = (struct sfe_ipv4_iphdr *)skb->data;
2188 tot_len = ntohs(iph->tot_len);
2189 if (unlikely(tot_len < sizeof(struct sfe_ipv4_iphdr))) {
2190 spin_lock(&si->lock);
2191 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH]++;
2192 si->packets_not_forwarded++;
2193 spin_unlock(&si->lock);
2194
2195 DEBUG_TRACE("tot_len: %u is too short\n", tot_len);
2196 return 0;
2197 }
2198
2199 /*
2200 * Is our IP version wrong?
2201 */
2202 if (unlikely(iph->version != 4)) {
2203 spin_lock(&si->lock);
2204 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_V4]++;
2205 si->packets_not_forwarded++;
2206 spin_unlock(&si->lock);
2207
2208 DEBUG_TRACE("IP version: %u\n", iph->version);
2209 return 0;
2210 }
2211
2212 /*
2213 * Does our datagram fit inside the skb?
2214 */
2215 if (unlikely(tot_len > len)) {
2216 spin_lock(&si->lock);
2217 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++;
2218 si->packets_not_forwarded++;
2219 spin_unlock(&si->lock);
2220
2221 DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len);
2222 return 0;
2223 }
2224
2225 /*
2226 * Do we have a non-initial fragment?
Nicolas Costaac2979c2014-01-14 10:35:24 -06002227 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002228 frag_off = ntohs(iph->frag_off);
2229 if (unlikely(frag_off & IP_OFFSET)) {
2230 spin_lock(&si->lock);
2231 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++;
2232 si->packets_not_forwarded++;
2233 spin_unlock(&si->lock);
2234
2235 DEBUG_TRACE("non-initial fragment\n");
2236 return 0;
2237 }
2238
2239 /*
2240 * If we have a (first) fragment then mark it to cause any connection to flush.
2241 */
2242 flush_on_find = unlikely(frag_off & IP_MF) ? true : false;
2243
2244 /*
2245 * Do we have any IP options? That's definite a slow path! If we do have IP
2246 * options we need to recheck our header size.
2247 */
2248 ihl = iph->ihl << 2;
2249 ip_options = unlikely(ihl != sizeof(struct sfe_ipv4_iphdr)) ? true : false;
2250 if (unlikely(ip_options)) {
2251 if (unlikely(len < ihl)) {
2252 spin_lock(&si->lock);
2253 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE]++;
2254 si->packets_not_forwarded++;
2255 spin_unlock(&si->lock);
2256
2257 DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl);
2258 return 0;
2259 }
2260
2261 flush_on_find = true;
2262 }
2263
2264 protocol = iph->protocol;
2265 if (IPPROTO_UDP == protocol) {
2266 return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find);
2267 }
2268
2269 if (IPPROTO_TCP == protocol) {
2270 return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find);
2271 }
2272
2273 if (IPPROTO_ICMP == protocol) {
2274 return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl);
2275 }
2276
2277 spin_lock(&si->lock);
2278 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++;
2279 si->packets_not_forwarded++;
2280 spin_unlock(&si->lock);
2281
2282 DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol);
2283 return 0;
2284}
2285
Nicolas Costa436926b2014-01-14 10:36:22 -06002286static void
2287sfe_ipv4_update_tcp_state(struct sfe_ipv4_connection *c,
2288 struct sfe_ipv4_create *sic)
2289{
2290 struct sfe_ipv4_connection_match *orig_cm;
2291 struct sfe_ipv4_connection_match *repl_cm;
2292 struct sfe_ipv4_tcp_connection_match *orig_tcp;
2293 struct sfe_ipv4_tcp_connection_match *repl_tcp;
2294
2295 orig_cm = c->original_match;
2296 repl_cm = c->reply_match;
2297 orig_tcp = &orig_cm->protocol_state.tcp;
2298 repl_tcp = &repl_cm->protocol_state.tcp;
2299
2300 /* update orig */
2301 if (orig_tcp->max_win < sic->src_td_max_window) {
2302 orig_tcp->max_win = sic->src_td_max_window;
2303 }
2304 if ((int32_t)(orig_tcp->end - sic->src_td_end) < 0) {
2305 orig_tcp->end = sic->src_td_end;
2306 }
2307 if ((int32_t)(orig_tcp->max_end - sic->src_td_max_end) < 0) {
2308 orig_tcp->max_end = sic->src_td_max_end;
2309 }
2310
2311 /* update reply */
2312 if (repl_tcp->max_win < sic->dest_td_max_window) {
2313 repl_tcp->max_win = sic->dest_td_max_window;
2314 }
2315 if ((int32_t)(repl_tcp->end - sic->dest_td_end) < 0) {
2316 repl_tcp->end = sic->dest_td_end;
2317 }
2318 if ((int32_t)(repl_tcp->max_end - sic->dest_td_max_end) < 0) {
2319 repl_tcp->max_end = sic->dest_td_max_end;
2320 }
2321
2322 /* update match flags */
2323 orig_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2324 repl_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2325 if (sic->flags & SFE_IPV4_CREATE_FLAG_NO_SEQ_CHECK) {
2326 orig_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2327 repl_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2328 }
2329}
2330
2331static void
2332sfe_ipv4_update_protocol_state(struct sfe_ipv4_connection *c,
2333 struct sfe_ipv4_create *sic)
2334{
2335 switch (sic->protocol) {
2336 case IPPROTO_TCP:
2337 sfe_ipv4_update_tcp_state(c, sic);
2338 break;
2339 }
2340}
2341
2342void sfe_ipv4_update_rule(struct sfe_ipv4_create *sic)
2343{
2344 struct sfe_ipv4_connection *c;
2345 struct sfe_ipv4 *si = &__si;
2346
2347 spin_lock_bh(&si->lock);
2348
2349 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2350 sic->protocol,
2351 sic->src_ip,
2352 sic->src_port,
2353 sic->dest_ip,
2354 sic->dest_port);
2355 if (c != NULL) {
2356 sfe_ipv4_update_protocol_state(c, sic);
2357 }
2358
2359 spin_unlock_bh(&si->lock);
2360}
2361
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002362/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002363 * sfe_ipv4_create_rule()
2364 * Create a forwarding rule.
2365 */
Nicolas Costa514fde02014-01-13 15:50:29 -06002366int sfe_ipv4_create_rule(struct sfe_ipv4_create *sic)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002367{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002368 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002369 struct sfe_ipv4_connection *c;
2370 struct sfe_ipv4_connection_match *original_cm;
2371 struct sfe_ipv4_connection_match *reply_cm;
2372
2373 spin_lock_bh(&si->lock);
2374 si->connection_create_requests++;
2375
2376 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002377 * Check to see if there is already a flow that matches the rule we're
2378 * trying to create. If there is then we can't create a new one.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002379 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002380 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2381 sic->protocol,
2382 sic->src_ip,
2383 sic->src_port,
2384 sic->dest_ip,
2385 sic->dest_port);
2386 if (c != NULL) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002387 si->connection_create_collisions++;
2388
2389 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002390 * If we already have the flow then it's likely that this
2391 * request to create the connection rule contains more
2392 * up-to-date information. Check and update accordingly.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002393 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002394 sfe_ipv4_update_protocol_state(c, sic);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002395 spin_unlock_bh(&si->lock);
2396
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002397 DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002398 " s: %s:%pM:%pI4:%u, d: %s:%pM:%pI4:%u\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002399 sic->mark, sic->protocol,
2400 sic->src_dev->name, sic->src_mac, &sic->src_ip, ntohs(sic->src_port),
Dave Hudson87973cd2013-10-22 16:00:04 +01002401 sic->dest_dev->name, sic->dest_mac, &sic->dest_ip, ntohs(sic->dest_port));
Nicolas Costa514fde02014-01-13 15:50:29 -06002402 return -EADDRINUSE;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002403 }
2404
2405 /*
2406 * Allocate the various connection tracking objects.
2407 */
2408 c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC);
2409 if (unlikely(!c)) {
2410 spin_unlock_bh(&si->lock);
Nicolas Costa514fde02014-01-13 15:50:29 -06002411 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002412 }
2413
2414 original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2415 if (unlikely(!original_cm)) {
2416 spin_unlock_bh(&si->lock);
2417 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002418 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002419 }
2420
2421 reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2422 if (unlikely(!reply_cm)) {
2423 spin_unlock_bh(&si->lock);
2424 kfree(original_cm);
2425 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002426 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002427 }
2428
2429 /*
2430 * Fill in the "original" direction connection matching object.
2431 * Note that the transmit MAC address is "dest_mac_xlate" because
2432 * we always know both ends of a connection by their translated
2433 * addresses and not their public addresses.
2434 */
2435 original_cm->match_dev = sic->src_dev;
2436 original_cm->match_protocol = sic->protocol;
2437 original_cm->match_src_ip = sic->src_ip;
2438 original_cm->match_src_port = sic->src_port;
2439 original_cm->match_dest_ip = sic->dest_ip;
2440 original_cm->match_dest_port = sic->dest_port;
2441 original_cm->xlate_src_ip = sic->src_ip_xlate;
2442 original_cm->xlate_src_port = sic->src_port_xlate;
2443 original_cm->xlate_dest_ip = sic->dest_ip_xlate;
2444 original_cm->xlate_dest_port = sic->dest_port_xlate;
2445 original_cm->rx_packet_count = 0;
2446 original_cm->rx_packet_count64 = 0;
2447 original_cm->rx_byte_count = 0;
2448 original_cm->rx_byte_count64 = 0;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06002449 original_cm->pppoe_sk = sic->dest_pppoe_sk;
2450 if (original_cm->pppoe_sk) {
2451 sock_hold(original_cm->pppoe_sk);
2452 original_cm->xmit_dev = pppox_sk(original_cm->pppoe_sk)->pppoe_dev;
2453 } else {
2454 original_cm->xmit_dev = sic->dest_dev;
2455 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002456 original_cm->xmit_dev_mtu = sic->dest_mtu;
2457 memcpy(original_cm->xmit_src_mac, sic->dest_dev->dev_addr, ETH_ALEN);
2458 memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN);
2459 original_cm->connection = c;
2460 original_cm->counter_match = reply_cm;
2461 original_cm->flags = 0;
2462 original_cm->active_next = NULL;
2463 original_cm->active_prev = NULL;
2464 original_cm->active = false;
2465 if (sic->dest_dev->header_ops->create == eth_header) {
2466 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_ETH_HDR;
2467 }
2468
2469 /*
2470 * Fill in the "reply" direction connection matching object.
2471 */
2472 reply_cm->match_dev = sic->dest_dev;
2473 reply_cm->match_protocol = sic->protocol;
2474 reply_cm->match_src_ip = sic->dest_ip_xlate;
2475 reply_cm->match_src_port = sic->dest_port_xlate;
2476 reply_cm->match_dest_ip = sic->src_ip_xlate;
2477 reply_cm->match_dest_port = sic->src_port_xlate;
2478 reply_cm->xlate_src_ip = sic->dest_ip;
2479 reply_cm->xlate_src_port = sic->dest_port;
2480 reply_cm->xlate_dest_ip = sic->src_ip;
2481 reply_cm->xlate_dest_port = sic->src_port;
2482 reply_cm->rx_packet_count = 0;
2483 reply_cm->rx_packet_count64 = 0;
2484 reply_cm->rx_byte_count = 0;
2485 reply_cm->rx_byte_count64 = 0;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06002486 reply_cm->pppoe_sk = sic->src_pppoe_sk;
2487 if (reply_cm->pppoe_sk) {
2488 sock_hold(reply_cm->pppoe_sk);
2489 reply_cm->xmit_dev = pppox_sk(reply_cm->pppoe_sk)->pppoe_dev;
2490 } else {
2491 reply_cm->xmit_dev = sic->src_dev;
2492 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002493 reply_cm->xmit_dev_mtu = sic->src_mtu;
2494 memcpy(reply_cm->xmit_src_mac, sic->src_dev->dev_addr, ETH_ALEN);
2495 memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN);
2496 reply_cm->connection = c;
2497 reply_cm->counter_match = original_cm;
2498 reply_cm->flags = 0;
2499 reply_cm->active_next = NULL;
2500 reply_cm->active_prev = NULL;
2501 reply_cm->active = false;
2502 if (sic->src_dev->header_ops->create == eth_header) {
2503 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_ETH_HDR;
2504 }
2505
Ben Menchaca0971b7a2014-01-10 14:43:02 -06002506
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002507 if (sic->dest_ip != sic->dest_ip_xlate || sic->dest_port != sic->dest_port_xlate) {
2508 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2509 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2510 }
2511
2512 if (sic->src_ip != sic->src_ip_xlate || sic->src_port != sic->src_port_xlate) {
2513 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2514 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2515 }
2516
2517 c->protocol = sic->protocol;
2518 c->src_ip = sic->src_ip;
2519 c->src_ip_xlate = sic->src_ip_xlate;
2520 c->src_port = sic->src_port;
2521 c->src_port_xlate = sic->src_port_xlate;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06002522 c->original_dev = reply_cm->xmit_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002523 c->original_match = original_cm;
2524 c->dest_ip = sic->dest_ip;
2525 c->dest_ip_xlate = sic->dest_ip_xlate;
2526 c->dest_port = sic->dest_port;
2527 c->dest_port_xlate = sic->dest_port_xlate;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06002528 c->reply_dev = original_cm->xmit_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002529 c->reply_match = reply_cm;
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06002530 c->mark = sic->mark;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002531
2532 c->last_sync_jiffies = get_jiffies_64();
2533 c->iterators = 0;
2534 c->pending_free = false;
2535
2536 /*
2537 * Take hold of our source and dest devices for the duration of the connection.
2538 */
2539 dev_hold(c->original_dev);
2540 dev_hold(c->reply_dev);
2541
2542 /*
2543 * Initialize the protocol-specific information that we track.
2544 */
2545 switch (sic->protocol) {
2546 case IPPROTO_TCP:
2547 original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale;
2548 original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1;
2549 original_cm->protocol_state.tcp.end = sic->src_td_end;
2550 original_cm->protocol_state.tcp.max_end = sic->src_td_max_end;
2551 reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale;
2552 reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1;
2553 reply_cm->protocol_state.tcp.end = sic->dest_td_end;
2554 reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end;
2555 if (sic->flags & SFE_IPV4_CREATE_FLAG_NO_SEQ_CHECK) {
2556 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2557 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2558 }
2559 break;
2560 }
2561
2562 sfe_ipv4_connection_match_compute_translations(original_cm);
2563 sfe_ipv4_connection_match_compute_translations(reply_cm);
2564 sfe_ipv4_insert_sfe_ipv4_connection(si, c);
2565
2566 spin_unlock_bh(&si->lock);
2567
2568 /*
2569 * We have everything we need!
2570 */
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002571 DEBUG_INFO("new connection - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002572 " s: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n"
2573 " d: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002574 sic->mark, sic->protocol,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002575 sic->src_dev->name, sic->src_mac, sic->src_mac_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002576 &sic->src_ip, &sic->src_ip_xlate, ntohs(sic->src_port), ntohs(sic->src_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002577 sic->dest_dev->name, sic->dest_mac, sic->dest_mac_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002578 &sic->dest_ip, &sic->dest_ip_xlate, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate));
Nicolas Costa514fde02014-01-13 15:50:29 -06002579
2580 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002581}
2582
2583/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002584 * sfe_ipv4_destroy_rule()
2585 * Destroy a forwarding rule.
2586 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002587void sfe_ipv4_destroy_rule(struct sfe_ipv4_destroy *sid)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002588{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002589 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002590 struct sfe_ipv4_connection *c;
2591
2592 spin_lock_bh(&si->lock);
2593 si->connection_destroy_requests++;
2594
2595 /*
2596 * Check to see if we have a flow that matches the rule we're trying
2597 * to destroy. If there isn't then we can't destroy it.
2598 */
2599 c = sfe_ipv4_find_sfe_ipv4_connection(si, sid->protocol, sid->src_ip, sid->src_port,
2600 sid->dest_ip, sid->dest_port);
2601 if (!c) {
2602 si->connection_destroy_misses++;
2603 spin_unlock_bh(&si->lock);
2604
2605 DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002606 sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2607 &sid->dest_ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002608 return;
2609 }
2610
2611 /*
2612 * Remove our connection details from the hash tables.
2613 */
2614 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2615 spin_unlock_bh(&si->lock);
2616
2617 /*
2618 * Finally synchronize state and free resources. We need to protect against
2619 * pre-emption by our bottom half while we do this though.
2620 */
2621 local_bh_disable();
2622 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
2623 local_bh_enable();
2624
2625 DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002626 sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2627 &sid->dest_ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002628}
2629
2630/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002631 * sfe_ipv4_register_sync_rule_callback()
2632 * Register a callback for rule synchronization.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002633 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002634void sfe_ipv4_register_sync_rule_callback(sfe_ipv4_sync_rule_callback_t sync_rule_callback)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002635{
2636 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002637
2638 spin_lock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002639 rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002640 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002641}
2642
2643/*
2644 * sfe_ipv4_get_debug_dev()
2645 */
2646static ssize_t sfe_ipv4_get_debug_dev(struct device *dev,
2647 struct device_attribute *attr,
2648 char *buf)
2649{
2650 struct sfe_ipv4 *si = &__si;
2651 ssize_t count;
2652 int num;
2653
2654 spin_lock_bh(&si->lock);
2655 num = si->debug_dev;
2656 spin_unlock_bh(&si->lock);
2657
2658 count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num);
2659 return count;
2660}
2661
2662/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002663 * sysfs attributes.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002664 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002665static const struct device_attribute sfe_ipv4_debug_dev_attr =
2666 __ATTR(debug_dev, S_IWUGO | S_IRUGO, sfe_ipv4_get_debug_dev, NULL);
2667
2668/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002669 * sfe_ipv4_destroy_all_rules_for_dev()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002670 * Destroy all connections that match a particular device.
2671 *
2672 * If we pass dev as NULL then this destroys all connections.
2673 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002674void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002675{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002676 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002677 struct sfe_ipv4_connection *c;
2678 struct sfe_ipv4_connection *c_next;
2679
2680 spin_lock_bh(&si->lock);
2681 c = si->all_connections_head;
2682 if (!c) {
2683 spin_unlock_bh(&si->lock);
2684 return;
2685 }
2686
2687 c->iterators++;
2688
2689 /*
2690 * Iterate over all connections
2691 */
2692 while (c) {
2693 c_next = c->all_connections_next;
2694
2695 /*
2696 * Before we do anything else, take an iterator reference for the
2697 * connection we'll iterate next.
2698 */
2699 if (c_next) {
2700 c_next->iterators++;
2701 }
2702
2703 /*
2704 * Does this connection relate to the device we are destroying? If
2705 * it does then ensure it is marked for being freed as soon as it
2706 * is no longer being iterated.
2707 */
2708 if (!dev
2709 || (dev == c->original_dev)
2710 || (dev == c->reply_dev)) {
2711 c->pending_free = true;
2712 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2713 }
2714
2715 /*
2716 * Remove the iterator reference that we acquired and see if we
2717 * should free any resources.
2718 */
2719 if (sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c)) {
2720 spin_unlock_bh(&si->lock);
Ben Menchaca0971b7a2014-01-10 14:43:02 -06002721
2722 if (c->original_match->pppoe_sk) {
2723 sock_put(c->original_match->pppoe_sk);
2724 }
2725 if (c->reply_match->pppoe_sk) {
2726 sock_put(c->reply_match->pppoe_sk);
2727 }
Nicolas Costaac2979c2014-01-14 10:35:24 -06002728
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002729 /*
2730 * This entry is dead so release our hold of the source and
2731 * dest devices and free the memory for our connection objects.
2732 */
2733 dev_put(c->original_dev);
2734 dev_put(c->reply_dev);
2735 kfree(c->original_match);
2736 kfree(c->reply_match);
2737 kfree(c);
2738
2739 spin_lock_bh(&si->lock);
2740 }
2741
2742 c = c_next;
2743 }
2744
2745 spin_unlock_bh(&si->lock);
2746}
2747
2748/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002749 * sfe_ipv4_periodic_sync()
2750 */
2751static void sfe_ipv4_periodic_sync(unsigned long arg)
2752{
2753 struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg;
2754 uint64_t now_jiffies;
2755 int quota;
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002756 sfe_ipv4_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002757
2758 now_jiffies = get_jiffies_64();
2759
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002760 rcu_read_lock();
2761 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
2762 if (!sync_rule_callback) {
2763 rcu_read_unlock();
2764 goto done;
2765 }
2766
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002767 spin_lock_bh(&si->lock);
2768 sfe_ipv4_update_summary_stats(si);
2769
2770 /*
2771 * Get an estimate of the number of connections to parse in this sync.
2772 */
2773 quota = (si->num_connections + 63) / 64;
2774
2775 /*
2776 * Walk the "active" list and sync the connection state.
2777 */
2778 while (quota--) {
2779 struct sfe_ipv4_connection_match *cm;
2780 struct sfe_ipv4_connection_match *counter_cm;
2781 struct sfe_ipv4_connection *c;
2782 struct sfe_ipv4_sync sis;
2783
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002784 cm = si->active_head;
2785 if (!cm) {
2786 break;
2787 }
2788
2789 cm->active = false;
2790
2791 /*
2792 * Having found an entry we now remove it from the active scan list.
2793 */
2794 si->active_head = cm->active_next;
2795 if (likely(cm->active_next)) {
2796 cm->active_next->active_prev = NULL;
2797 } else {
2798 si->active_tail = NULL;
2799 }
2800 cm->active_next = NULL;
2801
2802 /*
2803 * We scan the connection match lists so there's a possibility that our
2804 * counter match is in the list too. If it is then remove it.
2805 */
2806 counter_cm = cm->counter_match;
2807 if (counter_cm->active) {
2808 counter_cm->active = false;
2809
2810 if (likely(counter_cm->active_prev)) {
2811 counter_cm->active_prev->active_next = counter_cm->active_next;
2812 } else {
2813 si->active_head = counter_cm->active_next;
2814 }
2815
2816 if (likely(counter_cm->active_next)) {
2817 counter_cm->active_next->active_prev = counter_cm->active_prev;
2818 } else {
2819 si->active_tail = counter_cm->active_prev;
2820 }
2821
2822 counter_cm->active_next = NULL;
2823 counter_cm->active_prev = NULL;
2824 }
2825
2826 /*
2827 * Sync the connection state.
2828 */
2829 c = cm->connection;
2830 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, now_jiffies);
2831
2832 /*
2833 * We don't want to be holding the lock when we sync!
2834 */
2835 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002836 sync_rule_callback(&sis);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002837 spin_lock_bh(&si->lock);
2838 }
2839
2840 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002841 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002842
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002843done:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002844 mod_timer(&si->timer, jiffies + (HZ / 100));
2845}
2846
2847#define CHAR_DEV_MSG_SIZE 768
2848
2849/*
2850 * sfe_ipv4_debug_dev_read_start()
2851 * Generate part of the XML output.
2852 */
2853static bool sfe_ipv4_debug_dev_read_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2854 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2855{
2856 int bytes_read;
2857
2858 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "<sfe_ipv4>\n");
2859 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2860 return false;
2861 }
2862
2863 *length -= bytes_read;
2864 *total_read += bytes_read;
2865
2866 ws->state++;
2867 return true;
2868}
2869
2870/*
2871 * sfe_ipv4_debug_dev_read_connections_start()
2872 * Generate part of the XML output.
2873 */
2874static bool sfe_ipv4_debug_dev_read_connections_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2875 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2876{
2877 int bytes_read;
2878
2879 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<connections>\n");
2880 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2881 return false;
2882 }
2883
2884 *length -= bytes_read;
2885 *total_read += bytes_read;
2886
2887 ws->state++;
2888 return true;
2889}
2890
2891/*
2892 * sfe_ipv4_debug_dev_read_connections_connection()
2893 * Generate part of the XML output.
2894 */
2895static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2896 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2897{
2898 struct sfe_ipv4_connection *c;
2899 struct sfe_ipv4_connection *c_next;
2900 struct sfe_ipv4_connection_match *original_cm;
2901 struct sfe_ipv4_connection_match *reply_cm;
2902 int bytes_read;
2903 int protocol;
2904 struct net_device *src_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002905 __be32 src_ip;
2906 __be32 src_ip_xlate;
2907 __be16 src_port;
2908 __be16 src_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002909 uint64_t src_rx_packets;
2910 uint64_t src_rx_bytes;
2911 struct net_device *dest_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002912 __be32 dest_ip;
2913 __be32 dest_ip_xlate;
2914 __be16 dest_port;
2915 __be16 dest_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002916 uint64_t dest_rx_packets;
2917 uint64_t dest_rx_bytes;
2918 uint64_t last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002919 uint32_t mark;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002920
2921 spin_lock_bh(&si->lock);
2922 c = ws->iter_conn;
2923
2924 /*
2925 * Is this the first connection we need to scan?
2926 */
2927 if (!c) {
2928 c = si->all_connections_head;
2929
2930 /*
2931 * If there were no connections then move to the next state.
2932 */
2933 if (!c) {
2934 spin_unlock_bh(&si->lock);
2935
2936 ws->state++;
2937 return true;
2938 }
2939
2940 c->iterators++;
2941 }
2942
2943 c_next = c->all_connections_next;
2944 ws->iter_conn = c_next;
2945
2946 /*
2947 * Before we do anything else, take an iterator reference for the
2948 * connection we'll iterate next.
2949 */
2950 if (c_next) {
2951 c_next->iterators++;
2952 }
2953
2954 /*
2955 * Remove the iterator reference that we acquired and see if we
2956 * should free any resources.
2957 */
2958 if (sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c)) {
2959 spin_unlock_bh(&si->lock);
2960
Ben Menchaca0971b7a2014-01-10 14:43:02 -06002961 if (c->original_match->pppoe_sk) {
2962 sock_put(c->original_match->pppoe_sk);
2963 }
2964 if (c->reply_match->pppoe_sk) {
2965 sock_put(c->reply_match->pppoe_sk);
2966 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002967 /*
2968 * This entry is dead so release our hold of the source and
2969 * dest devices and free the memory for our connection objects.
2970 */
2971 dev_put(c->original_dev);
2972 dev_put(c->reply_dev);
2973 kfree(c->original_match);
2974 kfree(c->reply_match);
2975 kfree(c);
2976
2977 /*
2978 * If we have no more connections then move to the next state.
2979 */
2980 if (!c_next) {
2981 ws->state++;
2982 }
2983
2984 return true;
2985 }
2986
2987 original_cm = c->original_match;
2988 reply_cm = c->reply_match;
2989
2990 protocol = c->protocol;
2991 src_dev = c->original_dev;
2992 src_ip = c->src_ip;
2993 src_ip_xlate = c->src_ip_xlate;
2994 src_port = c->src_port;
2995 src_port_xlate = c->src_port_xlate;
2996
2997 sfe_ipv4_connection_match_update_summary_stats(original_cm);
2998 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
2999
3000 src_rx_packets = original_cm->rx_packet_count64;
3001 src_rx_bytes = original_cm->rx_byte_count64;
3002 dest_dev = c->reply_dev;
3003 dest_ip = c->dest_ip;
3004 dest_ip_xlate = c->dest_ip_xlate;
3005 dest_port = c->dest_port;
3006 dest_port_xlate = c->dest_port_xlate;
3007 dest_rx_packets = reply_cm->rx_packet_count64;
3008 dest_rx_bytes = reply_cm->rx_byte_count64;
3009 last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003010 mark = c->mark;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003011 spin_unlock_bh(&si->lock);
3012
3013 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t<connection "
3014 "protocol=\"%u\" "
3015 "src_dev=\"%s\" "
3016 "src_ip=\"%pI4\" src_ip_xlate=\"%pI4\" "
3017 "src_port=\"%u\" src_port_xlate=\"%u\" "
3018 "src_rx_pkts=\"%llu\" src_rx_bytes=\"%llu\" "
3019 "dest_dev=\"%s\" "
3020 "dest_ip=\"%pI4\" dest_ip_xlate=\"%pI4\" "
3021 "dest_port=\"%u\" dest_port_xlate=\"%u\" "
3022 "dest_rx_pkts=\"%llu\" dest_rx_bytes=\"%llu\" "
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003023 "last_sync=\"%llu\" "
Nicolas Costabb85a2e2014-01-13 16:26:33 -06003024 "mark=\"%08x\" />\n",
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003025 protocol,
3026 src_dev->name,
3027 &src_ip, &src_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01003028 ntohs(src_port), ntohs(src_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003029 src_rx_packets, src_rx_bytes,
3030 dest_dev->name,
3031 &dest_ip, &dest_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01003032 ntohs(dest_port), ntohs(dest_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003033 dest_rx_packets, dest_rx_bytes,
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003034 last_sync_jiffies, mark);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003035
3036 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3037 return false;
3038 }
3039
3040 *length -= bytes_read;
3041 *total_read += bytes_read;
3042
3043 /*
3044 * If we have no more connections then move to the next state.
3045 */
3046 if (!c_next) {
3047 ws->state++;
3048 }
3049
3050 return true;
3051}
3052
3053/*
3054 * sfe_ipv4_debug_dev_read_connections_end()
3055 * Generate part of the XML output.
3056 */
3057static bool sfe_ipv4_debug_dev_read_connections_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3058 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3059{
3060 int bytes_read;
3061
3062 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</connections>\n");
3063 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3064 return false;
3065 }
3066
3067 *length -= bytes_read;
3068 *total_read += bytes_read;
3069
3070 ws->state++;
3071 return true;
3072}
3073
3074/*
3075 * sfe_ipv4_debug_dev_read_exceptions_start()
3076 * Generate part of the XML output.
3077 */
3078static bool sfe_ipv4_debug_dev_read_exceptions_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3079 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3080{
3081 int bytes_read;
3082
3083 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<exceptions>\n");
3084 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3085 return false;
3086 }
3087
3088 *length -= bytes_read;
3089 *total_read += bytes_read;
3090
3091 ws->state++;
3092 return true;
3093}
3094
3095/*
3096 * sfe_ipv4_debug_dev_read_exceptions_exception()
3097 * Generate part of the XML output.
3098 */
3099static bool sfe_ipv4_debug_dev_read_exceptions_exception(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3100 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3101{
3102 uint64_t ct;
3103
3104 spin_lock_bh(&si->lock);
3105 ct = si->exception_events64[ws->iter_exception];
3106 spin_unlock_bh(&si->lock);
3107
3108 if (ct) {
3109 int bytes_read;
3110
3111 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE,
3112 "\t\t<exception name=\"%s\" count=\"%llu\" />\n",
3113 sfe_ipv4_exception_events_string[ws->iter_exception],
3114 ct);
3115 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3116 return false;
3117 }
3118
3119 *length -= bytes_read;
3120 *total_read += bytes_read;
3121 }
3122
3123 ws->iter_exception++;
3124 if (ws->iter_exception >= SFE_IPV4_EXCEPTION_EVENT_LAST) {
3125 ws->iter_exception = 0;
3126 ws->state++;
3127 }
3128
3129 return true;
3130}
3131
3132/*
3133 * sfe_ipv4_debug_dev_read_exceptions_end()
3134 * Generate part of the XML output.
3135 */
3136static bool sfe_ipv4_debug_dev_read_exceptions_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3137 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3138{
3139 int bytes_read;
3140
3141 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</exceptions>\n");
3142 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3143 return false;
3144 }
3145
3146 *length -= bytes_read;
3147 *total_read += bytes_read;
3148
3149 ws->state++;
3150 return true;
3151}
3152
3153/*
3154 * sfe_ipv4_debug_dev_read_stats()
3155 * Generate part of the XML output.
3156 */
3157static bool sfe_ipv4_debug_dev_read_stats(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3158 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3159{
3160 int bytes_read;
3161 unsigned int num_connections;
3162 uint64_t packets_forwarded;
3163 uint64_t packets_not_forwarded;
3164 uint64_t connection_create_requests;
3165 uint64_t connection_create_collisions;
3166 uint64_t connection_destroy_requests;
3167 uint64_t connection_destroy_misses;
3168 uint64_t connection_flushes;
3169 uint64_t connection_match_hash_hits;
3170 uint64_t connection_match_hash_reorders;
3171
3172 spin_lock_bh(&si->lock);
3173 sfe_ipv4_update_summary_stats(si);
3174
3175 num_connections = si->num_connections;
3176 packets_forwarded = si->packets_forwarded64;
3177 packets_not_forwarded = si->packets_not_forwarded64;
3178 connection_create_requests = si->connection_create_requests64;
3179 connection_create_collisions = si->connection_create_collisions64;
3180 connection_destroy_requests = si->connection_destroy_requests64;
3181 connection_destroy_misses = si->connection_destroy_misses64;
3182 connection_flushes = si->connection_flushes64;
3183 connection_match_hash_hits = si->connection_match_hash_hits64;
3184 connection_match_hash_reorders = si->connection_match_hash_reorders64;
3185 spin_unlock_bh(&si->lock);
3186
3187 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<stats "
3188 "num_connections=\"%u\" "
3189 "pkts_forwarded=\"%llu\" pkts_not_forwarded=\"%llu\" "
3190 "create_requests=\"%llu\" create_collisions=\"%llu\" "
3191 "destroy_requests=\"%llu\" destroy_misses=\"%llu\" "
3192 "flushes=\"%llu\" "
3193 "hash_hits=\"%llu\" hash_reorders=\"%llu\" />\n",
3194 num_connections,
3195 packets_forwarded,
3196 packets_not_forwarded,
3197 connection_create_requests,
3198 connection_create_collisions,
3199 connection_destroy_requests,
3200 connection_destroy_misses,
3201 connection_flushes,
3202 connection_match_hash_hits,
3203 connection_match_hash_reorders);
3204 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3205 return false;
3206 }
3207
3208 *length -= bytes_read;
3209 *total_read += bytes_read;
3210
3211 ws->state++;
3212 return true;
3213}
3214
3215/*
3216 * sfe_ipv4_debug_dev_read_end()
3217 * Generate part of the XML output.
3218 */
3219static bool sfe_ipv4_debug_dev_read_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3220 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3221{
3222 int bytes_read;
3223
3224 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "</sfe_ipv4>\n");
3225 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3226 return false;
3227 }
3228
3229 *length -= bytes_read;
3230 *total_read += bytes_read;
3231
3232 ws->state++;
3233 return true;
3234}
3235
3236/*
3237 * Array of write functions that write various XML elements that correspond to
3238 * our XML output state machine.
3239 */
3240sfe_ipv4_debug_xml_write_method_t sfe_ipv4_debug_xml_write_methods[SFE_IPV4_DEBUG_XML_STATE_DONE] = {
3241 sfe_ipv4_debug_dev_read_start,
3242 sfe_ipv4_debug_dev_read_connections_start,
3243 sfe_ipv4_debug_dev_read_connections_connection,
3244 sfe_ipv4_debug_dev_read_connections_end,
3245 sfe_ipv4_debug_dev_read_exceptions_start,
3246 sfe_ipv4_debug_dev_read_exceptions_exception,
3247 sfe_ipv4_debug_dev_read_exceptions_end,
3248 sfe_ipv4_debug_dev_read_stats,
3249 sfe_ipv4_debug_dev_read_end,
3250};
3251
3252/*
3253 * sfe_ipv4_debug_dev_read()
3254 * Send info to userspace upon read request from user
3255 */
3256static ssize_t sfe_ipv4_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset)
3257{
3258 char msg[CHAR_DEV_MSG_SIZE];
3259 int total_read = 0;
3260 struct sfe_ipv4_debug_xml_write_state *ws;
3261 struct sfe_ipv4 *si = &__si;
3262
3263 ws = (struct sfe_ipv4_debug_xml_write_state *)filp->private_data;
3264 while ((ws->state != SFE_IPV4_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) {
3265 if ((sfe_ipv4_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) {
3266 continue;
3267 }
3268 }
3269
3270 return total_read;
3271}
3272
3273/*
3274 * sfe_ipv4_debug_dev_write()
Matthew McClintock54167ab2014-01-14 21:06:28 -06003275 * Write to char device resets some stats
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003276 */
3277static ssize_t sfe_ipv4_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset)
3278{
Matthew McClintock54167ab2014-01-14 21:06:28 -06003279 struct sfe_ipv4 *si = &__si;
3280
3281 spin_lock_bh(&si->lock);
3282 sfe_ipv4_update_summary_stats(si);
3283
3284 si->num_connections = 0;
3285 si->packets_forwarded64 = 0;
3286 si->packets_not_forwarded64 = 0;
3287 si->connection_create_requests64 = 0;
3288 si->connection_create_collisions64 = 0;
3289 si->connection_destroy_requests64 = 0;
3290 si->connection_destroy_misses64 = 0;
3291 si->connection_flushes64 = 0;
3292 si->connection_match_hash_hits64 = 0;
3293 si->connection_match_hash_reorders64 = 0;
3294 spin_unlock_bh(&si->lock);
3295
3296 return length;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003297}
3298
3299/*
3300 * sfe_ipv4_debug_dev_open()
3301 */
3302static int sfe_ipv4_debug_dev_open(struct inode *inode, struct file *file)
3303{
3304 struct sfe_ipv4_debug_xml_write_state *ws;
3305
3306 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3307 if (!ws) {
3308 ws = kzalloc(sizeof(struct sfe_ipv4_debug_xml_write_state), GFP_KERNEL);
3309 if (!ws) {
3310 return -ENOMEM;
3311 }
3312
3313 ws->state = SFE_IPV4_DEBUG_XML_STATE_START;
3314 file->private_data = ws;
3315 }
3316
3317 return 0;
3318}
3319
3320/*
3321 * sfe_ipv4_debug_dev_release()
3322 */
3323static int sfe_ipv4_debug_dev_release(struct inode *inode, struct file *file)
3324{
3325 struct sfe_ipv4_debug_xml_write_state *ws;
3326
3327 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3328 if (ws) {
3329 struct sfe_ipv4_connection *c;
3330
3331 /*
3332 * Are we currently iterating a connection? If we are then
3333 * make sure that we reduce its iterator count and if necessary
3334 * free it.
3335 */
3336 c = ws->iter_conn;
3337 if (c) {
3338 struct sfe_ipv4 *si = &__si;
3339
3340 spin_lock_bh(&si->lock);
3341 if (sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c)) {
3342 spin_unlock_bh(&si->lock);
3343
Ben Menchaca0971b7a2014-01-10 14:43:02 -06003344 if (c->original_match->pppoe_sk) {
3345 sock_put(c->original_match->pppoe_sk);
3346 }
3347 if (c->reply_match->pppoe_sk) {
3348 sock_put(c->reply_match->pppoe_sk);
3349 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003350 /*
3351 * This entry is dead so release our hold of the source and
3352 * dest devices and free the memory for our connection objects.
3353 */
3354 dev_put(c->original_dev);
3355 dev_put(c->reply_dev);
3356 kfree(c->original_match);
3357 kfree(c->reply_match);
3358 kfree(c);
3359 }
3360 }
3361
3362 /*
3363 * We've finished with our output so free the write state.
3364 */
3365 kfree(ws);
3366 }
3367
3368 return 0;
3369}
3370
3371/*
3372 * File operations used in the debug char device
3373 */
3374static struct file_operations sfe_ipv4_debug_dev_fops = {
3375 .read = sfe_ipv4_debug_dev_read,
3376 .write = sfe_ipv4_debug_dev_write,
3377 .open = sfe_ipv4_debug_dev_open,
3378 .release = sfe_ipv4_debug_dev_release
3379};
3380
3381/*
Dave Hudson87973cd2013-10-22 16:00:04 +01003382 * sfe_ipv4_init()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003383 */
Dave Hudson87973cd2013-10-22 16:00:04 +01003384static int __init sfe_ipv4_init(void)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003385{
3386 struct sfe_ipv4 *si = &__si;
3387 int result = -1;
3388
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003389 DEBUG_INFO("SFE IPv4 init\n");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003390
3391 /*
3392 * Create sys/sfe_ipv4
3393 */
3394 si->sys_sfe_ipv4 = kobject_create_and_add("sfe_ipv4", NULL);
3395 if (!si->sys_sfe_ipv4) {
3396 DEBUG_ERROR("failed to register sfe_ipv4\n");
3397 goto exit1;
3398 }
3399
3400 /*
3401 * Create files, one for each parameter supported by this module.
3402 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003403 result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3404 if (result) {
3405 DEBUG_ERROR("failed to register debug dev file: %d\n", result);
3406 goto exit4;
3407 }
3408
3409 /*
3410 * Register our debug char device.
3411 */
3412 result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops);
3413 if (result < 0) {
3414 DEBUG_ERROR("Failed to register chrdev: %d\n", result);
3415 goto exit5;
3416 }
3417
3418 si->debug_dev = result;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003419
3420 /*
3421 * Create a timer to handle periodic statistics.
3422 */
3423 setup_timer(&si->timer, sfe_ipv4_periodic_sync, (unsigned long)si);
3424 mod_timer(&si->timer, jiffies + (HZ / 100));
3425
Dave Hudson87973cd2013-10-22 16:00:04 +01003426 spin_lock_init(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003427
Dave Hudson87973cd2013-10-22 16:00:04 +01003428 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003429
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003430exit5:
3431 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3432
3433exit4:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003434 kobject_put(si->sys_sfe_ipv4);
3435
3436exit1:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003437 return result;
3438}
3439
3440/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003441 * sfe_ipv4_exit()
3442 */
3443static void __exit sfe_ipv4_exit(void)
3444{
Dave Hudson87973cd2013-10-22 16:00:04 +01003445 struct sfe_ipv4 *si = &__si;
3446
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003447 DEBUG_INFO("SFE IPv4 exit\n");
Dave Hudson87973cd2013-10-22 16:00:04 +01003448
3449 /*
3450 * Destroy all connections.
3451 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003452 sfe_ipv4_destroy_all_rules_for_dev(NULL);
Dave Hudson87973cd2013-10-22 16:00:04 +01003453
3454// XXX - this is where we need to unregister with any lower level offload services.
3455
Dave Hudson87973cd2013-10-22 16:00:04 +01003456 del_timer_sync(&si->timer);
3457
Dave Hudson87973cd2013-10-22 16:00:04 +01003458 unregister_chrdev(si->debug_dev, "sfe_ipv4");
3459
3460 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3461
Dave Hudson87973cd2013-10-22 16:00:04 +01003462 kobject_put(si->sys_sfe_ipv4);
3463
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003464}
3465
3466module_init(sfe_ipv4_init)
3467module_exit(sfe_ipv4_exit)
3468
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003469EXPORT_SYMBOL(sfe_ipv4_recv);
Ben Menchaca0971b7a2014-01-10 14:43:02 -06003470EXPORT_SYMBOL(sfe_pppoe_recv);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003471EXPORT_SYMBOL(sfe_ipv4_create_rule);
3472EXPORT_SYMBOL(sfe_ipv4_destroy_rule);
3473EXPORT_SYMBOL(sfe_ipv4_destroy_all_rules_for_dev);
3474EXPORT_SYMBOL(sfe_ipv4_register_sync_rule_callback);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06003475EXPORT_SYMBOL(sfe_ipv4_mark_rule);
Nicolas Costa436926b2014-01-14 10:36:22 -06003476EXPORT_SYMBOL(sfe_ipv4_update_rule);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003477
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003478MODULE_AUTHOR("Qualcomm Atheros Inc.");
3479MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv4 edition");
3480MODULE_LICENSE("GPL");
3481