blob: 0e5cd123bbcf66c2270e0ebf6e0b853ee79bb6cf [file] [log] [blame]
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001/*
2 * sfe_ipv4.c
3 * Shortcut forwarding engine - IPv4 edition.
4 *
5 * XXX - fill in the appropriate GPL notice.
6 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01007#include <linux/module.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -06008#include <linux/sysfs.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01009#include <linux/skbuff.h>
10#include <linux/icmp.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010011#include <net/tcp.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060012#include <linux/etherdevice.h>
Ben Menchaca0971b7a2014-01-10 14:43:02 -060013#include <linux/if_pppox.h>
14#include <linux/ppp_defs.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010015
Dave Hudsondcd08fb2013-11-22 09:25:16 -060016#include "sfe.h"
17#include "sfe_ipv4.h"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010018
19/*
Dave Hudsona8197e72013-12-17 23:46:22 +000020 * By default Linux IP header and transport layer header structures are
21 * unpacked, assuming that such headers should be 32-bit aligned.
22 * Unfortunately some wireless adaptors can't cope with this requirement and
23 * some CPUs can't handle misaligned accesses. For those platforms we
24 * define SFE_IPV4_UNALIGNED_IP_HEADER and mark the structures as packed.
25 * When we do this the compiler will generate slightly worse code than for the
26 * aligned case (on most platforms) but will be much quicker than fixing
27 * things up in an unaligned trap handler.
28 */
29#define SFE_IPV4_UNALIGNED_IP_HEADER 1
30#if SFE_IPV4_UNALIGNED_IP_HEADER
31#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed))
32#else
33#define SFE_IPV4_UNALIGNED_STRUCT
34#endif
35
36/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010037 * The default Linux ethhdr structure is "packed". It also has byte aligned
38 * MAC addresses and this leads to poor performance. This version is not
39 * packed and has better alignment for the MAC addresses.
40 */
41struct sfe_ipv4_ethhdr {
42 __be16 h_dest[ETH_ALEN / 2];
43 __be16 h_source[ETH_ALEN / 2];
44 __be16 h_proto;
45};
46
47/*
Dave Hudsona8197e72013-12-17 23:46:22 +000048 * Based on the Linux IPv4 header, but with an optional "packed" attribute to
49 * help with performance on some platforms (see the definition of
50 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010051 */
52struct sfe_ipv4_iphdr {
53#if defined(__LITTLE_ENDIAN_BITFIELD)
54 __u8 ihl:4,
55 version:4;
56#elif defined (__BIG_ENDIAN_BITFIELD)
57 __u8 version:4,
58 ihl:4;
59#else
60#error "Please fix <asm/byteorder.h>"
61#endif
62 __u8 tos;
63 __be16 tot_len;
64 __be16 id;
65 __be16 frag_off;
66 __u8 ttl;
67 __u8 protocol;
68 __sum16 check;
69 __be32 saddr;
70 __be32 daddr;
Dave Hudsondcd08fb2013-11-22 09:25:16 -060071
72 /*
73 * The options start here.
74 */
Dave Hudsona8197e72013-12-17 23:46:22 +000075} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010076
77/*
Dave Hudsona8197e72013-12-17 23:46:22 +000078 * Based on the Linux UDP header, but with an optional "packed" attribute to
79 * help with performance on some platforms (see the definition of
80 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010081 */
82struct sfe_ipv4_udphdr {
83 __be16 source;
84 __be16 dest;
85 __be16 len;
86 __sum16 check;
Dave Hudsona8197e72013-12-17 23:46:22 +000087} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010088
89/*
Dave Hudsona8197e72013-12-17 23:46:22 +000090 * Based on the Linux TCP header, but with an optional "packed" attribute to
91 * help with performance on some platforms (see the definition of
92 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010093 */
94struct sfe_ipv4_tcphdr {
95 __be16 source;
96 __be16 dest;
97 __be32 seq;
98 __be32 ack_seq;
99#if defined(__LITTLE_ENDIAN_BITFIELD)
100 __u16 res1:4,
101 doff:4,
102 fin:1,
103 syn:1,
104 rst:1,
105 psh:1,
106 ack:1,
107 urg:1,
108 ece:1,
109 cwr:1;
110#elif defined(__BIG_ENDIAN_BITFIELD)
111 __u16 doff:4,
112 res1:4,
113 cwr:1,
114 ece:1,
115 urg:1,
116 ack:1,
117 psh:1,
118 rst:1,
119 syn:1,
120 fin:1;
121#else
122#error "Adjust your <asm/byteorder.h> defines"
123#endif
124 __be16 window;
125 __sum16 check;
126 __be16 urg_ptr;
Dave Hudsona8197e72013-12-17 23:46:22 +0000127} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100128
129/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100130 * Specifies the lower bound on ACK numbers carried in the TCP header
131 */
132#define SFE_IPV4_TCP_MAX_ACK_WINDOW 65520
133
134/*
135 * IPv4 TCP connection match additional data.
136 */
137struct sfe_ipv4_tcp_connection_match {
138 uint8_t win_scale; /* Window scale */
139 uint32_t max_win; /* Maximum window size seen */
140 uint32_t end; /* Sequence number of the next byte to send (seq + segment length) */
141 uint32_t max_end; /* Sequence number of the last byte to ack */
142};
143
144/*
145 * Bit flags for IPv4 connection matching entry.
146 */
147#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC 0x1
148 /* Perform source translation */
149#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST 0x2
150 /* Perform destination translation */
151#define SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK 0x4
152 /* Ignore TCP sequence numbers */
153#define SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_ETH_HDR 0x8
154 /* Fast Ethernet header write */
155
156/*
157 * IPv4 connection matching structure.
158 */
159struct sfe_ipv4_connection_match {
160 /*
161 * References to other objects.
162 */
163 struct sfe_ipv4_connection_match *next;
164 /* Next connection match entry in a list */
165 struct sfe_ipv4_connection_match *prev;
166 /* Previous connection match entry in a list */
167 struct sfe_ipv4_connection *connection;
168 /* Pointer to our connection */
169 struct sfe_ipv4_connection_match *counter_match;
170 /* Pointer to the connection match in the "counter" direction to this one */
171 struct sfe_ipv4_connection_match *active_next;
172 /* Pointer to the next connection in the active list */
173 struct sfe_ipv4_connection_match *active_prev;
174 /* Pointer to the previous connection in the active list */
175 bool active; /* Flag to indicate if we're on the active list */
176
177 /*
178 * Characteristics that identify flows that match this rule.
179 */
180 struct net_device *match_dev; /* Network device */
181 uint8_t match_protocol; /* Protocol */
Dave Hudson87973cd2013-10-22 16:00:04 +0100182 __be32 match_src_ip; /* Source IP address */
183 __be32 match_dest_ip; /* Destination IP address */
184 __be16 match_src_port; /* Source port/connection ident */
185 __be16 match_dest_port; /* Destination port/connection ident */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100186
187 /*
188 * Control the operations of the match.
189 */
190 uint32_t flags; /* Bit flags */
191
192 /*
193 * Connection state that we track once we match.
194 */
195 union { /* Protocol-specific state */
196 struct sfe_ipv4_tcp_connection_match tcp;
197 } protocol_state;
198 uint32_t rx_packet_count; /* Number of packets RX'd */
199 uint32_t rx_byte_count; /* Number of bytes RX'd */
200
201 /*
202 * Packet translation information.
203 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100204 __be32 xlate_src_ip; /* Address after source translation */
205 __be16 xlate_src_port; /* Port/connection ident after source translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100206 uint16_t xlate_src_csum_adjustment;
207 /* Transport layer checksum adjustment after source translation */
Dave Hudson87973cd2013-10-22 16:00:04 +0100208 __be32 xlate_dest_ip; /* Address after destination translation */
209 __be16 xlate_dest_port; /* Port/connection ident after destination translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100210 uint16_t xlate_dest_csum_adjustment;
211 /* Transport layer checksum adjustment after destination translation */
212
213 /*
214 * Packet transmit information.
215 */
216 struct net_device *xmit_dev; /* Network device on which to transmit */
217 unsigned short int xmit_dev_mtu;
218 /* Interface MTU */
219 uint16_t xmit_dest_mac[ETH_ALEN / 2];
220 /* Destination MAC address to use when forwarding */
221 uint16_t xmit_src_mac[ETH_ALEN / 2];
222 /* Source MAC address to use when forwarding */
Ben Menchaca0971b7a2014-01-10 14:43:02 -0600223 struct sock *pppoe_sk; /* pppoe socket for transmitting to this xmit_dev */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100224
225 /*
226 * Summary stats.
227 */
228 uint64_t rx_packet_count64; /* Number of packets RX'd */
229 uint64_t rx_byte_count64; /* Number of bytes RX'd */
230};
231
232/*
233 * Per-connection data structure.
234 */
235struct sfe_ipv4_connection {
236 struct sfe_ipv4_connection *next;
237 /* Pointer to the next entry in a hash chain */
238 struct sfe_ipv4_connection *prev;
239 /* Pointer to the previous entry in a hash chain */
240 int protocol; /* IP protocol number */
Dave Hudson87973cd2013-10-22 16:00:04 +0100241 __be32 src_ip; /* Source IP address */
242 __be32 src_ip_xlate; /* NAT-translated source IP address */
243 __be32 dest_ip; /* Destination IP address */
244 __be32 dest_ip_xlate; /* NAT-translated destination IP address */
245 __be16 src_port; /* Source port */
246 __be16 src_port_xlate; /* NAT-translated source port */
247 __be16 dest_port; /* Destination port */
248 __be16 dest_port_xlate; /* NAT-translated destination port */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100249 struct sfe_ipv4_connection_match *original_match;
250 /* Original direction matching structure */
251 struct net_device *original_dev;
252 /* Original direction source device */
253 struct sfe_ipv4_connection_match *reply_match;
254 /* Reply direction matching structure */
255 struct net_device *reply_dev; /* Reply direction source device */
256 uint64_t last_sync_jiffies; /* Jiffies count for the last sync */
257 struct sfe_ipv4_connection *all_connections_next;
258 /* Pointer to the next entry in the list of all connections */
259 struct sfe_ipv4_connection *all_connections_prev;
260 /* Pointer to the previous entry in the list of all connections */
261 int iterators; /* Number of iterators currently using this connection */
262 bool pending_free; /* Flag that indicates that this connection should be freed after iteration */
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600263 uint32_t mark; /* mark for outgoing packet */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100264};
265
266/*
267 * IPv4 connections and hash table size information.
268 */
269#define SFE_IPV4_CONNECTION_HASH_SHIFT 12
270#define SFE_IPV4_CONNECTION_HASH_SIZE (1 << SFE_IPV4_CONNECTION_HASH_SHIFT)
271#define SFE_IPV4_CONNECTION_HASH_MASK (SFE_IPV4_CONNECTION_HASH_SIZE - 1)
272
273enum sfe_ipv4_exception_events {
274 SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE,
275 SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION,
276 SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
277 SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL,
278 SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION,
279 SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE,
280 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS,
281 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS,
282 SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
283 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL,
284 SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION,
285 SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS,
286 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE,
287 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS,
288 SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK,
289 SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS,
290 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE,
291 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE,
292 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE,
293 SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE,
294 SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE,
295 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE,
296 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4,
297 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE,
298 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE,
299 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE,
300 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL,
301 SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION,
302 SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION,
303 SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE,
304 SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH,
305 SFE_IPV4_EXCEPTION_EVENT_NON_V4,
306 SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT,
307 SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE,
308 SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE,
309 SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL,
310 SFE_IPV4_EXCEPTION_EVENT_LAST
311};
312
313static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = {
314 "UDP_HEADER_INCOMPLETE",
315 "UDP_NO_CONNECTION",
316 "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
317 "UDP_SMALL_TTL",
318 "UDP_NEEDS_FRAGMENTATION",
319 "TCP_HEADER_INCOMPLETE",
320 "TCP_NO_CONNECTION_SLOW_FLAGS",
321 "TCP_NO_CONNECTION_FAST_FLAGS",
322 "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
323 "TCP_SMALL_TTL",
324 "TCP_NEEDS_FRAGMENTATION",
325 "TCP_FLAGS",
326 "TCP_SEQ_EXCEEDS_RIGHT_EDGE",
327 "TCP_SMALL_DATA_OFFS",
328 "TCP_BAD_SACK",
329 "TCP_BIG_DATA_OFFS",
330 "TCP_SEQ_BEFORE_LEFT_EDGE",
331 "TCP_ACK_EXCEEDS_RIGHT_EDGE",
332 "TCP_ACK_BEFORE_LEFT_EDGE",
333 "ICMP_HEADER_INCOMPLETE",
334 "ICMP_UNHANDLED_TYPE",
335 "ICMP_IPV4_HEADER_INCOMPLETE",
336 "ICMP_IPV4_NON_V4",
337 "ICMP_IPV4_IP_OPTIONS_INCOMPLETE",
338 "ICMP_IPV4_UDP_HEADER_INCOMPLETE",
339 "ICMP_IPV4_TCP_HEADER_INCOMPLETE",
340 "ICMP_IPV4_UNHANDLED_PROTOCOL",
341 "ICMP_NO_CONNECTION",
342 "ICMP_FLUSHED_CONNECTION",
343 "HEADER_INCOMPLETE",
344 "BAD_TOTAL_LENGTH",
345 "NON_V4",
346 "NON_INITIAL_FRAGMENT",
347 "DATAGRAM_INCOMPLETE",
348 "IP_OPTIONS_INCOMPLETE",
349 "UNHANDLED_PROTOCOL"
350};
351
352/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600353 * Per-module structure.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100354 */
355struct sfe_ipv4 {
356 spinlock_t lock; /* Lock for SMP correctness */
357 struct sfe_ipv4_connection_match *active_head;
358 /* Head of the list of recently active connections */
359 struct sfe_ipv4_connection_match *active_tail;
360 /* Tail of the list of recently active connections */
361 struct sfe_ipv4_connection *all_connections_head;
362 /* Head of the list of all connections */
363 struct sfe_ipv4_connection *all_connections_tail;
364 /* Tail of the list of all connections */
365 unsigned int num_connections; /* Number of connections */
366 struct timer_list timer; /* Timer used for periodic sync ops */
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600367 sfe_ipv4_sync_rule_callback_t __rcu sync_rule_callback;
368 /* Callback function registered by a connection manager for stats syncing */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100369 struct sfe_ipv4_connection *conn_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
370 /* Connection hash table */
371 struct sfe_ipv4_connection_match *conn_match_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
372 /* Connection match hash table */
373
374 /*
375 * Statistics.
376 */
377 uint32_t connection_create_requests;
378 /* Number of IPv4 connection create requests */
379 uint32_t connection_create_collisions;
380 /* Number of IPv4 connection create requests that collided with existing hash table entries */
381 uint32_t connection_destroy_requests;
382 /* Number of IPv4 connection destroy requests */
383 uint32_t connection_destroy_misses;
384 /* Number of IPv4 connection destroy requests that missed our hash table */
385 uint32_t connection_match_hash_hits;
386 /* Number of IPv4 connection match hash hits */
387 uint32_t connection_match_hash_reorders;
388 /* Number of IPv4 connection match hash reorders */
389 uint32_t connection_flushes; /* Number of IPv4 connection flushes */
390 uint32_t packets_forwarded; /* Number of IPv4 packets forwarded */
391 uint32_t packets_not_forwarded; /* Number of IPv4 packets not forwarded */
392 uint32_t exception_events[SFE_IPV4_EXCEPTION_EVENT_LAST];
393
394 /*
395 * Summary tatistics.
396 */
397 uint64_t connection_create_requests64;
398 /* Number of IPv4 connection create requests */
399 uint64_t connection_create_collisions64;
400 /* Number of IPv4 connection create requests that collided with existing hash table entries */
401 uint64_t connection_destroy_requests64;
402 /* Number of IPv4 connection destroy requests */
403 uint64_t connection_destroy_misses64;
404 /* Number of IPv4 connection destroy requests that missed our hash table */
405 uint64_t connection_match_hash_hits64;
406 /* Number of IPv4 connection match hash hits */
407 uint64_t connection_match_hash_reorders64;
408 /* Number of IPv4 connection match hash reorders */
409 uint64_t connection_flushes64; /* Number of IPv4 connection flushes */
410 uint64_t packets_forwarded64; /* Number of IPv4 packets forwarded */
411 uint64_t packets_not_forwarded64;
412 /* Number of IPv4 packets not forwarded */
413 uint64_t exception_events64[SFE_IPV4_EXCEPTION_EVENT_LAST];
414
415 /*
416 * Control state.
417 */
418 struct kobject *sys_sfe_ipv4; /* sysfs linkage */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100419 int debug_dev; /* Major number of the debug char device */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100420};
421
422/*
423 * Enumeration of the XML output.
424 */
425enum sfe_ipv4_debug_xml_states {
426 SFE_IPV4_DEBUG_XML_STATE_START,
427 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_START,
428 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_CONNECTION,
429 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_END,
430 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_START,
431 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION,
432 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_END,
433 SFE_IPV4_DEBUG_XML_STATE_STATS,
434 SFE_IPV4_DEBUG_XML_STATE_END,
435 SFE_IPV4_DEBUG_XML_STATE_DONE
436};
437
438/*
439 * XML write state.
440 */
441struct sfe_ipv4_debug_xml_write_state {
442 enum sfe_ipv4_debug_xml_states state;
443 /* XML output file state machine state */
444 struct sfe_ipv4_connection *iter_conn;
445 /* Next connection iterator */
446 int iter_exception; /* Next exception iterator */
447};
448
449typedef bool (*sfe_ipv4_debug_xml_write_method_t)(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
450 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws);
451
452struct sfe_ipv4 __si;
453
454/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100455 * sfe_ipv4_gen_ip_csum()
456 * Generate the IP checksum for an IPv4 header.
457 *
458 * Note that this function assumes that we have only 20 bytes of IP header.
459 */
460static inline uint16_t sfe_ipv4_gen_ip_csum(struct sfe_ipv4_iphdr *iph)
461{
462 uint32_t sum;
463 uint16_t *i = (uint16_t *)iph;
464
465 iph->check = 0;
466
467 /*
468 * Generate the sum.
469 */
470 sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9];
471
472 /*
473 * Fold it to ones-complement form.
474 */
475 sum = (sum & 0xffff) + (sum >> 16);
476 sum = (sum & 0xffff) + (sum >> 16);
477
478 return (uint16_t)sum ^ 0xffff;
479}
480
481/*
482 * sfe_ipv4_get_connection_match_hash()
483 * Generate the hash used in connection match lookups.
484 */
485static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100486 __be32 src_ip, __be16 src_port,
487 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100488{
489 size_t dev_addr = (size_t)dev;
Dave Hudson87973cd2013-10-22 16:00:04 +0100490 uint32_t hash = ((uint32_t)dev_addr) ^ ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100491 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
492}
493
494/*
495 * sfe_ipv4_find_sfe_ipv4_connection_match()
496 * Get the IPv4 flow match info that corresponds to a particular 5-tuple.
497 *
498 * On entry we must be holding the lock that protects the hash table.
499 */
500static struct sfe_ipv4_connection_match *
501sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100502 __be32 src_ip, __be16 src_port,
503 __be32 dest_ip, __be16 dest_port) __attribute__((always_inline));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100504static struct sfe_ipv4_connection_match *
505sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100506 __be32 src_ip, __be16 src_port,
507 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100508{
509 struct sfe_ipv4_connection_match *cm;
510 struct sfe_ipv4_connection_match *head;
511 unsigned int conn_match_idx;
512
513 conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port);
514 cm = si->conn_match_hash[conn_match_idx];
515
516 /*
517 * If we don't have anything in this chain then bale.
518 */
519 if (unlikely(!cm)) {
520 return cm;
521 }
522
523 /*
524 * Hopefully the first entry is the one we want.
525 */
526 if (likely(cm->match_src_port == src_port)
527 && likely(cm->match_dest_port == dest_port)
528 && likely(cm->match_src_ip == src_ip)
529 && likely(cm->match_dest_ip == dest_ip)
530 && likely(cm->match_protocol == protocol)
531 && likely(cm->match_dev == dev)) {
532 si->connection_match_hash_hits++;
533 return cm;
534 }
535
536 /*
537 * We may or may not have a matching entry but if we do then we want to
538 * move that entry to the top of the hash chain when we get to it. We
539 * presume that this will be reused again very quickly.
540 */
541 head = cm;
542 do {
543 cm = cm->next;
544 } while (cm && (cm->match_src_port != src_port
545 || cm->match_dest_port != dest_port
546 || cm->match_src_ip != src_ip
547 || cm->match_dest_ip != dest_ip
548 || cm->match_protocol != protocol
549 || cm->match_dev != dev));
550
551 /*
552 * Not found then we're done.
553 */
554 if (unlikely(!cm)) {
555 return cm;
556 }
557
558 /*
559 * We found a match so move it.
560 */
561 if (cm->next) {
562 cm->next->prev = cm->prev;
563 }
564 cm->prev->next = cm->next;
565 cm->prev = NULL;
566 cm->next = head;
567 head->prev = cm;
568 si->conn_match_hash[conn_match_idx] = cm;
569 si->connection_match_hash_reorders++;
570
571 return cm;
572}
573
574/*
575 * sfe_ipv4_connection_match_update_summary_stats()
576 * Update the summary stats for a connection match entry.
577 */
578static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm)
579{
580 cm->rx_packet_count64 += cm->rx_packet_count;
581 cm->rx_packet_count = 0;
582 cm->rx_byte_count64 += cm->rx_byte_count;
583 cm->rx_byte_count = 0;
584}
585
586/*
587 * sfe_ipv4_connection_match_compute_translations()
588 * Compute port and address translations for a connection match entry.
589 */
590static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm)
591{
592 /*
593 * Before we insert the entry look to see if this is tagged as doing address
594 * translations. If it is then work out the adjustment that we need to apply
595 * to the transport checksum.
596 */
597 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
598 /*
599 * Precompute an incremental checksum adjustment so we can
600 * edit packets in this stream very quickly. The algorithm is from RFC1624.
601 */
602 uint16_t src_ip_hi = cm->match_src_ip >> 16;
603 uint16_t src_ip_lo = cm->match_src_ip & 0xffff;
604 uint32_t xlate_src_ip = ~cm->xlate_src_ip;
605 uint16_t xlate_src_ip_hi = xlate_src_ip >> 16;
606 uint16_t xlate_src_ip_lo = xlate_src_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100607 uint16_t xlate_src_port = ~cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100608 uint32_t adj;
609
610 /*
611 * When we compute this fold it down to a 16-bit offset
612 * as that way we can avoid having to do a double
613 * folding of the twos-complement result because the
614 * addition of 2 16-bit values cannot cause a double
615 * wrap-around!
616 */
617 adj = src_ip_hi + src_ip_lo + cm->match_src_port
618 + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port;
619 adj = (adj & 0xffff) + (adj >> 16);
620 adj = (adj & 0xffff) + (adj >> 16);
621 cm->xlate_src_csum_adjustment = (uint16_t)adj;
622
623 }
624
625 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
626 /*
627 * Precompute an incremental checksum adjustment so we can
628 * edit packets in this stream very quickly. The algorithm is from RFC1624.
629 */
630 uint16_t dest_ip_hi = cm->match_dest_ip >> 16;
631 uint16_t dest_ip_lo = cm->match_dest_ip & 0xffff;
632 uint32_t xlate_dest_ip = ~cm->xlate_dest_ip;
633 uint16_t xlate_dest_ip_hi = xlate_dest_ip >> 16;
634 uint16_t xlate_dest_ip_lo = xlate_dest_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100635 uint16_t xlate_dest_port = ~cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100636 uint32_t adj;
637
638 /*
639 * When we compute this fold it down to a 16-bit offset
640 * as that way we can avoid having to do a double
641 * folding of the twos-complement result because the
642 * addition of 2 16-bit values cannot cause a double
643 * wrap-around!
644 */
645 adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port
646 + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port;
647 adj = (adj & 0xffff) + (adj >> 16);
648 adj = (adj & 0xffff) + (adj >> 16);
649 cm->xlate_dest_csum_adjustment = (uint16_t)adj;
650 }
651}
652
653/*
654 * sfe_ipv4_update_summary_stats()
655 * Update the summary stats.
656 */
657static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si)
658{
659 int i;
660
661 si->connection_create_requests64 += si->connection_create_requests;
662 si->connection_create_requests = 0;
663 si->connection_create_collisions64 += si->connection_create_collisions;
664 si->connection_create_collisions = 0;
665 si->connection_destroy_requests64 += si->connection_destroy_requests;
666 si->connection_destroy_requests = 0;
667 si->connection_destroy_misses64 += si->connection_destroy_misses;
668 si->connection_destroy_misses = 0;
669 si->connection_match_hash_hits64 += si->connection_match_hash_hits;
670 si->connection_match_hash_hits = 0;
671 si->connection_match_hash_reorders64 += si->connection_match_hash_reorders;
672 si->connection_match_hash_reorders = 0;
673 si->connection_flushes64 += si->connection_flushes;
674 si->connection_flushes = 0;
675 si->packets_forwarded64 += si->packets_forwarded;
676 si->packets_forwarded = 0;
677 si->packets_not_forwarded64 += si->packets_not_forwarded;
678 si->packets_not_forwarded = 0;
679
680 for (i = 0; i < SFE_IPV4_EXCEPTION_EVENT_LAST; i++) {
681 si->exception_events64[i] += si->exception_events[i];
682 si->exception_events[i] = 0;
683 }
684}
685
686/*
687 * sfe_ipv4_insert_sfe_ipv4_connection_match()
688 * Insert a connection match into the hash.
689 *
690 * On entry we must be holding the lock that protects the hash table.
691 */
692static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
693{
694 struct sfe_ipv4_connection_match **hash_head;
695 struct sfe_ipv4_connection_match *prev_head;
696 unsigned int conn_match_idx
697 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
698 cm->match_src_ip, cm->match_src_port,
699 cm->match_dest_ip, cm->match_dest_port);
700 hash_head = &si->conn_match_hash[conn_match_idx];
701 prev_head = *hash_head;
702 cm->prev = NULL;
703 if (prev_head) {
704 prev_head->prev = cm;
705 }
706
707 cm->next = prev_head;
708 *hash_head = cm;
709}
710
711/*
712 * sfe_ipv4_remove_sfe_ipv4_connection_match()
713 * Remove a connection match object from the hash.
714 *
715 * On entry we must be holding the lock that protects the hash table.
716 */
717static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
718{
719 /*
720 * Unlink the connection match entry from the hash.
721 */
722 if (cm->prev) {
723 cm->prev->next = cm->next;
724 } else {
725 unsigned int conn_match_idx
726 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
727 cm->match_src_ip, cm->match_src_port,
728 cm->match_dest_ip, cm->match_dest_port);
729 si->conn_match_hash[conn_match_idx] = cm->next;
730 }
731
732 if (cm->next) {
733 cm->next->prev = cm->prev;
734 }
735
736 /*
737 * Unlink the connection match entry from the active list.
738 */
739 if (likely(cm->active_prev)) {
740 cm->active_prev->active_next = cm->active_next;
741 } else {
742 si->active_head = cm->active_next;
743 }
744
745 if (likely(cm->active_next)) {
746 cm->active_next->active_prev = cm->active_prev;
747 } else {
748 si->active_tail = cm->active_prev;
749 }
750
751}
752
753/*
754 * sfe_ipv4_get_connection_hash()
755 * Generate the hash used in connection lookups.
756 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100757static inline unsigned int sfe_ipv4_get_connection_hash(uint8_t protocol, __be32 src_ip, __be16 src_port,
758 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100759{
Dave Hudson87973cd2013-10-22 16:00:04 +0100760 uint32_t hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100761 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
762}
763
764/*
765 * sfe_ipv4_find_sfe_ipv4_connection()
766 * Get the IPv4 connection info that corresponds to a particular 5-tuple.
767 *
768 * On entry we must be holding the lock that protects the hash table.
769 */
770static inline struct sfe_ipv4_connection *sfe_ipv4_find_sfe_ipv4_connection(struct sfe_ipv4 *si, uint32_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100771 __be32 src_ip, __be16 src_port,
772 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100773{
774 struct sfe_ipv4_connection *c;
775 unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port);
776 c = si->conn_hash[conn_idx];
777
778 /*
779 * If we don't have anything in this chain then bale.
780 */
781 if (unlikely(!c)) {
782 return c;
783 }
784
785 /*
786 * Hopefully the first entry is the one we want.
787 */
788 if (likely(c->src_port == src_port)
789 && likely(c->dest_port == dest_port)
790 && likely(c->src_ip == src_ip)
791 && likely(c->dest_ip == dest_ip)
792 && likely(c->protocol == protocol)) {
793 return c;
794 }
795
796 /*
797 * We may or may not have a matching entry but if we do then we want to
798 * move that entry to the top of the hash chain when we get to it. We
799 * presume that this will be reused again very quickly.
800 */
801 do {
802 c = c->next;
803 } while (c && (c->src_port != src_port
804 || c->dest_port != dest_port
805 || c->src_ip != src_ip
806 || c->dest_ip != dest_ip
807 || c->protocol != protocol));
808
809 /*
810 * Will need connection entry for next create/destroy metadata,
811 * So no need to re-order entry for these requests
812 */
813 return c;
814}
815
816/*
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600817 * sfe_ipv4_mark_rule()
818 * Updates the mark for a current offloaded connection
819 *
820 * Will take hash lock upon entry
821 */
822static void sfe_ipv4_mark_rule(struct sfe_ipv4_mark *mark)
823{
824 struct sfe_ipv4 *si = &__si;
825 struct sfe_ipv4_connection *c;
826 spin_lock(&si->lock);
827 c = sfe_ipv4_find_sfe_ipv4_connection(si, mark->protocol,
828 mark->src_ip, mark->src_port,
829 mark->dest_ip, mark->dest_port);
830 if (c) {
831 DEBUG_TRACE("INFO: Matching connection found for mark, setting to: %x\n", mark);
832 c->mark = mark->mark;
833 }
834 spin_unlock(&si->lock);
835}
836
837/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100838 * sfe_ipv4_insert_sfe_ipv4_connection()
839 * Insert a connection into the hash.
840 *
841 * On entry we must be holding the lock that protects the hash table.
842 */
843static void sfe_ipv4_insert_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
844{
845 struct sfe_ipv4_connection **hash_head;
846 struct sfe_ipv4_connection *prev_head;
847 unsigned int conn_idx;
848
849 /*
850 * Insert entry into the connection hash.
851 */
852 conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
853 c->dest_ip, c->dest_port);
854 hash_head = &si->conn_hash[conn_idx];
855 prev_head = *hash_head;
856 c->prev = NULL;
857 if (prev_head) {
858 prev_head->prev = c;
859 }
860
861 c->next = prev_head;
862 *hash_head = c;
863
864 /*
865 * Insert entry into the "all connections" list.
866 */
867 if (si->all_connections_tail) {
868 c->all_connections_prev = si->all_connections_tail;
869 si->all_connections_tail->all_connections_next = c;
870 } else {
871 c->all_connections_prev = NULL;
872 si->all_connections_head = c;
873 }
874
875 si->all_connections_tail = c;
876 c->all_connections_next = NULL;
877 si->num_connections++;
878
879 /*
880 * Insert the connection match objects too.
881 */
882 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->original_match);
883 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->reply_match);
884}
885
886/*
887 * sfe_ipv4_remove_sfe_ipv4_connection()
888 * Remove a sfe_ipv4_connection object from the hash.
889 *
890 * On entry we must be holding the lock that protects the hash table.
891 */
892static void sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
893{
894 /*
895 * Remove the connection match objects.
896 */
897 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->reply_match);
898 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->original_match);
899
900 /*
901 * Unlink the connection.
902 */
903 if (c->prev) {
904 c->prev->next = c->next;
905 } else {
906 unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
907 c->dest_ip, c->dest_port);
908 si->conn_hash[conn_idx] = c->next;
909 }
910
911 if (c->next) {
912 c->next->prev = c->prev;
913 }
914}
915
916/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100917 * sfe_ipv4_sync_sfe_ipv4_connection()
918 * Sync a connection.
919 *
920 * On entry to this function we expect that the lock for the connection is either
921 * already held or isn't required.
922 */
923static void sfe_ipv4_gen_sync_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c,
924 struct sfe_ipv4_sync *sis, uint64_t now_jiffies)
925{
926 struct sfe_ipv4_connection_match *original_cm;
927 struct sfe_ipv4_connection_match *reply_cm;
928
929 /*
930 * Fill in the update message.
931 */
932 sis->protocol = c->protocol;
933 sis->src_ip = c->src_ip;
934 sis->dest_ip = c->dest_ip;
935 sis->src_port = c->src_port;
936 sis->dest_port = c->dest_port;
937
938 original_cm = c->original_match;
939 reply_cm = c->reply_match;
940 sis->src_td_max_window = original_cm->protocol_state.tcp.max_win;
941 sis->src_td_end = original_cm->protocol_state.tcp.end;
942 sis->src_td_max_end = original_cm->protocol_state.tcp.max_end;
943 sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win;
944 sis->dest_td_end = reply_cm->protocol_state.tcp.end;
945 sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end;
946
947 sfe_ipv4_connection_match_update_summary_stats(original_cm);
948 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
949
950 sis->src_packet_count = original_cm->rx_packet_count64;
951 sis->src_byte_count = original_cm->rx_byte_count64;
952 sis->dest_packet_count = reply_cm->rx_packet_count64;
953 sis->dest_byte_count = reply_cm->rx_byte_count64;
954
955 /*
956 * Get the time increment since our last sync.
957 */
958 sis->delta_jiffies = now_jiffies - c->last_sync_jiffies;
959 c->last_sync_jiffies = now_jiffies;
960}
961
962/*
963 * sfe_ipv4_decrement_sfe_ipv4_connection_iterator()
964 * Remove an iterator from a connection - free all resources if necessary.
965 *
966 * Returns true if the connection should now be free, false if not.
967 *
968 * We must be locked on entry to this function.
969 */
970static bool sfe_ipv4_decrement_sfe_ipv4_connection_iterator(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
971{
972 /*
973 * Are we the last iterator for this connection?
974 */
975 c->iterators--;
976 if (c->iterators) {
977 return false;
978 }
979
980 /*
981 * Is this connection marked for deletion?
982 */
983 if (!c->pending_free) {
984 return false;
985 }
986
987 /*
988 * We're ready to delete this connection so unlink it from the "all
989 * connections" list.
990 */
991 si->num_connections--;
992 if (c->all_connections_prev) {
993 c->all_connections_prev->all_connections_next = c->all_connections_next;
994 } else {
995 si->all_connections_head = c->all_connections_next;
996 }
997
998 if (c->all_connections_next) {
999 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1000 } else {
1001 si->all_connections_tail = c->all_connections_prev;
1002 }
1003
1004 return true;
1005}
1006
1007/*
1008 * sfe_ipv4_flush_sfe_ipv4_connection()
1009 * Flush a connection and free all associated resources.
1010 *
1011 * We need to be called with bottom halves disabled locally as we need to acquire
1012 * the connection hash lock and release it again. In general we're actually called
1013 * from within a BH and so we're fine, but we're also called when connections are
1014 * torn down.
1015 */
1016static void sfe_ipv4_flush_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1017{
1018 struct sfe_ipv4_sync sis;
1019 uint64_t now_jiffies;
1020 bool pending_free = false;
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001021 sfe_ipv4_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001022
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001023 rcu_read_lock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001024 spin_lock(&si->lock);
1025 si->connection_flushes++;
1026
1027 /*
1028 * Check that we're not currently being iterated. If we are then
1029 * we can't free this entry yet but must mark it pending a free. If it's
1030 * not being iterated then we can unlink it from the list of all
1031 * connections.
1032 */
1033 if (c->iterators) {
1034 pending_free = true;
1035 c->pending_free = true;
1036 } else {
1037 si->num_connections--;
1038 if (c->all_connections_prev) {
1039 c->all_connections_prev->all_connections_next = c->all_connections_next;
1040 } else {
1041 si->all_connections_head = c->all_connections_next;
1042 }
1043
1044 if (c->all_connections_next) {
1045 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1046 } else {
1047 si->all_connections_tail = c->all_connections_prev;
1048 }
1049 }
1050
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001051 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
1052
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001053 spin_unlock(&si->lock);
1054
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001055 if (sync_rule_callback) {
1056 /*
1057 * Generate a sync message and then sync.
1058 */
1059 now_jiffies = get_jiffies_64();
1060 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, now_jiffies);
1061 sync_rule_callback(&sis);
1062 }
1063
1064 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001065
1066 /*
1067 * If we can't yet free the underlying memory then we're done.
1068 */
1069 if (pending_free) {
1070 return;
1071 }
1072
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001073 if (c->original_match->pppoe_sk) {
1074 sock_put(c->original_match->pppoe_sk);
1075 }
1076 if (c->reply_match->pppoe_sk) {
1077 sock_put(c->reply_match->pppoe_sk);
1078 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001079 /*
1080 * Release our hold of the source and dest devices and free the memory
1081 * for our connection objects.
1082 */
1083 dev_put(c->original_dev);
1084 dev_put(c->reply_dev);
1085 kfree(c->original_match);
1086 kfree(c->reply_match);
1087 kfree(c);
1088}
1089
1090/*
1091 * sfe_ipv4_recv_udp()
1092 * Handle UDP packet receives and forwarding.
1093 */
1094static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
1095 unsigned int len, struct sfe_ipv4_iphdr *iph, unsigned int ihl, bool flush_on_find)
1096{
1097 struct sfe_ipv4_udphdr *udph;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001098 __be16 proto;
Dave Hudson87973cd2013-10-22 16:00:04 +01001099 __be32 src_ip;
1100 __be32 dest_ip;
1101 __be16 src_port;
1102 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001103 struct sfe_ipv4_connection_match *cm;
1104 uint8_t ttl;
1105 struct net_device *xmit_dev;
1106
1107 /*
1108 * Is our packet too short to contain a valid UDP header?
1109 */
1110 if (unlikely(len < (sizeof(struct sfe_ipv4_udphdr) + ihl))) {
1111 spin_lock(&si->lock);
1112 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++;
1113 si->packets_not_forwarded++;
1114 spin_unlock(&si->lock);
1115
1116 DEBUG_TRACE("packet too short for UDP header\n");
1117 return 0;
1118 }
1119
1120 /*
1121 * Read the IP address and port information. Read the IP header data first
1122 * because we've almost certainly got that in the cache. We may not yet have
1123 * the UDP header cached though so allow more time for any prefetching.
1124 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001125 src_ip = iph->saddr;
1126 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001127
1128 udph = (struct sfe_ipv4_udphdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001129 src_port = udph->source;
1130 dest_port = udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001131
1132 spin_lock(&si->lock);
1133
1134 /*
1135 * Look for a connection match.
1136 */
1137 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
1138 if (unlikely(!cm)) {
1139 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION]++;
1140 si->packets_not_forwarded++;
1141 spin_unlock(&si->lock);
1142
1143 DEBUG_TRACE("no connection found\n");
1144 return 0;
1145 }
1146
1147 /*
1148 * If our packet has beern marked as "flush on find" we can't actually
1149 * forward it in the fast path, but now that we've found an associated
1150 * connection we can flush that out before we process the packet.
1151 */
1152 if (unlikely(flush_on_find)) {
1153 struct sfe_ipv4_connection *c = cm->connection;
1154 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1155 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1156 si->packets_not_forwarded++;
1157 spin_unlock(&si->lock);
1158
1159 DEBUG_TRACE("flush on find\n");
1160 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1161 return 0;
1162 }
1163
1164 /*
1165 * Does our TTL allow forwarding?
1166 */
1167 ttl = iph->ttl;
1168 if (unlikely(ttl < 2)) {
1169 struct sfe_ipv4_connection *c = cm->connection;
1170 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1171 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL]++;
1172 si->packets_not_forwarded++;
1173 spin_unlock(&si->lock);
1174
1175 DEBUG_TRACE("ttl too low\n");
1176 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1177 return 0;
1178 }
1179
1180 /*
1181 * If our packet is larger than the MTU of the transmit interface then
1182 * we can't forward it easily.
1183 */
1184 if (unlikely(len > cm->xmit_dev_mtu)) {
1185 struct sfe_ipv4_connection *c = cm->connection;
1186 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1187 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++;
1188 si->packets_not_forwarded++;
1189 spin_unlock(&si->lock);
1190
1191 DEBUG_TRACE("larger than mtu\n");
1192 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1193 return 0;
1194 }
1195
1196 /*
1197 * From this point on we're good to modify the packet.
1198 */
1199
1200 /*
1201 * Decrement our TTL.
1202 */
1203 iph->ttl = ttl - 1;
1204
1205 /*
1206 * Do we have to perform translations of the source address/port?
1207 */
1208 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1209 uint16_t udp_csum;
1210
Dave Hudson87973cd2013-10-22 16:00:04 +01001211 iph->saddr = cm->xlate_src_ip;
1212 udph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001213
1214 /*
1215 * Do we have a non-zero UDP checksum? If we do then we need
1216 * to update it.
1217 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001218 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001219 if (likely(udp_csum)) {
1220 uint32_t sum = udp_csum + cm->xlate_src_csum_adjustment;
1221 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001222 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001223 }
1224 }
1225
1226 /*
1227 * Do we have to perform translations of the destination address/port?
1228 */
1229 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1230 uint16_t udp_csum;
1231
Dave Hudson87973cd2013-10-22 16:00:04 +01001232 iph->daddr = cm->xlate_dest_ip;
1233 udph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001234
1235 /*
1236 * Do we have a non-zero UDP checksum? If we do then we need
1237 * to update it.
1238 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001239 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001240 if (likely(udp_csum)) {
1241 uint32_t sum = udp_csum + cm->xlate_dest_csum_adjustment;
1242 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001243 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001244 }
1245 }
1246
1247 /*
1248 * Replace the IP checksum.
1249 */
1250 iph->check = sfe_ipv4_gen_ip_csum(iph);
1251
1252// if ((nat_entry_data->tos & FASTNAT_DSCP_MASK) != (iph->tos & FASTNAT_DSCP_MASK)) {
1253// ipv4_change_dsfield(iph, (u_int8_t)(~FASTNAT_DSCP_MASK), nat_entry_data->tos);
1254// }
1255
1256// skb->priority = nat_entry_data->priority;
1257// skb->mark = nat_entry_data->mark;
1258
1259 /*
1260 * Update traffic stats.
1261 */
1262 cm->rx_packet_count++;
1263 cm->rx_byte_count += len;
1264
1265 /*
1266 * If we're not already on the active list then insert ourselves at the tail
1267 * of the current list.
1268 */
1269 if (unlikely(!cm->active)) {
1270 cm->active = true;
1271 cm->active_prev = si->active_tail;
1272 if (likely(si->active_tail)) {
1273 si->active_tail->active_next = cm;
1274 } else {
1275 si->active_head = cm;
1276 }
1277 si->active_tail = cm;
1278 }
1279
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001280 /*
1281 * On creation, we assume that cm->pppoe_sk is set, and that the socket
1282 * is held. If the connection is no longer established, we neeed to
1283 * release the socket, and we need to un-offload the connection. We
1284 * must also release the socket if the connection is closed, of course.
1285 * We assume that the xmit dev, as well as the dest MAC are set based on
1286 * the pppoe header (dest is po->pppoe_pa.remote).
1287 */
1288 if (cm->pppoe_sk) {
1289 struct pppoe_hdr *ph;
1290 int data_len = skb->len;
1291 struct sock *sk = cm->pppoe_sk;
1292 struct pppox_sock *po = pppox_sk(sk);
1293 struct net_device *dev = po->pppoe_dev;
1294
1295 if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
1296 goto abort;
1297
1298 ph = (struct pppoe_hdr *)__skb_push(skb, PPPOE_SES_HLEN);
1299 ph->ver = 1;
1300 ph->type = 1;
1301 ph->code = 0;
1302 ph->sid = po->num;
1303 ph->length = htons(data_len + 2);
1304 ph->tag[0].tag_type = htons(PPP_IP);
1305 memcpy(cm->xmit_dest_mac, po->pppoe_pa.remote, ETH_ALEN);
1306
1307 proto = ETH_P_PPP_SES;
1308 } else {
1309 proto = ETH_P_IP;
1310 }
1311
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001312 xmit_dev = cm->xmit_dev;
1313 skb->dev = xmit_dev;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001314 skb->protocol = cpu_to_be16(proto);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001315
1316 /*
1317 * Do we have a simple Ethernet header to write?
1318 */
1319 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_ETH_HDR))) {
1320 /*
1321 * If this is anything other than a point-to-point interface then we need to
1322 * create a header based on MAC addresses.
1323 */
1324 if (likely(!(xmit_dev->flags & IFF_POINTOPOINT))) {
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001325 xmit_dev->header_ops->create(skb, xmit_dev, proto,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001326 cm->xmit_dest_mac, cm->xmit_src_mac, len);
1327 }
1328 } else {
1329 struct sfe_ipv4_ethhdr *eth = (struct sfe_ipv4_ethhdr *)__skb_push(skb, ETH_HLEN);
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001330 eth->h_proto = skb->protocol;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001331 eth->h_dest[0] = htons(cm->xmit_dest_mac[0]);
1332 eth->h_dest[1] = htons(cm->xmit_dest_mac[1]);
1333 eth->h_dest[2] = htons(cm->xmit_dest_mac[2]);
1334 eth->h_source[0] = htons(cm->xmit_src_mac[0]);
1335 eth->h_source[1] = htons(cm->xmit_src_mac[1]);
1336 eth->h_source[2] = htons(cm->xmit_src_mac[2]);
1337 }
1338
Cristian Prundeanu94fff982013-12-23 15:02:59 -06001339 /* Mark outgoing packet */
1340 skb->mark = cm->connection->mark;
1341 if (skb->mark) {
1342 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1343 }
1344
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001345 si->packets_forwarded++;
1346 spin_unlock(&si->lock);
1347
1348 /*
1349 * We're going to check for GSO flags when we transmit the packet so
1350 * start fetching the necessary cache line now.
1351 */
1352 prefetch(skb_shinfo(skb));
1353
1354 /*
1355 * Send the packet on its way.
1356 */
1357 dev_queue_xmit(skb);
1358
1359 return 1;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001360
1361abort:
1362 kfree_skb(skb);
1363 return 1;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001364}
1365
1366/*
1367 * sfe_ipv4_process_tcp_option_sack()
1368 * Parse TCP SACK option and update ack according
1369 */
1370static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcphdr *th, const uint32_t data_offs,
1371 uint32_t *ack) __attribute__((always_inline));
1372static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcphdr *th, const uint32_t data_offs,
1373 uint32_t *ack)
1374{
1375 uint32_t length = sizeof(struct sfe_ipv4_tcphdr);
1376 uint8_t *ptr = (uint8_t *)th + length;
1377
1378 /*
1379 * If option is TIMESTAMP discard it.
1380 */
1381 if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
1382 && likely(ptr[0] == TCPOPT_NOP)
1383 && likely(ptr[1] == TCPOPT_NOP)
1384 && likely(ptr[2] == TCPOPT_TIMESTAMP)
1385 && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
1386 return true;
1387 }
1388
1389 /*
1390 * TCP options. Parse SACK option.
1391 */
1392 while (length < data_offs) {
1393 uint8_t size;
1394 uint8_t kind;
1395
1396 ptr = (uint8_t *)th + length;
1397 kind = *ptr;
1398
1399 /*
1400 * NOP, for padding
1401 * Not in the switch because to fast escape and to not calculate size
1402 */
1403 if (kind == TCPOPT_NOP) {
1404 length++;
1405 continue;
1406 }
1407
1408 if (kind == TCPOPT_SACK) {
1409 uint32_t sack = 0;
1410 uint8_t re = 1 + 1;
1411
1412 size = *(ptr + 1);
1413 if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
1414 || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
1415 || (size > (data_offs - length))) {
1416 return false;
1417 }
1418
1419 re += 4;
1420 while (re < size) {
1421 uint32_t sack_re;
1422 uint8_t *sptr = ptr + re;
1423 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
1424 if (sack_re > sack) {
1425 sack = sack_re;
1426 }
1427 re += TCPOLEN_SACK_PERBLOCK;
1428 }
1429 if (sack > *ack) {
1430 *ack = sack;
1431 }
1432 length += size;
1433 continue;
1434 }
1435 if (kind == TCPOPT_EOL) {
1436 return true;
1437 }
1438 size = *(ptr + 1);
1439 if (size < 2) {
1440 return false;
1441 }
1442 length += size;
1443 }
1444
1445 return true;
1446}
1447
1448/*
1449 * sfe_ipv4_recv_tcp()
1450 * Handle TCP packet receives and forwarding.
1451 */
1452static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
1453 unsigned int len, struct sfe_ipv4_iphdr *iph, unsigned int ihl, bool flush_on_find)
1454{
1455 struct sfe_ipv4_tcphdr *tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001456 __be32 src_ip;
1457 __be32 dest_ip;
1458 __be16 src_port;
1459 __be16 dest_port;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001460 __be16 proto;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001461 struct sfe_ipv4_connection_match *cm;
1462 struct sfe_ipv4_connection_match *counter_cm;
1463 uint8_t ttl;
1464 uint32_t flags;
1465 struct net_device *xmit_dev;
1466
1467 /*
1468 * Is our packet too short to contain a valid UDP header?
1469 */
1470 if (unlikely(len < (sizeof(struct sfe_ipv4_tcphdr) + ihl))) {
1471 spin_lock(&si->lock);
1472 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++;
1473 si->packets_not_forwarded++;
1474 spin_unlock(&si->lock);
1475
1476 DEBUG_TRACE("packet too short for TCP header\n");
1477 return 0;
1478 }
1479
1480 /*
1481 * Read the IP address and port information. Read the IP header data first
1482 * because we've almost certainly got that in the cache. We may not yet have
1483 * the TCP header cached though so allow more time for any prefetching.
1484 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001485 src_ip = iph->saddr;
1486 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001487
1488 tcph = (struct sfe_ipv4_tcphdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001489 src_port = tcph->source;
1490 dest_port = tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001491 flags = tcp_flag_word(tcph);
1492
1493 spin_lock(&si->lock);
1494
1495 /*
1496 * Look for a connection match.
1497 */
1498 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
1499 if (unlikely(!cm)) {
1500 /*
1501 * We didn't get a connection but as TCP is connection-oriented that
1502 * may be because this is a non-fast connection (not running established).
1503 * For diagnostic purposes we differentiate this here.
1504 */
1505 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
1506 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++;
1507 si->packets_not_forwarded++;
1508 spin_unlock(&si->lock);
1509
1510 DEBUG_TRACE("no connection found - fast flags\n");
1511 return 0;
1512 }
1513 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++;
1514 si->packets_not_forwarded++;
1515 spin_unlock(&si->lock);
1516
1517 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
1518 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1519 return 0;
1520 }
1521
1522 /*
1523 * If our packet has beern marked as "flush on find" we can't actually
1524 * forward it in the fast path, but now that we've found an associated
1525 * connection we can flush that out before we process the packet.
1526 */
1527 if (unlikely(flush_on_find)) {
1528 struct sfe_ipv4_connection *c = cm->connection;
1529 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1530 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1531 si->packets_not_forwarded++;
1532 spin_unlock(&si->lock);
1533
1534 DEBUG_TRACE("flush on find\n");
1535 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1536 return 0;
1537 }
1538
1539 /*
1540 * Does our TTL allow forwarding?
1541 */
1542 ttl = iph->ttl;
1543 if (unlikely(ttl < 2)) {
1544 struct sfe_ipv4_connection *c = cm->connection;
1545 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1546 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL]++;
1547 si->packets_not_forwarded++;
1548 spin_unlock(&si->lock);
1549
1550 DEBUG_TRACE("ttl too low\n");
1551 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1552 return 0;
1553 }
1554
1555 /*
1556 * If our packet is larger than the MTU of the transmit interface then
1557 * we can't forward it easily.
1558 */
1559 if (unlikely(len > cm->xmit_dev_mtu)) {
1560 struct sfe_ipv4_connection *c = cm->connection;
1561 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1562 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++;
1563 si->packets_not_forwarded++;
1564 spin_unlock(&si->lock);
1565
1566 DEBUG_TRACE("larger than mtu\n");
1567 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1568 return 0;
1569 }
1570
1571 /*
1572 * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN
1573 * set is not a fast path packet.
1574 */
1575 if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
1576 struct sfe_ipv4_connection *c = cm->connection;
1577 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1578 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS]++;
1579 si->packets_not_forwarded++;
1580 spin_unlock(&si->lock);
1581
1582 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
1583 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1584 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1585 return 0;
1586 }
1587
1588 counter_cm = cm->counter_match;
1589
1590 /*
1591 * Are we doing sequence number checking?
1592 */
1593 if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
1594 uint32_t seq;
1595 uint32_t ack;
1596 uint32_t sack;
1597 uint32_t data_offs;
1598 uint32_t end;
1599 uint32_t left_edge;
1600 uint32_t scaled_win;
1601 uint32_t max_end;
1602
1603 /*
1604 * Is our sequence fully past the right hand edge of the window?
1605 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001606 seq = ntohl(tcph->seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001607 if (unlikely((int32_t)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
1608 struct sfe_ipv4_connection *c = cm->connection;
1609 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1610 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++;
1611 si->packets_not_forwarded++;
1612 spin_unlock(&si->lock);
1613
1614 DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
1615 seq, cm->protocol_state.tcp.max_end + 1);
1616 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1617 return 0;
1618 }
1619
1620 /*
1621 * Check that our TCP data offset isn't too short.
1622 */
1623 data_offs = tcph->doff << 2;
1624 if (unlikely(data_offs < sizeof(struct sfe_ipv4_tcphdr))) {
1625 struct sfe_ipv4_connection *c = cm->connection;
1626 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1627 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++;
1628 si->packets_not_forwarded++;
1629 spin_unlock(&si->lock);
1630
1631 DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
1632 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1633 return 0;
1634 }
1635
1636 /*
1637 * Update ACK according to any SACK option.
1638 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001639 ack = ntohl(tcph->ack_seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001640 sack = ack;
1641 if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) {
1642 struct sfe_ipv4_connection *c = cm->connection;
1643 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1644 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK]++;
1645 si->packets_not_forwarded++;
1646 spin_unlock(&si->lock);
1647
1648 DEBUG_TRACE("TCP option SACK size is wrong\n");
1649 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1650 return 0;
1651 }
1652
1653 /*
1654 * Check that our TCP data offset isn't past the end of the packet.
1655 */
1656 data_offs += sizeof(struct sfe_ipv4_iphdr);
1657 if (unlikely(len < data_offs)) {
1658 struct sfe_ipv4_connection *c = cm->connection;
1659 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1660 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++;
1661 si->packets_not_forwarded++;
1662 spin_unlock(&si->lock);
1663
1664 DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
1665 data_offs, len);
1666 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1667 return 0;
1668 }
1669
1670 end = seq + len - data_offs;
1671
1672 /*
1673 * Is our sequence fully before the left hand edge of the window?
1674 */
1675 if (unlikely((int32_t)(end - (cm->protocol_state.tcp.end
1676 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
1677 struct sfe_ipv4_connection *c = cm->connection;
1678 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1679 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++;
1680 si->packets_not_forwarded++;
1681 spin_unlock(&si->lock);
1682
1683 DEBUG_TRACE("seq: %u before left edge: %u\n",
1684 end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
1685 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1686 return 0;
1687 }
1688
1689 /*
1690 * Are we acking data that is to the right of what has been sent?
1691 */
1692 if (unlikely((int32_t)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
1693 struct sfe_ipv4_connection *c = cm->connection;
1694 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1695 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++;
1696 si->packets_not_forwarded++;
1697 spin_unlock(&si->lock);
1698
1699 DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
1700 sack, counter_cm->protocol_state.tcp.end + 1);
1701 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1702 return 0;
1703 }
1704
1705 /*
1706 * Is our ack too far before the left hand edge of the window?
1707 */
1708 left_edge = counter_cm->protocol_state.tcp.end
1709 - cm->protocol_state.tcp.max_win
1710 - SFE_IPV4_TCP_MAX_ACK_WINDOW
1711 - 1;
1712 if (unlikely((int32_t)(sack - left_edge) < 0)) {
1713 struct sfe_ipv4_connection *c = cm->connection;
1714 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1715 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++;
1716 si->packets_not_forwarded++;
1717 spin_unlock(&si->lock);
1718
1719 DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
1720 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1721 return 0;
1722 }
1723
1724 /*
1725 * Have we just seen the largest window size yet for this connection? If yes
1726 * then we need to record the new value.
1727 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001728 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001729 scaled_win += (sack - ack);
1730 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
1731 cm->protocol_state.tcp.max_win = scaled_win;
1732 }
1733
1734 /*
1735 * If our sequence and/or ack numbers have advanced then record the new state.
1736 */
1737 if (likely((int32_t)(end - cm->protocol_state.tcp.end) >= 0)) {
1738 cm->protocol_state.tcp.end = end;
1739 }
1740
1741 max_end = sack + scaled_win;
1742 if (likely((int32_t)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
1743 counter_cm->protocol_state.tcp.max_end = max_end;
1744 }
1745 }
1746
1747 /*
1748 * From this point on we're good to modify the packet.
1749 */
1750
1751 /*
1752 * Decrement our TTL.
1753 */
1754 iph->ttl = ttl - 1;
1755
1756 /*
1757 * Do we have to perform translations of the source address/port?
1758 */
1759 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1760 uint16_t tcp_csum;
1761 uint32_t sum;
1762
Dave Hudson87973cd2013-10-22 16:00:04 +01001763 iph->saddr = cm->xlate_src_ip;
1764 tcph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001765
1766 /*
1767 * Do we have a non-zero UDP checksum? If we do then we need
1768 * to update it.
1769 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001770 tcp_csum = tcph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001771 sum = tcp_csum + cm->xlate_src_csum_adjustment;
1772 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001773 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001774 }
1775
1776 /*
1777 * Do we have to perform translations of the destination address/port?
1778 */
1779 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1780 uint16_t tcp_csum;
1781 uint32_t sum;
1782
Dave Hudson87973cd2013-10-22 16:00:04 +01001783 iph->daddr = cm->xlate_dest_ip;
1784 tcph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001785
1786 /*
1787 * Do we have a non-zero UDP checksum? If we do then we need
1788 * to update it.
1789 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001790 tcp_csum = tcph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001791 sum = tcp_csum + cm->xlate_dest_csum_adjustment;
1792 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001793 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001794 }
1795
1796 /*
1797 * Replace the IP checksum.
1798 */
1799 iph->check = sfe_ipv4_gen_ip_csum(iph);
1800
1801// if ((nat_entry_data->tos & FASTNAT_DSCP_MASK) != (iph->tos & FASTNAT_DSCP_MASK)) {
1802// ipv4_change_dsfield(iph, (u_int8_t)(~FASTNAT_DSCP_MASK), nat_entry_data->tos);
1803// }
1804
1805// skb->priority = nat_entry_data->priority;
1806// skb->mark = nat_entry_data->mark;
1807
1808 /*
1809 * Update traffic stats.
1810 */
1811 cm->rx_packet_count++;
1812 cm->rx_byte_count += len;
1813
1814 /*
1815 * If we're not already on the active list then insert ourselves at the tail
1816 * of the current list.
1817 */
1818 if (unlikely(!cm->active)) {
1819 cm->active = true;
1820 cm->active_prev = si->active_tail;
1821 if (likely(si->active_tail)) {
1822 si->active_tail->active_next = cm;
1823 } else {
1824 si->active_head = cm;
1825 }
1826 si->active_tail = cm;
1827 }
1828
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001829 if (cm->pppoe_sk) {
1830 struct pppoe_hdr *ph;
1831 int data_len = skb->len;
1832 struct sock *sk = cm->pppoe_sk;
1833 struct pppox_sock *po = pppox_sk(sk);
1834 struct net_device *dev = po->pppoe_dev;
1835
1836 if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
1837 goto abort;
1838
1839 ph = (struct pppoe_hdr *)__skb_push(skb, PPPOE_SES_HLEN);
1840 ph->ver = 1;
1841 ph->type = 1;
1842 ph->code = 0;
1843 ph->sid = po->num;
1844 ph->length = htons(data_len + 2);
1845 ph->tag[0].tag_type = htons(PPP_IP);
1846 memcpy(cm->xmit_dest_mac, po->pppoe_pa.remote, ETH_ALEN);
1847
1848 proto = ETH_P_PPP_SES;
1849 } else {
1850 proto = ETH_P_IP;
1851 }
1852
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001853 xmit_dev = cm->xmit_dev;
1854 skb->dev = xmit_dev;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001855 skb->protocol = cpu_to_be16(proto);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001856
1857 /*
1858 * Do we have a simple Ethernet header to write?
1859 */
1860 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_ETH_HDR))) {
1861 /*
1862 * If this is anything other than a point-to-point interface then we need to
1863 * create a header based on MAC addresses.
1864 */
1865 if (likely(!(xmit_dev->flags & IFF_POINTOPOINT))) {
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001866 xmit_dev->header_ops->create(skb, xmit_dev, proto,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001867 cm->xmit_dest_mac, cm->xmit_src_mac, len);
1868 }
1869 } else {
1870 struct sfe_ipv4_ethhdr *eth = (struct sfe_ipv4_ethhdr *)__skb_push(skb, ETH_HLEN);
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001871 eth->h_proto = skb->protocol;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001872 eth->h_dest[0] = htons(cm->xmit_dest_mac[0]);
1873 eth->h_dest[1] = htons(cm->xmit_dest_mac[1]);
1874 eth->h_dest[2] = htons(cm->xmit_dest_mac[2]);
1875 eth->h_source[0] = htons(cm->xmit_src_mac[0]);
1876 eth->h_source[1] = htons(cm->xmit_src_mac[1]);
1877 eth->h_source[2] = htons(cm->xmit_src_mac[2]);
1878 }
1879
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06001880 /*
1881 * Mark outgoing packet
1882 */
1883 skb->mark = cm->connection->mark;
1884 if (skb->mark) {
1885 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1886 }
1887
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001888 si->packets_forwarded++;
1889 spin_unlock(&si->lock);
1890
1891 /*
1892 * We're going to check for GSO flags when we transmit the packet so
1893 * start fetching the necessary cache line now.
1894 */
1895 prefetch(skb_shinfo(skb));
1896
1897 /*
1898 * Send the packet on its way.
1899 */
1900 dev_queue_xmit(skb);
1901
1902 return 1;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06001903
1904abort:
1905 kfree_skb(skb);
1906 return 1;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001907}
1908
1909/*
1910 * sfe_ipv4_recv_icmp()
1911 * Handle ICMP packet receives.
1912 *
1913 * ICMP packets aren't handled as a "fast path" and always have us process them
1914 * through the default Linux stack. What we do need to do is look for any errors
1915 * about connections we are handling in the fast path. If we find any such
1916 * connections then we want to flush their state so that the ICMP error path
1917 * within Linux has all of the correct state should it need it.
1918 */
1919static int sfe_ipv4_recv_icmp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
1920 unsigned int len, struct sfe_ipv4_iphdr *iph, unsigned int ihl)
1921{
1922 struct icmphdr *icmph;
1923 struct sfe_ipv4_iphdr *icmp_iph;
1924 unsigned int icmp_ihl_words;
1925 unsigned int icmp_ihl;
1926 uint32_t *icmp_trans_h;
1927 struct sfe_ipv4_udphdr *icmp_udph;
1928 struct sfe_ipv4_tcphdr *icmp_tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001929 __be32 src_ip;
1930 __be32 dest_ip;
1931 __be16 src_port;
1932 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001933 struct sfe_ipv4_connection_match *cm;
1934 struct sfe_ipv4_connection *c;
1935
1936 /*
1937 * Is our packet too short to contain a valid UDP header?
1938 */
1939 len -= ihl;
1940 if (unlikely(len < sizeof(struct icmphdr))) {
1941 spin_lock(&si->lock);
1942 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++;
1943 si->packets_not_forwarded++;
1944 spin_unlock(&si->lock);
1945
1946 DEBUG_TRACE("packet too short for ICMP header\n");
1947 return 0;
1948 }
1949
1950 /*
1951 * We only handle "destination unreachable" and "time exceeded" messages.
1952 */
1953 icmph = (struct icmphdr *)(skb->data + ihl);
1954 if ((icmph->type != ICMP_DEST_UNREACH)
1955 && (icmph->type != ICMP_TIME_EXCEEDED)) {
1956 spin_lock(&si->lock);
1957 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++;
1958 si->packets_not_forwarded++;
1959 spin_unlock(&si->lock);
1960
1961 DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->type);
1962 return 0;
1963 }
1964
1965 /*
1966 * Do we have the full embedded IP header?
1967 */
1968 len -= sizeof(struct icmphdr);
1969 if (unlikely(len < sizeof(struct sfe_ipv4_iphdr))) {
1970 spin_lock(&si->lock);
1971 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE]++;
1972 si->packets_not_forwarded++;
1973 spin_unlock(&si->lock);
1974
1975 DEBUG_TRACE("Embedded IP header not complete\n");
1976 return 0;
1977 }
1978
1979 /*
1980 * Is our embedded IP version wrong?
1981 */
1982 icmp_iph = (struct sfe_ipv4_iphdr *)(icmph + 1);
1983 if (unlikely(icmp_iph->version != 4)) {
1984 spin_lock(&si->lock);
1985 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4]++;
1986 si->packets_not_forwarded++;
1987 spin_unlock(&si->lock);
1988
1989 DEBUG_TRACE("IP version: %u\n", icmp_iph->version);
1990 return 0;
1991 }
1992
1993 /*
1994 * Do we have the full embedded IP header, including any options?
1995 */
1996 icmp_ihl_words = icmp_iph->ihl;
1997 icmp_ihl = icmp_ihl_words << 2;
1998 if (unlikely(len < icmp_ihl)) {
1999 spin_lock(&si->lock);
2000 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE]++;
2001 si->packets_not_forwarded++;
2002 spin_unlock(&si->lock);
2003
2004 DEBUG_TRACE("Embedded header not large enough for IP options\n");
2005 return 0;
2006 }
2007
2008 len -= icmp_ihl;
2009 icmp_trans_h = ((uint32_t *)icmp_iph) + icmp_ihl_words;
2010
2011 /*
2012 * Handle the embedded transport layer header.
2013 */
2014 switch (icmp_iph->protocol) {
2015 case IPPROTO_UDP:
2016 /*
2017 * We should have 8 bytes of UDP header - that's enough to identify
2018 * the connection.
2019 */
2020 if (unlikely(len < 8)) {
2021 spin_lock(&si->lock);
2022 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE]++;
2023 si->packets_not_forwarded++;
2024 spin_unlock(&si->lock);
2025
2026 DEBUG_TRACE("Incomplete embedded UDP header\n");
2027 return 0;
2028 }
2029
2030 icmp_udph = (struct sfe_ipv4_udphdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002031 src_port = icmp_udph->source;
2032 dest_port = icmp_udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002033 break;
2034
2035 case IPPROTO_TCP:
2036 /*
2037 * We should have 8 bytes of TCP header - that's enough to identify
2038 * the connection.
2039 */
2040 if (unlikely(len < 8)) {
2041 spin_lock(&si->lock);
2042 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE]++;
2043 si->packets_not_forwarded++;
2044 spin_unlock(&si->lock);
2045
2046 DEBUG_TRACE("Incomplete embedded TCP header\n");
2047 return 0;
2048 }
2049
2050 icmp_tcph = (struct sfe_ipv4_tcphdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002051 src_port = icmp_tcph->source;
2052 dest_port = icmp_tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002053 break;
2054
2055 default:
2056 spin_lock(&si->lock);
2057 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL]++;
2058 si->packets_not_forwarded++;
2059 spin_unlock(&si->lock);
2060
2061 DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", icmp_iph->protocol);
2062 return 0;
2063 }
2064
Dave Hudson87973cd2013-10-22 16:00:04 +01002065 src_ip = icmp_iph->saddr;
2066 dest_ip = icmp_iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002067
2068 spin_lock(&si->lock);
2069
2070 /*
2071 * Look for a connection match. Note that we reverse the source and destination
2072 * here because our embedded message contains a packet that was sent in the
2073 * opposite direction to the one in which we just received it. It will have
2074 * been sent on the interface from which we received it though so that's still
2075 * ok to use.
2076 */
2077 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port);
2078 if (unlikely(!cm)) {
2079 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++;
2080 si->packets_not_forwarded++;
2081 spin_unlock(&si->lock);
2082
2083 DEBUG_TRACE("no connection found\n");
2084 return 0;
2085 }
2086
2087 /*
2088 * We found a connection so now remove it from the connection list and flush
2089 * its state.
2090 */
2091 c = cm->connection;
2092 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2093 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++;
2094 si->packets_not_forwarded++;
2095 spin_unlock(&si->lock);
2096
2097 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
2098 return 0;
2099}
2100
Ben Menchaca0971b7a2014-01-10 14:43:02 -06002101int sfe_pppoe_recv(struct net_device *dev, struct sk_buff *skb)
2102{
2103 int offloaded;
2104 int ppplen, skblen, proto;
2105 struct pppoe_hdr *phdr;
2106
2107 if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) {
2108 DEBUG_TRACE( "sfe pppoe: failed at pskb_may_pull\n");
2109 return 0;
2110 }
2111
2112 phdr = pppoe_hdr(skb);
2113 ppplen = ntohs(phdr->length) - sizeof(struct pppoe_tag);
2114 skblen = skb->len - PPPOE_SES_HLEN;
2115 proto = skb->protocol;
2116
2117 /* check pppoe len < len */
2118 if (skblen < ppplen) {
2119 DEBUG_TRACE( "sfe pppoe: skblen (%d) < ppplen (%d)\n",
2120 skblen, ppplen);
2121 return 0;
2122 }
2123
2124 /* We already calculated the skblen diff; inline skb_pull */
2125 skb->len = skblen;
2126 BUG_ON(skb->len < skb->data_len);
2127 skb->data += PPPOE_SES_HLEN;
2128 switch(ntohs(phdr->tag[0].tag_type)) {
2129 case PPP_IP:
2130 skb->protocol = ETH_P_IP;
2131 offloaded = sfe_ipv4_recv(dev, skb);
2132 break;
2133 default:
2134 DEBUG_TRACE("sfe pppoe: unknown protocol %x",
2135 ntohs(phdr->tag[0].tag_type));
2136 offloaded = 0;
2137 break;
2138 }
2139
2140 if (!offloaded) {
2141 /* Put the packet back the way we found it - not offloaded */
2142 skb_push(skb, PPPOE_SES_HLEN);
2143 skb->protocol = proto;
2144 }
2145
2146 return offloaded;
2147}
2148
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002149/*
2150 * sfe_ipv4_recv()
Ben Menchaca0971b7a2014-01-10 14:43:02 -06002151 * Handle packet receives and forwarding.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002152 *
2153 * Returns 1 if the packet is forwarded or 0 if it isn't.
2154 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002155int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002156{
2157 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002158 unsigned int len;
2159 unsigned int tot_len;
2160 unsigned int frag_off;
2161 unsigned int ihl;
2162 bool flush_on_find;
2163 bool ip_options;
2164 struct sfe_ipv4_iphdr *iph;
2165 uint32_t protocol;
2166
2167 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002168 * Check that we have space for an IP header here.
2169 */
2170 len = skb->len;
2171 if (unlikely(len < sizeof(struct sfe_ipv4_iphdr))) {
2172 spin_lock(&si->lock);
2173 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2174 si->packets_not_forwarded++;
2175 spin_unlock(&si->lock);
2176
2177 DEBUG_TRACE("len: %u is too short\n", len);
2178 return 0;
2179 }
2180
2181 /*
2182 * Check that our "total length" is large enough for an IP header.
2183 */
2184 iph = (struct sfe_ipv4_iphdr *)skb->data;
2185 tot_len = ntohs(iph->tot_len);
2186 if (unlikely(tot_len < sizeof(struct sfe_ipv4_iphdr))) {
2187 spin_lock(&si->lock);
2188 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH]++;
2189 si->packets_not_forwarded++;
2190 spin_unlock(&si->lock);
2191
2192 DEBUG_TRACE("tot_len: %u is too short\n", tot_len);
2193 return 0;
2194 }
2195
2196 /*
2197 * Is our IP version wrong?
2198 */
2199 if (unlikely(iph->version != 4)) {
2200 spin_lock(&si->lock);
2201 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_V4]++;
2202 si->packets_not_forwarded++;
2203 spin_unlock(&si->lock);
2204
2205 DEBUG_TRACE("IP version: %u\n", iph->version);
2206 return 0;
2207 }
2208
2209 /*
2210 * Does our datagram fit inside the skb?
2211 */
2212 if (unlikely(tot_len > len)) {
2213 spin_lock(&si->lock);
2214 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++;
2215 si->packets_not_forwarded++;
2216 spin_unlock(&si->lock);
2217
2218 DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len);
2219 return 0;
2220 }
2221
2222 /*
2223 * Do we have a non-initial fragment?
2224 */
2225 frag_off = ntohs(iph->frag_off);
2226 if (unlikely(frag_off & IP_OFFSET)) {
2227 spin_lock(&si->lock);
2228 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++;
2229 si->packets_not_forwarded++;
2230 spin_unlock(&si->lock);
2231
2232 DEBUG_TRACE("non-initial fragment\n");
2233 return 0;
2234 }
2235
2236 /*
2237 * If we have a (first) fragment then mark it to cause any connection to flush.
2238 */
2239 flush_on_find = unlikely(frag_off & IP_MF) ? true : false;
2240
2241 /*
2242 * Do we have any IP options? That's definite a slow path! If we do have IP
2243 * options we need to recheck our header size.
2244 */
2245 ihl = iph->ihl << 2;
2246 ip_options = unlikely(ihl != sizeof(struct sfe_ipv4_iphdr)) ? true : false;
2247 if (unlikely(ip_options)) {
2248 if (unlikely(len < ihl)) {
2249 spin_lock(&si->lock);
2250 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE]++;
2251 si->packets_not_forwarded++;
2252 spin_unlock(&si->lock);
2253
2254 DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl);
2255 return 0;
2256 }
2257
2258 flush_on_find = true;
2259 }
2260
2261 protocol = iph->protocol;
2262 if (IPPROTO_UDP == protocol) {
2263 return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find);
2264 }
2265
2266 if (IPPROTO_TCP == protocol) {
2267 return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find);
2268 }
2269
2270 if (IPPROTO_ICMP == protocol) {
2271 return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl);
2272 }
2273
2274 spin_lock(&si->lock);
2275 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++;
2276 si->packets_not_forwarded++;
2277 spin_unlock(&si->lock);
2278
2279 DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol);
2280 return 0;
2281}
2282
2283/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002284 * sfe_ipv4_create_rule()
2285 * Create a forwarding rule.
2286 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002287void sfe_ipv4_create_rule(struct sfe_ipv4_create *sic)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002288{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002289 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002290 struct sfe_ipv4_connection *c;
2291 struct sfe_ipv4_connection_match *original_cm;
2292 struct sfe_ipv4_connection_match *reply_cm;
2293
2294 spin_lock_bh(&si->lock);
2295 si->connection_create_requests++;
2296
2297 /*
2298 * Check to see if there is already a flow that matches the rule we're trying
2299 * to create. If there is then we can't create a new one.
2300 */
2301 c = sfe_ipv4_find_sfe_ipv4_connection(si, sic->protocol, sic->src_ip, sic->src_port,
2302 sic->dest_ip, sic->dest_port);
2303 if (c) {
2304 si->connection_create_collisions++;
2305
2306 /*
2307 * If we already have the flow then it's likely that this request to
2308 * create the connection rule contains more up-to-date information.
2309 * Check and update accordingly.
2310 */
2311 original_cm = c->original_match;
2312 reply_cm = c->reply_match;
2313
2314 switch (sic->protocol) {
2315 case IPPROTO_TCP:
2316 if (original_cm->protocol_state.tcp.max_win < sic->src_td_max_window) {
2317 original_cm->protocol_state.tcp.max_win = sic->src_td_max_window;
2318 }
2319 if ((int32_t)(original_cm->protocol_state.tcp.end - sic->src_td_end) < 0) {
2320 original_cm->protocol_state.tcp.end = sic->src_td_end;
2321 }
2322 if ((int32_t)(original_cm->protocol_state.tcp.max_end - sic->src_td_max_end) < 0) {
2323 original_cm->protocol_state.tcp.max_end = sic->src_td_max_end;
2324 }
2325 if (reply_cm->protocol_state.tcp.max_win < sic->dest_td_max_window) {
2326 reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window;
2327 }
2328 if ((int32_t)(reply_cm->protocol_state.tcp.end - sic->dest_td_end) < 0) {
2329 reply_cm->protocol_state.tcp.end = sic->dest_td_end;
2330 }
2331 if ((int32_t)(reply_cm->protocol_state.tcp.max_end - sic->dest_td_max_end) < 0) {
2332 reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end;
2333 }
2334 original_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2335 reply_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2336 if (sic->flags & SFE_IPV4_CREATE_FLAG_NO_SEQ_CHECK) {
2337 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2338 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2339 }
2340 break;
2341 }
2342
2343 spin_unlock_bh(&si->lock);
2344
2345 DEBUG_TRACE("connection already exists - p: %d\n"
2346 " s: %s:%pM:%pI4:%u, d: %s:%pM:%pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002347 sic->protocol, sic->src_dev->name, sic->src_mac, &sic->src_ip, ntohs(sic->src_port),
2348 sic->dest_dev->name, sic->dest_mac, &sic->dest_ip, ntohs(sic->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002349 return;
2350 }
2351
2352 /*
2353 * Allocate the various connection tracking objects.
2354 */
2355 c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC);
2356 if (unlikely(!c)) {
2357 spin_unlock_bh(&si->lock);
2358 return;
2359 }
2360
2361 original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2362 if (unlikely(!original_cm)) {
2363 spin_unlock_bh(&si->lock);
2364 kfree(c);
2365 return;
2366 }
2367
2368 reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2369 if (unlikely(!reply_cm)) {
2370 spin_unlock_bh(&si->lock);
2371 kfree(original_cm);
2372 kfree(c);
2373 return;
2374 }
2375
2376 /*
2377 * Fill in the "original" direction connection matching object.
2378 * Note that the transmit MAC address is "dest_mac_xlate" because
2379 * we always know both ends of a connection by their translated
2380 * addresses and not their public addresses.
2381 */
2382 original_cm->match_dev = sic->src_dev;
2383 original_cm->match_protocol = sic->protocol;
2384 original_cm->match_src_ip = sic->src_ip;
2385 original_cm->match_src_port = sic->src_port;
2386 original_cm->match_dest_ip = sic->dest_ip;
2387 original_cm->match_dest_port = sic->dest_port;
2388 original_cm->xlate_src_ip = sic->src_ip_xlate;
2389 original_cm->xlate_src_port = sic->src_port_xlate;
2390 original_cm->xlate_dest_ip = sic->dest_ip_xlate;
2391 original_cm->xlate_dest_port = sic->dest_port_xlate;
2392 original_cm->rx_packet_count = 0;
2393 original_cm->rx_packet_count64 = 0;
2394 original_cm->rx_byte_count = 0;
2395 original_cm->rx_byte_count64 = 0;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06002396 original_cm->pppoe_sk = sic->dest_pppoe_sk;
2397 if (original_cm->pppoe_sk) {
2398 sock_hold(original_cm->pppoe_sk);
2399 original_cm->xmit_dev = pppox_sk(original_cm->pppoe_sk)->pppoe_dev;
2400 } else {
2401 original_cm->xmit_dev = sic->dest_dev;
2402 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002403 original_cm->xmit_dev_mtu = sic->dest_mtu;
2404 memcpy(original_cm->xmit_src_mac, sic->dest_dev->dev_addr, ETH_ALEN);
2405 memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN);
2406 original_cm->connection = c;
2407 original_cm->counter_match = reply_cm;
2408 original_cm->flags = 0;
2409 original_cm->active_next = NULL;
2410 original_cm->active_prev = NULL;
2411 original_cm->active = false;
2412 if (sic->dest_dev->header_ops->create == eth_header) {
2413 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_ETH_HDR;
2414 }
2415
2416 /*
2417 * Fill in the "reply" direction connection matching object.
2418 */
2419 reply_cm->match_dev = sic->dest_dev;
2420 reply_cm->match_protocol = sic->protocol;
2421 reply_cm->match_src_ip = sic->dest_ip_xlate;
2422 reply_cm->match_src_port = sic->dest_port_xlate;
2423 reply_cm->match_dest_ip = sic->src_ip_xlate;
2424 reply_cm->match_dest_port = sic->src_port_xlate;
2425 reply_cm->xlate_src_ip = sic->dest_ip;
2426 reply_cm->xlate_src_port = sic->dest_port;
2427 reply_cm->xlate_dest_ip = sic->src_ip;
2428 reply_cm->xlate_dest_port = sic->src_port;
2429 reply_cm->rx_packet_count = 0;
2430 reply_cm->rx_packet_count64 = 0;
2431 reply_cm->rx_byte_count = 0;
2432 reply_cm->rx_byte_count64 = 0;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06002433 reply_cm->pppoe_sk = sic->src_pppoe_sk;
2434 if (reply_cm->pppoe_sk) {
2435 sock_hold(reply_cm->pppoe_sk);
2436 reply_cm->xmit_dev = pppox_sk(reply_cm->pppoe_sk)->pppoe_dev;
2437 } else {
2438 reply_cm->xmit_dev = sic->src_dev;
2439 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002440 reply_cm->xmit_dev_mtu = sic->src_mtu;
2441 memcpy(reply_cm->xmit_src_mac, sic->src_dev->dev_addr, ETH_ALEN);
2442 memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN);
2443 reply_cm->connection = c;
2444 reply_cm->counter_match = original_cm;
2445 reply_cm->flags = 0;
2446 reply_cm->active_next = NULL;
2447 reply_cm->active_prev = NULL;
2448 reply_cm->active = false;
2449 if (sic->src_dev->header_ops->create == eth_header) {
2450 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_ETH_HDR;
2451 }
2452
Ben Menchaca0971b7a2014-01-10 14:43:02 -06002453
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002454 if (sic->dest_ip != sic->dest_ip_xlate || sic->dest_port != sic->dest_port_xlate) {
2455 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2456 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2457 }
2458
2459 if (sic->src_ip != sic->src_ip_xlate || sic->src_port != sic->src_port_xlate) {
2460 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2461 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2462 }
2463
2464 c->protocol = sic->protocol;
2465 c->src_ip = sic->src_ip;
2466 c->src_ip_xlate = sic->src_ip_xlate;
2467 c->src_port = sic->src_port;
2468 c->src_port_xlate = sic->src_port_xlate;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06002469 c->original_dev = reply_cm->xmit_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002470 c->original_match = original_cm;
2471 c->dest_ip = sic->dest_ip;
2472 c->dest_ip_xlate = sic->dest_ip_xlate;
2473 c->dest_port = sic->dest_port;
2474 c->dest_port_xlate = sic->dest_port_xlate;
Ben Menchaca0971b7a2014-01-10 14:43:02 -06002475 c->reply_dev = original_cm->xmit_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002476 c->reply_match = reply_cm;
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06002477 c->mark = sic->mark;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002478
2479 c->last_sync_jiffies = get_jiffies_64();
2480 c->iterators = 0;
2481 c->pending_free = false;
2482
2483 /*
2484 * Take hold of our source and dest devices for the duration of the connection.
2485 */
2486 dev_hold(c->original_dev);
2487 dev_hold(c->reply_dev);
2488
2489 /*
2490 * Initialize the protocol-specific information that we track.
2491 */
2492 switch (sic->protocol) {
2493 case IPPROTO_TCP:
2494 original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale;
2495 original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1;
2496 original_cm->protocol_state.tcp.end = sic->src_td_end;
2497 original_cm->protocol_state.tcp.max_end = sic->src_td_max_end;
2498 reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale;
2499 reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1;
2500 reply_cm->protocol_state.tcp.end = sic->dest_td_end;
2501 reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end;
2502 if (sic->flags & SFE_IPV4_CREATE_FLAG_NO_SEQ_CHECK) {
2503 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2504 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2505 }
2506 break;
2507 }
2508
2509 sfe_ipv4_connection_match_compute_translations(original_cm);
2510 sfe_ipv4_connection_match_compute_translations(reply_cm);
2511 sfe_ipv4_insert_sfe_ipv4_connection(si, c);
2512
2513 spin_unlock_bh(&si->lock);
2514
2515 /*
2516 * We have everything we need!
2517 */
2518 DEBUG_INFO("new connection - p: %d\n"
2519 " s: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n"
2520 " d: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n",
2521 sic->protocol,
2522 sic->src_dev->name, sic->src_mac, sic->src_mac_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002523 &sic->src_ip, &sic->src_ip_xlate, ntohs(sic->src_port), ntohs(sic->src_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002524 sic->dest_dev->name, sic->dest_mac, sic->dest_mac_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002525 &sic->dest_ip, &sic->dest_ip_xlate, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002526}
2527
2528/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002529 * sfe_ipv4_destroy_rule()
2530 * Destroy a forwarding rule.
2531 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002532void sfe_ipv4_destroy_rule(struct sfe_ipv4_destroy *sid)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002533{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002534 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002535 struct sfe_ipv4_connection *c;
2536
2537 spin_lock_bh(&si->lock);
2538 si->connection_destroy_requests++;
2539
2540 /*
2541 * Check to see if we have a flow that matches the rule we're trying
2542 * to destroy. If there isn't then we can't destroy it.
2543 */
2544 c = sfe_ipv4_find_sfe_ipv4_connection(si, sid->protocol, sid->src_ip, sid->src_port,
2545 sid->dest_ip, sid->dest_port);
2546 if (!c) {
2547 si->connection_destroy_misses++;
2548 spin_unlock_bh(&si->lock);
2549
2550 DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002551 sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2552 &sid->dest_ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002553 return;
2554 }
2555
2556 /*
2557 * Remove our connection details from the hash tables.
2558 */
2559 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2560 spin_unlock_bh(&si->lock);
2561
2562 /*
2563 * Finally synchronize state and free resources. We need to protect against
2564 * pre-emption by our bottom half while we do this though.
2565 */
2566 local_bh_disable();
2567 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
2568 local_bh_enable();
2569
2570 DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002571 sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2572 &sid->dest_ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002573}
2574
2575/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002576 * sfe_ipv4_register_sync_rule_callback()
2577 * Register a callback for rule synchronization.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002578 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002579void sfe_ipv4_register_sync_rule_callback(sfe_ipv4_sync_rule_callback_t sync_rule_callback)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002580{
2581 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002582
2583 spin_lock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002584 rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002585 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002586}
2587
2588/*
2589 * sfe_ipv4_get_debug_dev()
2590 */
2591static ssize_t sfe_ipv4_get_debug_dev(struct device *dev,
2592 struct device_attribute *attr,
2593 char *buf)
2594{
2595 struct sfe_ipv4 *si = &__si;
2596 ssize_t count;
2597 int num;
2598
2599 spin_lock_bh(&si->lock);
2600 num = si->debug_dev;
2601 spin_unlock_bh(&si->lock);
2602
2603 count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num);
2604 return count;
2605}
2606
2607/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002608 * sysfs attributes.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002609 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002610static const struct device_attribute sfe_ipv4_debug_dev_attr =
2611 __ATTR(debug_dev, S_IWUGO | S_IRUGO, sfe_ipv4_get_debug_dev, NULL);
2612
2613/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002614 * sfe_ipv4_destroy_all_rules_for_dev()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002615 * Destroy all connections that match a particular device.
2616 *
2617 * If we pass dev as NULL then this destroys all connections.
2618 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002619void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002620{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002621 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002622 struct sfe_ipv4_connection *c;
2623 struct sfe_ipv4_connection *c_next;
2624
2625 spin_lock_bh(&si->lock);
2626 c = si->all_connections_head;
2627 if (!c) {
2628 spin_unlock_bh(&si->lock);
2629 return;
2630 }
2631
2632 c->iterators++;
2633
2634 /*
2635 * Iterate over all connections
2636 */
2637 while (c) {
2638 c_next = c->all_connections_next;
2639
2640 /*
2641 * Before we do anything else, take an iterator reference for the
2642 * connection we'll iterate next.
2643 */
2644 if (c_next) {
2645 c_next->iterators++;
2646 }
2647
2648 /*
2649 * Does this connection relate to the device we are destroying? If
2650 * it does then ensure it is marked for being freed as soon as it
2651 * is no longer being iterated.
2652 */
2653 if (!dev
2654 || (dev == c->original_dev)
2655 || (dev == c->reply_dev)) {
2656 c->pending_free = true;
2657 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2658 }
2659
2660 /*
2661 * Remove the iterator reference that we acquired and see if we
2662 * should free any resources.
2663 */
2664 if (sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c)) {
2665 spin_unlock_bh(&si->lock);
Ben Menchaca0971b7a2014-01-10 14:43:02 -06002666
2667 if (c->original_match->pppoe_sk) {
2668 sock_put(c->original_match->pppoe_sk);
2669 }
2670 if (c->reply_match->pppoe_sk) {
2671 sock_put(c->reply_match->pppoe_sk);
2672 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002673 /*
2674 * This entry is dead so release our hold of the source and
2675 * dest devices and free the memory for our connection objects.
2676 */
2677 dev_put(c->original_dev);
2678 dev_put(c->reply_dev);
2679 kfree(c->original_match);
2680 kfree(c->reply_match);
2681 kfree(c);
2682
2683 spin_lock_bh(&si->lock);
2684 }
2685
2686 c = c_next;
2687 }
2688
2689 spin_unlock_bh(&si->lock);
2690}
2691
2692/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002693 * sfe_ipv4_periodic_sync()
2694 */
2695static void sfe_ipv4_periodic_sync(unsigned long arg)
2696{
2697 struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg;
2698 uint64_t now_jiffies;
2699 int quota;
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002700 sfe_ipv4_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002701
2702 now_jiffies = get_jiffies_64();
2703
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002704 rcu_read_lock();
2705 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
2706 if (!sync_rule_callback) {
2707 rcu_read_unlock();
2708 goto done;
2709 }
2710
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002711 spin_lock_bh(&si->lock);
2712 sfe_ipv4_update_summary_stats(si);
2713
2714 /*
2715 * Get an estimate of the number of connections to parse in this sync.
2716 */
2717 quota = (si->num_connections + 63) / 64;
2718
2719 /*
2720 * Walk the "active" list and sync the connection state.
2721 */
2722 while (quota--) {
2723 struct sfe_ipv4_connection_match *cm;
2724 struct sfe_ipv4_connection_match *counter_cm;
2725 struct sfe_ipv4_connection *c;
2726 struct sfe_ipv4_sync sis;
2727
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002728 cm = si->active_head;
2729 if (!cm) {
2730 break;
2731 }
2732
2733 cm->active = false;
2734
2735 /*
2736 * Having found an entry we now remove it from the active scan list.
2737 */
2738 si->active_head = cm->active_next;
2739 if (likely(cm->active_next)) {
2740 cm->active_next->active_prev = NULL;
2741 } else {
2742 si->active_tail = NULL;
2743 }
2744 cm->active_next = NULL;
2745
2746 /*
2747 * We scan the connection match lists so there's a possibility that our
2748 * counter match is in the list too. If it is then remove it.
2749 */
2750 counter_cm = cm->counter_match;
2751 if (counter_cm->active) {
2752 counter_cm->active = false;
2753
2754 if (likely(counter_cm->active_prev)) {
2755 counter_cm->active_prev->active_next = counter_cm->active_next;
2756 } else {
2757 si->active_head = counter_cm->active_next;
2758 }
2759
2760 if (likely(counter_cm->active_next)) {
2761 counter_cm->active_next->active_prev = counter_cm->active_prev;
2762 } else {
2763 si->active_tail = counter_cm->active_prev;
2764 }
2765
2766 counter_cm->active_next = NULL;
2767 counter_cm->active_prev = NULL;
2768 }
2769
2770 /*
2771 * Sync the connection state.
2772 */
2773 c = cm->connection;
2774 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, now_jiffies);
2775
2776 /*
2777 * We don't want to be holding the lock when we sync!
2778 */
2779 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002780 sync_rule_callback(&sis);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002781 spin_lock_bh(&si->lock);
2782 }
2783
2784 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002785 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002786
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002787done:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002788 mod_timer(&si->timer, jiffies + (HZ / 100));
2789}
2790
2791#define CHAR_DEV_MSG_SIZE 768
2792
2793/*
2794 * sfe_ipv4_debug_dev_read_start()
2795 * Generate part of the XML output.
2796 */
2797static bool sfe_ipv4_debug_dev_read_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2798 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2799{
2800 int bytes_read;
2801
2802 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "<sfe_ipv4>\n");
2803 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2804 return false;
2805 }
2806
2807 *length -= bytes_read;
2808 *total_read += bytes_read;
2809
2810 ws->state++;
2811 return true;
2812}
2813
2814/*
2815 * sfe_ipv4_debug_dev_read_connections_start()
2816 * Generate part of the XML output.
2817 */
2818static bool sfe_ipv4_debug_dev_read_connections_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2819 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2820{
2821 int bytes_read;
2822
2823 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<connections>\n");
2824 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2825 return false;
2826 }
2827
2828 *length -= bytes_read;
2829 *total_read += bytes_read;
2830
2831 ws->state++;
2832 return true;
2833}
2834
2835/*
2836 * sfe_ipv4_debug_dev_read_connections_connection()
2837 * Generate part of the XML output.
2838 */
2839static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2840 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2841{
2842 struct sfe_ipv4_connection *c;
2843 struct sfe_ipv4_connection *c_next;
2844 struct sfe_ipv4_connection_match *original_cm;
2845 struct sfe_ipv4_connection_match *reply_cm;
2846 int bytes_read;
2847 int protocol;
2848 struct net_device *src_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002849 __be32 src_ip;
2850 __be32 src_ip_xlate;
2851 __be16 src_port;
2852 __be16 src_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002853 uint64_t src_rx_packets;
2854 uint64_t src_rx_bytes;
2855 struct net_device *dest_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002856 __be32 dest_ip;
2857 __be32 dest_ip_xlate;
2858 __be16 dest_port;
2859 __be16 dest_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002860 uint64_t dest_rx_packets;
2861 uint64_t dest_rx_bytes;
2862 uint64_t last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002863 uint32_t mark;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002864
2865 spin_lock_bh(&si->lock);
2866 c = ws->iter_conn;
2867
2868 /*
2869 * Is this the first connection we need to scan?
2870 */
2871 if (!c) {
2872 c = si->all_connections_head;
2873
2874 /*
2875 * If there were no connections then move to the next state.
2876 */
2877 if (!c) {
2878 spin_unlock_bh(&si->lock);
2879
2880 ws->state++;
2881 return true;
2882 }
2883
2884 c->iterators++;
2885 }
2886
2887 c_next = c->all_connections_next;
2888 ws->iter_conn = c_next;
2889
2890 /*
2891 * Before we do anything else, take an iterator reference for the
2892 * connection we'll iterate next.
2893 */
2894 if (c_next) {
2895 c_next->iterators++;
2896 }
2897
2898 /*
2899 * Remove the iterator reference that we acquired and see if we
2900 * should free any resources.
2901 */
2902 if (sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c)) {
2903 spin_unlock_bh(&si->lock);
2904
Ben Menchaca0971b7a2014-01-10 14:43:02 -06002905 if (c->original_match->pppoe_sk) {
2906 sock_put(c->original_match->pppoe_sk);
2907 }
2908 if (c->reply_match->pppoe_sk) {
2909 sock_put(c->reply_match->pppoe_sk);
2910 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002911 /*
2912 * This entry is dead so release our hold of the source and
2913 * dest devices and free the memory for our connection objects.
2914 */
2915 dev_put(c->original_dev);
2916 dev_put(c->reply_dev);
2917 kfree(c->original_match);
2918 kfree(c->reply_match);
2919 kfree(c);
2920
2921 /*
2922 * If we have no more connections then move to the next state.
2923 */
2924 if (!c_next) {
2925 ws->state++;
2926 }
2927
2928 return true;
2929 }
2930
2931 original_cm = c->original_match;
2932 reply_cm = c->reply_match;
2933
2934 protocol = c->protocol;
2935 src_dev = c->original_dev;
2936 src_ip = c->src_ip;
2937 src_ip_xlate = c->src_ip_xlate;
2938 src_port = c->src_port;
2939 src_port_xlate = c->src_port_xlate;
2940
2941 sfe_ipv4_connection_match_update_summary_stats(original_cm);
2942 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
2943
2944 src_rx_packets = original_cm->rx_packet_count64;
2945 src_rx_bytes = original_cm->rx_byte_count64;
2946 dest_dev = c->reply_dev;
2947 dest_ip = c->dest_ip;
2948 dest_ip_xlate = c->dest_ip_xlate;
2949 dest_port = c->dest_port;
2950 dest_port_xlate = c->dest_port_xlate;
2951 dest_rx_packets = reply_cm->rx_packet_count64;
2952 dest_rx_bytes = reply_cm->rx_byte_count64;
2953 last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002954 mark = c->mark;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002955 spin_unlock_bh(&si->lock);
2956
2957 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t<connection "
2958 "protocol=\"%u\" "
2959 "src_dev=\"%s\" "
2960 "src_ip=\"%pI4\" src_ip_xlate=\"%pI4\" "
2961 "src_port=\"%u\" src_port_xlate=\"%u\" "
2962 "src_rx_pkts=\"%llu\" src_rx_bytes=\"%llu\" "
2963 "dest_dev=\"%s\" "
2964 "dest_ip=\"%pI4\" dest_ip_xlate=\"%pI4\" "
2965 "dest_port=\"%u\" dest_port_xlate=\"%u\" "
2966 "dest_rx_pkts=\"%llu\" dest_rx_bytes=\"%llu\" "
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002967 "last_sync=\"%llu\" "
2968 "mark=\"%u\" />\n",
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002969 protocol,
2970 src_dev->name,
2971 &src_ip, &src_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002972 ntohs(src_port), ntohs(src_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002973 src_rx_packets, src_rx_bytes,
2974 dest_dev->name,
2975 &dest_ip, &dest_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002976 ntohs(dest_port), ntohs(dest_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002977 dest_rx_packets, dest_rx_bytes,
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002978 last_sync_jiffies, mark);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002979
2980 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2981 return false;
2982 }
2983
2984 *length -= bytes_read;
2985 *total_read += bytes_read;
2986
2987 /*
2988 * If we have no more connections then move to the next state.
2989 */
2990 if (!c_next) {
2991 ws->state++;
2992 }
2993
2994 return true;
2995}
2996
2997/*
2998 * sfe_ipv4_debug_dev_read_connections_end()
2999 * Generate part of the XML output.
3000 */
3001static bool sfe_ipv4_debug_dev_read_connections_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3002 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3003{
3004 int bytes_read;
3005
3006 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</connections>\n");
3007 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3008 return false;
3009 }
3010
3011 *length -= bytes_read;
3012 *total_read += bytes_read;
3013
3014 ws->state++;
3015 return true;
3016}
3017
3018/*
3019 * sfe_ipv4_debug_dev_read_exceptions_start()
3020 * Generate part of the XML output.
3021 */
3022static bool sfe_ipv4_debug_dev_read_exceptions_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3023 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3024{
3025 int bytes_read;
3026
3027 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<exceptions>\n");
3028 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3029 return false;
3030 }
3031
3032 *length -= bytes_read;
3033 *total_read += bytes_read;
3034
3035 ws->state++;
3036 return true;
3037}
3038
3039/*
3040 * sfe_ipv4_debug_dev_read_exceptions_exception()
3041 * Generate part of the XML output.
3042 */
3043static bool sfe_ipv4_debug_dev_read_exceptions_exception(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3044 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3045{
3046 uint64_t ct;
3047
3048 spin_lock_bh(&si->lock);
3049 ct = si->exception_events64[ws->iter_exception];
3050 spin_unlock_bh(&si->lock);
3051
3052 if (ct) {
3053 int bytes_read;
3054
3055 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE,
3056 "\t\t<exception name=\"%s\" count=\"%llu\" />\n",
3057 sfe_ipv4_exception_events_string[ws->iter_exception],
3058 ct);
3059 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3060 return false;
3061 }
3062
3063 *length -= bytes_read;
3064 *total_read += bytes_read;
3065 }
3066
3067 ws->iter_exception++;
3068 if (ws->iter_exception >= SFE_IPV4_EXCEPTION_EVENT_LAST) {
3069 ws->iter_exception = 0;
3070 ws->state++;
3071 }
3072
3073 return true;
3074}
3075
3076/*
3077 * sfe_ipv4_debug_dev_read_exceptions_end()
3078 * Generate part of the XML output.
3079 */
3080static bool sfe_ipv4_debug_dev_read_exceptions_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3081 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3082{
3083 int bytes_read;
3084
3085 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</exceptions>\n");
3086 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3087 return false;
3088 }
3089
3090 *length -= bytes_read;
3091 *total_read += bytes_read;
3092
3093 ws->state++;
3094 return true;
3095}
3096
3097/*
3098 * sfe_ipv4_debug_dev_read_stats()
3099 * Generate part of the XML output.
3100 */
3101static bool sfe_ipv4_debug_dev_read_stats(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3102 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3103{
3104 int bytes_read;
3105 unsigned int num_connections;
3106 uint64_t packets_forwarded;
3107 uint64_t packets_not_forwarded;
3108 uint64_t connection_create_requests;
3109 uint64_t connection_create_collisions;
3110 uint64_t connection_destroy_requests;
3111 uint64_t connection_destroy_misses;
3112 uint64_t connection_flushes;
3113 uint64_t connection_match_hash_hits;
3114 uint64_t connection_match_hash_reorders;
3115
3116 spin_lock_bh(&si->lock);
3117 sfe_ipv4_update_summary_stats(si);
3118
3119 num_connections = si->num_connections;
3120 packets_forwarded = si->packets_forwarded64;
3121 packets_not_forwarded = si->packets_not_forwarded64;
3122 connection_create_requests = si->connection_create_requests64;
3123 connection_create_collisions = si->connection_create_collisions64;
3124 connection_destroy_requests = si->connection_destroy_requests64;
3125 connection_destroy_misses = si->connection_destroy_misses64;
3126 connection_flushes = si->connection_flushes64;
3127 connection_match_hash_hits = si->connection_match_hash_hits64;
3128 connection_match_hash_reorders = si->connection_match_hash_reorders64;
3129 spin_unlock_bh(&si->lock);
3130
3131 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<stats "
3132 "num_connections=\"%u\" "
3133 "pkts_forwarded=\"%llu\" pkts_not_forwarded=\"%llu\" "
3134 "create_requests=\"%llu\" create_collisions=\"%llu\" "
3135 "destroy_requests=\"%llu\" destroy_misses=\"%llu\" "
3136 "flushes=\"%llu\" "
3137 "hash_hits=\"%llu\" hash_reorders=\"%llu\" />\n",
3138 num_connections,
3139 packets_forwarded,
3140 packets_not_forwarded,
3141 connection_create_requests,
3142 connection_create_collisions,
3143 connection_destroy_requests,
3144 connection_destroy_misses,
3145 connection_flushes,
3146 connection_match_hash_hits,
3147 connection_match_hash_reorders);
3148 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3149 return false;
3150 }
3151
3152 *length -= bytes_read;
3153 *total_read += bytes_read;
3154
3155 ws->state++;
3156 return true;
3157}
3158
3159/*
3160 * sfe_ipv4_debug_dev_read_end()
3161 * Generate part of the XML output.
3162 */
3163static bool sfe_ipv4_debug_dev_read_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3164 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3165{
3166 int bytes_read;
3167
3168 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "</sfe_ipv4>\n");
3169 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3170 return false;
3171 }
3172
3173 *length -= bytes_read;
3174 *total_read += bytes_read;
3175
3176 ws->state++;
3177 return true;
3178}
3179
3180/*
3181 * Array of write functions that write various XML elements that correspond to
3182 * our XML output state machine.
3183 */
3184sfe_ipv4_debug_xml_write_method_t sfe_ipv4_debug_xml_write_methods[SFE_IPV4_DEBUG_XML_STATE_DONE] = {
3185 sfe_ipv4_debug_dev_read_start,
3186 sfe_ipv4_debug_dev_read_connections_start,
3187 sfe_ipv4_debug_dev_read_connections_connection,
3188 sfe_ipv4_debug_dev_read_connections_end,
3189 sfe_ipv4_debug_dev_read_exceptions_start,
3190 sfe_ipv4_debug_dev_read_exceptions_exception,
3191 sfe_ipv4_debug_dev_read_exceptions_end,
3192 sfe_ipv4_debug_dev_read_stats,
3193 sfe_ipv4_debug_dev_read_end,
3194};
3195
3196/*
3197 * sfe_ipv4_debug_dev_read()
3198 * Send info to userspace upon read request from user
3199 */
3200static ssize_t sfe_ipv4_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset)
3201{
3202 char msg[CHAR_DEV_MSG_SIZE];
3203 int total_read = 0;
3204 struct sfe_ipv4_debug_xml_write_state *ws;
3205 struct sfe_ipv4 *si = &__si;
3206
3207 ws = (struct sfe_ipv4_debug_xml_write_state *)filp->private_data;
3208 while ((ws->state != SFE_IPV4_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) {
3209 if ((sfe_ipv4_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) {
3210 continue;
3211 }
3212 }
3213
3214 return total_read;
3215}
3216
3217/*
3218 * sfe_ipv4_debug_dev_write()
3219 * Write to char device not required/supported
3220 */
3221static ssize_t sfe_ipv4_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset)
3222{
3223 return -EINVAL;
3224}
3225
3226/*
3227 * sfe_ipv4_debug_dev_open()
3228 */
3229static int sfe_ipv4_debug_dev_open(struct inode *inode, struct file *file)
3230{
3231 struct sfe_ipv4_debug_xml_write_state *ws;
3232
3233 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3234 if (!ws) {
3235 ws = kzalloc(sizeof(struct sfe_ipv4_debug_xml_write_state), GFP_KERNEL);
3236 if (!ws) {
3237 return -ENOMEM;
3238 }
3239
3240 ws->state = SFE_IPV4_DEBUG_XML_STATE_START;
3241 file->private_data = ws;
3242 }
3243
3244 return 0;
3245}
3246
3247/*
3248 * sfe_ipv4_debug_dev_release()
3249 */
3250static int sfe_ipv4_debug_dev_release(struct inode *inode, struct file *file)
3251{
3252 struct sfe_ipv4_debug_xml_write_state *ws;
3253
3254 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3255 if (ws) {
3256 struct sfe_ipv4_connection *c;
3257
3258 /*
3259 * Are we currently iterating a connection? If we are then
3260 * make sure that we reduce its iterator count and if necessary
3261 * free it.
3262 */
3263 c = ws->iter_conn;
3264 if (c) {
3265 struct sfe_ipv4 *si = &__si;
3266
3267 spin_lock_bh(&si->lock);
3268 if (sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c)) {
3269 spin_unlock_bh(&si->lock);
3270
Ben Menchaca0971b7a2014-01-10 14:43:02 -06003271 if (c->original_match->pppoe_sk) {
3272 sock_put(c->original_match->pppoe_sk);
3273 }
3274 if (c->reply_match->pppoe_sk) {
3275 sock_put(c->reply_match->pppoe_sk);
3276 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003277 /*
3278 * This entry is dead so release our hold of the source and
3279 * dest devices and free the memory for our connection objects.
3280 */
3281 dev_put(c->original_dev);
3282 dev_put(c->reply_dev);
3283 kfree(c->original_match);
3284 kfree(c->reply_match);
3285 kfree(c);
3286 }
3287 }
3288
3289 /*
3290 * We've finished with our output so free the write state.
3291 */
3292 kfree(ws);
3293 }
3294
3295 return 0;
3296}
3297
3298/*
3299 * File operations used in the debug char device
3300 */
3301static struct file_operations sfe_ipv4_debug_dev_fops = {
3302 .read = sfe_ipv4_debug_dev_read,
3303 .write = sfe_ipv4_debug_dev_write,
3304 .open = sfe_ipv4_debug_dev_open,
3305 .release = sfe_ipv4_debug_dev_release
3306};
3307
3308/*
Dave Hudson87973cd2013-10-22 16:00:04 +01003309 * sfe_ipv4_init()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003310 */
Dave Hudson87973cd2013-10-22 16:00:04 +01003311static int __init sfe_ipv4_init(void)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003312{
3313 struct sfe_ipv4 *si = &__si;
3314 int result = -1;
3315
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003316 DEBUG_INFO("SFE IPv4 init\n");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003317
3318 /*
3319 * Create sys/sfe_ipv4
3320 */
3321 si->sys_sfe_ipv4 = kobject_create_and_add("sfe_ipv4", NULL);
3322 if (!si->sys_sfe_ipv4) {
3323 DEBUG_ERROR("failed to register sfe_ipv4\n");
3324 goto exit1;
3325 }
3326
3327 /*
3328 * Create files, one for each parameter supported by this module.
3329 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003330 result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3331 if (result) {
3332 DEBUG_ERROR("failed to register debug dev file: %d\n", result);
3333 goto exit4;
3334 }
3335
3336 /*
3337 * Register our debug char device.
3338 */
3339 result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops);
3340 if (result < 0) {
3341 DEBUG_ERROR("Failed to register chrdev: %d\n", result);
3342 goto exit5;
3343 }
3344
3345 si->debug_dev = result;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003346
3347 /*
3348 * Create a timer to handle periodic statistics.
3349 */
3350 setup_timer(&si->timer, sfe_ipv4_periodic_sync, (unsigned long)si);
3351 mod_timer(&si->timer, jiffies + (HZ / 100));
3352
Dave Hudson87973cd2013-10-22 16:00:04 +01003353 spin_lock_init(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003354
Dave Hudson87973cd2013-10-22 16:00:04 +01003355 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003356
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003357exit5:
3358 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3359
3360exit4:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003361 kobject_put(si->sys_sfe_ipv4);
3362
3363exit1:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003364 return result;
3365}
3366
3367/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003368 * sfe_ipv4_exit()
3369 */
3370static void __exit sfe_ipv4_exit(void)
3371{
Dave Hudson87973cd2013-10-22 16:00:04 +01003372 struct sfe_ipv4 *si = &__si;
3373
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003374 DEBUG_INFO("SFE IPv4 exit\n");
Dave Hudson87973cd2013-10-22 16:00:04 +01003375
3376 /*
3377 * Destroy all connections.
3378 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003379 sfe_ipv4_destroy_all_rules_for_dev(NULL);
Dave Hudson87973cd2013-10-22 16:00:04 +01003380
3381// XXX - this is where we need to unregister with any lower level offload services.
3382
Dave Hudson87973cd2013-10-22 16:00:04 +01003383 del_timer_sync(&si->timer);
3384
Dave Hudson87973cd2013-10-22 16:00:04 +01003385 unregister_chrdev(si->debug_dev, "sfe_ipv4");
3386
3387 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3388
Dave Hudson87973cd2013-10-22 16:00:04 +01003389 kobject_put(si->sys_sfe_ipv4);
3390
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003391}
3392
3393module_init(sfe_ipv4_init)
3394module_exit(sfe_ipv4_exit)
3395
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003396EXPORT_SYMBOL(sfe_ipv4_recv);
Ben Menchaca0971b7a2014-01-10 14:43:02 -06003397EXPORT_SYMBOL(sfe_pppoe_recv);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003398EXPORT_SYMBOL(sfe_ipv4_create_rule);
3399EXPORT_SYMBOL(sfe_ipv4_destroy_rule);
3400EXPORT_SYMBOL(sfe_ipv4_destroy_all_rules_for_dev);
3401EXPORT_SYMBOL(sfe_ipv4_register_sync_rule_callback);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06003402EXPORT_SYMBOL(sfe_ipv4_mark_rule);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003403
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003404MODULE_AUTHOR("Qualcomm Atheros Inc.");
3405MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv4 edition");
3406MODULE_LICENSE("GPL");
3407