blob: 4b72b4f8b058566e20de5cef5d73f11574ddb11e [file] [log] [blame]
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001/*
2 * sfe_ipv4.c
3 * Shortcut forwarding engine - IPv4 edition.
4 *
Matthew McClintocka3221942014-01-16 11:44:26 -06005 * Copyright (c) 2013 Qualcomm Atheros, Inc.
6 *
7 * All Rights Reserved.
8 * Qualcomm Atheros Confidential and Proprietary.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01009 */
Matthew McClintocka3221942014-01-16 11:44:26 -060010
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010011#include <linux/module.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060012#include <linux/sysfs.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010013#include <linux/skbuff.h>
14#include <linux/icmp.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010015#include <net/tcp.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060016#include <linux/etherdevice.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010017
Dave Hudsondcd08fb2013-11-22 09:25:16 -060018#include "sfe.h"
19#include "sfe_ipv4.h"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010020
21/*
Dave Hudsona8197e72013-12-17 23:46:22 +000022 * By default Linux IP header and transport layer header structures are
23 * unpacked, assuming that such headers should be 32-bit aligned.
24 * Unfortunately some wireless adaptors can't cope with this requirement and
25 * some CPUs can't handle misaligned accesses. For those platforms we
26 * define SFE_IPV4_UNALIGNED_IP_HEADER and mark the structures as packed.
27 * When we do this the compiler will generate slightly worse code than for the
28 * aligned case (on most platforms) but will be much quicker than fixing
29 * things up in an unaligned trap handler.
30 */
31#define SFE_IPV4_UNALIGNED_IP_HEADER 1
32#if SFE_IPV4_UNALIGNED_IP_HEADER
33#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed))
34#else
35#define SFE_IPV4_UNALIGNED_STRUCT
36#endif
37
38/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060039 * An Ethernet header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000040 * help with performance on some platforms (see the definition of
41 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010042 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060043struct sfe_ipv4_eth_hdr {
44 __be16 h_dest[ETH_ALEN / 2];
45 __be16 h_source[ETH_ALEN / 2];
46 __be16 h_proto;
47} SFE_IPV4_UNALIGNED_STRUCT;
48
49/*
50 * An IPv4 header, but with an optional "packed" attribute to
51 * help with performance on some platforms (see the definition of
52 * SFE_IPV4_UNALIGNED_STRUCT)
53 */
54struct sfe_ipv4_ip_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010055#if defined(__LITTLE_ENDIAN_BITFIELD)
56 __u8 ihl:4,
57 version:4;
58#elif defined (__BIG_ENDIAN_BITFIELD)
59 __u8 version:4,
60 ihl:4;
61#else
62#error "Please fix <asm/byteorder.h>"
63#endif
64 __u8 tos;
65 __be16 tot_len;
66 __be16 id;
67 __be16 frag_off;
68 __u8 ttl;
69 __u8 protocol;
70 __sum16 check;
71 __be32 saddr;
72 __be32 daddr;
Dave Hudsondcd08fb2013-11-22 09:25:16 -060073
74 /*
75 * The options start here.
76 */
Dave Hudsona8197e72013-12-17 23:46:22 +000077} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010078
79/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060080 * A UDP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000081 * help with performance on some platforms (see the definition of
82 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010083 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060084struct sfe_ipv4_udp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010085 __be16 source;
86 __be16 dest;
87 __be16 len;
88 __sum16 check;
Dave Hudsona8197e72013-12-17 23:46:22 +000089} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010090
91/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060092 * A TCP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000093 * help with performance on some platforms (see the definition of
94 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010095 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060096struct sfe_ipv4_tcp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010097 __be16 source;
98 __be16 dest;
99 __be32 seq;
100 __be32 ack_seq;
101#if defined(__LITTLE_ENDIAN_BITFIELD)
102 __u16 res1:4,
103 doff:4,
104 fin:1,
105 syn:1,
106 rst:1,
107 psh:1,
108 ack:1,
109 urg:1,
110 ece:1,
111 cwr:1;
112#elif defined(__BIG_ENDIAN_BITFIELD)
113 __u16 doff:4,
114 res1:4,
115 cwr:1,
116 ece:1,
117 urg:1,
118 ack:1,
119 psh:1,
120 rst:1,
121 syn:1,
122 fin:1;
123#else
124#error "Adjust your <asm/byteorder.h> defines"
Nicolas Costaac2979c2014-01-14 10:35:24 -0600125#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100126 __be16 window;
127 __sum16 check;
128 __be16 urg_ptr;
Dave Hudsona8197e72013-12-17 23:46:22 +0000129} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100130
131/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100132 * Specifies the lower bound on ACK numbers carried in the TCP header
133 */
134#define SFE_IPV4_TCP_MAX_ACK_WINDOW 65520
135
136/*
137 * IPv4 TCP connection match additional data.
138 */
139struct sfe_ipv4_tcp_connection_match {
140 uint8_t win_scale; /* Window scale */
141 uint32_t max_win; /* Maximum window size seen */
142 uint32_t end; /* Sequence number of the next byte to send (seq + segment length) */
143 uint32_t max_end; /* Sequence number of the last byte to ack */
144};
145
146/*
147 * Bit flags for IPv4 connection matching entry.
148 */
149#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC 0x1
150 /* Perform source translation */
151#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST 0x2
152 /* Perform destination translation */
153#define SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK 0x4
154 /* Ignore TCP sequence numbers */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600155#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR 0x8
156 /* Fast Ethernet header write */
157#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR 0x10
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100158 /* Fast Ethernet header write */
159
160/*
161 * IPv4 connection matching structure.
162 */
163struct sfe_ipv4_connection_match {
164 /*
165 * References to other objects.
166 */
167 struct sfe_ipv4_connection_match *next;
168 /* Next connection match entry in a list */
169 struct sfe_ipv4_connection_match *prev;
170 /* Previous connection match entry in a list */
171 struct sfe_ipv4_connection *connection;
172 /* Pointer to our connection */
173 struct sfe_ipv4_connection_match *counter_match;
174 /* Pointer to the connection match in the "counter" direction to this one */
175 struct sfe_ipv4_connection_match *active_next;
176 /* Pointer to the next connection in the active list */
177 struct sfe_ipv4_connection_match *active_prev;
178 /* Pointer to the previous connection in the active list */
179 bool active; /* Flag to indicate if we're on the active list */
180
181 /*
182 * Characteristics that identify flows that match this rule.
183 */
184 struct net_device *match_dev; /* Network device */
185 uint8_t match_protocol; /* Protocol */
Dave Hudson87973cd2013-10-22 16:00:04 +0100186 __be32 match_src_ip; /* Source IP address */
187 __be32 match_dest_ip; /* Destination IP address */
188 __be16 match_src_port; /* Source port/connection ident */
189 __be16 match_dest_port; /* Destination port/connection ident */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100190
191 /*
192 * Control the operations of the match.
193 */
194 uint32_t flags; /* Bit flags */
195
196 /*
197 * Connection state that we track once we match.
198 */
199 union { /* Protocol-specific state */
200 struct sfe_ipv4_tcp_connection_match tcp;
201 } protocol_state;
202 uint32_t rx_packet_count; /* Number of packets RX'd */
203 uint32_t rx_byte_count; /* Number of bytes RX'd */
204
205 /*
206 * Packet translation information.
207 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100208 __be32 xlate_src_ip; /* Address after source translation */
209 __be16 xlate_src_port; /* Port/connection ident after source translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100210 uint16_t xlate_src_csum_adjustment;
211 /* Transport layer checksum adjustment after source translation */
Dave Hudson87973cd2013-10-22 16:00:04 +0100212 __be32 xlate_dest_ip; /* Address after destination translation */
213 __be16 xlate_dest_port; /* Port/connection ident after destination translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100214 uint16_t xlate_dest_csum_adjustment;
215 /* Transport layer checksum adjustment after destination translation */
216
217 /*
218 * Packet transmit information.
219 */
220 struct net_device *xmit_dev; /* Network device on which to transmit */
221 unsigned short int xmit_dev_mtu;
222 /* Interface MTU */
223 uint16_t xmit_dest_mac[ETH_ALEN / 2];
224 /* Destination MAC address to use when forwarding */
225 uint16_t xmit_src_mac[ETH_ALEN / 2];
226 /* Source MAC address to use when forwarding */
227
228 /*
229 * Summary stats.
230 */
231 uint64_t rx_packet_count64; /* Number of packets RX'd */
232 uint64_t rx_byte_count64; /* Number of bytes RX'd */
233};
234
235/*
236 * Per-connection data structure.
237 */
238struct sfe_ipv4_connection {
239 struct sfe_ipv4_connection *next;
240 /* Pointer to the next entry in a hash chain */
241 struct sfe_ipv4_connection *prev;
242 /* Pointer to the previous entry in a hash chain */
243 int protocol; /* IP protocol number */
Dave Hudson87973cd2013-10-22 16:00:04 +0100244 __be32 src_ip; /* Source IP address */
245 __be32 src_ip_xlate; /* NAT-translated source IP address */
246 __be32 dest_ip; /* Destination IP address */
247 __be32 dest_ip_xlate; /* NAT-translated destination IP address */
248 __be16 src_port; /* Source port */
249 __be16 src_port_xlate; /* NAT-translated source port */
250 __be16 dest_port; /* Destination port */
251 __be16 dest_port_xlate; /* NAT-translated destination port */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100252 struct sfe_ipv4_connection_match *original_match;
253 /* Original direction matching structure */
254 struct net_device *original_dev;
255 /* Original direction source device */
256 struct sfe_ipv4_connection_match *reply_match;
257 /* Reply direction matching structure */
258 struct net_device *reply_dev; /* Reply direction source device */
259 uint64_t last_sync_jiffies; /* Jiffies count for the last sync */
260 struct sfe_ipv4_connection *all_connections_next;
261 /* Pointer to the next entry in the list of all connections */
262 struct sfe_ipv4_connection *all_connections_prev;
263 /* Pointer to the previous entry in the list of all connections */
264 int iterators; /* Number of iterators currently using this connection */
265 bool pending_free; /* Flag that indicates that this connection should be freed after iteration */
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600266 uint32_t mark; /* mark for outgoing packet */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100267};
268
269/*
270 * IPv4 connections and hash table size information.
271 */
272#define SFE_IPV4_CONNECTION_HASH_SHIFT 12
273#define SFE_IPV4_CONNECTION_HASH_SIZE (1 << SFE_IPV4_CONNECTION_HASH_SHIFT)
274#define SFE_IPV4_CONNECTION_HASH_MASK (SFE_IPV4_CONNECTION_HASH_SIZE - 1)
275
276enum sfe_ipv4_exception_events {
277 SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE,
278 SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION,
279 SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
280 SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL,
281 SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION,
282 SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE,
283 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS,
284 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS,
285 SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
286 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL,
287 SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION,
288 SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS,
289 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE,
290 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS,
291 SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK,
292 SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS,
293 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE,
294 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE,
295 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE,
296 SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE,
297 SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE,
298 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE,
299 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4,
300 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE,
301 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE,
302 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE,
303 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL,
304 SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION,
305 SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION,
306 SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE,
307 SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH,
308 SFE_IPV4_EXCEPTION_EVENT_NON_V4,
309 SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT,
310 SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE,
311 SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE,
312 SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL,
313 SFE_IPV4_EXCEPTION_EVENT_LAST
314};
315
316static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = {
317 "UDP_HEADER_INCOMPLETE",
318 "UDP_NO_CONNECTION",
319 "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
320 "UDP_SMALL_TTL",
321 "UDP_NEEDS_FRAGMENTATION",
322 "TCP_HEADER_INCOMPLETE",
323 "TCP_NO_CONNECTION_SLOW_FLAGS",
324 "TCP_NO_CONNECTION_FAST_FLAGS",
325 "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
326 "TCP_SMALL_TTL",
327 "TCP_NEEDS_FRAGMENTATION",
328 "TCP_FLAGS",
329 "TCP_SEQ_EXCEEDS_RIGHT_EDGE",
330 "TCP_SMALL_DATA_OFFS",
331 "TCP_BAD_SACK",
332 "TCP_BIG_DATA_OFFS",
333 "TCP_SEQ_BEFORE_LEFT_EDGE",
334 "TCP_ACK_EXCEEDS_RIGHT_EDGE",
335 "TCP_ACK_BEFORE_LEFT_EDGE",
336 "ICMP_HEADER_INCOMPLETE",
337 "ICMP_UNHANDLED_TYPE",
338 "ICMP_IPV4_HEADER_INCOMPLETE",
339 "ICMP_IPV4_NON_V4",
340 "ICMP_IPV4_IP_OPTIONS_INCOMPLETE",
341 "ICMP_IPV4_UDP_HEADER_INCOMPLETE",
342 "ICMP_IPV4_TCP_HEADER_INCOMPLETE",
343 "ICMP_IPV4_UNHANDLED_PROTOCOL",
344 "ICMP_NO_CONNECTION",
345 "ICMP_FLUSHED_CONNECTION",
346 "HEADER_INCOMPLETE",
347 "BAD_TOTAL_LENGTH",
348 "NON_V4",
349 "NON_INITIAL_FRAGMENT",
350 "DATAGRAM_INCOMPLETE",
351 "IP_OPTIONS_INCOMPLETE",
352 "UNHANDLED_PROTOCOL"
353};
354
355/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600356 * Per-module structure.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100357 */
358struct sfe_ipv4 {
359 spinlock_t lock; /* Lock for SMP correctness */
360 struct sfe_ipv4_connection_match *active_head;
361 /* Head of the list of recently active connections */
362 struct sfe_ipv4_connection_match *active_tail;
363 /* Tail of the list of recently active connections */
364 struct sfe_ipv4_connection *all_connections_head;
365 /* Head of the list of all connections */
366 struct sfe_ipv4_connection *all_connections_tail;
367 /* Tail of the list of all connections */
368 unsigned int num_connections; /* Number of connections */
369 struct timer_list timer; /* Timer used for periodic sync ops */
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600370 sfe_ipv4_sync_rule_callback_t __rcu sync_rule_callback;
371 /* Callback function registered by a connection manager for stats syncing */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100372 struct sfe_ipv4_connection *conn_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
373 /* Connection hash table */
374 struct sfe_ipv4_connection_match *conn_match_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
375 /* Connection match hash table */
376
377 /*
378 * Statistics.
379 */
380 uint32_t connection_create_requests;
381 /* Number of IPv4 connection create requests */
382 uint32_t connection_create_collisions;
383 /* Number of IPv4 connection create requests that collided with existing hash table entries */
384 uint32_t connection_destroy_requests;
385 /* Number of IPv4 connection destroy requests */
386 uint32_t connection_destroy_misses;
387 /* Number of IPv4 connection destroy requests that missed our hash table */
388 uint32_t connection_match_hash_hits;
389 /* Number of IPv4 connection match hash hits */
390 uint32_t connection_match_hash_reorders;
391 /* Number of IPv4 connection match hash reorders */
392 uint32_t connection_flushes; /* Number of IPv4 connection flushes */
393 uint32_t packets_forwarded; /* Number of IPv4 packets forwarded */
394 uint32_t packets_not_forwarded; /* Number of IPv4 packets not forwarded */
395 uint32_t exception_events[SFE_IPV4_EXCEPTION_EVENT_LAST];
396
397 /*
398 * Summary tatistics.
399 */
400 uint64_t connection_create_requests64;
401 /* Number of IPv4 connection create requests */
402 uint64_t connection_create_collisions64;
403 /* Number of IPv4 connection create requests that collided with existing hash table entries */
404 uint64_t connection_destroy_requests64;
405 /* Number of IPv4 connection destroy requests */
406 uint64_t connection_destroy_misses64;
407 /* Number of IPv4 connection destroy requests that missed our hash table */
408 uint64_t connection_match_hash_hits64;
409 /* Number of IPv4 connection match hash hits */
410 uint64_t connection_match_hash_reorders64;
411 /* Number of IPv4 connection match hash reorders */
412 uint64_t connection_flushes64; /* Number of IPv4 connection flushes */
413 uint64_t packets_forwarded64; /* Number of IPv4 packets forwarded */
414 uint64_t packets_not_forwarded64;
415 /* Number of IPv4 packets not forwarded */
416 uint64_t exception_events64[SFE_IPV4_EXCEPTION_EVENT_LAST];
417
418 /*
419 * Control state.
420 */
421 struct kobject *sys_sfe_ipv4; /* sysfs linkage */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100422 int debug_dev; /* Major number of the debug char device */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100423};
424
425/*
426 * Enumeration of the XML output.
427 */
428enum sfe_ipv4_debug_xml_states {
429 SFE_IPV4_DEBUG_XML_STATE_START,
430 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_START,
431 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_CONNECTION,
432 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_END,
433 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_START,
434 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION,
435 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_END,
436 SFE_IPV4_DEBUG_XML_STATE_STATS,
437 SFE_IPV4_DEBUG_XML_STATE_END,
438 SFE_IPV4_DEBUG_XML_STATE_DONE
439};
440
441/*
442 * XML write state.
443 */
444struct sfe_ipv4_debug_xml_write_state {
445 enum sfe_ipv4_debug_xml_states state;
446 /* XML output file state machine state */
447 struct sfe_ipv4_connection *iter_conn;
448 /* Next connection iterator */
449 int iter_exception; /* Next exception iterator */
450};
451
452typedef bool (*sfe_ipv4_debug_xml_write_method_t)(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
453 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws);
454
455struct sfe_ipv4 __si;
456
457/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100458 * sfe_ipv4_gen_ip_csum()
459 * Generate the IP checksum for an IPv4 header.
460 *
461 * Note that this function assumes that we have only 20 bytes of IP header.
462 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600463static inline uint16_t sfe_ipv4_gen_ip_csum(struct sfe_ipv4_ip_hdr *iph)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100464{
465 uint32_t sum;
466 uint16_t *i = (uint16_t *)iph;
467
468 iph->check = 0;
469
470 /*
471 * Generate the sum.
472 */
473 sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9];
474
475 /*
476 * Fold it to ones-complement form.
477 */
478 sum = (sum & 0xffff) + (sum >> 16);
479 sum = (sum & 0xffff) + (sum >> 16);
480
481 return (uint16_t)sum ^ 0xffff;
482}
483
484/*
485 * sfe_ipv4_get_connection_match_hash()
486 * Generate the hash used in connection match lookups.
487 */
488static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100489 __be32 src_ip, __be16 src_port,
490 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100491{
492 size_t dev_addr = (size_t)dev;
Dave Hudson87973cd2013-10-22 16:00:04 +0100493 uint32_t hash = ((uint32_t)dev_addr) ^ ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100494 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
495}
496
497/*
498 * sfe_ipv4_find_sfe_ipv4_connection_match()
499 * Get the IPv4 flow match info that corresponds to a particular 5-tuple.
500 *
501 * On entry we must be holding the lock that protects the hash table.
502 */
503static struct sfe_ipv4_connection_match *
504sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100505 __be32 src_ip, __be16 src_port,
506 __be32 dest_ip, __be16 dest_port) __attribute__((always_inline));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100507static struct sfe_ipv4_connection_match *
508sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100509 __be32 src_ip, __be16 src_port,
510 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100511{
512 struct sfe_ipv4_connection_match *cm;
513 struct sfe_ipv4_connection_match *head;
514 unsigned int conn_match_idx;
515
516 conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port);
517 cm = si->conn_match_hash[conn_match_idx];
518
519 /*
520 * If we don't have anything in this chain then bale.
521 */
522 if (unlikely(!cm)) {
523 return cm;
524 }
525
526 /*
527 * Hopefully the first entry is the one we want.
528 */
529 if (likely(cm->match_src_port == src_port)
530 && likely(cm->match_dest_port == dest_port)
531 && likely(cm->match_src_ip == src_ip)
532 && likely(cm->match_dest_ip == dest_ip)
533 && likely(cm->match_protocol == protocol)
534 && likely(cm->match_dev == dev)) {
535 si->connection_match_hash_hits++;
536 return cm;
537 }
538
539 /*
540 * We may or may not have a matching entry but if we do then we want to
541 * move that entry to the top of the hash chain when we get to it. We
542 * presume that this will be reused again very quickly.
543 */
544 head = cm;
545 do {
546 cm = cm->next;
547 } while (cm && (cm->match_src_port != src_port
548 || cm->match_dest_port != dest_port
549 || cm->match_src_ip != src_ip
550 || cm->match_dest_ip != dest_ip
551 || cm->match_protocol != protocol
552 || cm->match_dev != dev));
553
554 /*
555 * Not found then we're done.
556 */
557 if (unlikely(!cm)) {
558 return cm;
559 }
560
561 /*
562 * We found a match so move it.
563 */
564 if (cm->next) {
565 cm->next->prev = cm->prev;
566 }
567 cm->prev->next = cm->next;
568 cm->prev = NULL;
569 cm->next = head;
570 head->prev = cm;
571 si->conn_match_hash[conn_match_idx] = cm;
572 si->connection_match_hash_reorders++;
573
574 return cm;
575}
576
577/*
578 * sfe_ipv4_connection_match_update_summary_stats()
579 * Update the summary stats for a connection match entry.
580 */
581static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm)
582{
583 cm->rx_packet_count64 += cm->rx_packet_count;
584 cm->rx_packet_count = 0;
585 cm->rx_byte_count64 += cm->rx_byte_count;
586 cm->rx_byte_count = 0;
587}
588
589/*
590 * sfe_ipv4_connection_match_compute_translations()
591 * Compute port and address translations for a connection match entry.
592 */
593static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm)
594{
595 /*
596 * Before we insert the entry look to see if this is tagged as doing address
597 * translations. If it is then work out the adjustment that we need to apply
598 * to the transport checksum.
599 */
600 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
601 /*
602 * Precompute an incremental checksum adjustment so we can
603 * edit packets in this stream very quickly. The algorithm is from RFC1624.
604 */
605 uint16_t src_ip_hi = cm->match_src_ip >> 16;
606 uint16_t src_ip_lo = cm->match_src_ip & 0xffff;
607 uint32_t xlate_src_ip = ~cm->xlate_src_ip;
608 uint16_t xlate_src_ip_hi = xlate_src_ip >> 16;
609 uint16_t xlate_src_ip_lo = xlate_src_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100610 uint16_t xlate_src_port = ~cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100611 uint32_t adj;
612
613 /*
614 * When we compute this fold it down to a 16-bit offset
615 * as that way we can avoid having to do a double
616 * folding of the twos-complement result because the
617 * addition of 2 16-bit values cannot cause a double
618 * wrap-around!
619 */
620 adj = src_ip_hi + src_ip_lo + cm->match_src_port
621 + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port;
622 adj = (adj & 0xffff) + (adj >> 16);
623 adj = (adj & 0xffff) + (adj >> 16);
624 cm->xlate_src_csum_adjustment = (uint16_t)adj;
Nicolas Costaac2979c2014-01-14 10:35:24 -0600625
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100626 }
627
628 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
629 /*
630 * Precompute an incremental checksum adjustment so we can
631 * edit packets in this stream very quickly. The algorithm is from RFC1624.
632 */
633 uint16_t dest_ip_hi = cm->match_dest_ip >> 16;
634 uint16_t dest_ip_lo = cm->match_dest_ip & 0xffff;
635 uint32_t xlate_dest_ip = ~cm->xlate_dest_ip;
636 uint16_t xlate_dest_ip_hi = xlate_dest_ip >> 16;
637 uint16_t xlate_dest_ip_lo = xlate_dest_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100638 uint16_t xlate_dest_port = ~cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100639 uint32_t adj;
640
641 /*
642 * When we compute this fold it down to a 16-bit offset
643 * as that way we can avoid having to do a double
644 * folding of the twos-complement result because the
645 * addition of 2 16-bit values cannot cause a double
646 * wrap-around!
647 */
648 adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port
649 + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port;
650 adj = (adj & 0xffff) + (adj >> 16);
651 adj = (adj & 0xffff) + (adj >> 16);
652 cm->xlate_dest_csum_adjustment = (uint16_t)adj;
653 }
654}
655
656/*
657 * sfe_ipv4_update_summary_stats()
658 * Update the summary stats.
659 */
660static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si)
661{
662 int i;
663
664 si->connection_create_requests64 += si->connection_create_requests;
665 si->connection_create_requests = 0;
666 si->connection_create_collisions64 += si->connection_create_collisions;
667 si->connection_create_collisions = 0;
668 si->connection_destroy_requests64 += si->connection_destroy_requests;
669 si->connection_destroy_requests = 0;
670 si->connection_destroy_misses64 += si->connection_destroy_misses;
671 si->connection_destroy_misses = 0;
672 si->connection_match_hash_hits64 += si->connection_match_hash_hits;
673 si->connection_match_hash_hits = 0;
674 si->connection_match_hash_reorders64 += si->connection_match_hash_reorders;
675 si->connection_match_hash_reorders = 0;
676 si->connection_flushes64 += si->connection_flushes;
677 si->connection_flushes = 0;
678 si->packets_forwarded64 += si->packets_forwarded;
679 si->packets_forwarded = 0;
680 si->packets_not_forwarded64 += si->packets_not_forwarded;
681 si->packets_not_forwarded = 0;
682
683 for (i = 0; i < SFE_IPV4_EXCEPTION_EVENT_LAST; i++) {
684 si->exception_events64[i] += si->exception_events[i];
685 si->exception_events[i] = 0;
686 }
687}
688
689/*
690 * sfe_ipv4_insert_sfe_ipv4_connection_match()
691 * Insert a connection match into the hash.
692 *
693 * On entry we must be holding the lock that protects the hash table.
694 */
695static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
696{
697 struct sfe_ipv4_connection_match **hash_head;
698 struct sfe_ipv4_connection_match *prev_head;
699 unsigned int conn_match_idx
700 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
701 cm->match_src_ip, cm->match_src_port,
702 cm->match_dest_ip, cm->match_dest_port);
703 hash_head = &si->conn_match_hash[conn_match_idx];
704 prev_head = *hash_head;
705 cm->prev = NULL;
706 if (prev_head) {
707 prev_head->prev = cm;
708 }
709
710 cm->next = prev_head;
711 *hash_head = cm;
712}
713
714/*
715 * sfe_ipv4_remove_sfe_ipv4_connection_match()
716 * Remove a connection match object from the hash.
717 *
718 * On entry we must be holding the lock that protects the hash table.
719 */
720static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
721{
722 /*
723 * Unlink the connection match entry from the hash.
724 */
725 if (cm->prev) {
726 cm->prev->next = cm->next;
727 } else {
728 unsigned int conn_match_idx
729 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
730 cm->match_src_ip, cm->match_src_port,
731 cm->match_dest_ip, cm->match_dest_port);
732 si->conn_match_hash[conn_match_idx] = cm->next;
733 }
734
735 if (cm->next) {
736 cm->next->prev = cm->prev;
737 }
738
739 /*
740 * Unlink the connection match entry from the active list.
741 */
742 if (likely(cm->active_prev)) {
743 cm->active_prev->active_next = cm->active_next;
744 } else {
745 si->active_head = cm->active_next;
746 }
747
748 if (likely(cm->active_next)) {
749 cm->active_next->active_prev = cm->active_prev;
750 } else {
751 si->active_tail = cm->active_prev;
752 }
753
754}
755
756/*
757 * sfe_ipv4_get_connection_hash()
758 * Generate the hash used in connection lookups.
759 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100760static inline unsigned int sfe_ipv4_get_connection_hash(uint8_t protocol, __be32 src_ip, __be16 src_port,
761 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100762{
Dave Hudson87973cd2013-10-22 16:00:04 +0100763 uint32_t hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100764 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
765}
766
767/*
768 * sfe_ipv4_find_sfe_ipv4_connection()
769 * Get the IPv4 connection info that corresponds to a particular 5-tuple.
770 *
771 * On entry we must be holding the lock that protects the hash table.
772 */
773static inline struct sfe_ipv4_connection *sfe_ipv4_find_sfe_ipv4_connection(struct sfe_ipv4 *si, uint32_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100774 __be32 src_ip, __be16 src_port,
775 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100776{
777 struct sfe_ipv4_connection *c;
778 unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port);
779 c = si->conn_hash[conn_idx];
780
781 /*
782 * If we don't have anything in this chain then bale.
783 */
784 if (unlikely(!c)) {
785 return c;
786 }
787
788 /*
789 * Hopefully the first entry is the one we want.
790 */
791 if (likely(c->src_port == src_port)
792 && likely(c->dest_port == dest_port)
793 && likely(c->src_ip == src_ip)
794 && likely(c->dest_ip == dest_ip)
795 && likely(c->protocol == protocol)) {
796 return c;
797 }
798
799 /*
800 * We may or may not have a matching entry but if we do then we want to
801 * move that entry to the top of the hash chain when we get to it. We
802 * presume that this will be reused again very quickly.
803 */
804 do {
805 c = c->next;
806 } while (c && (c->src_port != src_port
807 || c->dest_port != dest_port
808 || c->src_ip != src_ip
809 || c->dest_ip != dest_ip
810 || c->protocol != protocol));
811
812 /*
813 * Will need connection entry for next create/destroy metadata,
814 * So no need to re-order entry for these requests
815 */
816 return c;
817}
818
819/*
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600820 * sfe_ipv4_mark_rule()
821 * Updates the mark for a current offloaded connection
822 *
823 * Will take hash lock upon entry
824 */
825static void sfe_ipv4_mark_rule(struct sfe_ipv4_mark *mark)
826{
827 struct sfe_ipv4 *si = &__si;
828 struct sfe_ipv4_connection *c;
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600829
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600830 spin_lock(&si->lock);
831 c = sfe_ipv4_find_sfe_ipv4_connection(si, mark->protocol,
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600832 mark->src_ip, mark->src_port,
833 mark->dest_ip, mark->dest_port);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600834 if (c) {
Nicolas Costaf53d6fe2014-01-13 16:03:46 -0600835 DEBUG_TRACE("Matching connection found for mark, "
836 "setting from %08x to %08x\n",
837 c->mark, mark->mark);
838 WARN_ON((0 != c->mark) && (0 == mark->mark));
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600839 c->mark = mark->mark;
840 }
841 spin_unlock(&si->lock);
842}
843
844/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100845 * sfe_ipv4_insert_sfe_ipv4_connection()
846 * Insert a connection into the hash.
847 *
848 * On entry we must be holding the lock that protects the hash table.
849 */
850static void sfe_ipv4_insert_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
851{
852 struct sfe_ipv4_connection **hash_head;
853 struct sfe_ipv4_connection *prev_head;
854 unsigned int conn_idx;
855
856 /*
857 * Insert entry into the connection hash.
858 */
859 conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
860 c->dest_ip, c->dest_port);
861 hash_head = &si->conn_hash[conn_idx];
862 prev_head = *hash_head;
863 c->prev = NULL;
864 if (prev_head) {
865 prev_head->prev = c;
866 }
867
868 c->next = prev_head;
869 *hash_head = c;
870
871 /*
872 * Insert entry into the "all connections" list.
873 */
874 if (si->all_connections_tail) {
875 c->all_connections_prev = si->all_connections_tail;
876 si->all_connections_tail->all_connections_next = c;
877 } else {
878 c->all_connections_prev = NULL;
879 si->all_connections_head = c;
880 }
881
882 si->all_connections_tail = c;
883 c->all_connections_next = NULL;
884 si->num_connections++;
885
886 /*
887 * Insert the connection match objects too.
888 */
889 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->original_match);
890 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->reply_match);
891}
892
893/*
894 * sfe_ipv4_remove_sfe_ipv4_connection()
895 * Remove a sfe_ipv4_connection object from the hash.
896 *
897 * On entry we must be holding the lock that protects the hash table.
898 */
899static void sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
900{
901 /*
902 * Remove the connection match objects.
903 */
904 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->reply_match);
905 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->original_match);
906
907 /*
908 * Unlink the connection.
909 */
910 if (c->prev) {
911 c->prev->next = c->next;
912 } else {
913 unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
914 c->dest_ip, c->dest_port);
915 si->conn_hash[conn_idx] = c->next;
916 }
917
918 if (c->next) {
919 c->next->prev = c->prev;
920 }
921}
922
923/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100924 * sfe_ipv4_sync_sfe_ipv4_connection()
925 * Sync a connection.
926 *
927 * On entry to this function we expect that the lock for the connection is either
928 * already held or isn't required.
929 */
930static void sfe_ipv4_gen_sync_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c,
931 struct sfe_ipv4_sync *sis, uint64_t now_jiffies)
932{
933 struct sfe_ipv4_connection_match *original_cm;
934 struct sfe_ipv4_connection_match *reply_cm;
935
936 /*
937 * Fill in the update message.
938 */
939 sis->protocol = c->protocol;
940 sis->src_ip = c->src_ip;
941 sis->dest_ip = c->dest_ip;
942 sis->src_port = c->src_port;
943 sis->dest_port = c->dest_port;
944
945 original_cm = c->original_match;
946 reply_cm = c->reply_match;
947 sis->src_td_max_window = original_cm->protocol_state.tcp.max_win;
948 sis->src_td_end = original_cm->protocol_state.tcp.end;
949 sis->src_td_max_end = original_cm->protocol_state.tcp.max_end;
950 sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win;
951 sis->dest_td_end = reply_cm->protocol_state.tcp.end;
952 sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end;
953
954 sfe_ipv4_connection_match_update_summary_stats(original_cm);
955 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
956
957 sis->src_packet_count = original_cm->rx_packet_count64;
958 sis->src_byte_count = original_cm->rx_byte_count64;
959 sis->dest_packet_count = reply_cm->rx_packet_count64;
960 sis->dest_byte_count = reply_cm->rx_byte_count64;
961
962 /*
963 * Get the time increment since our last sync.
964 */
965 sis->delta_jiffies = now_jiffies - c->last_sync_jiffies;
966 c->last_sync_jiffies = now_jiffies;
967}
968
969/*
970 * sfe_ipv4_decrement_sfe_ipv4_connection_iterator()
971 * Remove an iterator from a connection - free all resources if necessary.
972 *
973 * Returns true if the connection should now be free, false if not.
974 *
975 * We must be locked on entry to this function.
976 */
977static bool sfe_ipv4_decrement_sfe_ipv4_connection_iterator(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
978{
979 /*
980 * Are we the last iterator for this connection?
981 */
982 c->iterators--;
983 if (c->iterators) {
984 return false;
985 }
986
987 /*
988 * Is this connection marked for deletion?
989 */
990 if (!c->pending_free) {
991 return false;
992 }
993
994 /*
995 * We're ready to delete this connection so unlink it from the "all
996 * connections" list.
997 */
998 si->num_connections--;
999 if (c->all_connections_prev) {
1000 c->all_connections_prev->all_connections_next = c->all_connections_next;
1001 } else {
1002 si->all_connections_head = c->all_connections_next;
1003 }
1004
1005 if (c->all_connections_next) {
1006 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1007 } else {
1008 si->all_connections_tail = c->all_connections_prev;
1009 }
1010
1011 return true;
1012}
1013
1014/*
1015 * sfe_ipv4_flush_sfe_ipv4_connection()
1016 * Flush a connection and free all associated resources.
1017 *
1018 * We need to be called with bottom halves disabled locally as we need to acquire
1019 * the connection hash lock and release it again. In general we're actually called
1020 * from within a BH and so we're fine, but we're also called when connections are
1021 * torn down.
1022 */
1023static void sfe_ipv4_flush_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1024{
1025 struct sfe_ipv4_sync sis;
1026 uint64_t now_jiffies;
1027 bool pending_free = false;
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001028 sfe_ipv4_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001029
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001030 rcu_read_lock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001031 spin_lock(&si->lock);
1032 si->connection_flushes++;
1033
1034 /*
1035 * Check that we're not currently being iterated. If we are then
1036 * we can't free this entry yet but must mark it pending a free. If it's
1037 * not being iterated then we can unlink it from the list of all
1038 * connections.
1039 */
1040 if (c->iterators) {
1041 pending_free = true;
1042 c->pending_free = true;
1043 } else {
1044 si->num_connections--;
1045 if (c->all_connections_prev) {
1046 c->all_connections_prev->all_connections_next = c->all_connections_next;
1047 } else {
1048 si->all_connections_head = c->all_connections_next;
1049 }
1050
1051 if (c->all_connections_next) {
1052 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1053 } else {
1054 si->all_connections_tail = c->all_connections_prev;
1055 }
1056 }
1057
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001058 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
1059
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001060 spin_unlock(&si->lock);
1061
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001062 if (sync_rule_callback) {
1063 /*
1064 * Generate a sync message and then sync.
1065 */
1066 now_jiffies = get_jiffies_64();
1067 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, now_jiffies);
1068 sync_rule_callback(&sis);
1069 }
1070
1071 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001072
1073 /*
1074 * If we can't yet free the underlying memory then we're done.
1075 */
1076 if (pending_free) {
1077 return;
1078 }
1079
1080 /*
1081 * Release our hold of the source and dest devices and free the memory
1082 * for our connection objects.
1083 */
1084 dev_put(c->original_dev);
1085 dev_put(c->reply_dev);
1086 kfree(c->original_match);
1087 kfree(c->reply_match);
1088 kfree(c);
1089}
1090
1091/*
1092 * sfe_ipv4_recv_udp()
1093 * Handle UDP packet receives and forwarding.
1094 */
1095static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001096 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001097{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001098 struct sfe_ipv4_udp_hdr *udph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001099 __be32 src_ip;
1100 __be32 dest_ip;
1101 __be16 src_port;
1102 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001103 struct sfe_ipv4_connection_match *cm;
1104 uint8_t ttl;
1105 struct net_device *xmit_dev;
1106
1107 /*
1108 * Is our packet too short to contain a valid UDP header?
1109 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001110 if (unlikely(len < (sizeof(struct sfe_ipv4_udp_hdr) + ihl))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001111 spin_lock(&si->lock);
1112 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++;
1113 si->packets_not_forwarded++;
1114 spin_unlock(&si->lock);
1115
1116 DEBUG_TRACE("packet too short for UDP header\n");
1117 return 0;
1118 }
1119
1120 /*
1121 * Read the IP address and port information. Read the IP header data first
1122 * because we've almost certainly got that in the cache. We may not yet have
1123 * the UDP header cached though so allow more time for any prefetching.
1124 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001125 src_ip = iph->saddr;
1126 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001127
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001128 udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001129 src_port = udph->source;
1130 dest_port = udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001131
1132 spin_lock(&si->lock);
1133
1134 /*
1135 * Look for a connection match.
1136 */
1137 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
1138 if (unlikely(!cm)) {
1139 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION]++;
1140 si->packets_not_forwarded++;
1141 spin_unlock(&si->lock);
1142
1143 DEBUG_TRACE("no connection found\n");
1144 return 0;
1145 }
1146
1147 /*
1148 * If our packet has beern marked as "flush on find" we can't actually
1149 * forward it in the fast path, but now that we've found an associated
1150 * connection we can flush that out before we process the packet.
1151 */
1152 if (unlikely(flush_on_find)) {
1153 struct sfe_ipv4_connection *c = cm->connection;
1154 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1155 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1156 si->packets_not_forwarded++;
1157 spin_unlock(&si->lock);
1158
1159 DEBUG_TRACE("flush on find\n");
1160 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1161 return 0;
1162 }
1163
1164 /*
1165 * Does our TTL allow forwarding?
1166 */
1167 ttl = iph->ttl;
1168 if (unlikely(ttl < 2)) {
1169 struct sfe_ipv4_connection *c = cm->connection;
1170 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1171 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL]++;
1172 si->packets_not_forwarded++;
1173 spin_unlock(&si->lock);
1174
1175 DEBUG_TRACE("ttl too low\n");
1176 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1177 return 0;
1178 }
1179
1180 /*
1181 * If our packet is larger than the MTU of the transmit interface then
1182 * we can't forward it easily.
1183 */
1184 if (unlikely(len > cm->xmit_dev_mtu)) {
1185 struct sfe_ipv4_connection *c = cm->connection;
1186 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1187 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++;
1188 si->packets_not_forwarded++;
1189 spin_unlock(&si->lock);
1190
1191 DEBUG_TRACE("larger than mtu\n");
1192 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1193 return 0;
1194 }
1195
1196 /*
1197 * From this point on we're good to modify the packet.
1198 */
1199
1200 /*
1201 * Decrement our TTL.
1202 */
1203 iph->ttl = ttl - 1;
1204
1205 /*
1206 * Do we have to perform translations of the source address/port?
1207 */
1208 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1209 uint16_t udp_csum;
1210
Dave Hudson87973cd2013-10-22 16:00:04 +01001211 iph->saddr = cm->xlate_src_ip;
1212 udph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001213
1214 /*
1215 * Do we have a non-zero UDP checksum? If we do then we need
1216 * to update it.
1217 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001218 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001219 if (likely(udp_csum)) {
1220 uint32_t sum = udp_csum + cm->xlate_src_csum_adjustment;
1221 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001222 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001223 }
1224 }
1225
1226 /*
1227 * Do we have to perform translations of the destination address/port?
1228 */
1229 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1230 uint16_t udp_csum;
1231
Dave Hudson87973cd2013-10-22 16:00:04 +01001232 iph->daddr = cm->xlate_dest_ip;
1233 udph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001234
1235 /*
1236 * Do we have a non-zero UDP checksum? If we do then we need
1237 * to update it.
1238 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001239 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001240 if (likely(udp_csum)) {
1241 uint32_t sum = udp_csum + cm->xlate_dest_csum_adjustment;
1242 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001243 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001244 }
1245 }
1246
1247 /*
1248 * Replace the IP checksum.
1249 */
1250 iph->check = sfe_ipv4_gen_ip_csum(iph);
1251
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001252 /*
1253 * Update traffic stats.
1254 */
1255 cm->rx_packet_count++;
1256 cm->rx_byte_count += len;
1257
1258 /*
1259 * If we're not already on the active list then insert ourselves at the tail
1260 * of the current list.
1261 */
1262 if (unlikely(!cm->active)) {
1263 cm->active = true;
1264 cm->active_prev = si->active_tail;
1265 if (likely(si->active_tail)) {
1266 si->active_tail->active_next = cm;
1267 } else {
1268 si->active_head = cm;
1269 }
1270 si->active_tail = cm;
1271 }
1272
1273 xmit_dev = cm->xmit_dev;
1274 skb->dev = xmit_dev;
1275
1276 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001277 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001278 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001279 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1280 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Matthew McClintocka8ad7962014-01-16 16:49:30 -06001281 xmit_dev->header_ops->create(skb, xmit_dev, ETH_P_IP,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001282 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001283 } else {
1284 /*
1285 * For the simple case we write this really fast.
1286 */
1287 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1288 eth->h_proto = htons(ETH_P_IP);
1289 eth->h_dest[0] = htons(cm->xmit_dest_mac[0]);
1290 eth->h_dest[1] = htons(cm->xmit_dest_mac[1]);
1291 eth->h_dest[2] = htons(cm->xmit_dest_mac[2]);
1292 eth->h_source[0] = htons(cm->xmit_src_mac[0]);
1293 eth->h_source[1] = htons(cm->xmit_src_mac[1]);
1294 eth->h_source[2] = htons(cm->xmit_src_mac[2]);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001295 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001296 }
1297
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001298 /*
1299 * Mark outgoing packet.
1300 */
Cristian Prundeanu94fff982013-12-23 15:02:59 -06001301 skb->mark = cm->connection->mark;
1302 if (skb->mark) {
1303 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1304 }
1305
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001306 si->packets_forwarded++;
1307 spin_unlock(&si->lock);
1308
1309 /*
1310 * We're going to check for GSO flags when we transmit the packet so
1311 * start fetching the necessary cache line now.
1312 */
1313 prefetch(skb_shinfo(skb));
1314
1315 /*
1316 * Send the packet on its way.
1317 */
1318 dev_queue_xmit(skb);
1319
1320 return 1;
1321}
1322
1323/*
1324 * sfe_ipv4_process_tcp_option_sack()
1325 * Parse TCP SACK option and update ack according
1326 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001327static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const uint32_t data_offs,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001328 uint32_t *ack) __attribute__((always_inline));
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001329static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const uint32_t data_offs,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001330 uint32_t *ack)
1331{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001332 uint32_t length = sizeof(struct sfe_ipv4_tcp_hdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001333 uint8_t *ptr = (uint8_t *)th + length;
1334
1335 /*
1336 * If option is TIMESTAMP discard it.
1337 */
1338 if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
1339 && likely(ptr[0] == TCPOPT_NOP)
1340 && likely(ptr[1] == TCPOPT_NOP)
1341 && likely(ptr[2] == TCPOPT_TIMESTAMP)
1342 && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
1343 return true;
1344 }
1345
1346 /*
1347 * TCP options. Parse SACK option.
1348 */
1349 while (length < data_offs) {
1350 uint8_t size;
1351 uint8_t kind;
1352
1353 ptr = (uint8_t *)th + length;
1354 kind = *ptr;
1355
1356 /*
1357 * NOP, for padding
1358 * Not in the switch because to fast escape and to not calculate size
1359 */
1360 if (kind == TCPOPT_NOP) {
1361 length++;
1362 continue;
1363 }
1364
1365 if (kind == TCPOPT_SACK) {
1366 uint32_t sack = 0;
1367 uint8_t re = 1 + 1;
1368
1369 size = *(ptr + 1);
1370 if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
1371 || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
1372 || (size > (data_offs - length))) {
1373 return false;
1374 }
1375
1376 re += 4;
1377 while (re < size) {
1378 uint32_t sack_re;
1379 uint8_t *sptr = ptr + re;
1380 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
1381 if (sack_re > sack) {
1382 sack = sack_re;
1383 }
1384 re += TCPOLEN_SACK_PERBLOCK;
1385 }
1386 if (sack > *ack) {
1387 *ack = sack;
1388 }
1389 length += size;
1390 continue;
1391 }
1392 if (kind == TCPOPT_EOL) {
1393 return true;
1394 }
1395 size = *(ptr + 1);
1396 if (size < 2) {
1397 return false;
1398 }
1399 length += size;
1400 }
1401
1402 return true;
1403}
1404
1405/*
1406 * sfe_ipv4_recv_tcp()
1407 * Handle TCP packet receives and forwarding.
1408 */
1409static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001410 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001411{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001412 struct sfe_ipv4_tcp_hdr *tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001413 __be32 src_ip;
1414 __be32 dest_ip;
1415 __be16 src_port;
1416 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001417 struct sfe_ipv4_connection_match *cm;
1418 struct sfe_ipv4_connection_match *counter_cm;
1419 uint8_t ttl;
1420 uint32_t flags;
1421 struct net_device *xmit_dev;
1422
1423 /*
1424 * Is our packet too short to contain a valid UDP header?
1425 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001426 if (unlikely(len < (sizeof(struct sfe_ipv4_tcp_hdr) + ihl))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001427 spin_lock(&si->lock);
1428 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++;
1429 si->packets_not_forwarded++;
1430 spin_unlock(&si->lock);
1431
1432 DEBUG_TRACE("packet too short for TCP header\n");
1433 return 0;
1434 }
1435
1436 /*
1437 * Read the IP address and port information. Read the IP header data first
1438 * because we've almost certainly got that in the cache. We may not yet have
1439 * the TCP header cached though so allow more time for any prefetching.
1440 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001441 src_ip = iph->saddr;
1442 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001443
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001444 tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001445 src_port = tcph->source;
1446 dest_port = tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001447 flags = tcp_flag_word(tcph);
1448
1449 spin_lock(&si->lock);
1450
1451 /*
1452 * Look for a connection match.
1453 */
1454 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
1455 if (unlikely(!cm)) {
1456 /*
1457 * We didn't get a connection but as TCP is connection-oriented that
1458 * may be because this is a non-fast connection (not running established).
1459 * For diagnostic purposes we differentiate this here.
1460 */
1461 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
1462 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++;
1463 si->packets_not_forwarded++;
1464 spin_unlock(&si->lock);
1465
1466 DEBUG_TRACE("no connection found - fast flags\n");
1467 return 0;
1468 }
1469 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++;
1470 si->packets_not_forwarded++;
1471 spin_unlock(&si->lock);
1472
1473 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
1474 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1475 return 0;
1476 }
1477
1478 /*
1479 * If our packet has beern marked as "flush on find" we can't actually
1480 * forward it in the fast path, but now that we've found an associated
1481 * connection we can flush that out before we process the packet.
1482 */
1483 if (unlikely(flush_on_find)) {
1484 struct sfe_ipv4_connection *c = cm->connection;
1485 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1486 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1487 si->packets_not_forwarded++;
1488 spin_unlock(&si->lock);
1489
1490 DEBUG_TRACE("flush on find\n");
1491 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1492 return 0;
1493 }
1494
1495 /*
1496 * Does our TTL allow forwarding?
1497 */
1498 ttl = iph->ttl;
1499 if (unlikely(ttl < 2)) {
1500 struct sfe_ipv4_connection *c = cm->connection;
1501 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1502 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL]++;
1503 si->packets_not_forwarded++;
1504 spin_unlock(&si->lock);
1505
1506 DEBUG_TRACE("ttl too low\n");
1507 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1508 return 0;
1509 }
1510
1511 /*
1512 * If our packet is larger than the MTU of the transmit interface then
1513 * we can't forward it easily.
1514 */
1515 if (unlikely(len > cm->xmit_dev_mtu)) {
1516 struct sfe_ipv4_connection *c = cm->connection;
1517 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1518 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++;
1519 si->packets_not_forwarded++;
1520 spin_unlock(&si->lock);
1521
1522 DEBUG_TRACE("larger than mtu\n");
1523 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1524 return 0;
1525 }
1526
1527 /*
1528 * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN
1529 * set is not a fast path packet.
1530 */
1531 if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
1532 struct sfe_ipv4_connection *c = cm->connection;
1533 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1534 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS]++;
1535 si->packets_not_forwarded++;
1536 spin_unlock(&si->lock);
1537
1538 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
1539 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1540 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1541 return 0;
1542 }
1543
1544 counter_cm = cm->counter_match;
1545
1546 /*
1547 * Are we doing sequence number checking?
1548 */
1549 if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
1550 uint32_t seq;
1551 uint32_t ack;
1552 uint32_t sack;
1553 uint32_t data_offs;
1554 uint32_t end;
1555 uint32_t left_edge;
1556 uint32_t scaled_win;
1557 uint32_t max_end;
1558
1559 /*
1560 * Is our sequence fully past the right hand edge of the window?
1561 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001562 seq = ntohl(tcph->seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001563 if (unlikely((int32_t)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
1564 struct sfe_ipv4_connection *c = cm->connection;
1565 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1566 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++;
1567 si->packets_not_forwarded++;
1568 spin_unlock(&si->lock);
1569
1570 DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
1571 seq, cm->protocol_state.tcp.max_end + 1);
1572 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1573 return 0;
1574 }
1575
1576 /*
1577 * Check that our TCP data offset isn't too short.
1578 */
1579 data_offs = tcph->doff << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001580 if (unlikely(data_offs < sizeof(struct sfe_ipv4_tcp_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001581 struct sfe_ipv4_connection *c = cm->connection;
1582 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1583 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++;
1584 si->packets_not_forwarded++;
1585 spin_unlock(&si->lock);
1586
1587 DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
1588 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1589 return 0;
1590 }
1591
1592 /*
1593 * Update ACK according to any SACK option.
1594 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001595 ack = ntohl(tcph->ack_seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001596 sack = ack;
1597 if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) {
1598 struct sfe_ipv4_connection *c = cm->connection;
1599 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1600 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK]++;
1601 si->packets_not_forwarded++;
1602 spin_unlock(&si->lock);
1603
1604 DEBUG_TRACE("TCP option SACK size is wrong\n");
1605 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1606 return 0;
1607 }
1608
1609 /*
1610 * Check that our TCP data offset isn't past the end of the packet.
1611 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001612 data_offs += sizeof(struct sfe_ipv4_ip_hdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001613 if (unlikely(len < data_offs)) {
1614 struct sfe_ipv4_connection *c = cm->connection;
1615 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1616 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++;
1617 si->packets_not_forwarded++;
1618 spin_unlock(&si->lock);
1619
1620 DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
1621 data_offs, len);
1622 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1623 return 0;
1624 }
1625
1626 end = seq + len - data_offs;
1627
1628 /*
1629 * Is our sequence fully before the left hand edge of the window?
1630 */
1631 if (unlikely((int32_t)(end - (cm->protocol_state.tcp.end
1632 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
1633 struct sfe_ipv4_connection *c = cm->connection;
1634 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1635 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++;
1636 si->packets_not_forwarded++;
1637 spin_unlock(&si->lock);
1638
1639 DEBUG_TRACE("seq: %u before left edge: %u\n",
1640 end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
1641 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1642 return 0;
1643 }
1644
1645 /*
1646 * Are we acking data that is to the right of what has been sent?
1647 */
1648 if (unlikely((int32_t)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
1649 struct sfe_ipv4_connection *c = cm->connection;
1650 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1651 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++;
1652 si->packets_not_forwarded++;
1653 spin_unlock(&si->lock);
1654
1655 DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
1656 sack, counter_cm->protocol_state.tcp.end + 1);
1657 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1658 return 0;
1659 }
1660
1661 /*
1662 * Is our ack too far before the left hand edge of the window?
1663 */
1664 left_edge = counter_cm->protocol_state.tcp.end
1665 - cm->protocol_state.tcp.max_win
1666 - SFE_IPV4_TCP_MAX_ACK_WINDOW
1667 - 1;
1668 if (unlikely((int32_t)(sack - left_edge) < 0)) {
1669 struct sfe_ipv4_connection *c = cm->connection;
1670 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1671 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++;
1672 si->packets_not_forwarded++;
1673 spin_unlock(&si->lock);
1674
1675 DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
1676 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1677 return 0;
1678 }
1679
1680 /*
1681 * Have we just seen the largest window size yet for this connection? If yes
1682 * then we need to record the new value.
1683 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001684 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001685 scaled_win += (sack - ack);
1686 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
1687 cm->protocol_state.tcp.max_win = scaled_win;
1688 }
1689
1690 /*
1691 * If our sequence and/or ack numbers have advanced then record the new state.
1692 */
1693 if (likely((int32_t)(end - cm->protocol_state.tcp.end) >= 0)) {
1694 cm->protocol_state.tcp.end = end;
1695 }
1696
1697 max_end = sack + scaled_win;
1698 if (likely((int32_t)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
1699 counter_cm->protocol_state.tcp.max_end = max_end;
1700 }
1701 }
1702
1703 /*
1704 * From this point on we're good to modify the packet.
1705 */
1706
1707 /*
1708 * Decrement our TTL.
1709 */
1710 iph->ttl = ttl - 1;
1711
1712 /*
1713 * Do we have to perform translations of the source address/port?
1714 */
1715 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1716 uint16_t tcp_csum;
1717 uint32_t sum;
1718
Dave Hudson87973cd2013-10-22 16:00:04 +01001719 iph->saddr = cm->xlate_src_ip;
1720 tcph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001721
1722 /*
1723 * Do we have a non-zero UDP checksum? If we do then we need
1724 * to update it.
1725 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001726 tcp_csum = tcph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001727 sum = tcp_csum + cm->xlate_src_csum_adjustment;
1728 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001729 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001730 }
1731
1732 /*
1733 * Do we have to perform translations of the destination address/port?
1734 */
1735 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1736 uint16_t tcp_csum;
1737 uint32_t sum;
1738
Dave Hudson87973cd2013-10-22 16:00:04 +01001739 iph->daddr = cm->xlate_dest_ip;
1740 tcph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001741
1742 /*
1743 * Do we have a non-zero UDP checksum? If we do then we need
1744 * to update it.
1745 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001746 tcp_csum = tcph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001747 sum = tcp_csum + cm->xlate_dest_csum_adjustment;
1748 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001749 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001750 }
1751
1752 /*
1753 * Replace the IP checksum.
1754 */
1755 iph->check = sfe_ipv4_gen_ip_csum(iph);
1756
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001757 /*
1758 * Update traffic stats.
1759 */
1760 cm->rx_packet_count++;
1761 cm->rx_byte_count += len;
1762
1763 /*
1764 * If we're not already on the active list then insert ourselves at the tail
1765 * of the current list.
1766 */
1767 if (unlikely(!cm->active)) {
1768 cm->active = true;
1769 cm->active_prev = si->active_tail;
1770 if (likely(si->active_tail)) {
1771 si->active_tail->active_next = cm;
1772 } else {
1773 si->active_head = cm;
1774 }
1775 si->active_tail = cm;
1776 }
1777
1778 xmit_dev = cm->xmit_dev;
1779 skb->dev = xmit_dev;
1780
1781 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001782 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001783 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001784 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1785 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Matthew McClintocka8ad7962014-01-16 16:49:30 -06001786 xmit_dev->header_ops->create(skb, xmit_dev, ETH_P_IP,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001787 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001788 } else {
1789 /*
1790 * For the simple case we write this really fast.
1791 */
1792 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1793 eth->h_proto = htons(ETH_P_IP);
1794 eth->h_dest[0] = htons(cm->xmit_dest_mac[0]);
1795 eth->h_dest[1] = htons(cm->xmit_dest_mac[1]);
1796 eth->h_dest[2] = htons(cm->xmit_dest_mac[2]);
1797 eth->h_source[0] = htons(cm->xmit_src_mac[0]);
1798 eth->h_source[1] = htons(cm->xmit_src_mac[1]);
1799 eth->h_source[2] = htons(cm->xmit_src_mac[2]);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001800 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001801 }
1802
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06001803 /*
1804 * Mark outgoing packet
1805 */
1806 skb->mark = cm->connection->mark;
1807 if (skb->mark) {
1808 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1809 }
1810
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001811 si->packets_forwarded++;
1812 spin_unlock(&si->lock);
1813
1814 /*
1815 * We're going to check for GSO flags when we transmit the packet so
1816 * start fetching the necessary cache line now.
1817 */
1818 prefetch(skb_shinfo(skb));
1819
1820 /*
1821 * Send the packet on its way.
1822 */
1823 dev_queue_xmit(skb);
1824
1825 return 1;
1826}
1827
1828/*
1829 * sfe_ipv4_recv_icmp()
1830 * Handle ICMP packet receives.
1831 *
1832 * ICMP packets aren't handled as a "fast path" and always have us process them
1833 * through the default Linux stack. What we do need to do is look for any errors
1834 * about connections we are handling in the fast path. If we find any such
1835 * connections then we want to flush their state so that the ICMP error path
1836 * within Linux has all of the correct state should it need it.
1837 */
1838static int sfe_ipv4_recv_icmp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001839 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001840{
1841 struct icmphdr *icmph;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001842 struct sfe_ipv4_ip_hdr *icmp_iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001843 unsigned int icmp_ihl_words;
1844 unsigned int icmp_ihl;
1845 uint32_t *icmp_trans_h;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001846 struct sfe_ipv4_udp_hdr *icmp_udph;
1847 struct sfe_ipv4_tcp_hdr *icmp_tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001848 __be32 src_ip;
1849 __be32 dest_ip;
1850 __be16 src_port;
1851 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001852 struct sfe_ipv4_connection_match *cm;
1853 struct sfe_ipv4_connection *c;
1854
1855 /*
1856 * Is our packet too short to contain a valid UDP header?
1857 */
1858 len -= ihl;
1859 if (unlikely(len < sizeof(struct icmphdr))) {
1860 spin_lock(&si->lock);
1861 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++;
1862 si->packets_not_forwarded++;
1863 spin_unlock(&si->lock);
1864
1865 DEBUG_TRACE("packet too short for ICMP header\n");
1866 return 0;
1867 }
1868
1869 /*
1870 * We only handle "destination unreachable" and "time exceeded" messages.
1871 */
1872 icmph = (struct icmphdr *)(skb->data + ihl);
1873 if ((icmph->type != ICMP_DEST_UNREACH)
1874 && (icmph->type != ICMP_TIME_EXCEEDED)) {
1875 spin_lock(&si->lock);
1876 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++;
1877 si->packets_not_forwarded++;
1878 spin_unlock(&si->lock);
1879
1880 DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->type);
1881 return 0;
1882 }
1883
1884 /*
1885 * Do we have the full embedded IP header?
1886 */
1887 len -= sizeof(struct icmphdr);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001888 if (unlikely(len < sizeof(struct sfe_ipv4_ip_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001889 spin_lock(&si->lock);
1890 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE]++;
1891 si->packets_not_forwarded++;
1892 spin_unlock(&si->lock);
1893
1894 DEBUG_TRACE("Embedded IP header not complete\n");
1895 return 0;
1896 }
1897
1898 /*
1899 * Is our embedded IP version wrong?
1900 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001901 icmp_iph = (struct sfe_ipv4_ip_hdr *)(icmph + 1);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001902 if (unlikely(icmp_iph->version != 4)) {
1903 spin_lock(&si->lock);
1904 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4]++;
1905 si->packets_not_forwarded++;
1906 spin_unlock(&si->lock);
1907
1908 DEBUG_TRACE("IP version: %u\n", icmp_iph->version);
1909 return 0;
1910 }
1911
1912 /*
1913 * Do we have the full embedded IP header, including any options?
1914 */
1915 icmp_ihl_words = icmp_iph->ihl;
1916 icmp_ihl = icmp_ihl_words << 2;
1917 if (unlikely(len < icmp_ihl)) {
1918 spin_lock(&si->lock);
1919 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE]++;
1920 si->packets_not_forwarded++;
1921 spin_unlock(&si->lock);
1922
1923 DEBUG_TRACE("Embedded header not large enough for IP options\n");
1924 return 0;
1925 }
1926
Nicolas Costaac2979c2014-01-14 10:35:24 -06001927 len -= icmp_ihl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001928 icmp_trans_h = ((uint32_t *)icmp_iph) + icmp_ihl_words;
1929
1930 /*
1931 * Handle the embedded transport layer header.
1932 */
1933 switch (icmp_iph->protocol) {
1934 case IPPROTO_UDP:
1935 /*
1936 * We should have 8 bytes of UDP header - that's enough to identify
1937 * the connection.
1938 */
1939 if (unlikely(len < 8)) {
1940 spin_lock(&si->lock);
1941 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE]++;
1942 si->packets_not_forwarded++;
1943 spin_unlock(&si->lock);
1944
1945 DEBUG_TRACE("Incomplete embedded UDP header\n");
1946 return 0;
1947 }
1948
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001949 icmp_udph = (struct sfe_ipv4_udp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01001950 src_port = icmp_udph->source;
1951 dest_port = icmp_udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001952 break;
1953
1954 case IPPROTO_TCP:
1955 /*
1956 * We should have 8 bytes of TCP header - that's enough to identify
1957 * the connection.
1958 */
1959 if (unlikely(len < 8)) {
1960 spin_lock(&si->lock);
1961 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE]++;
1962 si->packets_not_forwarded++;
1963 spin_unlock(&si->lock);
1964
1965 DEBUG_TRACE("Incomplete embedded TCP header\n");
1966 return 0;
1967 }
1968
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001969 icmp_tcph = (struct sfe_ipv4_tcp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01001970 src_port = icmp_tcph->source;
1971 dest_port = icmp_tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001972 break;
1973
1974 default:
1975 spin_lock(&si->lock);
1976 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL]++;
1977 si->packets_not_forwarded++;
1978 spin_unlock(&si->lock);
1979
1980 DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", icmp_iph->protocol);
1981 return 0;
1982 }
1983
Dave Hudson87973cd2013-10-22 16:00:04 +01001984 src_ip = icmp_iph->saddr;
1985 dest_ip = icmp_iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001986
1987 spin_lock(&si->lock);
1988
1989 /*
1990 * Look for a connection match. Note that we reverse the source and destination
1991 * here because our embedded message contains a packet that was sent in the
1992 * opposite direction to the one in which we just received it. It will have
1993 * been sent on the interface from which we received it though so that's still
1994 * ok to use.
1995 */
1996 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port);
1997 if (unlikely(!cm)) {
1998 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++;
1999 si->packets_not_forwarded++;
2000 spin_unlock(&si->lock);
2001
2002 DEBUG_TRACE("no connection found\n");
2003 return 0;
2004 }
2005
2006 /*
2007 * We found a connection so now remove it from the connection list and flush
2008 * its state.
2009 */
2010 c = cm->connection;
2011 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2012 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++;
2013 si->packets_not_forwarded++;
2014 spin_unlock(&si->lock);
2015
2016 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
2017 return 0;
2018}
2019
2020/*
2021 * sfe_ipv4_recv()
Matthew McClintocka8ad7962014-01-16 16:49:30 -06002022 * Handle packet receives and forwaring.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002023 *
2024 * Returns 1 if the packet is forwarded or 0 if it isn't.
2025 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002026int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002027{
2028 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002029 unsigned int len;
2030 unsigned int tot_len;
2031 unsigned int frag_off;
2032 unsigned int ihl;
2033 bool flush_on_find;
2034 bool ip_options;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002035 struct sfe_ipv4_ip_hdr *iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002036 uint32_t protocol;
2037
2038 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002039 * Check that we have space for an IP header here.
2040 */
2041 len = skb->len;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002042 if (unlikely(len < sizeof(struct sfe_ipv4_ip_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002043 spin_lock(&si->lock);
2044 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2045 si->packets_not_forwarded++;
2046 spin_unlock(&si->lock);
2047
2048 DEBUG_TRACE("len: %u is too short\n", len);
2049 return 0;
2050 }
2051
2052 /*
2053 * Check that our "total length" is large enough for an IP header.
2054 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002055 iph = (struct sfe_ipv4_ip_hdr *)skb->data;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002056 tot_len = ntohs(iph->tot_len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002057 if (unlikely(tot_len < sizeof(struct sfe_ipv4_ip_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002058 spin_lock(&si->lock);
2059 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH]++;
2060 si->packets_not_forwarded++;
2061 spin_unlock(&si->lock);
2062
2063 DEBUG_TRACE("tot_len: %u is too short\n", tot_len);
2064 return 0;
2065 }
2066
2067 /*
2068 * Is our IP version wrong?
2069 */
2070 if (unlikely(iph->version != 4)) {
2071 spin_lock(&si->lock);
2072 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_V4]++;
2073 si->packets_not_forwarded++;
2074 spin_unlock(&si->lock);
2075
2076 DEBUG_TRACE("IP version: %u\n", iph->version);
2077 return 0;
2078 }
2079
2080 /*
2081 * Does our datagram fit inside the skb?
2082 */
2083 if (unlikely(tot_len > len)) {
2084 spin_lock(&si->lock);
2085 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++;
2086 si->packets_not_forwarded++;
2087 spin_unlock(&si->lock);
2088
2089 DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len);
2090 return 0;
2091 }
2092
2093 /*
2094 * Do we have a non-initial fragment?
Nicolas Costaac2979c2014-01-14 10:35:24 -06002095 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002096 frag_off = ntohs(iph->frag_off);
2097 if (unlikely(frag_off & IP_OFFSET)) {
2098 spin_lock(&si->lock);
2099 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++;
2100 si->packets_not_forwarded++;
2101 spin_unlock(&si->lock);
2102
2103 DEBUG_TRACE("non-initial fragment\n");
2104 return 0;
2105 }
2106
2107 /*
2108 * If we have a (first) fragment then mark it to cause any connection to flush.
2109 */
2110 flush_on_find = unlikely(frag_off & IP_MF) ? true : false;
2111
2112 /*
2113 * Do we have any IP options? That's definite a slow path! If we do have IP
2114 * options we need to recheck our header size.
2115 */
2116 ihl = iph->ihl << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002117 ip_options = unlikely(ihl != sizeof(struct sfe_ipv4_ip_hdr)) ? true : false;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002118 if (unlikely(ip_options)) {
2119 if (unlikely(len < ihl)) {
2120 spin_lock(&si->lock);
2121 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE]++;
2122 si->packets_not_forwarded++;
2123 spin_unlock(&si->lock);
2124
2125 DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl);
2126 return 0;
2127 }
2128
2129 flush_on_find = true;
2130 }
2131
2132 protocol = iph->protocol;
2133 if (IPPROTO_UDP == protocol) {
2134 return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find);
2135 }
2136
2137 if (IPPROTO_TCP == protocol) {
2138 return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find);
2139 }
2140
2141 if (IPPROTO_ICMP == protocol) {
2142 return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl);
2143 }
2144
2145 spin_lock(&si->lock);
2146 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++;
2147 si->packets_not_forwarded++;
2148 spin_unlock(&si->lock);
2149
2150 DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol);
2151 return 0;
2152}
2153
Nicolas Costa436926b2014-01-14 10:36:22 -06002154static void
2155sfe_ipv4_update_tcp_state(struct sfe_ipv4_connection *c,
2156 struct sfe_ipv4_create *sic)
2157{
2158 struct sfe_ipv4_connection_match *orig_cm;
2159 struct sfe_ipv4_connection_match *repl_cm;
2160 struct sfe_ipv4_tcp_connection_match *orig_tcp;
2161 struct sfe_ipv4_tcp_connection_match *repl_tcp;
2162
2163 orig_cm = c->original_match;
2164 repl_cm = c->reply_match;
2165 orig_tcp = &orig_cm->protocol_state.tcp;
2166 repl_tcp = &repl_cm->protocol_state.tcp;
2167
2168 /* update orig */
2169 if (orig_tcp->max_win < sic->src_td_max_window) {
2170 orig_tcp->max_win = sic->src_td_max_window;
2171 }
2172 if ((int32_t)(orig_tcp->end - sic->src_td_end) < 0) {
2173 orig_tcp->end = sic->src_td_end;
2174 }
2175 if ((int32_t)(orig_tcp->max_end - sic->src_td_max_end) < 0) {
2176 orig_tcp->max_end = sic->src_td_max_end;
2177 }
2178
2179 /* update reply */
2180 if (repl_tcp->max_win < sic->dest_td_max_window) {
2181 repl_tcp->max_win = sic->dest_td_max_window;
2182 }
2183 if ((int32_t)(repl_tcp->end - sic->dest_td_end) < 0) {
2184 repl_tcp->end = sic->dest_td_end;
2185 }
2186 if ((int32_t)(repl_tcp->max_end - sic->dest_td_max_end) < 0) {
2187 repl_tcp->max_end = sic->dest_td_max_end;
2188 }
2189
2190 /* update match flags */
2191 orig_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2192 repl_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2193 if (sic->flags & SFE_IPV4_CREATE_FLAG_NO_SEQ_CHECK) {
2194 orig_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2195 repl_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2196 }
2197}
2198
2199static void
2200sfe_ipv4_update_protocol_state(struct sfe_ipv4_connection *c,
2201 struct sfe_ipv4_create *sic)
2202{
2203 switch (sic->protocol) {
2204 case IPPROTO_TCP:
2205 sfe_ipv4_update_tcp_state(c, sic);
2206 break;
2207 }
2208}
2209
2210void sfe_ipv4_update_rule(struct sfe_ipv4_create *sic)
2211{
2212 struct sfe_ipv4_connection *c;
2213 struct sfe_ipv4 *si = &__si;
2214
2215 spin_lock_bh(&si->lock);
2216
2217 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2218 sic->protocol,
2219 sic->src_ip,
2220 sic->src_port,
2221 sic->dest_ip,
2222 sic->dest_port);
2223 if (c != NULL) {
2224 sfe_ipv4_update_protocol_state(c, sic);
2225 }
2226
2227 spin_unlock_bh(&si->lock);
2228}
2229
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002230/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002231 * sfe_ipv4_create_rule()
2232 * Create a forwarding rule.
2233 */
Nicolas Costa514fde02014-01-13 15:50:29 -06002234int sfe_ipv4_create_rule(struct sfe_ipv4_create *sic)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002235{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002236 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002237 struct sfe_ipv4_connection *c;
2238 struct sfe_ipv4_connection_match *original_cm;
2239 struct sfe_ipv4_connection_match *reply_cm;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002240 struct net_device *dest_dev;
2241 struct net_device *src_dev;
2242
2243 dest_dev = sic->dest_dev;
2244 src_dev = sic->src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002245
2246 spin_lock_bh(&si->lock);
2247 si->connection_create_requests++;
2248
2249 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002250 * Check to see if there is already a flow that matches the rule we're
2251 * trying to create. If there is then we can't create a new one.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002252 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002253 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2254 sic->protocol,
2255 sic->src_ip,
2256 sic->src_port,
2257 sic->dest_ip,
2258 sic->dest_port);
2259 if (c != NULL) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002260 si->connection_create_collisions++;
2261
2262 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002263 * If we already have the flow then it's likely that this
2264 * request to create the connection rule contains more
2265 * up-to-date information. Check and update accordingly.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002266 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002267 sfe_ipv4_update_protocol_state(c, sic);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002268 spin_unlock_bh(&si->lock);
2269
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002270 DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002271 " s: %s:%pM:%pI4:%u, d: %s:%pM:%pI4:%u\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002272 sic->mark, sic->protocol,
2273 sic->src_dev->name, sic->src_mac, &sic->src_ip, ntohs(sic->src_port),
Dave Hudson87973cd2013-10-22 16:00:04 +01002274 sic->dest_dev->name, sic->dest_mac, &sic->dest_ip, ntohs(sic->dest_port));
Nicolas Costa514fde02014-01-13 15:50:29 -06002275 return -EADDRINUSE;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002276 }
2277
2278 /*
2279 * Allocate the various connection tracking objects.
2280 */
2281 c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC);
2282 if (unlikely(!c)) {
2283 spin_unlock_bh(&si->lock);
Nicolas Costa514fde02014-01-13 15:50:29 -06002284 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002285 }
2286
2287 original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2288 if (unlikely(!original_cm)) {
2289 spin_unlock_bh(&si->lock);
2290 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002291 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002292 }
2293
2294 reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2295 if (unlikely(!reply_cm)) {
2296 spin_unlock_bh(&si->lock);
2297 kfree(original_cm);
2298 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002299 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002300 }
2301
2302 /*
2303 * Fill in the "original" direction connection matching object.
2304 * Note that the transmit MAC address is "dest_mac_xlate" because
2305 * we always know both ends of a connection by their translated
2306 * addresses and not their public addresses.
2307 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002308 original_cm->match_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002309 original_cm->match_protocol = sic->protocol;
2310 original_cm->match_src_ip = sic->src_ip;
2311 original_cm->match_src_port = sic->src_port;
2312 original_cm->match_dest_ip = sic->dest_ip;
2313 original_cm->match_dest_port = sic->dest_port;
2314 original_cm->xlate_src_ip = sic->src_ip_xlate;
2315 original_cm->xlate_src_port = sic->src_port_xlate;
2316 original_cm->xlate_dest_ip = sic->dest_ip_xlate;
2317 original_cm->xlate_dest_port = sic->dest_port_xlate;
2318 original_cm->rx_packet_count = 0;
2319 original_cm->rx_packet_count64 = 0;
2320 original_cm->rx_byte_count = 0;
2321 original_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002322 original_cm->xmit_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002323 original_cm->xmit_dev_mtu = sic->dest_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002324 memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002325 memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN);
2326 original_cm->connection = c;
2327 original_cm->counter_match = reply_cm;
2328 original_cm->flags = 0;
2329 original_cm->active_next = NULL;
2330 original_cm->active_prev = NULL;
2331 original_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002332
2333 /*
2334 * For PPP links we don't write an L2 header. For everything else we do.
2335 */
2336 if (!(dest_dev->flags & IFF_POINTOPOINT)) {
2337 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2338
2339 /*
2340 * If our dev writes Ethernet headers then we can write a really fast
2341 * version.
2342 */
2343 if (dest_dev->header_ops) {
2344 if (dest_dev->header_ops->create == eth_header) {
2345 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2346 }
2347 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002348 }
2349
2350 /*
2351 * Fill in the "reply" direction connection matching object.
2352 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002353 reply_cm->match_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002354 reply_cm->match_protocol = sic->protocol;
2355 reply_cm->match_src_ip = sic->dest_ip_xlate;
2356 reply_cm->match_src_port = sic->dest_port_xlate;
2357 reply_cm->match_dest_ip = sic->src_ip_xlate;
2358 reply_cm->match_dest_port = sic->src_port_xlate;
2359 reply_cm->xlate_src_ip = sic->dest_ip;
2360 reply_cm->xlate_src_port = sic->dest_port;
2361 reply_cm->xlate_dest_ip = sic->src_ip;
2362 reply_cm->xlate_dest_port = sic->src_port;
2363 reply_cm->rx_packet_count = 0;
2364 reply_cm->rx_packet_count64 = 0;
2365 reply_cm->rx_byte_count = 0;
2366 reply_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002367 reply_cm->xmit_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002368 reply_cm->xmit_dev_mtu = sic->src_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002369 memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002370 memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN);
2371 reply_cm->connection = c;
2372 reply_cm->counter_match = original_cm;
2373 reply_cm->flags = 0;
2374 reply_cm->active_next = NULL;
2375 reply_cm->active_prev = NULL;
2376 reply_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002377
2378 /*
2379 * For PPP links we don't write an L2 header. For everything else we do.
2380 */
2381 if (!(src_dev->flags & IFF_POINTOPOINT)) {
2382 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2383
2384 /*
2385 * If our dev writes Ethernet headers then we can write a really fast
2386 * version.
2387 */
2388 if (src_dev->header_ops) {
2389 if (src_dev->header_ops->create == eth_header) {
2390 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2391 }
2392 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002393 }
2394
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002395
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002396 if (sic->dest_ip != sic->dest_ip_xlate || sic->dest_port != sic->dest_port_xlate) {
2397 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2398 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2399 }
2400
2401 if (sic->src_ip != sic->src_ip_xlate || sic->src_port != sic->src_port_xlate) {
2402 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2403 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2404 }
2405
2406 c->protocol = sic->protocol;
2407 c->src_ip = sic->src_ip;
2408 c->src_ip_xlate = sic->src_ip_xlate;
2409 c->src_port = sic->src_port;
2410 c->src_port_xlate = sic->src_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002411 c->original_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002412 c->original_match = original_cm;
2413 c->dest_ip = sic->dest_ip;
2414 c->dest_ip_xlate = sic->dest_ip_xlate;
2415 c->dest_port = sic->dest_port;
2416 c->dest_port_xlate = sic->dest_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002417 c->reply_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002418 c->reply_match = reply_cm;
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06002419 c->mark = sic->mark;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002420
2421 c->last_sync_jiffies = get_jiffies_64();
2422 c->iterators = 0;
2423 c->pending_free = false;
2424
2425 /*
2426 * Take hold of our source and dest devices for the duration of the connection.
2427 */
2428 dev_hold(c->original_dev);
2429 dev_hold(c->reply_dev);
2430
2431 /*
2432 * Initialize the protocol-specific information that we track.
2433 */
2434 switch (sic->protocol) {
2435 case IPPROTO_TCP:
2436 original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale;
2437 original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1;
2438 original_cm->protocol_state.tcp.end = sic->src_td_end;
2439 original_cm->protocol_state.tcp.max_end = sic->src_td_max_end;
2440 reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale;
2441 reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1;
2442 reply_cm->protocol_state.tcp.end = sic->dest_td_end;
2443 reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end;
2444 if (sic->flags & SFE_IPV4_CREATE_FLAG_NO_SEQ_CHECK) {
2445 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2446 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2447 }
2448 break;
2449 }
2450
2451 sfe_ipv4_connection_match_compute_translations(original_cm);
2452 sfe_ipv4_connection_match_compute_translations(reply_cm);
2453 sfe_ipv4_insert_sfe_ipv4_connection(si, c);
2454
2455 spin_unlock_bh(&si->lock);
2456
2457 /*
2458 * We have everything we need!
2459 */
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002460 DEBUG_INFO("new connection - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002461 " s: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n"
2462 " d: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002463 sic->mark, sic->protocol,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002464 sic->src_dev->name, sic->src_mac, sic->src_mac_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002465 &sic->src_ip, &sic->src_ip_xlate, ntohs(sic->src_port), ntohs(sic->src_port_xlate),
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002466 dest_dev->name, sic->dest_mac, sic->dest_mac_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002467 &sic->dest_ip, &sic->dest_ip_xlate, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate));
Nicolas Costa514fde02014-01-13 15:50:29 -06002468
2469 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002470}
2471
2472/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002473 * sfe_ipv4_destroy_rule()
2474 * Destroy a forwarding rule.
2475 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002476void sfe_ipv4_destroy_rule(struct sfe_ipv4_destroy *sid)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002477{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002478 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002479 struct sfe_ipv4_connection *c;
2480
2481 spin_lock_bh(&si->lock);
2482 si->connection_destroy_requests++;
2483
2484 /*
2485 * Check to see if we have a flow that matches the rule we're trying
2486 * to destroy. If there isn't then we can't destroy it.
2487 */
2488 c = sfe_ipv4_find_sfe_ipv4_connection(si, sid->protocol, sid->src_ip, sid->src_port,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002489 sid->dest_ip, sid->dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002490 if (!c) {
2491 si->connection_destroy_misses++;
2492 spin_unlock_bh(&si->lock);
2493
2494 DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002495 sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2496 &sid->dest_ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002497 return;
2498 }
2499
2500 /*
2501 * Remove our connection details from the hash tables.
2502 */
2503 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2504 spin_unlock_bh(&si->lock);
2505
2506 /*
2507 * Finally synchronize state and free resources. We need to protect against
2508 * pre-emption by our bottom half while we do this though.
2509 */
2510 local_bh_disable();
2511 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
2512 local_bh_enable();
2513
2514 DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002515 sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2516 &sid->dest_ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002517}
2518
2519/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002520 * sfe_ipv4_register_sync_rule_callback()
2521 * Register a callback for rule synchronization.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002522 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002523void sfe_ipv4_register_sync_rule_callback(sfe_ipv4_sync_rule_callback_t sync_rule_callback)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002524{
2525 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002526
2527 spin_lock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002528 rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002529 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002530}
2531
2532/*
2533 * sfe_ipv4_get_debug_dev()
2534 */
2535static ssize_t sfe_ipv4_get_debug_dev(struct device *dev,
2536 struct device_attribute *attr,
2537 char *buf)
2538{
2539 struct sfe_ipv4 *si = &__si;
2540 ssize_t count;
2541 int num;
2542
2543 spin_lock_bh(&si->lock);
2544 num = si->debug_dev;
2545 spin_unlock_bh(&si->lock);
2546
2547 count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num);
2548 return count;
2549}
2550
2551/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002552 * sysfs attributes.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002553 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002554static const struct device_attribute sfe_ipv4_debug_dev_attr =
2555 __ATTR(debug_dev, S_IWUGO | S_IRUGO, sfe_ipv4_get_debug_dev, NULL);
2556
2557/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002558 * sfe_ipv4_destroy_all_rules_for_dev()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002559 * Destroy all connections that match a particular device.
2560 *
2561 * If we pass dev as NULL then this destroys all connections.
2562 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002563void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002564{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002565 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002566 struct sfe_ipv4_connection *c;
2567 struct sfe_ipv4_connection *c_next;
2568
2569 spin_lock_bh(&si->lock);
2570 c = si->all_connections_head;
2571 if (!c) {
2572 spin_unlock_bh(&si->lock);
2573 return;
2574 }
2575
2576 c->iterators++;
2577
2578 /*
2579 * Iterate over all connections
2580 */
2581 while (c) {
2582 c_next = c->all_connections_next;
2583
2584 /*
2585 * Before we do anything else, take an iterator reference for the
2586 * connection we'll iterate next.
2587 */
2588 if (c_next) {
2589 c_next->iterators++;
2590 }
2591
2592 /*
2593 * Does this connection relate to the device we are destroying? If
2594 * it does then ensure it is marked for being freed as soon as it
2595 * is no longer being iterated.
2596 */
2597 if (!dev
2598 || (dev == c->original_dev)
2599 || (dev == c->reply_dev)) {
2600 c->pending_free = true;
2601 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2602 }
2603
2604 /*
2605 * Remove the iterator reference that we acquired and see if we
2606 * should free any resources.
2607 */
2608 if (sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c)) {
2609 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002610 /*
2611 * This entry is dead so release our hold of the source and
2612 * dest devices and free the memory for our connection objects.
2613 */
2614 dev_put(c->original_dev);
2615 dev_put(c->reply_dev);
2616 kfree(c->original_match);
2617 kfree(c->reply_match);
2618 kfree(c);
2619
2620 spin_lock_bh(&si->lock);
2621 }
2622
2623 c = c_next;
2624 }
2625
2626 spin_unlock_bh(&si->lock);
2627}
2628
2629/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002630 * sfe_ipv4_periodic_sync()
2631 */
2632static void sfe_ipv4_periodic_sync(unsigned long arg)
2633{
2634 struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg;
2635 uint64_t now_jiffies;
2636 int quota;
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002637 sfe_ipv4_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002638
2639 now_jiffies = get_jiffies_64();
2640
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002641 rcu_read_lock();
2642 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
2643 if (!sync_rule_callback) {
2644 rcu_read_unlock();
2645 goto done;
2646 }
2647
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002648 spin_lock_bh(&si->lock);
2649 sfe_ipv4_update_summary_stats(si);
2650
2651 /*
2652 * Get an estimate of the number of connections to parse in this sync.
2653 */
2654 quota = (si->num_connections + 63) / 64;
2655
2656 /*
2657 * Walk the "active" list and sync the connection state.
2658 */
2659 while (quota--) {
2660 struct sfe_ipv4_connection_match *cm;
2661 struct sfe_ipv4_connection_match *counter_cm;
2662 struct sfe_ipv4_connection *c;
2663 struct sfe_ipv4_sync sis;
2664
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002665 cm = si->active_head;
2666 if (!cm) {
2667 break;
2668 }
2669
2670 cm->active = false;
2671
2672 /*
2673 * Having found an entry we now remove it from the active scan list.
2674 */
2675 si->active_head = cm->active_next;
2676 if (likely(cm->active_next)) {
2677 cm->active_next->active_prev = NULL;
2678 } else {
2679 si->active_tail = NULL;
2680 }
2681 cm->active_next = NULL;
2682
2683 /*
2684 * We scan the connection match lists so there's a possibility that our
2685 * counter match is in the list too. If it is then remove it.
2686 */
2687 counter_cm = cm->counter_match;
2688 if (counter_cm->active) {
2689 counter_cm->active = false;
2690
2691 if (likely(counter_cm->active_prev)) {
2692 counter_cm->active_prev->active_next = counter_cm->active_next;
2693 } else {
2694 si->active_head = counter_cm->active_next;
2695 }
2696
2697 if (likely(counter_cm->active_next)) {
2698 counter_cm->active_next->active_prev = counter_cm->active_prev;
2699 } else {
2700 si->active_tail = counter_cm->active_prev;
2701 }
2702
2703 counter_cm->active_next = NULL;
2704 counter_cm->active_prev = NULL;
2705 }
2706
2707 /*
2708 * Sync the connection state.
2709 */
2710 c = cm->connection;
2711 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, now_jiffies);
2712
2713 /*
2714 * We don't want to be holding the lock when we sync!
2715 */
2716 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002717 sync_rule_callback(&sis);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002718 spin_lock_bh(&si->lock);
2719 }
2720
2721 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002722 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002723
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002724done:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002725 mod_timer(&si->timer, jiffies + (HZ / 100));
2726}
2727
2728#define CHAR_DEV_MSG_SIZE 768
2729
2730/*
2731 * sfe_ipv4_debug_dev_read_start()
2732 * Generate part of the XML output.
2733 */
2734static bool sfe_ipv4_debug_dev_read_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2735 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2736{
2737 int bytes_read;
2738
2739 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "<sfe_ipv4>\n");
2740 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2741 return false;
2742 }
2743
2744 *length -= bytes_read;
2745 *total_read += bytes_read;
2746
2747 ws->state++;
2748 return true;
2749}
2750
2751/*
2752 * sfe_ipv4_debug_dev_read_connections_start()
2753 * Generate part of the XML output.
2754 */
2755static bool sfe_ipv4_debug_dev_read_connections_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2756 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2757{
2758 int bytes_read;
2759
2760 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<connections>\n");
2761 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2762 return false;
2763 }
2764
2765 *length -= bytes_read;
2766 *total_read += bytes_read;
2767
2768 ws->state++;
2769 return true;
2770}
2771
2772/*
2773 * sfe_ipv4_debug_dev_read_connections_connection()
2774 * Generate part of the XML output.
2775 */
2776static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2777 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2778{
2779 struct sfe_ipv4_connection *c;
2780 struct sfe_ipv4_connection *c_next;
2781 struct sfe_ipv4_connection_match *original_cm;
2782 struct sfe_ipv4_connection_match *reply_cm;
2783 int bytes_read;
2784 int protocol;
2785 struct net_device *src_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002786 __be32 src_ip;
2787 __be32 src_ip_xlate;
2788 __be16 src_port;
2789 __be16 src_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002790 uint64_t src_rx_packets;
2791 uint64_t src_rx_bytes;
2792 struct net_device *dest_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002793 __be32 dest_ip;
2794 __be32 dest_ip_xlate;
2795 __be16 dest_port;
2796 __be16 dest_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002797 uint64_t dest_rx_packets;
2798 uint64_t dest_rx_bytes;
2799 uint64_t last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002800 uint32_t mark;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002801
2802 spin_lock_bh(&si->lock);
2803 c = ws->iter_conn;
2804
2805 /*
2806 * Is this the first connection we need to scan?
2807 */
2808 if (!c) {
2809 c = si->all_connections_head;
2810
2811 /*
2812 * If there were no connections then move to the next state.
2813 */
2814 if (!c) {
2815 spin_unlock_bh(&si->lock);
2816
2817 ws->state++;
2818 return true;
2819 }
2820
2821 c->iterators++;
2822 }
2823
2824 c_next = c->all_connections_next;
2825 ws->iter_conn = c_next;
2826
2827 /*
2828 * Before we do anything else, take an iterator reference for the
2829 * connection we'll iterate next.
2830 */
2831 if (c_next) {
2832 c_next->iterators++;
2833 }
2834
2835 /*
2836 * Remove the iterator reference that we acquired and see if we
2837 * should free any resources.
2838 */
2839 if (sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c)) {
2840 spin_unlock_bh(&si->lock);
2841
2842 /*
2843 * This entry is dead so release our hold of the source and
2844 * dest devices and free the memory for our connection objects.
2845 */
2846 dev_put(c->original_dev);
2847 dev_put(c->reply_dev);
2848 kfree(c->original_match);
2849 kfree(c->reply_match);
2850 kfree(c);
2851
2852 /*
2853 * If we have no more connections then move to the next state.
2854 */
2855 if (!c_next) {
2856 ws->state++;
2857 }
2858
2859 return true;
2860 }
2861
2862 original_cm = c->original_match;
2863 reply_cm = c->reply_match;
2864
2865 protocol = c->protocol;
2866 src_dev = c->original_dev;
2867 src_ip = c->src_ip;
2868 src_ip_xlate = c->src_ip_xlate;
2869 src_port = c->src_port;
2870 src_port_xlate = c->src_port_xlate;
2871
2872 sfe_ipv4_connection_match_update_summary_stats(original_cm);
2873 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
2874
2875 src_rx_packets = original_cm->rx_packet_count64;
2876 src_rx_bytes = original_cm->rx_byte_count64;
2877 dest_dev = c->reply_dev;
2878 dest_ip = c->dest_ip;
2879 dest_ip_xlate = c->dest_ip_xlate;
2880 dest_port = c->dest_port;
2881 dest_port_xlate = c->dest_port_xlate;
2882 dest_rx_packets = reply_cm->rx_packet_count64;
2883 dest_rx_bytes = reply_cm->rx_byte_count64;
2884 last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002885 mark = c->mark;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002886 spin_unlock_bh(&si->lock);
2887
2888 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t<connection "
2889 "protocol=\"%u\" "
2890 "src_dev=\"%s\" "
2891 "src_ip=\"%pI4\" src_ip_xlate=\"%pI4\" "
2892 "src_port=\"%u\" src_port_xlate=\"%u\" "
2893 "src_rx_pkts=\"%llu\" src_rx_bytes=\"%llu\" "
2894 "dest_dev=\"%s\" "
2895 "dest_ip=\"%pI4\" dest_ip_xlate=\"%pI4\" "
2896 "dest_port=\"%u\" dest_port_xlate=\"%u\" "
2897 "dest_rx_pkts=\"%llu\" dest_rx_bytes=\"%llu\" "
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002898 "last_sync=\"%llu\" "
Nicolas Costabb85a2e2014-01-13 16:26:33 -06002899 "mark=\"%08x\" />\n",
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002900 protocol,
2901 src_dev->name,
2902 &src_ip, &src_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002903 ntohs(src_port), ntohs(src_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002904 src_rx_packets, src_rx_bytes,
2905 dest_dev->name,
2906 &dest_ip, &dest_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002907 ntohs(dest_port), ntohs(dest_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002908 dest_rx_packets, dest_rx_bytes,
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002909 last_sync_jiffies, mark);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002910
2911 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2912 return false;
2913 }
2914
2915 *length -= bytes_read;
2916 *total_read += bytes_read;
2917
2918 /*
2919 * If we have no more connections then move to the next state.
2920 */
2921 if (!c_next) {
2922 ws->state++;
2923 }
2924
2925 return true;
2926}
2927
2928/*
2929 * sfe_ipv4_debug_dev_read_connections_end()
2930 * Generate part of the XML output.
2931 */
2932static bool sfe_ipv4_debug_dev_read_connections_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2933 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2934{
2935 int bytes_read;
2936
2937 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</connections>\n");
2938 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2939 return false;
2940 }
2941
2942 *length -= bytes_read;
2943 *total_read += bytes_read;
2944
2945 ws->state++;
2946 return true;
2947}
2948
2949/*
2950 * sfe_ipv4_debug_dev_read_exceptions_start()
2951 * Generate part of the XML output.
2952 */
2953static bool sfe_ipv4_debug_dev_read_exceptions_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2954 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2955{
2956 int bytes_read;
2957
2958 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<exceptions>\n");
2959 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2960 return false;
2961 }
2962
2963 *length -= bytes_read;
2964 *total_read += bytes_read;
2965
2966 ws->state++;
2967 return true;
2968}
2969
2970/*
2971 * sfe_ipv4_debug_dev_read_exceptions_exception()
2972 * Generate part of the XML output.
2973 */
2974static bool sfe_ipv4_debug_dev_read_exceptions_exception(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2975 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2976{
2977 uint64_t ct;
2978
2979 spin_lock_bh(&si->lock);
2980 ct = si->exception_events64[ws->iter_exception];
2981 spin_unlock_bh(&si->lock);
2982
2983 if (ct) {
2984 int bytes_read;
2985
2986 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE,
2987 "\t\t<exception name=\"%s\" count=\"%llu\" />\n",
2988 sfe_ipv4_exception_events_string[ws->iter_exception],
2989 ct);
2990 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2991 return false;
2992 }
2993
2994 *length -= bytes_read;
2995 *total_read += bytes_read;
2996 }
2997
2998 ws->iter_exception++;
2999 if (ws->iter_exception >= SFE_IPV4_EXCEPTION_EVENT_LAST) {
3000 ws->iter_exception = 0;
3001 ws->state++;
3002 }
3003
3004 return true;
3005}
3006
3007/*
3008 * sfe_ipv4_debug_dev_read_exceptions_end()
3009 * Generate part of the XML output.
3010 */
3011static bool sfe_ipv4_debug_dev_read_exceptions_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3012 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3013{
3014 int bytes_read;
3015
3016 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</exceptions>\n");
3017 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3018 return false;
3019 }
3020
3021 *length -= bytes_read;
3022 *total_read += bytes_read;
3023
3024 ws->state++;
3025 return true;
3026}
3027
3028/*
3029 * sfe_ipv4_debug_dev_read_stats()
3030 * Generate part of the XML output.
3031 */
3032static bool sfe_ipv4_debug_dev_read_stats(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3033 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3034{
3035 int bytes_read;
3036 unsigned int num_connections;
3037 uint64_t packets_forwarded;
3038 uint64_t packets_not_forwarded;
3039 uint64_t connection_create_requests;
3040 uint64_t connection_create_collisions;
3041 uint64_t connection_destroy_requests;
3042 uint64_t connection_destroy_misses;
3043 uint64_t connection_flushes;
3044 uint64_t connection_match_hash_hits;
3045 uint64_t connection_match_hash_reorders;
3046
3047 spin_lock_bh(&si->lock);
3048 sfe_ipv4_update_summary_stats(si);
3049
3050 num_connections = si->num_connections;
3051 packets_forwarded = si->packets_forwarded64;
3052 packets_not_forwarded = si->packets_not_forwarded64;
3053 connection_create_requests = si->connection_create_requests64;
3054 connection_create_collisions = si->connection_create_collisions64;
3055 connection_destroy_requests = si->connection_destroy_requests64;
3056 connection_destroy_misses = si->connection_destroy_misses64;
3057 connection_flushes = si->connection_flushes64;
3058 connection_match_hash_hits = si->connection_match_hash_hits64;
3059 connection_match_hash_reorders = si->connection_match_hash_reorders64;
3060 spin_unlock_bh(&si->lock);
3061
3062 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<stats "
3063 "num_connections=\"%u\" "
3064 "pkts_forwarded=\"%llu\" pkts_not_forwarded=\"%llu\" "
3065 "create_requests=\"%llu\" create_collisions=\"%llu\" "
3066 "destroy_requests=\"%llu\" destroy_misses=\"%llu\" "
3067 "flushes=\"%llu\" "
3068 "hash_hits=\"%llu\" hash_reorders=\"%llu\" />\n",
3069 num_connections,
3070 packets_forwarded,
3071 packets_not_forwarded,
3072 connection_create_requests,
3073 connection_create_collisions,
3074 connection_destroy_requests,
3075 connection_destroy_misses,
3076 connection_flushes,
3077 connection_match_hash_hits,
3078 connection_match_hash_reorders);
3079 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3080 return false;
3081 }
3082
3083 *length -= bytes_read;
3084 *total_read += bytes_read;
3085
3086 ws->state++;
3087 return true;
3088}
3089
3090/*
3091 * sfe_ipv4_debug_dev_read_end()
3092 * Generate part of the XML output.
3093 */
3094static bool sfe_ipv4_debug_dev_read_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3095 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3096{
3097 int bytes_read;
3098
3099 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "</sfe_ipv4>\n");
3100 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3101 return false;
3102 }
3103
3104 *length -= bytes_read;
3105 *total_read += bytes_read;
3106
3107 ws->state++;
3108 return true;
3109}
3110
3111/*
3112 * Array of write functions that write various XML elements that correspond to
3113 * our XML output state machine.
3114 */
3115sfe_ipv4_debug_xml_write_method_t sfe_ipv4_debug_xml_write_methods[SFE_IPV4_DEBUG_XML_STATE_DONE] = {
3116 sfe_ipv4_debug_dev_read_start,
3117 sfe_ipv4_debug_dev_read_connections_start,
3118 sfe_ipv4_debug_dev_read_connections_connection,
3119 sfe_ipv4_debug_dev_read_connections_end,
3120 sfe_ipv4_debug_dev_read_exceptions_start,
3121 sfe_ipv4_debug_dev_read_exceptions_exception,
3122 sfe_ipv4_debug_dev_read_exceptions_end,
3123 sfe_ipv4_debug_dev_read_stats,
3124 sfe_ipv4_debug_dev_read_end,
3125};
3126
3127/*
3128 * sfe_ipv4_debug_dev_read()
3129 * Send info to userspace upon read request from user
3130 */
3131static ssize_t sfe_ipv4_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset)
3132{
3133 char msg[CHAR_DEV_MSG_SIZE];
3134 int total_read = 0;
3135 struct sfe_ipv4_debug_xml_write_state *ws;
3136 struct sfe_ipv4 *si = &__si;
3137
3138 ws = (struct sfe_ipv4_debug_xml_write_state *)filp->private_data;
3139 while ((ws->state != SFE_IPV4_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) {
3140 if ((sfe_ipv4_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) {
3141 continue;
3142 }
3143 }
3144
3145 return total_read;
3146}
3147
3148/*
3149 * sfe_ipv4_debug_dev_write()
Matthew McClintock54167ab2014-01-14 21:06:28 -06003150 * Write to char device resets some stats
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003151 */
3152static ssize_t sfe_ipv4_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset)
3153{
Matthew McClintock54167ab2014-01-14 21:06:28 -06003154 struct sfe_ipv4 *si = &__si;
3155
3156 spin_lock_bh(&si->lock);
3157 sfe_ipv4_update_summary_stats(si);
3158
3159 si->num_connections = 0;
3160 si->packets_forwarded64 = 0;
3161 si->packets_not_forwarded64 = 0;
3162 si->connection_create_requests64 = 0;
3163 si->connection_create_collisions64 = 0;
3164 si->connection_destroy_requests64 = 0;
3165 si->connection_destroy_misses64 = 0;
3166 si->connection_flushes64 = 0;
3167 si->connection_match_hash_hits64 = 0;
3168 si->connection_match_hash_reorders64 = 0;
3169 spin_unlock_bh(&si->lock);
3170
3171 return length;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003172}
3173
3174/*
3175 * sfe_ipv4_debug_dev_open()
3176 */
3177static int sfe_ipv4_debug_dev_open(struct inode *inode, struct file *file)
3178{
3179 struct sfe_ipv4_debug_xml_write_state *ws;
3180
3181 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3182 if (!ws) {
3183 ws = kzalloc(sizeof(struct sfe_ipv4_debug_xml_write_state), GFP_KERNEL);
3184 if (!ws) {
3185 return -ENOMEM;
3186 }
3187
3188 ws->state = SFE_IPV4_DEBUG_XML_STATE_START;
3189 file->private_data = ws;
3190 }
3191
3192 return 0;
3193}
3194
3195/*
3196 * sfe_ipv4_debug_dev_release()
3197 */
3198static int sfe_ipv4_debug_dev_release(struct inode *inode, struct file *file)
3199{
3200 struct sfe_ipv4_debug_xml_write_state *ws;
3201
3202 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3203 if (ws) {
3204 struct sfe_ipv4_connection *c;
3205
3206 /*
3207 * Are we currently iterating a connection? If we are then
3208 * make sure that we reduce its iterator count and if necessary
3209 * free it.
3210 */
3211 c = ws->iter_conn;
3212 if (c) {
3213 struct sfe_ipv4 *si = &__si;
3214
3215 spin_lock_bh(&si->lock);
3216 if (sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c)) {
3217 spin_unlock_bh(&si->lock);
3218
3219 /*
3220 * This entry is dead so release our hold of the source and
3221 * dest devices and free the memory for our connection objects.
3222 */
3223 dev_put(c->original_dev);
3224 dev_put(c->reply_dev);
3225 kfree(c->original_match);
3226 kfree(c->reply_match);
3227 kfree(c);
3228 }
3229 }
3230
3231 /*
3232 * We've finished with our output so free the write state.
3233 */
3234 kfree(ws);
3235 }
3236
3237 return 0;
3238}
3239
3240/*
3241 * File operations used in the debug char device
3242 */
3243static struct file_operations sfe_ipv4_debug_dev_fops = {
3244 .read = sfe_ipv4_debug_dev_read,
3245 .write = sfe_ipv4_debug_dev_write,
3246 .open = sfe_ipv4_debug_dev_open,
3247 .release = sfe_ipv4_debug_dev_release
3248};
3249
3250/*
Dave Hudson87973cd2013-10-22 16:00:04 +01003251 * sfe_ipv4_init()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003252 */
Dave Hudson87973cd2013-10-22 16:00:04 +01003253static int __init sfe_ipv4_init(void)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003254{
3255 struct sfe_ipv4 *si = &__si;
3256 int result = -1;
3257
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003258 DEBUG_INFO("SFE IPv4 init\n");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003259
3260 /*
3261 * Create sys/sfe_ipv4
3262 */
3263 si->sys_sfe_ipv4 = kobject_create_and_add("sfe_ipv4", NULL);
3264 if (!si->sys_sfe_ipv4) {
3265 DEBUG_ERROR("failed to register sfe_ipv4\n");
3266 goto exit1;
3267 }
3268
3269 /*
3270 * Create files, one for each parameter supported by this module.
3271 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003272 result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3273 if (result) {
3274 DEBUG_ERROR("failed to register debug dev file: %d\n", result);
3275 goto exit4;
3276 }
3277
3278 /*
3279 * Register our debug char device.
3280 */
3281 result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops);
3282 if (result < 0) {
3283 DEBUG_ERROR("Failed to register chrdev: %d\n", result);
3284 goto exit5;
3285 }
3286
3287 si->debug_dev = result;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003288
3289 /*
3290 * Create a timer to handle periodic statistics.
3291 */
3292 setup_timer(&si->timer, sfe_ipv4_periodic_sync, (unsigned long)si);
3293 mod_timer(&si->timer, jiffies + (HZ / 100));
3294
Dave Hudson87973cd2013-10-22 16:00:04 +01003295 spin_lock_init(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003296
Dave Hudson87973cd2013-10-22 16:00:04 +01003297 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003298
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003299exit5:
3300 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3301
3302exit4:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003303 kobject_put(si->sys_sfe_ipv4);
3304
3305exit1:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003306 return result;
3307}
3308
3309/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003310 * sfe_ipv4_exit()
3311 */
3312static void __exit sfe_ipv4_exit(void)
3313{
Dave Hudson87973cd2013-10-22 16:00:04 +01003314 struct sfe_ipv4 *si = &__si;
3315
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003316 DEBUG_INFO("SFE IPv4 exit\n");
Dave Hudson87973cd2013-10-22 16:00:04 +01003317
3318 /*
3319 * Destroy all connections.
3320 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003321 sfe_ipv4_destroy_all_rules_for_dev(NULL);
Dave Hudson87973cd2013-10-22 16:00:04 +01003322
3323// XXX - this is where we need to unregister with any lower level offload services.
3324
Dave Hudson87973cd2013-10-22 16:00:04 +01003325 del_timer_sync(&si->timer);
3326
Dave Hudson87973cd2013-10-22 16:00:04 +01003327 unregister_chrdev(si->debug_dev, "sfe_ipv4");
3328
3329 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3330
Dave Hudson87973cd2013-10-22 16:00:04 +01003331 kobject_put(si->sys_sfe_ipv4);
3332
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003333}
3334
3335module_init(sfe_ipv4_init)
3336module_exit(sfe_ipv4_exit)
3337
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003338EXPORT_SYMBOL(sfe_ipv4_recv);
3339EXPORT_SYMBOL(sfe_ipv4_create_rule);
3340EXPORT_SYMBOL(sfe_ipv4_destroy_rule);
3341EXPORT_SYMBOL(sfe_ipv4_destroy_all_rules_for_dev);
3342EXPORT_SYMBOL(sfe_ipv4_register_sync_rule_callback);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06003343EXPORT_SYMBOL(sfe_ipv4_mark_rule);
Nicolas Costa436926b2014-01-14 10:36:22 -06003344EXPORT_SYMBOL(sfe_ipv4_update_rule);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003345
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003346MODULE_AUTHOR("Qualcomm Atheros Inc.");
3347MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv4 edition");
Matthew McClintocka3221942014-01-16 11:44:26 -06003348MODULE_LICENSE("Dual BSD/GPL");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003349