blob: 58bf95d1cf77a16984734542701b71258f55de2c [file] [log] [blame]
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001/*
2 * sfe_ipv4.c
3 * Shortcut forwarding engine - IPv4 edition.
4 *
Matthew McClintocka3221942014-01-16 11:44:26 -06005 * Copyright (c) 2013 Qualcomm Atheros, Inc.
6 *
7 * All Rights Reserved.
8 * Qualcomm Atheros Confidential and Proprietary.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01009 */
Matthew McClintocka3221942014-01-16 11:44:26 -060010
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010011#include <linux/module.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060012#include <linux/sysfs.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010013#include <linux/skbuff.h>
14#include <linux/icmp.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010015#include <net/tcp.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060016#include <linux/etherdevice.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010017
Dave Hudsondcd08fb2013-11-22 09:25:16 -060018#include "sfe.h"
19#include "sfe_ipv4.h"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010020
21/*
Dave Hudsona8197e72013-12-17 23:46:22 +000022 * By default Linux IP header and transport layer header structures are
23 * unpacked, assuming that such headers should be 32-bit aligned.
24 * Unfortunately some wireless adaptors can't cope with this requirement and
25 * some CPUs can't handle misaligned accesses. For those platforms we
26 * define SFE_IPV4_UNALIGNED_IP_HEADER and mark the structures as packed.
27 * When we do this the compiler will generate slightly worse code than for the
28 * aligned case (on most platforms) but will be much quicker than fixing
29 * things up in an unaligned trap handler.
30 */
31#define SFE_IPV4_UNALIGNED_IP_HEADER 1
32#if SFE_IPV4_UNALIGNED_IP_HEADER
33#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed))
34#else
35#define SFE_IPV4_UNALIGNED_STRUCT
36#endif
37
38/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060039 * An Ethernet header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000040 * help with performance on some platforms (see the definition of
41 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010042 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060043struct sfe_ipv4_eth_hdr {
44 __be16 h_dest[ETH_ALEN / 2];
45 __be16 h_source[ETH_ALEN / 2];
46 __be16 h_proto;
47} SFE_IPV4_UNALIGNED_STRUCT;
48
49/*
50 * An IPv4 header, but with an optional "packed" attribute to
51 * help with performance on some platforms (see the definition of
52 * SFE_IPV4_UNALIGNED_STRUCT)
53 */
54struct sfe_ipv4_ip_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010055#if defined(__LITTLE_ENDIAN_BITFIELD)
56 __u8 ihl:4,
57 version:4;
58#elif defined (__BIG_ENDIAN_BITFIELD)
59 __u8 version:4,
60 ihl:4;
61#else
62#error "Please fix <asm/byteorder.h>"
63#endif
64 __u8 tos;
65 __be16 tot_len;
66 __be16 id;
67 __be16 frag_off;
68 __u8 ttl;
69 __u8 protocol;
70 __sum16 check;
71 __be32 saddr;
72 __be32 daddr;
Dave Hudsondcd08fb2013-11-22 09:25:16 -060073
74 /*
75 * The options start here.
76 */
Dave Hudsona8197e72013-12-17 23:46:22 +000077} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010078
79/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060080 * A UDP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000081 * help with performance on some platforms (see the definition of
82 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010083 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060084struct sfe_ipv4_udp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010085 __be16 source;
86 __be16 dest;
87 __be16 len;
88 __sum16 check;
Dave Hudsona8197e72013-12-17 23:46:22 +000089} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010090
91/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060092 * A TCP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000093 * help with performance on some platforms (see the definition of
94 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010095 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060096struct sfe_ipv4_tcp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010097 __be16 source;
98 __be16 dest;
99 __be32 seq;
100 __be32 ack_seq;
101#if defined(__LITTLE_ENDIAN_BITFIELD)
102 __u16 res1:4,
103 doff:4,
104 fin:1,
105 syn:1,
106 rst:1,
107 psh:1,
108 ack:1,
109 urg:1,
110 ece:1,
111 cwr:1;
112#elif defined(__BIG_ENDIAN_BITFIELD)
113 __u16 doff:4,
114 res1:4,
115 cwr:1,
116 ece:1,
117 urg:1,
118 ack:1,
119 psh:1,
120 rst:1,
121 syn:1,
122 fin:1;
123#else
124#error "Adjust your <asm/byteorder.h> defines"
Nicolas Costaac2979c2014-01-14 10:35:24 -0600125#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100126 __be16 window;
127 __sum16 check;
128 __be16 urg_ptr;
Dave Hudsona8197e72013-12-17 23:46:22 +0000129} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100130
131/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100132 * Specifies the lower bound on ACK numbers carried in the TCP header
133 */
134#define SFE_IPV4_TCP_MAX_ACK_WINDOW 65520
135
136/*
137 * IPv4 TCP connection match additional data.
138 */
139struct sfe_ipv4_tcp_connection_match {
140 uint8_t win_scale; /* Window scale */
141 uint32_t max_win; /* Maximum window size seen */
142 uint32_t end; /* Sequence number of the next byte to send (seq + segment length) */
143 uint32_t max_end; /* Sequence number of the last byte to ack */
144};
145
146/*
147 * Bit flags for IPv4 connection matching entry.
148 */
149#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC 0x1
150 /* Perform source translation */
151#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST 0x2
152 /* Perform destination translation */
153#define SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK 0x4
154 /* Ignore TCP sequence numbers */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600155#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR 0x8
156 /* Fast Ethernet header write */
157#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR 0x10
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100158 /* Fast Ethernet header write */
159
160/*
161 * IPv4 connection matching structure.
162 */
163struct sfe_ipv4_connection_match {
164 /*
165 * References to other objects.
166 */
167 struct sfe_ipv4_connection_match *next;
168 /* Next connection match entry in a list */
169 struct sfe_ipv4_connection_match *prev;
170 /* Previous connection match entry in a list */
171 struct sfe_ipv4_connection *connection;
172 /* Pointer to our connection */
173 struct sfe_ipv4_connection_match *counter_match;
174 /* Pointer to the connection match in the "counter" direction to this one */
175 struct sfe_ipv4_connection_match *active_next;
176 /* Pointer to the next connection in the active list */
177 struct sfe_ipv4_connection_match *active_prev;
178 /* Pointer to the previous connection in the active list */
179 bool active; /* Flag to indicate if we're on the active list */
180
181 /*
182 * Characteristics that identify flows that match this rule.
183 */
184 struct net_device *match_dev; /* Network device */
185 uint8_t match_protocol; /* Protocol */
Dave Hudson87973cd2013-10-22 16:00:04 +0100186 __be32 match_src_ip; /* Source IP address */
187 __be32 match_dest_ip; /* Destination IP address */
188 __be16 match_src_port; /* Source port/connection ident */
189 __be16 match_dest_port; /* Destination port/connection ident */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100190
191 /*
192 * Control the operations of the match.
193 */
194 uint32_t flags; /* Bit flags */
195
196 /*
197 * Connection state that we track once we match.
198 */
199 union { /* Protocol-specific state */
200 struct sfe_ipv4_tcp_connection_match tcp;
201 } protocol_state;
202 uint32_t rx_packet_count; /* Number of packets RX'd */
203 uint32_t rx_byte_count; /* Number of bytes RX'd */
204
205 /*
206 * Packet translation information.
207 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100208 __be32 xlate_src_ip; /* Address after source translation */
209 __be16 xlate_src_port; /* Port/connection ident after source translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100210 uint16_t xlate_src_csum_adjustment;
211 /* Transport layer checksum adjustment after source translation */
Dave Hudson87973cd2013-10-22 16:00:04 +0100212 __be32 xlate_dest_ip; /* Address after destination translation */
213 __be16 xlate_dest_port; /* Port/connection ident after destination translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100214 uint16_t xlate_dest_csum_adjustment;
215 /* Transport layer checksum adjustment after destination translation */
216
217 /*
218 * Packet transmit information.
219 */
220 struct net_device *xmit_dev; /* Network device on which to transmit */
221 unsigned short int xmit_dev_mtu;
222 /* Interface MTU */
223 uint16_t xmit_dest_mac[ETH_ALEN / 2];
224 /* Destination MAC address to use when forwarding */
225 uint16_t xmit_src_mac[ETH_ALEN / 2];
226 /* Source MAC address to use when forwarding */
227
228 /*
229 * Summary stats.
230 */
231 uint64_t rx_packet_count64; /* Number of packets RX'd */
232 uint64_t rx_byte_count64; /* Number of bytes RX'd */
233};
234
235/*
236 * Per-connection data structure.
237 */
238struct sfe_ipv4_connection {
239 struct sfe_ipv4_connection *next;
240 /* Pointer to the next entry in a hash chain */
241 struct sfe_ipv4_connection *prev;
242 /* Pointer to the previous entry in a hash chain */
243 int protocol; /* IP protocol number */
Dave Hudson87973cd2013-10-22 16:00:04 +0100244 __be32 src_ip; /* Source IP address */
245 __be32 src_ip_xlate; /* NAT-translated source IP address */
246 __be32 dest_ip; /* Destination IP address */
247 __be32 dest_ip_xlate; /* NAT-translated destination IP address */
248 __be16 src_port; /* Source port */
249 __be16 src_port_xlate; /* NAT-translated source port */
250 __be16 dest_port; /* Destination port */
251 __be16 dest_port_xlate; /* NAT-translated destination port */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100252 struct sfe_ipv4_connection_match *original_match;
253 /* Original direction matching structure */
254 struct net_device *original_dev;
255 /* Original direction source device */
256 struct sfe_ipv4_connection_match *reply_match;
257 /* Reply direction matching structure */
258 struct net_device *reply_dev; /* Reply direction source device */
259 uint64_t last_sync_jiffies; /* Jiffies count for the last sync */
260 struct sfe_ipv4_connection *all_connections_next;
261 /* Pointer to the next entry in the list of all connections */
262 struct sfe_ipv4_connection *all_connections_prev;
263 /* Pointer to the previous entry in the list of all connections */
264 int iterators; /* Number of iterators currently using this connection */
265 bool pending_free; /* Flag that indicates that this connection should be freed after iteration */
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600266 uint32_t mark; /* mark for outgoing packet */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100267};
268
269/*
270 * IPv4 connections and hash table size information.
271 */
272#define SFE_IPV4_CONNECTION_HASH_SHIFT 12
273#define SFE_IPV4_CONNECTION_HASH_SIZE (1 << SFE_IPV4_CONNECTION_HASH_SHIFT)
274#define SFE_IPV4_CONNECTION_HASH_MASK (SFE_IPV4_CONNECTION_HASH_SIZE - 1)
275
276enum sfe_ipv4_exception_events {
277 SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE,
278 SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION,
279 SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
280 SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL,
281 SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION,
282 SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE,
283 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS,
284 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS,
285 SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
286 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL,
287 SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION,
288 SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS,
289 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE,
290 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS,
291 SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK,
292 SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS,
293 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE,
294 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE,
295 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE,
296 SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE,
297 SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE,
298 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE,
299 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4,
300 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE,
301 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE,
302 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE,
303 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL,
304 SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION,
305 SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION,
306 SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE,
307 SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH,
308 SFE_IPV4_EXCEPTION_EVENT_NON_V4,
309 SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT,
310 SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE,
311 SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE,
312 SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL,
313 SFE_IPV4_EXCEPTION_EVENT_LAST
314};
315
316static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = {
317 "UDP_HEADER_INCOMPLETE",
318 "UDP_NO_CONNECTION",
319 "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
320 "UDP_SMALL_TTL",
321 "UDP_NEEDS_FRAGMENTATION",
322 "TCP_HEADER_INCOMPLETE",
323 "TCP_NO_CONNECTION_SLOW_FLAGS",
324 "TCP_NO_CONNECTION_FAST_FLAGS",
325 "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
326 "TCP_SMALL_TTL",
327 "TCP_NEEDS_FRAGMENTATION",
328 "TCP_FLAGS",
329 "TCP_SEQ_EXCEEDS_RIGHT_EDGE",
330 "TCP_SMALL_DATA_OFFS",
331 "TCP_BAD_SACK",
332 "TCP_BIG_DATA_OFFS",
333 "TCP_SEQ_BEFORE_LEFT_EDGE",
334 "TCP_ACK_EXCEEDS_RIGHT_EDGE",
335 "TCP_ACK_BEFORE_LEFT_EDGE",
336 "ICMP_HEADER_INCOMPLETE",
337 "ICMP_UNHANDLED_TYPE",
338 "ICMP_IPV4_HEADER_INCOMPLETE",
339 "ICMP_IPV4_NON_V4",
340 "ICMP_IPV4_IP_OPTIONS_INCOMPLETE",
341 "ICMP_IPV4_UDP_HEADER_INCOMPLETE",
342 "ICMP_IPV4_TCP_HEADER_INCOMPLETE",
343 "ICMP_IPV4_UNHANDLED_PROTOCOL",
344 "ICMP_NO_CONNECTION",
345 "ICMP_FLUSHED_CONNECTION",
346 "HEADER_INCOMPLETE",
347 "BAD_TOTAL_LENGTH",
348 "NON_V4",
349 "NON_INITIAL_FRAGMENT",
350 "DATAGRAM_INCOMPLETE",
351 "IP_OPTIONS_INCOMPLETE",
352 "UNHANDLED_PROTOCOL"
353};
354
355/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600356 * Per-module structure.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100357 */
358struct sfe_ipv4 {
359 spinlock_t lock; /* Lock for SMP correctness */
360 struct sfe_ipv4_connection_match *active_head;
361 /* Head of the list of recently active connections */
362 struct sfe_ipv4_connection_match *active_tail;
363 /* Tail of the list of recently active connections */
364 struct sfe_ipv4_connection *all_connections_head;
365 /* Head of the list of all connections */
366 struct sfe_ipv4_connection *all_connections_tail;
367 /* Tail of the list of all connections */
368 unsigned int num_connections; /* Number of connections */
369 struct timer_list timer; /* Timer used for periodic sync ops */
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600370 sfe_ipv4_sync_rule_callback_t __rcu sync_rule_callback;
371 /* Callback function registered by a connection manager for stats syncing */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100372 struct sfe_ipv4_connection *conn_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
373 /* Connection hash table */
374 struct sfe_ipv4_connection_match *conn_match_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
375 /* Connection match hash table */
376
377 /*
378 * Statistics.
379 */
380 uint32_t connection_create_requests;
381 /* Number of IPv4 connection create requests */
382 uint32_t connection_create_collisions;
383 /* Number of IPv4 connection create requests that collided with existing hash table entries */
384 uint32_t connection_destroy_requests;
385 /* Number of IPv4 connection destroy requests */
386 uint32_t connection_destroy_misses;
387 /* Number of IPv4 connection destroy requests that missed our hash table */
388 uint32_t connection_match_hash_hits;
389 /* Number of IPv4 connection match hash hits */
390 uint32_t connection_match_hash_reorders;
391 /* Number of IPv4 connection match hash reorders */
392 uint32_t connection_flushes; /* Number of IPv4 connection flushes */
393 uint32_t packets_forwarded; /* Number of IPv4 packets forwarded */
394 uint32_t packets_not_forwarded; /* Number of IPv4 packets not forwarded */
395 uint32_t exception_events[SFE_IPV4_EXCEPTION_EVENT_LAST];
396
397 /*
398 * Summary tatistics.
399 */
400 uint64_t connection_create_requests64;
401 /* Number of IPv4 connection create requests */
402 uint64_t connection_create_collisions64;
403 /* Number of IPv4 connection create requests that collided with existing hash table entries */
404 uint64_t connection_destroy_requests64;
405 /* Number of IPv4 connection destroy requests */
406 uint64_t connection_destroy_misses64;
407 /* Number of IPv4 connection destroy requests that missed our hash table */
408 uint64_t connection_match_hash_hits64;
409 /* Number of IPv4 connection match hash hits */
410 uint64_t connection_match_hash_reorders64;
411 /* Number of IPv4 connection match hash reorders */
412 uint64_t connection_flushes64; /* Number of IPv4 connection flushes */
413 uint64_t packets_forwarded64; /* Number of IPv4 packets forwarded */
414 uint64_t packets_not_forwarded64;
415 /* Number of IPv4 packets not forwarded */
416 uint64_t exception_events64[SFE_IPV4_EXCEPTION_EVENT_LAST];
417
418 /*
419 * Control state.
420 */
421 struct kobject *sys_sfe_ipv4; /* sysfs linkage */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100422 int debug_dev; /* Major number of the debug char device */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100423};
424
425/*
426 * Enumeration of the XML output.
427 */
428enum sfe_ipv4_debug_xml_states {
429 SFE_IPV4_DEBUG_XML_STATE_START,
430 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_START,
431 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_CONNECTION,
432 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_END,
433 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_START,
434 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION,
435 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_END,
436 SFE_IPV4_DEBUG_XML_STATE_STATS,
437 SFE_IPV4_DEBUG_XML_STATE_END,
438 SFE_IPV4_DEBUG_XML_STATE_DONE
439};
440
441/*
442 * XML write state.
443 */
444struct sfe_ipv4_debug_xml_write_state {
445 enum sfe_ipv4_debug_xml_states state;
446 /* XML output file state machine state */
447 struct sfe_ipv4_connection *iter_conn;
448 /* Next connection iterator */
449 int iter_exception; /* Next exception iterator */
450};
451
452typedef bool (*sfe_ipv4_debug_xml_write_method_t)(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
453 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws);
454
455struct sfe_ipv4 __si;
456
457/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100458 * sfe_ipv4_gen_ip_csum()
459 * Generate the IP checksum for an IPv4 header.
460 *
461 * Note that this function assumes that we have only 20 bytes of IP header.
462 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600463static inline uint16_t sfe_ipv4_gen_ip_csum(struct sfe_ipv4_ip_hdr *iph)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100464{
465 uint32_t sum;
466 uint16_t *i = (uint16_t *)iph;
467
468 iph->check = 0;
469
470 /*
471 * Generate the sum.
472 */
473 sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9];
474
475 /*
476 * Fold it to ones-complement form.
477 */
478 sum = (sum & 0xffff) + (sum >> 16);
479 sum = (sum & 0xffff) + (sum >> 16);
480
481 return (uint16_t)sum ^ 0xffff;
482}
483
484/*
485 * sfe_ipv4_get_connection_match_hash()
486 * Generate the hash used in connection match lookups.
487 */
488static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100489 __be32 src_ip, __be16 src_port,
490 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100491{
492 size_t dev_addr = (size_t)dev;
Dave Hudson87973cd2013-10-22 16:00:04 +0100493 uint32_t hash = ((uint32_t)dev_addr) ^ ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100494 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
495}
496
497/*
498 * sfe_ipv4_find_sfe_ipv4_connection_match()
499 * Get the IPv4 flow match info that corresponds to a particular 5-tuple.
500 *
501 * On entry we must be holding the lock that protects the hash table.
502 */
503static struct sfe_ipv4_connection_match *
504sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100505 __be32 src_ip, __be16 src_port,
506 __be32 dest_ip, __be16 dest_port) __attribute__((always_inline));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100507static struct sfe_ipv4_connection_match *
508sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100509 __be32 src_ip, __be16 src_port,
510 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100511{
512 struct sfe_ipv4_connection_match *cm;
513 struct sfe_ipv4_connection_match *head;
514 unsigned int conn_match_idx;
515
516 conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port);
517 cm = si->conn_match_hash[conn_match_idx];
518
519 /*
520 * If we don't have anything in this chain then bale.
521 */
522 if (unlikely(!cm)) {
523 return cm;
524 }
525
526 /*
527 * Hopefully the first entry is the one we want.
528 */
529 if (likely(cm->match_src_port == src_port)
530 && likely(cm->match_dest_port == dest_port)
531 && likely(cm->match_src_ip == src_ip)
532 && likely(cm->match_dest_ip == dest_ip)
533 && likely(cm->match_protocol == protocol)
534 && likely(cm->match_dev == dev)) {
535 si->connection_match_hash_hits++;
536 return cm;
537 }
538
539 /*
540 * We may or may not have a matching entry but if we do then we want to
541 * move that entry to the top of the hash chain when we get to it. We
542 * presume that this will be reused again very quickly.
543 */
544 head = cm;
545 do {
546 cm = cm->next;
547 } while (cm && (cm->match_src_port != src_port
548 || cm->match_dest_port != dest_port
549 || cm->match_src_ip != src_ip
550 || cm->match_dest_ip != dest_ip
551 || cm->match_protocol != protocol
552 || cm->match_dev != dev));
553
554 /*
555 * Not found then we're done.
556 */
557 if (unlikely(!cm)) {
558 return cm;
559 }
560
561 /*
562 * We found a match so move it.
563 */
564 if (cm->next) {
565 cm->next->prev = cm->prev;
566 }
567 cm->prev->next = cm->next;
568 cm->prev = NULL;
569 cm->next = head;
570 head->prev = cm;
571 si->conn_match_hash[conn_match_idx] = cm;
572 si->connection_match_hash_reorders++;
573
574 return cm;
575}
576
577/*
578 * sfe_ipv4_connection_match_update_summary_stats()
579 * Update the summary stats for a connection match entry.
580 */
581static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm)
582{
583 cm->rx_packet_count64 += cm->rx_packet_count;
584 cm->rx_packet_count = 0;
585 cm->rx_byte_count64 += cm->rx_byte_count;
586 cm->rx_byte_count = 0;
587}
588
589/*
590 * sfe_ipv4_connection_match_compute_translations()
591 * Compute port and address translations for a connection match entry.
592 */
593static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm)
594{
595 /*
596 * Before we insert the entry look to see if this is tagged as doing address
597 * translations. If it is then work out the adjustment that we need to apply
598 * to the transport checksum.
599 */
600 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
601 /*
602 * Precompute an incremental checksum adjustment so we can
603 * edit packets in this stream very quickly. The algorithm is from RFC1624.
604 */
605 uint16_t src_ip_hi = cm->match_src_ip >> 16;
606 uint16_t src_ip_lo = cm->match_src_ip & 0xffff;
607 uint32_t xlate_src_ip = ~cm->xlate_src_ip;
608 uint16_t xlate_src_ip_hi = xlate_src_ip >> 16;
609 uint16_t xlate_src_ip_lo = xlate_src_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100610 uint16_t xlate_src_port = ~cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100611 uint32_t adj;
612
613 /*
614 * When we compute this fold it down to a 16-bit offset
615 * as that way we can avoid having to do a double
616 * folding of the twos-complement result because the
617 * addition of 2 16-bit values cannot cause a double
618 * wrap-around!
619 */
620 adj = src_ip_hi + src_ip_lo + cm->match_src_port
621 + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port;
622 adj = (adj & 0xffff) + (adj >> 16);
623 adj = (adj & 0xffff) + (adj >> 16);
624 cm->xlate_src_csum_adjustment = (uint16_t)adj;
Nicolas Costaac2979c2014-01-14 10:35:24 -0600625
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100626 }
627
628 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
629 /*
630 * Precompute an incremental checksum adjustment so we can
631 * edit packets in this stream very quickly. The algorithm is from RFC1624.
632 */
633 uint16_t dest_ip_hi = cm->match_dest_ip >> 16;
634 uint16_t dest_ip_lo = cm->match_dest_ip & 0xffff;
635 uint32_t xlate_dest_ip = ~cm->xlate_dest_ip;
636 uint16_t xlate_dest_ip_hi = xlate_dest_ip >> 16;
637 uint16_t xlate_dest_ip_lo = xlate_dest_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100638 uint16_t xlate_dest_port = ~cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100639 uint32_t adj;
640
641 /*
642 * When we compute this fold it down to a 16-bit offset
643 * as that way we can avoid having to do a double
644 * folding of the twos-complement result because the
645 * addition of 2 16-bit values cannot cause a double
646 * wrap-around!
647 */
648 adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port
649 + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port;
650 adj = (adj & 0xffff) + (adj >> 16);
651 adj = (adj & 0xffff) + (adj >> 16);
652 cm->xlate_dest_csum_adjustment = (uint16_t)adj;
653 }
654}
655
656/*
657 * sfe_ipv4_update_summary_stats()
658 * Update the summary stats.
659 */
660static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si)
661{
662 int i;
663
664 si->connection_create_requests64 += si->connection_create_requests;
665 si->connection_create_requests = 0;
666 si->connection_create_collisions64 += si->connection_create_collisions;
667 si->connection_create_collisions = 0;
668 si->connection_destroy_requests64 += si->connection_destroy_requests;
669 si->connection_destroy_requests = 0;
670 si->connection_destroy_misses64 += si->connection_destroy_misses;
671 si->connection_destroy_misses = 0;
672 si->connection_match_hash_hits64 += si->connection_match_hash_hits;
673 si->connection_match_hash_hits = 0;
674 si->connection_match_hash_reorders64 += si->connection_match_hash_reorders;
675 si->connection_match_hash_reorders = 0;
676 si->connection_flushes64 += si->connection_flushes;
677 si->connection_flushes = 0;
678 si->packets_forwarded64 += si->packets_forwarded;
679 si->packets_forwarded = 0;
680 si->packets_not_forwarded64 += si->packets_not_forwarded;
681 si->packets_not_forwarded = 0;
682
683 for (i = 0; i < SFE_IPV4_EXCEPTION_EVENT_LAST; i++) {
684 si->exception_events64[i] += si->exception_events[i];
685 si->exception_events[i] = 0;
686 }
687}
688
689/*
690 * sfe_ipv4_insert_sfe_ipv4_connection_match()
691 * Insert a connection match into the hash.
692 *
693 * On entry we must be holding the lock that protects the hash table.
694 */
695static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
696{
697 struct sfe_ipv4_connection_match **hash_head;
698 struct sfe_ipv4_connection_match *prev_head;
699 unsigned int conn_match_idx
700 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
701 cm->match_src_ip, cm->match_src_port,
702 cm->match_dest_ip, cm->match_dest_port);
703 hash_head = &si->conn_match_hash[conn_match_idx];
704 prev_head = *hash_head;
705 cm->prev = NULL;
706 if (prev_head) {
707 prev_head->prev = cm;
708 }
709
710 cm->next = prev_head;
711 *hash_head = cm;
712}
713
714/*
715 * sfe_ipv4_remove_sfe_ipv4_connection_match()
716 * Remove a connection match object from the hash.
717 *
718 * On entry we must be holding the lock that protects the hash table.
719 */
720static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
721{
722 /*
723 * Unlink the connection match entry from the hash.
724 */
725 if (cm->prev) {
726 cm->prev->next = cm->next;
727 } else {
728 unsigned int conn_match_idx
729 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
730 cm->match_src_ip, cm->match_src_port,
731 cm->match_dest_ip, cm->match_dest_port);
732 si->conn_match_hash[conn_match_idx] = cm->next;
733 }
734
735 if (cm->next) {
736 cm->next->prev = cm->prev;
737 }
738
739 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600740 * If the connection match entry is in the active list remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100741 */
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600742 if (cm->active) {
743 if (likely(cm->active_prev)) {
744 cm->active_prev->active_next = cm->active_next;
745 } else {
746 si->active_head = cm->active_next;
747 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100748
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600749 if (likely(cm->active_next)) {
750 cm->active_next->active_prev = cm->active_prev;
751 } else {
752 si->active_tail = cm->active_prev;
753 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100754 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100755}
756
757/*
758 * sfe_ipv4_get_connection_hash()
759 * Generate the hash used in connection lookups.
760 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100761static inline unsigned int sfe_ipv4_get_connection_hash(uint8_t protocol, __be32 src_ip, __be16 src_port,
762 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100763{
Dave Hudson87973cd2013-10-22 16:00:04 +0100764 uint32_t hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100765 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
766}
767
768/*
769 * sfe_ipv4_find_sfe_ipv4_connection()
770 * Get the IPv4 connection info that corresponds to a particular 5-tuple.
771 *
772 * On entry we must be holding the lock that protects the hash table.
773 */
774static inline struct sfe_ipv4_connection *sfe_ipv4_find_sfe_ipv4_connection(struct sfe_ipv4 *si, uint32_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100775 __be32 src_ip, __be16 src_port,
776 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100777{
778 struct sfe_ipv4_connection *c;
779 unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port);
780 c = si->conn_hash[conn_idx];
781
782 /*
783 * If we don't have anything in this chain then bale.
784 */
785 if (unlikely(!c)) {
786 return c;
787 }
788
789 /*
790 * Hopefully the first entry is the one we want.
791 */
792 if (likely(c->src_port == src_port)
793 && likely(c->dest_port == dest_port)
794 && likely(c->src_ip == src_ip)
795 && likely(c->dest_ip == dest_ip)
796 && likely(c->protocol == protocol)) {
797 return c;
798 }
799
800 /*
801 * We may or may not have a matching entry but if we do then we want to
802 * move that entry to the top of the hash chain when we get to it. We
803 * presume that this will be reused again very quickly.
804 */
805 do {
806 c = c->next;
807 } while (c && (c->src_port != src_port
808 || c->dest_port != dest_port
809 || c->src_ip != src_ip
810 || c->dest_ip != dest_ip
811 || c->protocol != protocol));
812
813 /*
814 * Will need connection entry for next create/destroy metadata,
815 * So no need to re-order entry for these requests
816 */
817 return c;
818}
819
820/*
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600821 * sfe_ipv4_mark_rule()
822 * Updates the mark for a current offloaded connection
823 *
824 * Will take hash lock upon entry
825 */
Nicolas Costa865bce82014-01-31 17:48:03 -0600826void sfe_ipv4_mark_rule(struct sfe_ipv4_mark *mark)
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600827{
828 struct sfe_ipv4 *si = &__si;
829 struct sfe_ipv4_connection *c;
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600830
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600831 spin_lock(&si->lock);
832 c = sfe_ipv4_find_sfe_ipv4_connection(si, mark->protocol,
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600833 mark->src_ip, mark->src_port,
834 mark->dest_ip, mark->dest_port);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600835 if (c) {
Nicolas Costaf53d6fe2014-01-13 16:03:46 -0600836 DEBUG_TRACE("Matching connection found for mark, "
837 "setting from %08x to %08x\n",
838 c->mark, mark->mark);
839 WARN_ON((0 != c->mark) && (0 == mark->mark));
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600840 c->mark = mark->mark;
841 }
842 spin_unlock(&si->lock);
843}
844
845/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100846 * sfe_ipv4_insert_sfe_ipv4_connection()
847 * Insert a connection into the hash.
848 *
849 * On entry we must be holding the lock that protects the hash table.
850 */
851static void sfe_ipv4_insert_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
852{
853 struct sfe_ipv4_connection **hash_head;
854 struct sfe_ipv4_connection *prev_head;
855 unsigned int conn_idx;
856
857 /*
858 * Insert entry into the connection hash.
859 */
860 conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
861 c->dest_ip, c->dest_port);
862 hash_head = &si->conn_hash[conn_idx];
863 prev_head = *hash_head;
864 c->prev = NULL;
865 if (prev_head) {
866 prev_head->prev = c;
867 }
868
869 c->next = prev_head;
870 *hash_head = c;
871
872 /*
873 * Insert entry into the "all connections" list.
874 */
875 if (si->all_connections_tail) {
876 c->all_connections_prev = si->all_connections_tail;
877 si->all_connections_tail->all_connections_next = c;
878 } else {
879 c->all_connections_prev = NULL;
880 si->all_connections_head = c;
881 }
882
883 si->all_connections_tail = c;
884 c->all_connections_next = NULL;
885 si->num_connections++;
886
887 /*
888 * Insert the connection match objects too.
889 */
890 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->original_match);
891 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->reply_match);
892}
893
894/*
895 * sfe_ipv4_remove_sfe_ipv4_connection()
896 * Remove a sfe_ipv4_connection object from the hash.
897 *
898 * On entry we must be holding the lock that protects the hash table.
899 */
900static void sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
901{
902 /*
903 * Remove the connection match objects.
904 */
905 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->reply_match);
906 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->original_match);
907
908 /*
909 * Unlink the connection.
910 */
911 if (c->prev) {
912 c->prev->next = c->next;
913 } else {
914 unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
915 c->dest_ip, c->dest_port);
916 si->conn_hash[conn_idx] = c->next;
917 }
918
919 if (c->next) {
920 c->next->prev = c->prev;
921 }
922}
923
924/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100925 * sfe_ipv4_sync_sfe_ipv4_connection()
926 * Sync a connection.
927 *
928 * On entry to this function we expect that the lock for the connection is either
929 * already held or isn't required.
930 */
931static void sfe_ipv4_gen_sync_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c,
932 struct sfe_ipv4_sync *sis, uint64_t now_jiffies)
933{
934 struct sfe_ipv4_connection_match *original_cm;
935 struct sfe_ipv4_connection_match *reply_cm;
936
937 /*
938 * Fill in the update message.
939 */
940 sis->protocol = c->protocol;
941 sis->src_ip = c->src_ip;
942 sis->dest_ip = c->dest_ip;
943 sis->src_port = c->src_port;
944 sis->dest_port = c->dest_port;
945
946 original_cm = c->original_match;
947 reply_cm = c->reply_match;
948 sis->src_td_max_window = original_cm->protocol_state.tcp.max_win;
949 sis->src_td_end = original_cm->protocol_state.tcp.end;
950 sis->src_td_max_end = original_cm->protocol_state.tcp.max_end;
951 sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win;
952 sis->dest_td_end = reply_cm->protocol_state.tcp.end;
953 sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end;
954
955 sfe_ipv4_connection_match_update_summary_stats(original_cm);
956 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
957
958 sis->src_packet_count = original_cm->rx_packet_count64;
959 sis->src_byte_count = original_cm->rx_byte_count64;
960 sis->dest_packet_count = reply_cm->rx_packet_count64;
961 sis->dest_byte_count = reply_cm->rx_byte_count64;
962
963 /*
964 * Get the time increment since our last sync.
965 */
966 sis->delta_jiffies = now_jiffies - c->last_sync_jiffies;
967 c->last_sync_jiffies = now_jiffies;
968}
969
970/*
971 * sfe_ipv4_decrement_sfe_ipv4_connection_iterator()
972 * Remove an iterator from a connection - free all resources if necessary.
973 *
974 * Returns true if the connection should now be free, false if not.
975 *
976 * We must be locked on entry to this function.
977 */
978static bool sfe_ipv4_decrement_sfe_ipv4_connection_iterator(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
979{
980 /*
981 * Are we the last iterator for this connection?
982 */
983 c->iterators--;
984 if (c->iterators) {
985 return false;
986 }
987
988 /*
989 * Is this connection marked for deletion?
990 */
991 if (!c->pending_free) {
992 return false;
993 }
994
995 /*
996 * We're ready to delete this connection so unlink it from the "all
997 * connections" list.
998 */
999 si->num_connections--;
1000 if (c->all_connections_prev) {
1001 c->all_connections_prev->all_connections_next = c->all_connections_next;
1002 } else {
1003 si->all_connections_head = c->all_connections_next;
1004 }
1005
1006 if (c->all_connections_next) {
1007 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1008 } else {
1009 si->all_connections_tail = c->all_connections_prev;
1010 }
1011
1012 return true;
1013}
1014
1015/*
1016 * sfe_ipv4_flush_sfe_ipv4_connection()
1017 * Flush a connection and free all associated resources.
1018 *
1019 * We need to be called with bottom halves disabled locally as we need to acquire
1020 * the connection hash lock and release it again. In general we're actually called
1021 * from within a BH and so we're fine, but we're also called when connections are
1022 * torn down.
1023 */
1024static void sfe_ipv4_flush_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1025{
1026 struct sfe_ipv4_sync sis;
1027 uint64_t now_jiffies;
1028 bool pending_free = false;
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001029 sfe_ipv4_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001030
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001031 rcu_read_lock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001032 spin_lock(&si->lock);
1033 si->connection_flushes++;
1034
1035 /*
1036 * Check that we're not currently being iterated. If we are then
1037 * we can't free this entry yet but must mark it pending a free. If it's
1038 * not being iterated then we can unlink it from the list of all
1039 * connections.
1040 */
1041 if (c->iterators) {
1042 pending_free = true;
1043 c->pending_free = true;
1044 } else {
1045 si->num_connections--;
1046 if (c->all_connections_prev) {
1047 c->all_connections_prev->all_connections_next = c->all_connections_next;
1048 } else {
1049 si->all_connections_head = c->all_connections_next;
1050 }
1051
1052 if (c->all_connections_next) {
1053 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1054 } else {
1055 si->all_connections_tail = c->all_connections_prev;
1056 }
1057 }
1058
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001059 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
1060
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001061 spin_unlock(&si->lock);
1062
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001063 if (sync_rule_callback) {
1064 /*
1065 * Generate a sync message and then sync.
1066 */
1067 now_jiffies = get_jiffies_64();
1068 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, now_jiffies);
1069 sync_rule_callback(&sis);
1070 }
1071
1072 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001073
1074 /*
1075 * If we can't yet free the underlying memory then we're done.
1076 */
1077 if (pending_free) {
1078 return;
1079 }
1080
1081 /*
1082 * Release our hold of the source and dest devices and free the memory
1083 * for our connection objects.
1084 */
1085 dev_put(c->original_dev);
1086 dev_put(c->reply_dev);
1087 kfree(c->original_match);
1088 kfree(c->reply_match);
1089 kfree(c);
1090}
1091
1092/*
1093 * sfe_ipv4_recv_udp()
1094 * Handle UDP packet receives and forwarding.
1095 */
1096static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001097 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001098{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001099 struct sfe_ipv4_udp_hdr *udph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001100 __be32 src_ip;
1101 __be32 dest_ip;
1102 __be16 src_port;
1103 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001104 struct sfe_ipv4_connection_match *cm;
1105 uint8_t ttl;
1106 struct net_device *xmit_dev;
1107
1108 /*
1109 * Is our packet too short to contain a valid UDP header?
1110 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001111 if (unlikely(len < (sizeof(struct sfe_ipv4_udp_hdr) + ihl))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001112 spin_lock(&si->lock);
1113 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++;
1114 si->packets_not_forwarded++;
1115 spin_unlock(&si->lock);
1116
1117 DEBUG_TRACE("packet too short for UDP header\n");
1118 return 0;
1119 }
1120
1121 /*
1122 * Read the IP address and port information. Read the IP header data first
1123 * because we've almost certainly got that in the cache. We may not yet have
1124 * the UDP header cached though so allow more time for any prefetching.
1125 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001126 src_ip = iph->saddr;
1127 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001128
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001129 udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001130 src_port = udph->source;
1131 dest_port = udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001132
1133 spin_lock(&si->lock);
1134
1135 /*
1136 * Look for a connection match.
1137 */
1138 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
1139 if (unlikely(!cm)) {
1140 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION]++;
1141 si->packets_not_forwarded++;
1142 spin_unlock(&si->lock);
1143
1144 DEBUG_TRACE("no connection found\n");
1145 return 0;
1146 }
1147
1148 /*
1149 * If our packet has beern marked as "flush on find" we can't actually
1150 * forward it in the fast path, but now that we've found an associated
1151 * connection we can flush that out before we process the packet.
1152 */
1153 if (unlikely(flush_on_find)) {
1154 struct sfe_ipv4_connection *c = cm->connection;
1155 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1156 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1157 si->packets_not_forwarded++;
1158 spin_unlock(&si->lock);
1159
1160 DEBUG_TRACE("flush on find\n");
1161 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1162 return 0;
1163 }
1164
1165 /*
1166 * Does our TTL allow forwarding?
1167 */
1168 ttl = iph->ttl;
1169 if (unlikely(ttl < 2)) {
1170 struct sfe_ipv4_connection *c = cm->connection;
1171 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1172 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL]++;
1173 si->packets_not_forwarded++;
1174 spin_unlock(&si->lock);
1175
1176 DEBUG_TRACE("ttl too low\n");
1177 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1178 return 0;
1179 }
1180
1181 /*
1182 * If our packet is larger than the MTU of the transmit interface then
1183 * we can't forward it easily.
1184 */
1185 if (unlikely(len > cm->xmit_dev_mtu)) {
1186 struct sfe_ipv4_connection *c = cm->connection;
1187 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1188 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++;
1189 si->packets_not_forwarded++;
1190 spin_unlock(&si->lock);
1191
1192 DEBUG_TRACE("larger than mtu\n");
1193 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1194 return 0;
1195 }
1196
1197 /*
1198 * From this point on we're good to modify the packet.
1199 */
1200
1201 /*
1202 * Decrement our TTL.
1203 */
1204 iph->ttl = ttl - 1;
1205
1206 /*
1207 * Do we have to perform translations of the source address/port?
1208 */
1209 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1210 uint16_t udp_csum;
1211
Dave Hudson87973cd2013-10-22 16:00:04 +01001212 iph->saddr = cm->xlate_src_ip;
1213 udph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001214
1215 /*
1216 * Do we have a non-zero UDP checksum? If we do then we need
1217 * to update it.
1218 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001219 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001220 if (likely(udp_csum)) {
1221 uint32_t sum = udp_csum + cm->xlate_src_csum_adjustment;
1222 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001223 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001224 }
1225 }
1226
1227 /*
1228 * Do we have to perform translations of the destination address/port?
1229 */
1230 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1231 uint16_t udp_csum;
1232
Dave Hudson87973cd2013-10-22 16:00:04 +01001233 iph->daddr = cm->xlate_dest_ip;
1234 udph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001235
1236 /*
1237 * Do we have a non-zero UDP checksum? If we do then we need
1238 * to update it.
1239 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001240 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001241 if (likely(udp_csum)) {
1242 uint32_t sum = udp_csum + cm->xlate_dest_csum_adjustment;
1243 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001244 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001245 }
1246 }
1247
1248 /*
1249 * Replace the IP checksum.
1250 */
1251 iph->check = sfe_ipv4_gen_ip_csum(iph);
1252
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001253 /*
1254 * Update traffic stats.
1255 */
1256 cm->rx_packet_count++;
1257 cm->rx_byte_count += len;
1258
1259 /*
1260 * If we're not already on the active list then insert ourselves at the tail
1261 * of the current list.
1262 */
1263 if (unlikely(!cm->active)) {
1264 cm->active = true;
1265 cm->active_prev = si->active_tail;
1266 if (likely(si->active_tail)) {
1267 si->active_tail->active_next = cm;
1268 } else {
1269 si->active_head = cm;
1270 }
1271 si->active_tail = cm;
1272 }
1273
1274 xmit_dev = cm->xmit_dev;
1275 skb->dev = xmit_dev;
1276
1277 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001278 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001279 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001280 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1281 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Matthew McClintocka8ad7962014-01-16 16:49:30 -06001282 xmit_dev->header_ops->create(skb, xmit_dev, ETH_P_IP,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001283 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001284 } else {
1285 /*
1286 * For the simple case we write this really fast.
1287 */
1288 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1289 eth->h_proto = htons(ETH_P_IP);
1290 eth->h_dest[0] = htons(cm->xmit_dest_mac[0]);
1291 eth->h_dest[1] = htons(cm->xmit_dest_mac[1]);
1292 eth->h_dest[2] = htons(cm->xmit_dest_mac[2]);
1293 eth->h_source[0] = htons(cm->xmit_src_mac[0]);
1294 eth->h_source[1] = htons(cm->xmit_src_mac[1]);
1295 eth->h_source[2] = htons(cm->xmit_src_mac[2]);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001296 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001297 }
1298
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001299 /*
1300 * Mark outgoing packet.
1301 */
Cristian Prundeanu94fff982013-12-23 15:02:59 -06001302 skb->mark = cm->connection->mark;
1303 if (skb->mark) {
1304 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1305 }
1306
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001307 si->packets_forwarded++;
1308 spin_unlock(&si->lock);
1309
1310 /*
1311 * We're going to check for GSO flags when we transmit the packet so
1312 * start fetching the necessary cache line now.
1313 */
1314 prefetch(skb_shinfo(skb));
1315
1316 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06001317 * Mark that this packet has been fast forwarded.
1318 */
1319 skb->fast_forwarded = 1;
1320
1321 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001322 * Send the packet on its way.
1323 */
1324 dev_queue_xmit(skb);
1325
1326 return 1;
1327}
1328
1329/*
1330 * sfe_ipv4_process_tcp_option_sack()
1331 * Parse TCP SACK option and update ack according
1332 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001333static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const uint32_t data_offs,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001334 uint32_t *ack) __attribute__((always_inline));
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001335static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const uint32_t data_offs,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001336 uint32_t *ack)
1337{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001338 uint32_t length = sizeof(struct sfe_ipv4_tcp_hdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001339 uint8_t *ptr = (uint8_t *)th + length;
1340
1341 /*
1342 * If option is TIMESTAMP discard it.
1343 */
1344 if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
1345 && likely(ptr[0] == TCPOPT_NOP)
1346 && likely(ptr[1] == TCPOPT_NOP)
1347 && likely(ptr[2] == TCPOPT_TIMESTAMP)
1348 && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
1349 return true;
1350 }
1351
1352 /*
1353 * TCP options. Parse SACK option.
1354 */
1355 while (length < data_offs) {
1356 uint8_t size;
1357 uint8_t kind;
1358
1359 ptr = (uint8_t *)th + length;
1360 kind = *ptr;
1361
1362 /*
1363 * NOP, for padding
1364 * Not in the switch because to fast escape and to not calculate size
1365 */
1366 if (kind == TCPOPT_NOP) {
1367 length++;
1368 continue;
1369 }
1370
1371 if (kind == TCPOPT_SACK) {
1372 uint32_t sack = 0;
1373 uint8_t re = 1 + 1;
1374
1375 size = *(ptr + 1);
1376 if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
1377 || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
1378 || (size > (data_offs - length))) {
1379 return false;
1380 }
1381
1382 re += 4;
1383 while (re < size) {
1384 uint32_t sack_re;
1385 uint8_t *sptr = ptr + re;
1386 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
1387 if (sack_re > sack) {
1388 sack = sack_re;
1389 }
1390 re += TCPOLEN_SACK_PERBLOCK;
1391 }
1392 if (sack > *ack) {
1393 *ack = sack;
1394 }
1395 length += size;
1396 continue;
1397 }
1398 if (kind == TCPOPT_EOL) {
1399 return true;
1400 }
1401 size = *(ptr + 1);
1402 if (size < 2) {
1403 return false;
1404 }
1405 length += size;
1406 }
1407
1408 return true;
1409}
1410
1411/*
1412 * sfe_ipv4_recv_tcp()
1413 * Handle TCP packet receives and forwarding.
1414 */
1415static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001416 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001417{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001418 struct sfe_ipv4_tcp_hdr *tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001419 __be32 src_ip;
1420 __be32 dest_ip;
1421 __be16 src_port;
1422 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001423 struct sfe_ipv4_connection_match *cm;
1424 struct sfe_ipv4_connection_match *counter_cm;
1425 uint8_t ttl;
1426 uint32_t flags;
1427 struct net_device *xmit_dev;
1428
1429 /*
1430 * Is our packet too short to contain a valid UDP header?
1431 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001432 if (unlikely(len < (sizeof(struct sfe_ipv4_tcp_hdr) + ihl))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001433 spin_lock(&si->lock);
1434 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++;
1435 si->packets_not_forwarded++;
1436 spin_unlock(&si->lock);
1437
1438 DEBUG_TRACE("packet too short for TCP header\n");
1439 return 0;
1440 }
1441
1442 /*
1443 * Read the IP address and port information. Read the IP header data first
1444 * because we've almost certainly got that in the cache. We may not yet have
1445 * the TCP header cached though so allow more time for any prefetching.
1446 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001447 src_ip = iph->saddr;
1448 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001449
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001450 tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001451 src_port = tcph->source;
1452 dest_port = tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001453 flags = tcp_flag_word(tcph);
1454
1455 spin_lock(&si->lock);
1456
1457 /*
1458 * Look for a connection match.
1459 */
1460 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
1461 if (unlikely(!cm)) {
1462 /*
1463 * We didn't get a connection but as TCP is connection-oriented that
1464 * may be because this is a non-fast connection (not running established).
1465 * For diagnostic purposes we differentiate this here.
1466 */
1467 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
1468 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++;
1469 si->packets_not_forwarded++;
1470 spin_unlock(&si->lock);
1471
1472 DEBUG_TRACE("no connection found - fast flags\n");
1473 return 0;
1474 }
1475 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++;
1476 si->packets_not_forwarded++;
1477 spin_unlock(&si->lock);
1478
1479 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
1480 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1481 return 0;
1482 }
1483
1484 /*
1485 * If our packet has beern marked as "flush on find" we can't actually
1486 * forward it in the fast path, but now that we've found an associated
1487 * connection we can flush that out before we process the packet.
1488 */
1489 if (unlikely(flush_on_find)) {
1490 struct sfe_ipv4_connection *c = cm->connection;
1491 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1492 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1493 si->packets_not_forwarded++;
1494 spin_unlock(&si->lock);
1495
1496 DEBUG_TRACE("flush on find\n");
1497 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1498 return 0;
1499 }
1500
1501 /*
1502 * Does our TTL allow forwarding?
1503 */
1504 ttl = iph->ttl;
1505 if (unlikely(ttl < 2)) {
1506 struct sfe_ipv4_connection *c = cm->connection;
1507 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1508 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL]++;
1509 si->packets_not_forwarded++;
1510 spin_unlock(&si->lock);
1511
1512 DEBUG_TRACE("ttl too low\n");
1513 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1514 return 0;
1515 }
1516
1517 /*
1518 * If our packet is larger than the MTU of the transmit interface then
1519 * we can't forward it easily.
1520 */
1521 if (unlikely(len > cm->xmit_dev_mtu)) {
1522 struct sfe_ipv4_connection *c = cm->connection;
1523 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1524 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++;
1525 si->packets_not_forwarded++;
1526 spin_unlock(&si->lock);
1527
1528 DEBUG_TRACE("larger than mtu\n");
1529 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1530 return 0;
1531 }
1532
1533 /*
1534 * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN
1535 * set is not a fast path packet.
1536 */
1537 if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
1538 struct sfe_ipv4_connection *c = cm->connection;
1539 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1540 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS]++;
1541 si->packets_not_forwarded++;
1542 spin_unlock(&si->lock);
1543
1544 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
1545 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1546 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1547 return 0;
1548 }
1549
1550 counter_cm = cm->counter_match;
1551
1552 /*
1553 * Are we doing sequence number checking?
1554 */
1555 if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
1556 uint32_t seq;
1557 uint32_t ack;
1558 uint32_t sack;
1559 uint32_t data_offs;
1560 uint32_t end;
1561 uint32_t left_edge;
1562 uint32_t scaled_win;
1563 uint32_t max_end;
1564
1565 /*
1566 * Is our sequence fully past the right hand edge of the window?
1567 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001568 seq = ntohl(tcph->seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001569 if (unlikely((int32_t)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
1570 struct sfe_ipv4_connection *c = cm->connection;
1571 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1572 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++;
1573 si->packets_not_forwarded++;
1574 spin_unlock(&si->lock);
1575
1576 DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
1577 seq, cm->protocol_state.tcp.max_end + 1);
1578 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1579 return 0;
1580 }
1581
1582 /*
1583 * Check that our TCP data offset isn't too short.
1584 */
1585 data_offs = tcph->doff << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001586 if (unlikely(data_offs < sizeof(struct sfe_ipv4_tcp_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001587 struct sfe_ipv4_connection *c = cm->connection;
1588 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1589 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++;
1590 si->packets_not_forwarded++;
1591 spin_unlock(&si->lock);
1592
1593 DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
1594 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1595 return 0;
1596 }
1597
1598 /*
1599 * Update ACK according to any SACK option.
1600 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001601 ack = ntohl(tcph->ack_seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001602 sack = ack;
1603 if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) {
1604 struct sfe_ipv4_connection *c = cm->connection;
1605 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1606 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK]++;
1607 si->packets_not_forwarded++;
1608 spin_unlock(&si->lock);
1609
1610 DEBUG_TRACE("TCP option SACK size is wrong\n");
1611 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1612 return 0;
1613 }
1614
1615 /*
1616 * Check that our TCP data offset isn't past the end of the packet.
1617 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001618 data_offs += sizeof(struct sfe_ipv4_ip_hdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001619 if (unlikely(len < data_offs)) {
1620 struct sfe_ipv4_connection *c = cm->connection;
1621 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1622 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++;
1623 si->packets_not_forwarded++;
1624 spin_unlock(&si->lock);
1625
1626 DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
1627 data_offs, len);
1628 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1629 return 0;
1630 }
1631
1632 end = seq + len - data_offs;
1633
1634 /*
1635 * Is our sequence fully before the left hand edge of the window?
1636 */
1637 if (unlikely((int32_t)(end - (cm->protocol_state.tcp.end
1638 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
1639 struct sfe_ipv4_connection *c = cm->connection;
1640 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1641 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++;
1642 si->packets_not_forwarded++;
1643 spin_unlock(&si->lock);
1644
1645 DEBUG_TRACE("seq: %u before left edge: %u\n",
1646 end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
1647 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1648 return 0;
1649 }
1650
1651 /*
1652 * Are we acking data that is to the right of what has been sent?
1653 */
1654 if (unlikely((int32_t)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
1655 struct sfe_ipv4_connection *c = cm->connection;
1656 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1657 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++;
1658 si->packets_not_forwarded++;
1659 spin_unlock(&si->lock);
1660
1661 DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
1662 sack, counter_cm->protocol_state.tcp.end + 1);
1663 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1664 return 0;
1665 }
1666
1667 /*
1668 * Is our ack too far before the left hand edge of the window?
1669 */
1670 left_edge = counter_cm->protocol_state.tcp.end
1671 - cm->protocol_state.tcp.max_win
1672 - SFE_IPV4_TCP_MAX_ACK_WINDOW
1673 - 1;
1674 if (unlikely((int32_t)(sack - left_edge) < 0)) {
1675 struct sfe_ipv4_connection *c = cm->connection;
1676 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1677 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++;
1678 si->packets_not_forwarded++;
1679 spin_unlock(&si->lock);
1680
1681 DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
1682 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1683 return 0;
1684 }
1685
1686 /*
1687 * Have we just seen the largest window size yet for this connection? If yes
1688 * then we need to record the new value.
1689 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001690 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001691 scaled_win += (sack - ack);
1692 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
1693 cm->protocol_state.tcp.max_win = scaled_win;
1694 }
1695
1696 /*
1697 * If our sequence and/or ack numbers have advanced then record the new state.
1698 */
1699 if (likely((int32_t)(end - cm->protocol_state.tcp.end) >= 0)) {
1700 cm->protocol_state.tcp.end = end;
1701 }
1702
1703 max_end = sack + scaled_win;
1704 if (likely((int32_t)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
1705 counter_cm->protocol_state.tcp.max_end = max_end;
1706 }
1707 }
1708
1709 /*
1710 * From this point on we're good to modify the packet.
1711 */
1712
1713 /*
1714 * Decrement our TTL.
1715 */
1716 iph->ttl = ttl - 1;
1717
1718 /*
1719 * Do we have to perform translations of the source address/port?
1720 */
1721 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1722 uint16_t tcp_csum;
1723 uint32_t sum;
1724
Dave Hudson87973cd2013-10-22 16:00:04 +01001725 iph->saddr = cm->xlate_src_ip;
1726 tcph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001727
1728 /*
1729 * Do we have a non-zero UDP checksum? If we do then we need
1730 * to update it.
1731 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001732 tcp_csum = tcph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001733 sum = tcp_csum + cm->xlate_src_csum_adjustment;
1734 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001735 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001736 }
1737
1738 /*
1739 * Do we have to perform translations of the destination address/port?
1740 */
1741 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1742 uint16_t tcp_csum;
1743 uint32_t sum;
1744
Dave Hudson87973cd2013-10-22 16:00:04 +01001745 iph->daddr = cm->xlate_dest_ip;
1746 tcph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001747
1748 /*
1749 * Do we have a non-zero UDP checksum? If we do then we need
1750 * to update it.
1751 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001752 tcp_csum = tcph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001753 sum = tcp_csum + cm->xlate_dest_csum_adjustment;
1754 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001755 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001756 }
1757
1758 /*
1759 * Replace the IP checksum.
1760 */
1761 iph->check = sfe_ipv4_gen_ip_csum(iph);
1762
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001763 /*
1764 * Update traffic stats.
1765 */
1766 cm->rx_packet_count++;
1767 cm->rx_byte_count += len;
1768
1769 /*
1770 * If we're not already on the active list then insert ourselves at the tail
1771 * of the current list.
1772 */
1773 if (unlikely(!cm->active)) {
1774 cm->active = true;
1775 cm->active_prev = si->active_tail;
1776 if (likely(si->active_tail)) {
1777 si->active_tail->active_next = cm;
1778 } else {
1779 si->active_head = cm;
1780 }
1781 si->active_tail = cm;
1782 }
1783
1784 xmit_dev = cm->xmit_dev;
1785 skb->dev = xmit_dev;
1786
1787 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001788 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001789 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001790 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1791 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Matthew McClintocka8ad7962014-01-16 16:49:30 -06001792 xmit_dev->header_ops->create(skb, xmit_dev, ETH_P_IP,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001793 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001794 } else {
1795 /*
1796 * For the simple case we write this really fast.
1797 */
1798 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1799 eth->h_proto = htons(ETH_P_IP);
1800 eth->h_dest[0] = htons(cm->xmit_dest_mac[0]);
1801 eth->h_dest[1] = htons(cm->xmit_dest_mac[1]);
1802 eth->h_dest[2] = htons(cm->xmit_dest_mac[2]);
1803 eth->h_source[0] = htons(cm->xmit_src_mac[0]);
1804 eth->h_source[1] = htons(cm->xmit_src_mac[1]);
1805 eth->h_source[2] = htons(cm->xmit_src_mac[2]);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001806 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001807 }
1808
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06001809 /*
1810 * Mark outgoing packet
1811 */
1812 skb->mark = cm->connection->mark;
1813 if (skb->mark) {
1814 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1815 }
1816
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001817 si->packets_forwarded++;
1818 spin_unlock(&si->lock);
1819
1820 /*
1821 * We're going to check for GSO flags when we transmit the packet so
1822 * start fetching the necessary cache line now.
1823 */
1824 prefetch(skb_shinfo(skb));
1825
1826 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06001827 * Mark that this packet has been fast forwarded.
1828 */
1829 skb->fast_forwarded = 1;
1830
1831 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001832 * Send the packet on its way.
1833 */
1834 dev_queue_xmit(skb);
1835
1836 return 1;
1837}
1838
1839/*
1840 * sfe_ipv4_recv_icmp()
1841 * Handle ICMP packet receives.
1842 *
1843 * ICMP packets aren't handled as a "fast path" and always have us process them
1844 * through the default Linux stack. What we do need to do is look for any errors
1845 * about connections we are handling in the fast path. If we find any such
1846 * connections then we want to flush their state so that the ICMP error path
1847 * within Linux has all of the correct state should it need it.
1848 */
1849static int sfe_ipv4_recv_icmp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001850 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001851{
1852 struct icmphdr *icmph;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001853 struct sfe_ipv4_ip_hdr *icmp_iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001854 unsigned int icmp_ihl_words;
1855 unsigned int icmp_ihl;
1856 uint32_t *icmp_trans_h;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001857 struct sfe_ipv4_udp_hdr *icmp_udph;
1858 struct sfe_ipv4_tcp_hdr *icmp_tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001859 __be32 src_ip;
1860 __be32 dest_ip;
1861 __be16 src_port;
1862 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001863 struct sfe_ipv4_connection_match *cm;
1864 struct sfe_ipv4_connection *c;
1865
1866 /*
1867 * Is our packet too short to contain a valid UDP header?
1868 */
1869 len -= ihl;
1870 if (unlikely(len < sizeof(struct icmphdr))) {
1871 spin_lock(&si->lock);
1872 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++;
1873 si->packets_not_forwarded++;
1874 spin_unlock(&si->lock);
1875
1876 DEBUG_TRACE("packet too short for ICMP header\n");
1877 return 0;
1878 }
1879
1880 /*
1881 * We only handle "destination unreachable" and "time exceeded" messages.
1882 */
1883 icmph = (struct icmphdr *)(skb->data + ihl);
1884 if ((icmph->type != ICMP_DEST_UNREACH)
1885 && (icmph->type != ICMP_TIME_EXCEEDED)) {
1886 spin_lock(&si->lock);
1887 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++;
1888 si->packets_not_forwarded++;
1889 spin_unlock(&si->lock);
1890
1891 DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->type);
1892 return 0;
1893 }
1894
1895 /*
1896 * Do we have the full embedded IP header?
1897 */
1898 len -= sizeof(struct icmphdr);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001899 if (unlikely(len < sizeof(struct sfe_ipv4_ip_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001900 spin_lock(&si->lock);
1901 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE]++;
1902 si->packets_not_forwarded++;
1903 spin_unlock(&si->lock);
1904
1905 DEBUG_TRACE("Embedded IP header not complete\n");
1906 return 0;
1907 }
1908
1909 /*
1910 * Is our embedded IP version wrong?
1911 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001912 icmp_iph = (struct sfe_ipv4_ip_hdr *)(icmph + 1);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001913 if (unlikely(icmp_iph->version != 4)) {
1914 spin_lock(&si->lock);
1915 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4]++;
1916 si->packets_not_forwarded++;
1917 spin_unlock(&si->lock);
1918
1919 DEBUG_TRACE("IP version: %u\n", icmp_iph->version);
1920 return 0;
1921 }
1922
1923 /*
1924 * Do we have the full embedded IP header, including any options?
1925 */
1926 icmp_ihl_words = icmp_iph->ihl;
1927 icmp_ihl = icmp_ihl_words << 2;
1928 if (unlikely(len < icmp_ihl)) {
1929 spin_lock(&si->lock);
1930 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE]++;
1931 si->packets_not_forwarded++;
1932 spin_unlock(&si->lock);
1933
1934 DEBUG_TRACE("Embedded header not large enough for IP options\n");
1935 return 0;
1936 }
1937
Nicolas Costaac2979c2014-01-14 10:35:24 -06001938 len -= icmp_ihl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001939 icmp_trans_h = ((uint32_t *)icmp_iph) + icmp_ihl_words;
1940
1941 /*
1942 * Handle the embedded transport layer header.
1943 */
1944 switch (icmp_iph->protocol) {
1945 case IPPROTO_UDP:
1946 /*
1947 * We should have 8 bytes of UDP header - that's enough to identify
1948 * the connection.
1949 */
1950 if (unlikely(len < 8)) {
1951 spin_lock(&si->lock);
1952 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE]++;
1953 si->packets_not_forwarded++;
1954 spin_unlock(&si->lock);
1955
1956 DEBUG_TRACE("Incomplete embedded UDP header\n");
1957 return 0;
1958 }
1959
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001960 icmp_udph = (struct sfe_ipv4_udp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01001961 src_port = icmp_udph->source;
1962 dest_port = icmp_udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001963 break;
1964
1965 case IPPROTO_TCP:
1966 /*
1967 * We should have 8 bytes of TCP header - that's enough to identify
1968 * the connection.
1969 */
1970 if (unlikely(len < 8)) {
1971 spin_lock(&si->lock);
1972 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE]++;
1973 si->packets_not_forwarded++;
1974 spin_unlock(&si->lock);
1975
1976 DEBUG_TRACE("Incomplete embedded TCP header\n");
1977 return 0;
1978 }
1979
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001980 icmp_tcph = (struct sfe_ipv4_tcp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01001981 src_port = icmp_tcph->source;
1982 dest_port = icmp_tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001983 break;
1984
1985 default:
1986 spin_lock(&si->lock);
1987 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL]++;
1988 si->packets_not_forwarded++;
1989 spin_unlock(&si->lock);
1990
1991 DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", icmp_iph->protocol);
1992 return 0;
1993 }
1994
Dave Hudson87973cd2013-10-22 16:00:04 +01001995 src_ip = icmp_iph->saddr;
1996 dest_ip = icmp_iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001997
1998 spin_lock(&si->lock);
1999
2000 /*
2001 * Look for a connection match. Note that we reverse the source and destination
2002 * here because our embedded message contains a packet that was sent in the
2003 * opposite direction to the one in which we just received it. It will have
2004 * been sent on the interface from which we received it though so that's still
2005 * ok to use.
2006 */
2007 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port);
2008 if (unlikely(!cm)) {
2009 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++;
2010 si->packets_not_forwarded++;
2011 spin_unlock(&si->lock);
2012
2013 DEBUG_TRACE("no connection found\n");
2014 return 0;
2015 }
2016
2017 /*
2018 * We found a connection so now remove it from the connection list and flush
2019 * its state.
2020 */
2021 c = cm->connection;
2022 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2023 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++;
2024 si->packets_not_forwarded++;
2025 spin_unlock(&si->lock);
2026
2027 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
2028 return 0;
2029}
2030
2031/*
2032 * sfe_ipv4_recv()
Matthew McClintocka8ad7962014-01-16 16:49:30 -06002033 * Handle packet receives and forwaring.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002034 *
2035 * Returns 1 if the packet is forwarded or 0 if it isn't.
2036 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002037int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002038{
2039 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002040 unsigned int len;
2041 unsigned int tot_len;
2042 unsigned int frag_off;
2043 unsigned int ihl;
2044 bool flush_on_find;
2045 bool ip_options;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002046 struct sfe_ipv4_ip_hdr *iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002047 uint32_t protocol;
2048
2049 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002050 * Check that we have space for an IP header here.
2051 */
2052 len = skb->len;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002053 if (unlikely(len < sizeof(struct sfe_ipv4_ip_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002054 spin_lock(&si->lock);
2055 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2056 si->packets_not_forwarded++;
2057 spin_unlock(&si->lock);
2058
2059 DEBUG_TRACE("len: %u is too short\n", len);
2060 return 0;
2061 }
2062
2063 /*
2064 * Check that our "total length" is large enough for an IP header.
2065 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002066 iph = (struct sfe_ipv4_ip_hdr *)skb->data;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002067 tot_len = ntohs(iph->tot_len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002068 if (unlikely(tot_len < sizeof(struct sfe_ipv4_ip_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002069 spin_lock(&si->lock);
2070 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH]++;
2071 si->packets_not_forwarded++;
2072 spin_unlock(&si->lock);
2073
2074 DEBUG_TRACE("tot_len: %u is too short\n", tot_len);
2075 return 0;
2076 }
2077
2078 /*
2079 * Is our IP version wrong?
2080 */
2081 if (unlikely(iph->version != 4)) {
2082 spin_lock(&si->lock);
2083 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_V4]++;
2084 si->packets_not_forwarded++;
2085 spin_unlock(&si->lock);
2086
2087 DEBUG_TRACE("IP version: %u\n", iph->version);
2088 return 0;
2089 }
2090
2091 /*
2092 * Does our datagram fit inside the skb?
2093 */
2094 if (unlikely(tot_len > len)) {
2095 spin_lock(&si->lock);
2096 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++;
2097 si->packets_not_forwarded++;
2098 spin_unlock(&si->lock);
2099
2100 DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len);
2101 return 0;
2102 }
2103
2104 /*
2105 * Do we have a non-initial fragment?
Nicolas Costaac2979c2014-01-14 10:35:24 -06002106 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002107 frag_off = ntohs(iph->frag_off);
2108 if (unlikely(frag_off & IP_OFFSET)) {
2109 spin_lock(&si->lock);
2110 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++;
2111 si->packets_not_forwarded++;
2112 spin_unlock(&si->lock);
2113
2114 DEBUG_TRACE("non-initial fragment\n");
2115 return 0;
2116 }
2117
2118 /*
2119 * If we have a (first) fragment then mark it to cause any connection to flush.
2120 */
2121 flush_on_find = unlikely(frag_off & IP_MF) ? true : false;
2122
2123 /*
2124 * Do we have any IP options? That's definite a slow path! If we do have IP
2125 * options we need to recheck our header size.
2126 */
2127 ihl = iph->ihl << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002128 ip_options = unlikely(ihl != sizeof(struct sfe_ipv4_ip_hdr)) ? true : false;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002129 if (unlikely(ip_options)) {
2130 if (unlikely(len < ihl)) {
2131 spin_lock(&si->lock);
2132 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE]++;
2133 si->packets_not_forwarded++;
2134 spin_unlock(&si->lock);
2135
2136 DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl);
2137 return 0;
2138 }
2139
2140 flush_on_find = true;
2141 }
2142
2143 protocol = iph->protocol;
2144 if (IPPROTO_UDP == protocol) {
2145 return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find);
2146 }
2147
2148 if (IPPROTO_TCP == protocol) {
2149 return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find);
2150 }
2151
2152 if (IPPROTO_ICMP == protocol) {
2153 return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl);
2154 }
2155
2156 spin_lock(&si->lock);
2157 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++;
2158 si->packets_not_forwarded++;
2159 spin_unlock(&si->lock);
2160
2161 DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol);
2162 return 0;
2163}
2164
Nicolas Costa436926b2014-01-14 10:36:22 -06002165static void
2166sfe_ipv4_update_tcp_state(struct sfe_ipv4_connection *c,
2167 struct sfe_ipv4_create *sic)
2168{
2169 struct sfe_ipv4_connection_match *orig_cm;
2170 struct sfe_ipv4_connection_match *repl_cm;
2171 struct sfe_ipv4_tcp_connection_match *orig_tcp;
2172 struct sfe_ipv4_tcp_connection_match *repl_tcp;
2173
2174 orig_cm = c->original_match;
2175 repl_cm = c->reply_match;
2176 orig_tcp = &orig_cm->protocol_state.tcp;
2177 repl_tcp = &repl_cm->protocol_state.tcp;
2178
2179 /* update orig */
2180 if (orig_tcp->max_win < sic->src_td_max_window) {
2181 orig_tcp->max_win = sic->src_td_max_window;
2182 }
2183 if ((int32_t)(orig_tcp->end - sic->src_td_end) < 0) {
2184 orig_tcp->end = sic->src_td_end;
2185 }
2186 if ((int32_t)(orig_tcp->max_end - sic->src_td_max_end) < 0) {
2187 orig_tcp->max_end = sic->src_td_max_end;
2188 }
2189
2190 /* update reply */
2191 if (repl_tcp->max_win < sic->dest_td_max_window) {
2192 repl_tcp->max_win = sic->dest_td_max_window;
2193 }
2194 if ((int32_t)(repl_tcp->end - sic->dest_td_end) < 0) {
2195 repl_tcp->end = sic->dest_td_end;
2196 }
2197 if ((int32_t)(repl_tcp->max_end - sic->dest_td_max_end) < 0) {
2198 repl_tcp->max_end = sic->dest_td_max_end;
2199 }
2200
2201 /* update match flags */
2202 orig_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2203 repl_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2204 if (sic->flags & SFE_IPV4_CREATE_FLAG_NO_SEQ_CHECK) {
2205 orig_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2206 repl_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2207 }
2208}
2209
2210static void
2211sfe_ipv4_update_protocol_state(struct sfe_ipv4_connection *c,
2212 struct sfe_ipv4_create *sic)
2213{
2214 switch (sic->protocol) {
2215 case IPPROTO_TCP:
2216 sfe_ipv4_update_tcp_state(c, sic);
2217 break;
2218 }
2219}
2220
2221void sfe_ipv4_update_rule(struct sfe_ipv4_create *sic)
2222{
2223 struct sfe_ipv4_connection *c;
2224 struct sfe_ipv4 *si = &__si;
2225
2226 spin_lock_bh(&si->lock);
2227
2228 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2229 sic->protocol,
2230 sic->src_ip,
2231 sic->src_port,
2232 sic->dest_ip,
2233 sic->dest_port);
2234 if (c != NULL) {
2235 sfe_ipv4_update_protocol_state(c, sic);
2236 }
2237
2238 spin_unlock_bh(&si->lock);
2239}
2240
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002241/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002242 * sfe_ipv4_create_rule()
2243 * Create a forwarding rule.
2244 */
Nicolas Costa514fde02014-01-13 15:50:29 -06002245int sfe_ipv4_create_rule(struct sfe_ipv4_create *sic)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002246{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002247 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002248 struct sfe_ipv4_connection *c;
2249 struct sfe_ipv4_connection_match *original_cm;
2250 struct sfe_ipv4_connection_match *reply_cm;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002251 struct net_device *dest_dev;
2252 struct net_device *src_dev;
2253
2254 dest_dev = sic->dest_dev;
2255 src_dev = sic->src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002256
2257 spin_lock_bh(&si->lock);
2258 si->connection_create_requests++;
2259
2260 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002261 * Check to see if there is already a flow that matches the rule we're
2262 * trying to create. If there is then we can't create a new one.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002263 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002264 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2265 sic->protocol,
2266 sic->src_ip,
2267 sic->src_port,
2268 sic->dest_ip,
2269 sic->dest_port);
2270 if (c != NULL) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002271 si->connection_create_collisions++;
2272
2273 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002274 * If we already have the flow then it's likely that this
2275 * request to create the connection rule contains more
2276 * up-to-date information. Check and update accordingly.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002277 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002278 sfe_ipv4_update_protocol_state(c, sic);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002279 spin_unlock_bh(&si->lock);
2280
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002281 DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002282 " s: %s:%pM:%pI4:%u, d: %s:%pM:%pI4:%u\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002283 sic->mark, sic->protocol,
2284 sic->src_dev->name, sic->src_mac, &sic->src_ip, ntohs(sic->src_port),
Dave Hudson87973cd2013-10-22 16:00:04 +01002285 sic->dest_dev->name, sic->dest_mac, &sic->dest_ip, ntohs(sic->dest_port));
Nicolas Costa514fde02014-01-13 15:50:29 -06002286 return -EADDRINUSE;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002287 }
2288
2289 /*
2290 * Allocate the various connection tracking objects.
2291 */
2292 c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC);
2293 if (unlikely(!c)) {
2294 spin_unlock_bh(&si->lock);
Nicolas Costa514fde02014-01-13 15:50:29 -06002295 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002296 }
2297
2298 original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2299 if (unlikely(!original_cm)) {
2300 spin_unlock_bh(&si->lock);
2301 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002302 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002303 }
2304
2305 reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2306 if (unlikely(!reply_cm)) {
2307 spin_unlock_bh(&si->lock);
2308 kfree(original_cm);
2309 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002310 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002311 }
2312
2313 /*
2314 * Fill in the "original" direction connection matching object.
2315 * Note that the transmit MAC address is "dest_mac_xlate" because
2316 * we always know both ends of a connection by their translated
2317 * addresses and not their public addresses.
2318 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002319 original_cm->match_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002320 original_cm->match_protocol = sic->protocol;
2321 original_cm->match_src_ip = sic->src_ip;
2322 original_cm->match_src_port = sic->src_port;
2323 original_cm->match_dest_ip = sic->dest_ip;
2324 original_cm->match_dest_port = sic->dest_port;
2325 original_cm->xlate_src_ip = sic->src_ip_xlate;
2326 original_cm->xlate_src_port = sic->src_port_xlate;
2327 original_cm->xlate_dest_ip = sic->dest_ip_xlate;
2328 original_cm->xlate_dest_port = sic->dest_port_xlate;
2329 original_cm->rx_packet_count = 0;
2330 original_cm->rx_packet_count64 = 0;
2331 original_cm->rx_byte_count = 0;
2332 original_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002333 original_cm->xmit_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002334 original_cm->xmit_dev_mtu = sic->dest_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002335 memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002336 memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN);
2337 original_cm->connection = c;
2338 original_cm->counter_match = reply_cm;
2339 original_cm->flags = 0;
2340 original_cm->active_next = NULL;
2341 original_cm->active_prev = NULL;
2342 original_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002343
2344 /*
2345 * For PPP links we don't write an L2 header. For everything else we do.
2346 */
2347 if (!(dest_dev->flags & IFF_POINTOPOINT)) {
2348 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2349
2350 /*
2351 * If our dev writes Ethernet headers then we can write a really fast
2352 * version.
2353 */
2354 if (dest_dev->header_ops) {
2355 if (dest_dev->header_ops->create == eth_header) {
2356 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2357 }
2358 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002359 }
2360
2361 /*
2362 * Fill in the "reply" direction connection matching object.
2363 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002364 reply_cm->match_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002365 reply_cm->match_protocol = sic->protocol;
2366 reply_cm->match_src_ip = sic->dest_ip_xlate;
2367 reply_cm->match_src_port = sic->dest_port_xlate;
2368 reply_cm->match_dest_ip = sic->src_ip_xlate;
2369 reply_cm->match_dest_port = sic->src_port_xlate;
2370 reply_cm->xlate_src_ip = sic->dest_ip;
2371 reply_cm->xlate_src_port = sic->dest_port;
2372 reply_cm->xlate_dest_ip = sic->src_ip;
2373 reply_cm->xlate_dest_port = sic->src_port;
2374 reply_cm->rx_packet_count = 0;
2375 reply_cm->rx_packet_count64 = 0;
2376 reply_cm->rx_byte_count = 0;
2377 reply_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002378 reply_cm->xmit_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002379 reply_cm->xmit_dev_mtu = sic->src_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002380 memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002381 memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN);
2382 reply_cm->connection = c;
2383 reply_cm->counter_match = original_cm;
2384 reply_cm->flags = 0;
2385 reply_cm->active_next = NULL;
2386 reply_cm->active_prev = NULL;
2387 reply_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002388
2389 /*
2390 * For PPP links we don't write an L2 header. For everything else we do.
2391 */
2392 if (!(src_dev->flags & IFF_POINTOPOINT)) {
2393 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2394
2395 /*
2396 * If our dev writes Ethernet headers then we can write a really fast
2397 * version.
2398 */
2399 if (src_dev->header_ops) {
2400 if (src_dev->header_ops->create == eth_header) {
2401 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2402 }
2403 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002404 }
2405
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002406
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002407 if (sic->dest_ip != sic->dest_ip_xlate || sic->dest_port != sic->dest_port_xlate) {
2408 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2409 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2410 }
2411
2412 if (sic->src_ip != sic->src_ip_xlate || sic->src_port != sic->src_port_xlate) {
2413 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2414 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2415 }
2416
2417 c->protocol = sic->protocol;
2418 c->src_ip = sic->src_ip;
2419 c->src_ip_xlate = sic->src_ip_xlate;
2420 c->src_port = sic->src_port;
2421 c->src_port_xlate = sic->src_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002422 c->original_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002423 c->original_match = original_cm;
2424 c->dest_ip = sic->dest_ip;
2425 c->dest_ip_xlate = sic->dest_ip_xlate;
2426 c->dest_port = sic->dest_port;
2427 c->dest_port_xlate = sic->dest_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002428 c->reply_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002429 c->reply_match = reply_cm;
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06002430 c->mark = sic->mark;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002431
2432 c->last_sync_jiffies = get_jiffies_64();
2433 c->iterators = 0;
2434 c->pending_free = false;
2435
2436 /*
2437 * Take hold of our source and dest devices for the duration of the connection.
2438 */
2439 dev_hold(c->original_dev);
2440 dev_hold(c->reply_dev);
2441
2442 /*
2443 * Initialize the protocol-specific information that we track.
2444 */
2445 switch (sic->protocol) {
2446 case IPPROTO_TCP:
2447 original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale;
2448 original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1;
2449 original_cm->protocol_state.tcp.end = sic->src_td_end;
2450 original_cm->protocol_state.tcp.max_end = sic->src_td_max_end;
2451 reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale;
2452 reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1;
2453 reply_cm->protocol_state.tcp.end = sic->dest_td_end;
2454 reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end;
2455 if (sic->flags & SFE_IPV4_CREATE_FLAG_NO_SEQ_CHECK) {
2456 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2457 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2458 }
2459 break;
2460 }
2461
2462 sfe_ipv4_connection_match_compute_translations(original_cm);
2463 sfe_ipv4_connection_match_compute_translations(reply_cm);
2464 sfe_ipv4_insert_sfe_ipv4_connection(si, c);
2465
2466 spin_unlock_bh(&si->lock);
2467
2468 /*
2469 * We have everything we need!
2470 */
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002471 DEBUG_INFO("new connection - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002472 " s: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n"
2473 " d: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002474 sic->mark, sic->protocol,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002475 sic->src_dev->name, sic->src_mac, sic->src_mac_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002476 &sic->src_ip, &sic->src_ip_xlate, ntohs(sic->src_port), ntohs(sic->src_port_xlate),
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002477 dest_dev->name, sic->dest_mac, sic->dest_mac_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002478 &sic->dest_ip, &sic->dest_ip_xlate, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate));
Nicolas Costa514fde02014-01-13 15:50:29 -06002479
2480 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002481}
2482
2483/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002484 * sfe_ipv4_destroy_rule()
2485 * Destroy a forwarding rule.
2486 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002487void sfe_ipv4_destroy_rule(struct sfe_ipv4_destroy *sid)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002488{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002489 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002490 struct sfe_ipv4_connection *c;
2491
2492 spin_lock_bh(&si->lock);
2493 si->connection_destroy_requests++;
2494
2495 /*
2496 * Check to see if we have a flow that matches the rule we're trying
2497 * to destroy. If there isn't then we can't destroy it.
2498 */
2499 c = sfe_ipv4_find_sfe_ipv4_connection(si, sid->protocol, sid->src_ip, sid->src_port,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002500 sid->dest_ip, sid->dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002501 if (!c) {
2502 si->connection_destroy_misses++;
2503 spin_unlock_bh(&si->lock);
2504
2505 DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002506 sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2507 &sid->dest_ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002508 return;
2509 }
2510
2511 /*
2512 * Remove our connection details from the hash tables.
2513 */
2514 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2515 spin_unlock_bh(&si->lock);
2516
2517 /*
2518 * Finally synchronize state and free resources. We need to protect against
2519 * pre-emption by our bottom half while we do this though.
2520 */
2521 local_bh_disable();
2522 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
2523 local_bh_enable();
2524
2525 DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002526 sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2527 &sid->dest_ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002528}
2529
2530/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002531 * sfe_ipv4_register_sync_rule_callback()
2532 * Register a callback for rule synchronization.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002533 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002534void sfe_ipv4_register_sync_rule_callback(sfe_ipv4_sync_rule_callback_t sync_rule_callback)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002535{
2536 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002537
2538 spin_lock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002539 rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002540 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002541}
2542
2543/*
2544 * sfe_ipv4_get_debug_dev()
2545 */
2546static ssize_t sfe_ipv4_get_debug_dev(struct device *dev,
2547 struct device_attribute *attr,
2548 char *buf)
2549{
2550 struct sfe_ipv4 *si = &__si;
2551 ssize_t count;
2552 int num;
2553
2554 spin_lock_bh(&si->lock);
2555 num = si->debug_dev;
2556 spin_unlock_bh(&si->lock);
2557
2558 count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num);
2559 return count;
2560}
2561
2562/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002563 * sysfs attributes.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002564 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002565static const struct device_attribute sfe_ipv4_debug_dev_attr =
2566 __ATTR(debug_dev, S_IWUGO | S_IRUGO, sfe_ipv4_get_debug_dev, NULL);
2567
2568/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002569 * sfe_ipv4_destroy_all_rules_for_dev()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002570 * Destroy all connections that match a particular device.
2571 *
2572 * If we pass dev as NULL then this destroys all connections.
2573 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002574void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002575{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002576 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002577 struct sfe_ipv4_connection *c;
2578 struct sfe_ipv4_connection *c_next;
2579
2580 spin_lock_bh(&si->lock);
2581 c = si->all_connections_head;
2582 if (!c) {
2583 spin_unlock_bh(&si->lock);
2584 return;
2585 }
2586
2587 c->iterators++;
2588
2589 /*
2590 * Iterate over all connections
2591 */
2592 while (c) {
2593 c_next = c->all_connections_next;
2594
2595 /*
2596 * Before we do anything else, take an iterator reference for the
2597 * connection we'll iterate next.
2598 */
2599 if (c_next) {
2600 c_next->iterators++;
2601 }
2602
2603 /*
2604 * Does this connection relate to the device we are destroying? If
2605 * it does then ensure it is marked for being freed as soon as it
2606 * is no longer being iterated.
2607 */
2608 if (!dev
2609 || (dev == c->original_dev)
2610 || (dev == c->reply_dev)) {
2611 c->pending_free = true;
2612 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2613 }
2614
2615 /*
2616 * Remove the iterator reference that we acquired and see if we
2617 * should free any resources.
2618 */
2619 if (sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c)) {
2620 spin_unlock_bh(&si->lock);
Nicolas Costabafb3af2014-01-29 16:39:39 -06002621
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002622 /*
2623 * This entry is dead so release our hold of the source and
2624 * dest devices and free the memory for our connection objects.
2625 */
2626 dev_put(c->original_dev);
2627 dev_put(c->reply_dev);
2628 kfree(c->original_match);
2629 kfree(c->reply_match);
2630 kfree(c);
2631
2632 spin_lock_bh(&si->lock);
2633 }
2634
2635 c = c_next;
2636 }
2637
2638 spin_unlock_bh(&si->lock);
2639}
2640
2641/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002642 * sfe_ipv4_periodic_sync()
2643 */
2644static void sfe_ipv4_periodic_sync(unsigned long arg)
2645{
2646 struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg;
2647 uint64_t now_jiffies;
2648 int quota;
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002649 sfe_ipv4_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002650
2651 now_jiffies = get_jiffies_64();
2652
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002653 rcu_read_lock();
2654 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
2655 if (!sync_rule_callback) {
2656 rcu_read_unlock();
2657 goto done;
2658 }
2659
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002660 spin_lock_bh(&si->lock);
2661 sfe_ipv4_update_summary_stats(si);
2662
2663 /*
2664 * Get an estimate of the number of connections to parse in this sync.
2665 */
2666 quota = (si->num_connections + 63) / 64;
2667
2668 /*
2669 * Walk the "active" list and sync the connection state.
2670 */
2671 while (quota--) {
2672 struct sfe_ipv4_connection_match *cm;
2673 struct sfe_ipv4_connection_match *counter_cm;
2674 struct sfe_ipv4_connection *c;
2675 struct sfe_ipv4_sync sis;
2676
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002677 cm = si->active_head;
2678 if (!cm) {
2679 break;
2680 }
2681
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002682 /*
Nicolas Costabafb3af2014-01-29 16:39:39 -06002683 * There's a possibility that our counter match is in the active list too.
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002684 * If it is then remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002685 */
2686 counter_cm = cm->counter_match;
2687 if (counter_cm->active) {
2688 counter_cm->active = false;
2689
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002690 /*
2691 * We must have a connection preceding this counter match
2692 * because that's the one that got us to this point, so we don't have
2693 * to worry about removing the head of the list.
2694 */
2695 counter_cm->active_prev->active_next = counter_cm->active_next;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002696
2697 if (likely(counter_cm->active_next)) {
2698 counter_cm->active_next->active_prev = counter_cm->active_prev;
2699 } else {
2700 si->active_tail = counter_cm->active_prev;
2701 }
2702
2703 counter_cm->active_next = NULL;
2704 counter_cm->active_prev = NULL;
2705 }
2706
2707 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002708 * Now remove the head of the active scan list.
2709 */
2710 cm->active = false;
2711 si->active_head = cm->active_next;
2712 if (likely(cm->active_next)) {
2713 cm->active_next->active_prev = NULL;
2714 } else {
2715 si->active_tail = NULL;
2716 }
2717 cm->active_next = NULL;
2718
2719 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002720 * Sync the connection state.
2721 */
2722 c = cm->connection;
2723 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, now_jiffies);
2724
2725 /*
2726 * We don't want to be holding the lock when we sync!
2727 */
2728 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002729 sync_rule_callback(&sis);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002730 spin_lock_bh(&si->lock);
2731 }
2732
2733 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002734 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002735
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002736done:
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002737 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002738}
2739
2740#define CHAR_DEV_MSG_SIZE 768
2741
2742/*
2743 * sfe_ipv4_debug_dev_read_start()
2744 * Generate part of the XML output.
2745 */
2746static bool sfe_ipv4_debug_dev_read_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2747 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2748{
2749 int bytes_read;
2750
2751 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "<sfe_ipv4>\n");
2752 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2753 return false;
2754 }
2755
2756 *length -= bytes_read;
2757 *total_read += bytes_read;
2758
2759 ws->state++;
2760 return true;
2761}
2762
2763/*
2764 * sfe_ipv4_debug_dev_read_connections_start()
2765 * Generate part of the XML output.
2766 */
2767static bool sfe_ipv4_debug_dev_read_connections_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2768 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2769{
2770 int bytes_read;
2771
2772 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<connections>\n");
2773 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2774 return false;
2775 }
2776
2777 *length -= bytes_read;
2778 *total_read += bytes_read;
2779
2780 ws->state++;
2781 return true;
2782}
2783
2784/*
2785 * sfe_ipv4_debug_dev_read_connections_connection()
2786 * Generate part of the XML output.
2787 */
2788static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2789 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2790{
2791 struct sfe_ipv4_connection *c;
2792 struct sfe_ipv4_connection *c_next;
2793 struct sfe_ipv4_connection_match *original_cm;
2794 struct sfe_ipv4_connection_match *reply_cm;
2795 int bytes_read;
2796 int protocol;
2797 struct net_device *src_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002798 __be32 src_ip;
2799 __be32 src_ip_xlate;
2800 __be16 src_port;
2801 __be16 src_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002802 uint64_t src_rx_packets;
2803 uint64_t src_rx_bytes;
2804 struct net_device *dest_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002805 __be32 dest_ip;
2806 __be32 dest_ip_xlate;
2807 __be16 dest_port;
2808 __be16 dest_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002809 uint64_t dest_rx_packets;
2810 uint64_t dest_rx_bytes;
2811 uint64_t last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002812 uint32_t mark;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002813
2814 spin_lock_bh(&si->lock);
2815 c = ws->iter_conn;
2816
2817 /*
2818 * Is this the first connection we need to scan?
2819 */
2820 if (!c) {
2821 c = si->all_connections_head;
2822
2823 /*
2824 * If there were no connections then move to the next state.
2825 */
2826 if (!c) {
2827 spin_unlock_bh(&si->lock);
2828
2829 ws->state++;
2830 return true;
2831 }
2832
2833 c->iterators++;
2834 }
2835
2836 c_next = c->all_connections_next;
2837 ws->iter_conn = c_next;
2838
2839 /*
2840 * Before we do anything else, take an iterator reference for the
2841 * connection we'll iterate next.
2842 */
2843 if (c_next) {
2844 c_next->iterators++;
2845 }
2846
2847 /*
2848 * Remove the iterator reference that we acquired and see if we
2849 * should free any resources.
2850 */
2851 if (sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c)) {
2852 spin_unlock_bh(&si->lock);
2853
2854 /*
2855 * This entry is dead so release our hold of the source and
2856 * dest devices and free the memory for our connection objects.
2857 */
2858 dev_put(c->original_dev);
2859 dev_put(c->reply_dev);
2860 kfree(c->original_match);
2861 kfree(c->reply_match);
2862 kfree(c);
2863
2864 /*
2865 * If we have no more connections then move to the next state.
2866 */
2867 if (!c_next) {
2868 ws->state++;
2869 }
2870
2871 return true;
2872 }
2873
2874 original_cm = c->original_match;
2875 reply_cm = c->reply_match;
2876
2877 protocol = c->protocol;
2878 src_dev = c->original_dev;
2879 src_ip = c->src_ip;
2880 src_ip_xlate = c->src_ip_xlate;
2881 src_port = c->src_port;
2882 src_port_xlate = c->src_port_xlate;
2883
2884 sfe_ipv4_connection_match_update_summary_stats(original_cm);
2885 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
2886
2887 src_rx_packets = original_cm->rx_packet_count64;
2888 src_rx_bytes = original_cm->rx_byte_count64;
2889 dest_dev = c->reply_dev;
2890 dest_ip = c->dest_ip;
2891 dest_ip_xlate = c->dest_ip_xlate;
2892 dest_port = c->dest_port;
2893 dest_port_xlate = c->dest_port_xlate;
2894 dest_rx_packets = reply_cm->rx_packet_count64;
2895 dest_rx_bytes = reply_cm->rx_byte_count64;
2896 last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002897 mark = c->mark;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002898 spin_unlock_bh(&si->lock);
2899
2900 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t<connection "
2901 "protocol=\"%u\" "
2902 "src_dev=\"%s\" "
2903 "src_ip=\"%pI4\" src_ip_xlate=\"%pI4\" "
2904 "src_port=\"%u\" src_port_xlate=\"%u\" "
2905 "src_rx_pkts=\"%llu\" src_rx_bytes=\"%llu\" "
2906 "dest_dev=\"%s\" "
2907 "dest_ip=\"%pI4\" dest_ip_xlate=\"%pI4\" "
2908 "dest_port=\"%u\" dest_port_xlate=\"%u\" "
2909 "dest_rx_pkts=\"%llu\" dest_rx_bytes=\"%llu\" "
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002910 "last_sync=\"%llu\" "
Nicolas Costabb85a2e2014-01-13 16:26:33 -06002911 "mark=\"%08x\" />\n",
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002912 protocol,
2913 src_dev->name,
2914 &src_ip, &src_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002915 ntohs(src_port), ntohs(src_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002916 src_rx_packets, src_rx_bytes,
2917 dest_dev->name,
2918 &dest_ip, &dest_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002919 ntohs(dest_port), ntohs(dest_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002920 dest_rx_packets, dest_rx_bytes,
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002921 last_sync_jiffies, mark);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002922
2923 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2924 return false;
2925 }
2926
2927 *length -= bytes_read;
2928 *total_read += bytes_read;
2929
2930 /*
2931 * If we have no more connections then move to the next state.
2932 */
2933 if (!c_next) {
2934 ws->state++;
2935 }
2936
2937 return true;
2938}
2939
2940/*
2941 * sfe_ipv4_debug_dev_read_connections_end()
2942 * Generate part of the XML output.
2943 */
2944static bool sfe_ipv4_debug_dev_read_connections_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2945 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2946{
2947 int bytes_read;
2948
2949 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</connections>\n");
2950 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2951 return false;
2952 }
2953
2954 *length -= bytes_read;
2955 *total_read += bytes_read;
2956
2957 ws->state++;
2958 return true;
2959}
2960
2961/*
2962 * sfe_ipv4_debug_dev_read_exceptions_start()
2963 * Generate part of the XML output.
2964 */
2965static bool sfe_ipv4_debug_dev_read_exceptions_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2966 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2967{
2968 int bytes_read;
2969
2970 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<exceptions>\n");
2971 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2972 return false;
2973 }
2974
2975 *length -= bytes_read;
2976 *total_read += bytes_read;
2977
2978 ws->state++;
2979 return true;
2980}
2981
2982/*
2983 * sfe_ipv4_debug_dev_read_exceptions_exception()
2984 * Generate part of the XML output.
2985 */
2986static bool sfe_ipv4_debug_dev_read_exceptions_exception(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2987 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2988{
2989 uint64_t ct;
2990
2991 spin_lock_bh(&si->lock);
2992 ct = si->exception_events64[ws->iter_exception];
2993 spin_unlock_bh(&si->lock);
2994
2995 if (ct) {
2996 int bytes_read;
2997
2998 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE,
2999 "\t\t<exception name=\"%s\" count=\"%llu\" />\n",
3000 sfe_ipv4_exception_events_string[ws->iter_exception],
3001 ct);
3002 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3003 return false;
3004 }
3005
3006 *length -= bytes_read;
3007 *total_read += bytes_read;
3008 }
3009
3010 ws->iter_exception++;
3011 if (ws->iter_exception >= SFE_IPV4_EXCEPTION_EVENT_LAST) {
3012 ws->iter_exception = 0;
3013 ws->state++;
3014 }
3015
3016 return true;
3017}
3018
3019/*
3020 * sfe_ipv4_debug_dev_read_exceptions_end()
3021 * Generate part of the XML output.
3022 */
3023static bool sfe_ipv4_debug_dev_read_exceptions_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3024 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3025{
3026 int bytes_read;
3027
3028 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</exceptions>\n");
3029 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3030 return false;
3031 }
3032
3033 *length -= bytes_read;
3034 *total_read += bytes_read;
3035
3036 ws->state++;
3037 return true;
3038}
3039
3040/*
3041 * sfe_ipv4_debug_dev_read_stats()
3042 * Generate part of the XML output.
3043 */
3044static bool sfe_ipv4_debug_dev_read_stats(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3045 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3046{
3047 int bytes_read;
3048 unsigned int num_connections;
3049 uint64_t packets_forwarded;
3050 uint64_t packets_not_forwarded;
3051 uint64_t connection_create_requests;
3052 uint64_t connection_create_collisions;
3053 uint64_t connection_destroy_requests;
3054 uint64_t connection_destroy_misses;
3055 uint64_t connection_flushes;
3056 uint64_t connection_match_hash_hits;
3057 uint64_t connection_match_hash_reorders;
3058
3059 spin_lock_bh(&si->lock);
3060 sfe_ipv4_update_summary_stats(si);
3061
3062 num_connections = si->num_connections;
3063 packets_forwarded = si->packets_forwarded64;
3064 packets_not_forwarded = si->packets_not_forwarded64;
3065 connection_create_requests = si->connection_create_requests64;
3066 connection_create_collisions = si->connection_create_collisions64;
3067 connection_destroy_requests = si->connection_destroy_requests64;
3068 connection_destroy_misses = si->connection_destroy_misses64;
3069 connection_flushes = si->connection_flushes64;
3070 connection_match_hash_hits = si->connection_match_hash_hits64;
3071 connection_match_hash_reorders = si->connection_match_hash_reorders64;
3072 spin_unlock_bh(&si->lock);
3073
3074 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<stats "
3075 "num_connections=\"%u\" "
3076 "pkts_forwarded=\"%llu\" pkts_not_forwarded=\"%llu\" "
3077 "create_requests=\"%llu\" create_collisions=\"%llu\" "
3078 "destroy_requests=\"%llu\" destroy_misses=\"%llu\" "
3079 "flushes=\"%llu\" "
3080 "hash_hits=\"%llu\" hash_reorders=\"%llu\" />\n",
3081 num_connections,
3082 packets_forwarded,
3083 packets_not_forwarded,
3084 connection_create_requests,
3085 connection_create_collisions,
3086 connection_destroy_requests,
3087 connection_destroy_misses,
3088 connection_flushes,
3089 connection_match_hash_hits,
3090 connection_match_hash_reorders);
3091 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3092 return false;
3093 }
3094
3095 *length -= bytes_read;
3096 *total_read += bytes_read;
3097
3098 ws->state++;
3099 return true;
3100}
3101
3102/*
3103 * sfe_ipv4_debug_dev_read_end()
3104 * Generate part of the XML output.
3105 */
3106static bool sfe_ipv4_debug_dev_read_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3107 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3108{
3109 int bytes_read;
3110
3111 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "</sfe_ipv4>\n");
3112 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3113 return false;
3114 }
3115
3116 *length -= bytes_read;
3117 *total_read += bytes_read;
3118
3119 ws->state++;
3120 return true;
3121}
3122
3123/*
3124 * Array of write functions that write various XML elements that correspond to
3125 * our XML output state machine.
3126 */
3127sfe_ipv4_debug_xml_write_method_t sfe_ipv4_debug_xml_write_methods[SFE_IPV4_DEBUG_XML_STATE_DONE] = {
3128 sfe_ipv4_debug_dev_read_start,
3129 sfe_ipv4_debug_dev_read_connections_start,
3130 sfe_ipv4_debug_dev_read_connections_connection,
3131 sfe_ipv4_debug_dev_read_connections_end,
3132 sfe_ipv4_debug_dev_read_exceptions_start,
3133 sfe_ipv4_debug_dev_read_exceptions_exception,
3134 sfe_ipv4_debug_dev_read_exceptions_end,
3135 sfe_ipv4_debug_dev_read_stats,
3136 sfe_ipv4_debug_dev_read_end,
3137};
3138
3139/*
3140 * sfe_ipv4_debug_dev_read()
3141 * Send info to userspace upon read request from user
3142 */
3143static ssize_t sfe_ipv4_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset)
3144{
3145 char msg[CHAR_DEV_MSG_SIZE];
3146 int total_read = 0;
3147 struct sfe_ipv4_debug_xml_write_state *ws;
3148 struct sfe_ipv4 *si = &__si;
3149
3150 ws = (struct sfe_ipv4_debug_xml_write_state *)filp->private_data;
3151 while ((ws->state != SFE_IPV4_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) {
3152 if ((sfe_ipv4_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) {
3153 continue;
3154 }
3155 }
3156
3157 return total_read;
3158}
3159
3160/*
3161 * sfe_ipv4_debug_dev_write()
Nicolas Costabafb3af2014-01-29 16:39:39 -06003162 * Write to char device resets some stats
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003163 */
3164static ssize_t sfe_ipv4_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset)
3165{
Matthew McClintock54167ab2014-01-14 21:06:28 -06003166 struct sfe_ipv4 *si = &__si;
3167
3168 spin_lock_bh(&si->lock);
3169 sfe_ipv4_update_summary_stats(si);
3170
Matthew McClintock54167ab2014-01-14 21:06:28 -06003171 si->packets_forwarded64 = 0;
3172 si->packets_not_forwarded64 = 0;
3173 si->connection_create_requests64 = 0;
3174 si->connection_create_collisions64 = 0;
3175 si->connection_destroy_requests64 = 0;
3176 si->connection_destroy_misses64 = 0;
3177 si->connection_flushes64 = 0;
3178 si->connection_match_hash_hits64 = 0;
3179 si->connection_match_hash_reorders64 = 0;
3180 spin_unlock_bh(&si->lock);
3181
3182 return length;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003183}
3184
3185/*
3186 * sfe_ipv4_debug_dev_open()
3187 */
3188static int sfe_ipv4_debug_dev_open(struct inode *inode, struct file *file)
3189{
3190 struct sfe_ipv4_debug_xml_write_state *ws;
3191
3192 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3193 if (!ws) {
3194 ws = kzalloc(sizeof(struct sfe_ipv4_debug_xml_write_state), GFP_KERNEL);
3195 if (!ws) {
3196 return -ENOMEM;
3197 }
3198
3199 ws->state = SFE_IPV4_DEBUG_XML_STATE_START;
3200 file->private_data = ws;
3201 }
3202
3203 return 0;
3204}
3205
3206/*
3207 * sfe_ipv4_debug_dev_release()
3208 */
3209static int sfe_ipv4_debug_dev_release(struct inode *inode, struct file *file)
3210{
3211 struct sfe_ipv4_debug_xml_write_state *ws;
3212
3213 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3214 if (ws) {
3215 struct sfe_ipv4_connection *c;
3216
3217 /*
3218 * Are we currently iterating a connection? If we are then
3219 * make sure that we reduce its iterator count and if necessary
3220 * free it.
3221 */
3222 c = ws->iter_conn;
3223 if (c) {
3224 struct sfe_ipv4 *si = &__si;
Nicolas Costabdddb9b2014-01-29 16:40:26 -06003225 bool free_connection;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003226
3227 spin_lock_bh(&si->lock);
Nicolas Costabdddb9b2014-01-29 16:40:26 -06003228 free_connection = sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c);
3229 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003230
Nicolas Costabdddb9b2014-01-29 16:40:26 -06003231 if (free_connection) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003232 /*
3233 * This entry is dead so release our hold of the source and
3234 * dest devices and free the memory for our connection objects.
3235 */
3236 dev_put(c->original_dev);
3237 dev_put(c->reply_dev);
3238 kfree(c->original_match);
3239 kfree(c->reply_match);
3240 kfree(c);
3241 }
3242 }
3243
3244 /*
3245 * We've finished with our output so free the write state.
3246 */
3247 kfree(ws);
3248 }
3249
3250 return 0;
3251}
3252
3253/*
3254 * File operations used in the debug char device
3255 */
3256static struct file_operations sfe_ipv4_debug_dev_fops = {
3257 .read = sfe_ipv4_debug_dev_read,
3258 .write = sfe_ipv4_debug_dev_write,
3259 .open = sfe_ipv4_debug_dev_open,
3260 .release = sfe_ipv4_debug_dev_release
3261};
3262
3263/*
Dave Hudson87973cd2013-10-22 16:00:04 +01003264 * sfe_ipv4_init()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003265 */
Dave Hudson87973cd2013-10-22 16:00:04 +01003266static int __init sfe_ipv4_init(void)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003267{
3268 struct sfe_ipv4 *si = &__si;
3269 int result = -1;
3270
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003271 DEBUG_INFO("SFE IPv4 init\n");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003272
3273 /*
3274 * Create sys/sfe_ipv4
3275 */
3276 si->sys_sfe_ipv4 = kobject_create_and_add("sfe_ipv4", NULL);
3277 if (!si->sys_sfe_ipv4) {
3278 DEBUG_ERROR("failed to register sfe_ipv4\n");
3279 goto exit1;
3280 }
3281
3282 /*
3283 * Create files, one for each parameter supported by this module.
3284 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003285 result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3286 if (result) {
3287 DEBUG_ERROR("failed to register debug dev file: %d\n", result);
3288 goto exit4;
3289 }
3290
3291 /*
3292 * Register our debug char device.
3293 */
3294 result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops);
3295 if (result < 0) {
3296 DEBUG_ERROR("Failed to register chrdev: %d\n", result);
3297 goto exit5;
3298 }
3299
3300 si->debug_dev = result;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003301
3302 /*
3303 * Create a timer to handle periodic statistics.
3304 */
3305 setup_timer(&si->timer, sfe_ipv4_periodic_sync, (unsigned long)si);
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06003306 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003307
Dave Hudson87973cd2013-10-22 16:00:04 +01003308 spin_lock_init(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003309
Dave Hudson87973cd2013-10-22 16:00:04 +01003310 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003311
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003312exit5:
3313 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3314
3315exit4:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003316 kobject_put(si->sys_sfe_ipv4);
3317
3318exit1:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003319 return result;
3320}
3321
3322/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003323 * sfe_ipv4_exit()
3324 */
3325static void __exit sfe_ipv4_exit(void)
3326{
Dave Hudson87973cd2013-10-22 16:00:04 +01003327 struct sfe_ipv4 *si = &__si;
3328
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003329 DEBUG_INFO("SFE IPv4 exit\n");
Dave Hudson87973cd2013-10-22 16:00:04 +01003330
3331 /*
3332 * Destroy all connections.
3333 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003334 sfe_ipv4_destroy_all_rules_for_dev(NULL);
Dave Hudson87973cd2013-10-22 16:00:04 +01003335
3336// XXX - this is where we need to unregister with any lower level offload services.
3337
Dave Hudson87973cd2013-10-22 16:00:04 +01003338 del_timer_sync(&si->timer);
3339
Dave Hudson87973cd2013-10-22 16:00:04 +01003340 unregister_chrdev(si->debug_dev, "sfe_ipv4");
3341
3342 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3343
Dave Hudson87973cd2013-10-22 16:00:04 +01003344 kobject_put(si->sys_sfe_ipv4);
3345
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003346}
3347
3348module_init(sfe_ipv4_init)
3349module_exit(sfe_ipv4_exit)
3350
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003351EXPORT_SYMBOL(sfe_ipv4_recv);
3352EXPORT_SYMBOL(sfe_ipv4_create_rule);
3353EXPORT_SYMBOL(sfe_ipv4_destroy_rule);
3354EXPORT_SYMBOL(sfe_ipv4_destroy_all_rules_for_dev);
3355EXPORT_SYMBOL(sfe_ipv4_register_sync_rule_callback);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06003356EXPORT_SYMBOL(sfe_ipv4_mark_rule);
Nicolas Costa436926b2014-01-14 10:36:22 -06003357EXPORT_SYMBOL(sfe_ipv4_update_rule);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003358
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003359MODULE_AUTHOR("Qualcomm Atheros Inc.");
3360MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv4 edition");
Matthew McClintocka3221942014-01-16 11:44:26 -06003361MODULE_LICENSE("Dual BSD/GPL");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003362