blob: 055242fc510054e3cbd814acbc88f7a50705307b [file] [log] [blame]
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001/*
2 * sfe_ipv4.c
3 * Shortcut forwarding engine - IPv4 edition.
4 *
Xiaoping Fand642a6e2015-04-10 15:19:06 -07005 * Copyright (c) 2013-2015 Qualcomm Atheros, Inc.
Matthew McClintocka3221942014-01-16 11:44:26 -06006 *
7 * All Rights Reserved.
8 * Qualcomm Atheros Confidential and Proprietary.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01009 */
Matthew McClintocka3221942014-01-16 11:44:26 -060010
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010011#include <linux/module.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060012#include <linux/sysfs.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010013#include <linux/skbuff.h>
14#include <linux/icmp.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010015#include <net/tcp.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060016#include <linux/etherdevice.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010017
Dave Hudsondcd08fb2013-11-22 09:25:16 -060018#include "sfe.h"
Xiaoping Fand44a5b42015-05-26 17:37:37 -070019#include "sfe_cm.h"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010020
21/*
Dave Hudsona8197e72013-12-17 23:46:22 +000022 * By default Linux IP header and transport layer header structures are
23 * unpacked, assuming that such headers should be 32-bit aligned.
24 * Unfortunately some wireless adaptors can't cope with this requirement and
25 * some CPUs can't handle misaligned accesses. For those platforms we
26 * define SFE_IPV4_UNALIGNED_IP_HEADER and mark the structures as packed.
27 * When we do this the compiler will generate slightly worse code than for the
28 * aligned case (on most platforms) but will be much quicker than fixing
29 * things up in an unaligned trap handler.
30 */
31#define SFE_IPV4_UNALIGNED_IP_HEADER 1
32#if SFE_IPV4_UNALIGNED_IP_HEADER
33#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed))
34#else
35#define SFE_IPV4_UNALIGNED_STRUCT
36#endif
37
38/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060039 * An Ethernet header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000040 * help with performance on some platforms (see the definition of
41 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010042 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060043struct sfe_ipv4_eth_hdr {
44 __be16 h_dest[ETH_ALEN / 2];
45 __be16 h_source[ETH_ALEN / 2];
46 __be16 h_proto;
47} SFE_IPV4_UNALIGNED_STRUCT;
48
49/*
50 * An IPv4 header, but with an optional "packed" attribute to
51 * help with performance on some platforms (see the definition of
52 * SFE_IPV4_UNALIGNED_STRUCT)
53 */
54struct sfe_ipv4_ip_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010055#if defined(__LITTLE_ENDIAN_BITFIELD)
56 __u8 ihl:4,
57 version:4;
58#elif defined (__BIG_ENDIAN_BITFIELD)
59 __u8 version:4,
Xiaoping Fan59176422015-05-22 15:58:10 -070060 ihl:4;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010061#else
62#error "Please fix <asm/byteorder.h>"
63#endif
64 __u8 tos;
65 __be16 tot_len;
66 __be16 id;
67 __be16 frag_off;
68 __u8 ttl;
69 __u8 protocol;
70 __sum16 check;
71 __be32 saddr;
72 __be32 daddr;
Dave Hudsondcd08fb2013-11-22 09:25:16 -060073
74 /*
75 * The options start here.
76 */
Dave Hudsona8197e72013-12-17 23:46:22 +000077} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010078
79/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060080 * A UDP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000081 * help with performance on some platforms (see the definition of
82 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010083 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060084struct sfe_ipv4_udp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010085 __be16 source;
86 __be16 dest;
87 __be16 len;
88 __sum16 check;
Dave Hudsona8197e72013-12-17 23:46:22 +000089} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010090
91/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060092 * A TCP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000093 * help with performance on some platforms (see the definition of
94 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010095 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060096struct sfe_ipv4_tcp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010097 __be16 source;
98 __be16 dest;
99 __be32 seq;
100 __be32 ack_seq;
101#if defined(__LITTLE_ENDIAN_BITFIELD)
102 __u16 res1:4,
103 doff:4,
104 fin:1,
105 syn:1,
106 rst:1,
107 psh:1,
108 ack:1,
109 urg:1,
110 ece:1,
111 cwr:1;
112#elif defined(__BIG_ENDIAN_BITFIELD)
113 __u16 doff:4,
114 res1:4,
115 cwr:1,
116 ece:1,
117 urg:1,
118 ack:1,
119 psh:1,
120 rst:1,
121 syn:1,
122 fin:1;
123#else
124#error "Adjust your <asm/byteorder.h> defines"
Nicolas Costaac2979c2014-01-14 10:35:24 -0600125#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100126 __be16 window;
127 __sum16 check;
128 __be16 urg_ptr;
Dave Hudsona8197e72013-12-17 23:46:22 +0000129} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100130
131/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100132 * Specifies the lower bound on ACK numbers carried in the TCP header
133 */
134#define SFE_IPV4_TCP_MAX_ACK_WINDOW 65520
135
136/*
137 * IPv4 TCP connection match additional data.
138 */
139struct sfe_ipv4_tcp_connection_match {
140 uint8_t win_scale; /* Window scale */
141 uint32_t max_win; /* Maximum window size seen */
142 uint32_t end; /* Sequence number of the next byte to send (seq + segment length) */
143 uint32_t max_end; /* Sequence number of the last byte to ack */
144};
145
146/*
147 * Bit flags for IPv4 connection matching entry.
148 */
149#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC 0x1
150 /* Perform source translation */
151#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST 0x2
152 /* Perform destination translation */
153#define SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK 0x4
154 /* Ignore TCP sequence numbers */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600155#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR 0x8
156 /* Fast Ethernet header write */
157#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR 0x10
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100158 /* Fast Ethernet header write */
159
160/*
161 * IPv4 connection matching structure.
162 */
163struct sfe_ipv4_connection_match {
164 /*
165 * References to other objects.
166 */
167 struct sfe_ipv4_connection_match *next;
168 /* Next connection match entry in a list */
169 struct sfe_ipv4_connection_match *prev;
170 /* Previous connection match entry in a list */
171 struct sfe_ipv4_connection *connection;
172 /* Pointer to our connection */
173 struct sfe_ipv4_connection_match *counter_match;
174 /* Pointer to the connection match in the "counter" direction to this one */
175 struct sfe_ipv4_connection_match *active_next;
176 /* Pointer to the next connection in the active list */
177 struct sfe_ipv4_connection_match *active_prev;
178 /* Pointer to the previous connection in the active list */
179 bool active; /* Flag to indicate if we're on the active list */
180
181 /*
182 * Characteristics that identify flows that match this rule.
183 */
184 struct net_device *match_dev; /* Network device */
185 uint8_t match_protocol; /* Protocol */
Dave Hudson87973cd2013-10-22 16:00:04 +0100186 __be32 match_src_ip; /* Source IP address */
187 __be32 match_dest_ip; /* Destination IP address */
188 __be16 match_src_port; /* Source port/connection ident */
189 __be16 match_dest_port; /* Destination port/connection ident */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100190
191 /*
192 * Control the operations of the match.
193 */
194 uint32_t flags; /* Bit flags */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800195#ifdef CONFIG_NF_FLOW_COOKIE
196 uint32_t flow_cookie; /* used flow cookie, for debug */
197#endif
Zhi Chen8748eb32015-06-18 12:58:48 -0700198#ifdef CONFIG_XFRM
199 uint32_t flow_accel; /* The flow accelerated or not */
200#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100201
202 /*
203 * Connection state that we track once we match.
204 */
205 union { /* Protocol-specific state */
206 struct sfe_ipv4_tcp_connection_match tcp;
207 } protocol_state;
208 uint32_t rx_packet_count; /* Number of packets RX'd */
209 uint32_t rx_byte_count; /* Number of bytes RX'd */
210
211 /*
212 * Packet translation information.
213 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100214 __be32 xlate_src_ip; /* Address after source translation */
215 __be16 xlate_src_port; /* Port/connection ident after source translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100216 uint16_t xlate_src_csum_adjustment;
217 /* Transport layer checksum adjustment after source translation */
Xiaoping Fanad755af2015-04-01 16:58:46 -0700218 uint16_t xlate_src_partial_csum_adjustment;
219 /* Transport layer pseudo header checksum adjustment after source translation */
220
Dave Hudson87973cd2013-10-22 16:00:04 +0100221 __be32 xlate_dest_ip; /* Address after destination translation */
222 __be16 xlate_dest_port; /* Port/connection ident after destination translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100223 uint16_t xlate_dest_csum_adjustment;
224 /* Transport layer checksum adjustment after destination translation */
Xiaoping Fanad755af2015-04-01 16:58:46 -0700225 uint16_t xlate_dest_partial_csum_adjustment;
226 /* Transport layer pseudo header checksum adjustment after destination translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100227
228 /*
229 * Packet transmit information.
230 */
231 struct net_device *xmit_dev; /* Network device on which to transmit */
232 unsigned short int xmit_dev_mtu;
233 /* Interface MTU */
234 uint16_t xmit_dest_mac[ETH_ALEN / 2];
235 /* Destination MAC address to use when forwarding */
236 uint16_t xmit_src_mac[ETH_ALEN / 2];
237 /* Source MAC address to use when forwarding */
238
239 /*
240 * Summary stats.
241 */
242 uint64_t rx_packet_count64; /* Number of packets RX'd */
243 uint64_t rx_byte_count64; /* Number of bytes RX'd */
244};
245
246/*
247 * Per-connection data structure.
248 */
249struct sfe_ipv4_connection {
250 struct sfe_ipv4_connection *next;
251 /* Pointer to the next entry in a hash chain */
252 struct sfe_ipv4_connection *prev;
253 /* Pointer to the previous entry in a hash chain */
254 int protocol; /* IP protocol number */
Dave Hudson87973cd2013-10-22 16:00:04 +0100255 __be32 src_ip; /* Source IP address */
256 __be32 src_ip_xlate; /* NAT-translated source IP address */
257 __be32 dest_ip; /* Destination IP address */
258 __be32 dest_ip_xlate; /* NAT-translated destination IP address */
259 __be16 src_port; /* Source port */
260 __be16 src_port_xlate; /* NAT-translated source port */
261 __be16 dest_port; /* Destination port */
262 __be16 dest_port_xlate; /* NAT-translated destination port */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100263 struct sfe_ipv4_connection_match *original_match;
264 /* Original direction matching structure */
265 struct net_device *original_dev;
266 /* Original direction source device */
267 struct sfe_ipv4_connection_match *reply_match;
268 /* Reply direction matching structure */
269 struct net_device *reply_dev; /* Reply direction source device */
270 uint64_t last_sync_jiffies; /* Jiffies count for the last sync */
271 struct sfe_ipv4_connection *all_connections_next;
272 /* Pointer to the next entry in the list of all connections */
273 struct sfe_ipv4_connection *all_connections_prev;
274 /* Pointer to the previous entry in the list of all connections */
275 int iterators; /* Number of iterators currently using this connection */
276 bool pending_free; /* Flag that indicates that this connection should be freed after iteration */
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600277 uint32_t mark; /* mark for outgoing packet */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100278};
279
280/*
281 * IPv4 connections and hash table size information.
282 */
283#define SFE_IPV4_CONNECTION_HASH_SHIFT 12
284#define SFE_IPV4_CONNECTION_HASH_SIZE (1 << SFE_IPV4_CONNECTION_HASH_SHIFT)
285#define SFE_IPV4_CONNECTION_HASH_MASK (SFE_IPV4_CONNECTION_HASH_SIZE - 1)
286
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800287#ifdef CONFIG_NF_FLOW_COOKIE
288#define SFE_FLOW_COOKIE_SIZE 2048
289#define SFE_FLOW_COOKIE_MASK 0x7ff
290
291struct sfe_flow_cookie_entry {
292 struct sfe_ipv4_connection_match *match;
293 unsigned long last_clean_time;
294};
295#endif
296
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100297enum sfe_ipv4_exception_events {
298 SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE,
299 SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION,
300 SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
301 SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL,
302 SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION,
303 SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE,
304 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS,
305 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS,
306 SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
307 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL,
308 SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION,
309 SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS,
310 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE,
311 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS,
312 SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK,
313 SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS,
314 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE,
315 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE,
316 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE,
317 SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE,
318 SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE,
319 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE,
320 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4,
321 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE,
322 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE,
323 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE,
324 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL,
325 SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION,
326 SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION,
327 SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE,
328 SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH,
329 SFE_IPV4_EXCEPTION_EVENT_NON_V4,
330 SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT,
331 SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE,
332 SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE,
333 SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL,
334 SFE_IPV4_EXCEPTION_EVENT_LAST
335};
336
337static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = {
338 "UDP_HEADER_INCOMPLETE",
339 "UDP_NO_CONNECTION",
340 "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
341 "UDP_SMALL_TTL",
342 "UDP_NEEDS_FRAGMENTATION",
343 "TCP_HEADER_INCOMPLETE",
344 "TCP_NO_CONNECTION_SLOW_FLAGS",
345 "TCP_NO_CONNECTION_FAST_FLAGS",
346 "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
347 "TCP_SMALL_TTL",
348 "TCP_NEEDS_FRAGMENTATION",
349 "TCP_FLAGS",
350 "TCP_SEQ_EXCEEDS_RIGHT_EDGE",
351 "TCP_SMALL_DATA_OFFS",
352 "TCP_BAD_SACK",
353 "TCP_BIG_DATA_OFFS",
354 "TCP_SEQ_BEFORE_LEFT_EDGE",
355 "TCP_ACK_EXCEEDS_RIGHT_EDGE",
356 "TCP_ACK_BEFORE_LEFT_EDGE",
357 "ICMP_HEADER_INCOMPLETE",
358 "ICMP_UNHANDLED_TYPE",
359 "ICMP_IPV4_HEADER_INCOMPLETE",
360 "ICMP_IPV4_NON_V4",
361 "ICMP_IPV4_IP_OPTIONS_INCOMPLETE",
362 "ICMP_IPV4_UDP_HEADER_INCOMPLETE",
363 "ICMP_IPV4_TCP_HEADER_INCOMPLETE",
364 "ICMP_IPV4_UNHANDLED_PROTOCOL",
365 "ICMP_NO_CONNECTION",
366 "ICMP_FLUSHED_CONNECTION",
367 "HEADER_INCOMPLETE",
368 "BAD_TOTAL_LENGTH",
369 "NON_V4",
370 "NON_INITIAL_FRAGMENT",
371 "DATAGRAM_INCOMPLETE",
372 "IP_OPTIONS_INCOMPLETE",
373 "UNHANDLED_PROTOCOL"
374};
375
376/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600377 * Per-module structure.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100378 */
379struct sfe_ipv4 {
380 spinlock_t lock; /* Lock for SMP correctness */
381 struct sfe_ipv4_connection_match *active_head;
382 /* Head of the list of recently active connections */
383 struct sfe_ipv4_connection_match *active_tail;
384 /* Tail of the list of recently active connections */
385 struct sfe_ipv4_connection *all_connections_head;
386 /* Head of the list of all connections */
387 struct sfe_ipv4_connection *all_connections_tail;
388 /* Tail of the list of all connections */
389 unsigned int num_connections; /* Number of connections */
390 struct timer_list timer; /* Timer used for periodic sync ops */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700391 sfe_sync_rule_callback_t __rcu sync_rule_callback;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600392 /* Callback function registered by a connection manager for stats syncing */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100393 struct sfe_ipv4_connection *conn_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
394 /* Connection hash table */
395 struct sfe_ipv4_connection_match *conn_match_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
396 /* Connection match hash table */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800397#ifdef CONFIG_NF_FLOW_COOKIE
398 struct sfe_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE];
399 /* flow cookie table*/
400 flow_cookie_set_func_t flow_cookie_set_func;
401 /* function used to configure flow cookie in hardware*/
402#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100403
404 /*
405 * Statistics.
406 */
407 uint32_t connection_create_requests;
408 /* Number of IPv4 connection create requests */
409 uint32_t connection_create_collisions;
410 /* Number of IPv4 connection create requests that collided with existing hash table entries */
411 uint32_t connection_destroy_requests;
412 /* Number of IPv4 connection destroy requests */
413 uint32_t connection_destroy_misses;
414 /* Number of IPv4 connection destroy requests that missed our hash table */
415 uint32_t connection_match_hash_hits;
416 /* Number of IPv4 connection match hash hits */
417 uint32_t connection_match_hash_reorders;
418 /* Number of IPv4 connection match hash reorders */
419 uint32_t connection_flushes; /* Number of IPv4 connection flushes */
420 uint32_t packets_forwarded; /* Number of IPv4 packets forwarded */
421 uint32_t packets_not_forwarded; /* Number of IPv4 packets not forwarded */
422 uint32_t exception_events[SFE_IPV4_EXCEPTION_EVENT_LAST];
423
424 /*
425 * Summary tatistics.
426 */
427 uint64_t connection_create_requests64;
428 /* Number of IPv4 connection create requests */
429 uint64_t connection_create_collisions64;
430 /* Number of IPv4 connection create requests that collided with existing hash table entries */
431 uint64_t connection_destroy_requests64;
432 /* Number of IPv4 connection destroy requests */
433 uint64_t connection_destroy_misses64;
434 /* Number of IPv4 connection destroy requests that missed our hash table */
435 uint64_t connection_match_hash_hits64;
436 /* Number of IPv4 connection match hash hits */
437 uint64_t connection_match_hash_reorders64;
438 /* Number of IPv4 connection match hash reorders */
439 uint64_t connection_flushes64; /* Number of IPv4 connection flushes */
440 uint64_t packets_forwarded64; /* Number of IPv4 packets forwarded */
441 uint64_t packets_not_forwarded64;
442 /* Number of IPv4 packets not forwarded */
443 uint64_t exception_events64[SFE_IPV4_EXCEPTION_EVENT_LAST];
444
445 /*
446 * Control state.
447 */
448 struct kobject *sys_sfe_ipv4; /* sysfs linkage */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100449 int debug_dev; /* Major number of the debug char device */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100450};
451
452/*
453 * Enumeration of the XML output.
454 */
455enum sfe_ipv4_debug_xml_states {
456 SFE_IPV4_DEBUG_XML_STATE_START,
457 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_START,
458 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_CONNECTION,
459 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_END,
460 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_START,
461 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION,
462 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_END,
463 SFE_IPV4_DEBUG_XML_STATE_STATS,
464 SFE_IPV4_DEBUG_XML_STATE_END,
465 SFE_IPV4_DEBUG_XML_STATE_DONE
466};
467
468/*
469 * XML write state.
470 */
471struct sfe_ipv4_debug_xml_write_state {
472 enum sfe_ipv4_debug_xml_states state;
473 /* XML output file state machine state */
474 struct sfe_ipv4_connection *iter_conn;
475 /* Next connection iterator */
476 int iter_exception; /* Next exception iterator */
477};
478
479typedef bool (*sfe_ipv4_debug_xml_write_method_t)(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
480 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws);
481
482struct sfe_ipv4 __si;
483
484/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100485 * sfe_ipv4_gen_ip_csum()
486 * Generate the IP checksum for an IPv4 header.
487 *
488 * Note that this function assumes that we have only 20 bytes of IP header.
489 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600490static inline uint16_t sfe_ipv4_gen_ip_csum(struct sfe_ipv4_ip_hdr *iph)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100491{
492 uint32_t sum;
493 uint16_t *i = (uint16_t *)iph;
494
495 iph->check = 0;
496
497 /*
498 * Generate the sum.
499 */
500 sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9];
501
502 /*
503 * Fold it to ones-complement form.
504 */
505 sum = (sum & 0xffff) + (sum >> 16);
506 sum = (sum & 0xffff) + (sum >> 16);
507
508 return (uint16_t)sum ^ 0xffff;
509}
510
511/*
512 * sfe_ipv4_get_connection_match_hash()
513 * Generate the hash used in connection match lookups.
514 */
515static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100516 __be32 src_ip, __be16 src_port,
517 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100518{
519 size_t dev_addr = (size_t)dev;
Dave Hudson87973cd2013-10-22 16:00:04 +0100520 uint32_t hash = ((uint32_t)dev_addr) ^ ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100521 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
522}
523
524/*
525 * sfe_ipv4_find_sfe_ipv4_connection_match()
526 * Get the IPv4 flow match info that corresponds to a particular 5-tuple.
527 *
528 * On entry we must be holding the lock that protects the hash table.
529 */
530static struct sfe_ipv4_connection_match *
531sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100532 __be32 src_ip, __be16 src_port,
533 __be32 dest_ip, __be16 dest_port) __attribute__((always_inline));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100534static struct sfe_ipv4_connection_match *
535sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100536 __be32 src_ip, __be16 src_port,
537 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100538{
539 struct sfe_ipv4_connection_match *cm;
540 struct sfe_ipv4_connection_match *head;
541 unsigned int conn_match_idx;
542
543 conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port);
544 cm = si->conn_match_hash[conn_match_idx];
545
546 /*
547 * If we don't have anything in this chain then bale.
548 */
549 if (unlikely(!cm)) {
550 return cm;
551 }
552
553 /*
554 * Hopefully the first entry is the one we want.
555 */
556 if (likely(cm->match_src_port == src_port)
557 && likely(cm->match_dest_port == dest_port)
558 && likely(cm->match_src_ip == src_ip)
559 && likely(cm->match_dest_ip == dest_ip)
560 && likely(cm->match_protocol == protocol)
561 && likely(cm->match_dev == dev)) {
562 si->connection_match_hash_hits++;
563 return cm;
564 }
565
566 /*
567 * We may or may not have a matching entry but if we do then we want to
568 * move that entry to the top of the hash chain when we get to it. We
569 * presume that this will be reused again very quickly.
570 */
571 head = cm;
572 do {
573 cm = cm->next;
574 } while (cm && (cm->match_src_port != src_port
575 || cm->match_dest_port != dest_port
576 || cm->match_src_ip != src_ip
577 || cm->match_dest_ip != dest_ip
578 || cm->match_protocol != protocol
579 || cm->match_dev != dev));
580
581 /*
582 * Not found then we're done.
583 */
584 if (unlikely(!cm)) {
585 return cm;
586 }
587
588 /*
589 * We found a match so move it.
590 */
591 if (cm->next) {
592 cm->next->prev = cm->prev;
593 }
594 cm->prev->next = cm->next;
595 cm->prev = NULL;
596 cm->next = head;
597 head->prev = cm;
598 si->conn_match_hash[conn_match_idx] = cm;
599 si->connection_match_hash_reorders++;
600
601 return cm;
602}
603
604/*
605 * sfe_ipv4_connection_match_update_summary_stats()
606 * Update the summary stats for a connection match entry.
607 */
608static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm)
609{
610 cm->rx_packet_count64 += cm->rx_packet_count;
611 cm->rx_packet_count = 0;
612 cm->rx_byte_count64 += cm->rx_byte_count;
613 cm->rx_byte_count = 0;
614}
615
616/*
617 * sfe_ipv4_connection_match_compute_translations()
618 * Compute port and address translations for a connection match entry.
619 */
620static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm)
621{
622 /*
623 * Before we insert the entry look to see if this is tagged as doing address
624 * translations. If it is then work out the adjustment that we need to apply
625 * to the transport checksum.
626 */
627 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
628 /*
629 * Precompute an incremental checksum adjustment so we can
630 * edit packets in this stream very quickly. The algorithm is from RFC1624.
631 */
632 uint16_t src_ip_hi = cm->match_src_ip >> 16;
633 uint16_t src_ip_lo = cm->match_src_ip & 0xffff;
634 uint32_t xlate_src_ip = ~cm->xlate_src_ip;
635 uint16_t xlate_src_ip_hi = xlate_src_ip >> 16;
636 uint16_t xlate_src_ip_lo = xlate_src_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100637 uint16_t xlate_src_port = ~cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100638 uint32_t adj;
639
640 /*
641 * When we compute this fold it down to a 16-bit offset
642 * as that way we can avoid having to do a double
643 * folding of the twos-complement result because the
644 * addition of 2 16-bit values cannot cause a double
645 * wrap-around!
646 */
647 adj = src_ip_hi + src_ip_lo + cm->match_src_port
648 + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port;
649 adj = (adj & 0xffff) + (adj >> 16);
650 adj = (adj & 0xffff) + (adj >> 16);
651 cm->xlate_src_csum_adjustment = (uint16_t)adj;
Nicolas Costaac2979c2014-01-14 10:35:24 -0600652
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100653 }
654
655 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
656 /*
657 * Precompute an incremental checksum adjustment so we can
658 * edit packets in this stream very quickly. The algorithm is from RFC1624.
659 */
660 uint16_t dest_ip_hi = cm->match_dest_ip >> 16;
661 uint16_t dest_ip_lo = cm->match_dest_ip & 0xffff;
662 uint32_t xlate_dest_ip = ~cm->xlate_dest_ip;
663 uint16_t xlate_dest_ip_hi = xlate_dest_ip >> 16;
664 uint16_t xlate_dest_ip_lo = xlate_dest_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100665 uint16_t xlate_dest_port = ~cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100666 uint32_t adj;
667
668 /*
669 * When we compute this fold it down to a 16-bit offset
670 * as that way we can avoid having to do a double
671 * folding of the twos-complement result because the
672 * addition of 2 16-bit values cannot cause a double
673 * wrap-around!
674 */
675 adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port
676 + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port;
677 adj = (adj & 0xffff) + (adj >> 16);
678 adj = (adj & 0xffff) + (adj >> 16);
679 cm->xlate_dest_csum_adjustment = (uint16_t)adj;
680 }
Xiaoping Fanad755af2015-04-01 16:58:46 -0700681
682 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
683 uint32_t adj = ~cm->match_src_ip + cm->xlate_src_ip;
684 if (adj < cm->xlate_src_ip) {
685 adj++;
686 }
687
688 adj = (adj & 0xffff) + (adj >> 16);
689 adj = (adj & 0xffff) + (adj >> 16);
690 cm->xlate_src_partial_csum_adjustment = (uint16_t)adj;
691 }
692
693 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
694 uint32_t adj = ~cm->match_dest_ip + cm->xlate_dest_ip;
695 if (adj < cm->xlate_dest_ip) {
696 adj++;
697 }
698
699 adj = (adj & 0xffff) + (adj >> 16);
700 adj = (adj & 0xffff) + (adj >> 16);
701 cm->xlate_dest_partial_csum_adjustment = (uint16_t)adj;
702 }
703
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100704}
705
706/*
707 * sfe_ipv4_update_summary_stats()
708 * Update the summary stats.
709 */
710static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si)
711{
712 int i;
713
714 si->connection_create_requests64 += si->connection_create_requests;
715 si->connection_create_requests = 0;
716 si->connection_create_collisions64 += si->connection_create_collisions;
717 si->connection_create_collisions = 0;
718 si->connection_destroy_requests64 += si->connection_destroy_requests;
719 si->connection_destroy_requests = 0;
720 si->connection_destroy_misses64 += si->connection_destroy_misses;
721 si->connection_destroy_misses = 0;
722 si->connection_match_hash_hits64 += si->connection_match_hash_hits;
723 si->connection_match_hash_hits = 0;
724 si->connection_match_hash_reorders64 += si->connection_match_hash_reorders;
725 si->connection_match_hash_reorders = 0;
726 si->connection_flushes64 += si->connection_flushes;
727 si->connection_flushes = 0;
728 si->packets_forwarded64 += si->packets_forwarded;
729 si->packets_forwarded = 0;
730 si->packets_not_forwarded64 += si->packets_not_forwarded;
731 si->packets_not_forwarded = 0;
732
733 for (i = 0; i < SFE_IPV4_EXCEPTION_EVENT_LAST; i++) {
734 si->exception_events64[i] += si->exception_events[i];
735 si->exception_events[i] = 0;
736 }
737}
738
739/*
740 * sfe_ipv4_insert_sfe_ipv4_connection_match()
741 * Insert a connection match into the hash.
742 *
743 * On entry we must be holding the lock that protects the hash table.
744 */
745static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
746{
747 struct sfe_ipv4_connection_match **hash_head;
748 struct sfe_ipv4_connection_match *prev_head;
749 unsigned int conn_match_idx
750 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
751 cm->match_src_ip, cm->match_src_port,
752 cm->match_dest_ip, cm->match_dest_port);
753 hash_head = &si->conn_match_hash[conn_match_idx];
754 prev_head = *hash_head;
755 cm->prev = NULL;
756 if (prev_head) {
757 prev_head->prev = cm;
758 }
759
760 cm->next = prev_head;
761 *hash_head = cm;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800762
763#ifdef CONFIG_NF_FLOW_COOKIE
764 /*
765 * Configure hardware to put a flow cookie in packet of this flow,
766 * then we can accelerate the lookup process when we received this packet.
767 */
768 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
769 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
770
771 if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) {
772 flow_cookie_set_func_t func;
773
774 rcu_read_lock();
775 func = rcu_dereference(si->flow_cookie_set_func);
776 if (func) {
Xiaoping Fan59176422015-05-22 15:58:10 -0700777 if (!func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800778 cm->match_dest_ip, cm->match_dest_port, conn_match_idx)) {
779 entry->match = cm;
780 cm->flow_cookie = conn_match_idx;
781 }
782 }
783 rcu_read_unlock();
784
785 break;
786 }
787 }
788#endif
789
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100790}
791
792/*
793 * sfe_ipv4_remove_sfe_ipv4_connection_match()
794 * Remove a connection match object from the hash.
795 *
796 * On entry we must be holding the lock that protects the hash table.
797 */
798static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
799{
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800800#ifdef CONFIG_NF_FLOW_COOKIE
801 /*
802 * Tell hardware that we no longer need a flow cookie in packet of this flow
803 */
804 unsigned int conn_match_idx;
805
806 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
807 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
808
809 if (cm == entry->match) {
810 flow_cookie_set_func_t func;
811
812 rcu_read_lock();
813 func = rcu_dereference(si->flow_cookie_set_func);
814 if (func) {
815 func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
816 cm->match_dest_ip, cm->match_dest_port, 0);
817 }
818 rcu_read_unlock();
819
820 cm->flow_cookie = 0;
821 entry->match = NULL;
822 entry->last_clean_time = jiffies;
823 break;
824 }
825 }
826#endif
827
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100828 /*
829 * Unlink the connection match entry from the hash.
830 */
831 if (cm->prev) {
832 cm->prev->next = cm->next;
833 } else {
834 unsigned int conn_match_idx
835 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
836 cm->match_src_ip, cm->match_src_port,
837 cm->match_dest_ip, cm->match_dest_port);
838 si->conn_match_hash[conn_match_idx] = cm->next;
839 }
840
841 if (cm->next) {
842 cm->next->prev = cm->prev;
843 }
844
845 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600846 * If the connection match entry is in the active list remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100847 */
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600848 if (cm->active) {
849 if (likely(cm->active_prev)) {
850 cm->active_prev->active_next = cm->active_next;
851 } else {
852 si->active_head = cm->active_next;
853 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100854
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600855 if (likely(cm->active_next)) {
856 cm->active_next->active_prev = cm->active_prev;
857 } else {
858 si->active_tail = cm->active_prev;
859 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100860 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100861}
862
863/*
864 * sfe_ipv4_get_connection_hash()
865 * Generate the hash used in connection lookups.
866 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100867static inline unsigned int sfe_ipv4_get_connection_hash(uint8_t protocol, __be32 src_ip, __be16 src_port,
868 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100869{
Dave Hudson87973cd2013-10-22 16:00:04 +0100870 uint32_t hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100871 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
872}
873
874/*
875 * sfe_ipv4_find_sfe_ipv4_connection()
876 * Get the IPv4 connection info that corresponds to a particular 5-tuple.
877 *
878 * On entry we must be holding the lock that protects the hash table.
879 */
880static inline struct sfe_ipv4_connection *sfe_ipv4_find_sfe_ipv4_connection(struct sfe_ipv4 *si, uint32_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100881 __be32 src_ip, __be16 src_port,
882 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100883{
884 struct sfe_ipv4_connection *c;
885 unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port);
886 c = si->conn_hash[conn_idx];
887
888 /*
889 * If we don't have anything in this chain then bale.
890 */
891 if (unlikely(!c)) {
892 return c;
893 }
894
895 /*
896 * Hopefully the first entry is the one we want.
897 */
898 if (likely(c->src_port == src_port)
899 && likely(c->dest_port == dest_port)
900 && likely(c->src_ip == src_ip)
901 && likely(c->dest_ip == dest_ip)
902 && likely(c->protocol == protocol)) {
903 return c;
904 }
905
906 /*
907 * We may or may not have a matching entry but if we do then we want to
908 * move that entry to the top of the hash chain when we get to it. We
909 * presume that this will be reused again very quickly.
910 */
911 do {
912 c = c->next;
913 } while (c && (c->src_port != src_port
914 || c->dest_port != dest_port
915 || c->src_ip != src_ip
916 || c->dest_ip != dest_ip
917 || c->protocol != protocol));
918
919 /*
920 * Will need connection entry for next create/destroy metadata,
921 * So no need to re-order entry for these requests
922 */
923 return c;
924}
925
926/*
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600927 * sfe_ipv4_mark_rule()
928 * Updates the mark for a current offloaded connection
929 *
930 * Will take hash lock upon entry
931 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700932void sfe_ipv4_mark_rule(struct sfe_connection_mark *mark)
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600933{
934 struct sfe_ipv4 *si = &__si;
935 struct sfe_ipv4_connection *c;
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600936
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600937 spin_lock(&si->lock);
938 c = sfe_ipv4_find_sfe_ipv4_connection(si, mark->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700939 mark->src_ip.ip, mark->src_port,
940 mark->dest_ip.ip, mark->dest_port);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600941 if (c) {
Nicolas Costaf53d6fe2014-01-13 16:03:46 -0600942 DEBUG_TRACE("Matching connection found for mark, "
943 "setting from %08x to %08x\n",
944 c->mark, mark->mark);
945 WARN_ON((0 != c->mark) && (0 == mark->mark));
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600946 c->mark = mark->mark;
947 }
948 spin_unlock(&si->lock);
949}
950
951/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100952 * sfe_ipv4_insert_sfe_ipv4_connection()
953 * Insert a connection into the hash.
954 *
955 * On entry we must be holding the lock that protects the hash table.
956 */
957static void sfe_ipv4_insert_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
958{
959 struct sfe_ipv4_connection **hash_head;
960 struct sfe_ipv4_connection *prev_head;
961 unsigned int conn_idx;
962
963 /*
964 * Insert entry into the connection hash.
965 */
966 conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
967 c->dest_ip, c->dest_port);
968 hash_head = &si->conn_hash[conn_idx];
969 prev_head = *hash_head;
970 c->prev = NULL;
971 if (prev_head) {
972 prev_head->prev = c;
973 }
974
975 c->next = prev_head;
976 *hash_head = c;
977
978 /*
979 * Insert entry into the "all connections" list.
980 */
981 if (si->all_connections_tail) {
982 c->all_connections_prev = si->all_connections_tail;
983 si->all_connections_tail->all_connections_next = c;
984 } else {
985 c->all_connections_prev = NULL;
986 si->all_connections_head = c;
987 }
988
989 si->all_connections_tail = c;
990 c->all_connections_next = NULL;
991 si->num_connections++;
992
993 /*
994 * Insert the connection match objects too.
995 */
996 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->original_match);
997 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->reply_match);
998}
999
1000/*
1001 * sfe_ipv4_remove_sfe_ipv4_connection()
1002 * Remove a sfe_ipv4_connection object from the hash.
1003 *
1004 * On entry we must be holding the lock that protects the hash table.
1005 */
1006static void sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1007{
1008 /*
1009 * Remove the connection match objects.
1010 */
1011 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->reply_match);
1012 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->original_match);
1013
1014 /*
1015 * Unlink the connection.
1016 */
1017 if (c->prev) {
1018 c->prev->next = c->next;
1019 } else {
1020 unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
1021 c->dest_ip, c->dest_port);
1022 si->conn_hash[conn_idx] = c->next;
1023 }
1024
1025 if (c->next) {
1026 c->next->prev = c->prev;
1027 }
1028}
1029
1030/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001031 * sfe_ipv4_sync_sfe_ipv4_connection()
1032 * Sync a connection.
1033 *
1034 * On entry to this function we expect that the lock for the connection is either
1035 * already held or isn't required.
1036 */
1037static void sfe_ipv4_gen_sync_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001038 struct sfe_connection_sync *sis, uint64_t now_jiffies)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001039{
1040 struct sfe_ipv4_connection_match *original_cm;
1041 struct sfe_ipv4_connection_match *reply_cm;
1042
1043 /*
1044 * Fill in the update message.
1045 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001046 sis->is_v6 = 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001047 sis->protocol = c->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001048 sis->src_ip.ip = c->src_ip;
1049 sis->dest_ip.ip = c->dest_ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001050 sis->src_port = c->src_port;
1051 sis->dest_port = c->dest_port;
1052
1053 original_cm = c->original_match;
1054 reply_cm = c->reply_match;
1055 sis->src_td_max_window = original_cm->protocol_state.tcp.max_win;
1056 sis->src_td_end = original_cm->protocol_state.tcp.end;
1057 sis->src_td_max_end = original_cm->protocol_state.tcp.max_end;
1058 sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win;
1059 sis->dest_td_end = reply_cm->protocol_state.tcp.end;
1060 sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end;
1061
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001062 sis->src_new_packet_count = original_cm->rx_packet_count;
1063 sis->src_new_byte_count = original_cm->rx_byte_count;
1064 sis->dest_new_packet_count = reply_cm->rx_packet_count;
1065 sis->dest_new_byte_count = reply_cm->rx_byte_count;
1066
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001067 sfe_ipv4_connection_match_update_summary_stats(original_cm);
1068 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
1069
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001070 sis->src_dev = original_cm->match_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001071 sis->src_packet_count = original_cm->rx_packet_count64;
1072 sis->src_byte_count = original_cm->rx_byte_count64;
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001073
1074 sis->dest_dev = reply_cm->match_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001075 sis->dest_packet_count = reply_cm->rx_packet_count64;
1076 sis->dest_byte_count = reply_cm->rx_byte_count64;
1077
1078 /*
1079 * Get the time increment since our last sync.
1080 */
1081 sis->delta_jiffies = now_jiffies - c->last_sync_jiffies;
1082 c->last_sync_jiffies = now_jiffies;
1083}
1084
1085/*
1086 * sfe_ipv4_decrement_sfe_ipv4_connection_iterator()
1087 * Remove an iterator from a connection - free all resources if necessary.
1088 *
1089 * Returns true if the connection should now be free, false if not.
1090 *
1091 * We must be locked on entry to this function.
1092 */
1093static bool sfe_ipv4_decrement_sfe_ipv4_connection_iterator(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1094{
1095 /*
1096 * Are we the last iterator for this connection?
1097 */
1098 c->iterators--;
1099 if (c->iterators) {
1100 return false;
1101 }
1102
1103 /*
1104 * Is this connection marked for deletion?
1105 */
1106 if (!c->pending_free) {
1107 return false;
1108 }
1109
1110 /*
1111 * We're ready to delete this connection so unlink it from the "all
1112 * connections" list.
1113 */
1114 si->num_connections--;
1115 if (c->all_connections_prev) {
1116 c->all_connections_prev->all_connections_next = c->all_connections_next;
1117 } else {
1118 si->all_connections_head = c->all_connections_next;
1119 }
1120
1121 if (c->all_connections_next) {
1122 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1123 } else {
1124 si->all_connections_tail = c->all_connections_prev;
1125 }
1126
1127 return true;
1128}
1129
1130/*
1131 * sfe_ipv4_flush_sfe_ipv4_connection()
1132 * Flush a connection and free all associated resources.
1133 *
1134 * We need to be called with bottom halves disabled locally as we need to acquire
1135 * the connection hash lock and release it again. In general we're actually called
1136 * from within a BH and so we're fine, but we're also called when connections are
1137 * torn down.
1138 */
1139static void sfe_ipv4_flush_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1140{
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001141 struct sfe_connection_sync sis;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001142 uint64_t now_jiffies;
1143 bool pending_free = false;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001144 sfe_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001145
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001146 rcu_read_lock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001147 spin_lock(&si->lock);
1148 si->connection_flushes++;
1149
1150 /*
1151 * Check that we're not currently being iterated. If we are then
1152 * we can't free this entry yet but must mark it pending a free. If it's
1153 * not being iterated then we can unlink it from the list of all
1154 * connections.
1155 */
1156 if (c->iterators) {
1157 pending_free = true;
1158 c->pending_free = true;
1159 } else {
1160 si->num_connections--;
1161 if (c->all_connections_prev) {
1162 c->all_connections_prev->all_connections_next = c->all_connections_next;
1163 } else {
1164 si->all_connections_head = c->all_connections_next;
1165 }
1166
1167 if (c->all_connections_next) {
1168 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1169 } else {
1170 si->all_connections_tail = c->all_connections_prev;
1171 }
1172 }
1173
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001174 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
1175
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001176 spin_unlock(&si->lock);
1177
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001178 if (sync_rule_callback) {
1179 /*
1180 * Generate a sync message and then sync.
1181 */
1182 now_jiffies = get_jiffies_64();
1183 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, now_jiffies);
1184 sync_rule_callback(&sis);
1185 }
1186
1187 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001188
1189 /*
1190 * If we can't yet free the underlying memory then we're done.
1191 */
1192 if (pending_free) {
1193 return;
1194 }
1195
1196 /*
1197 * Release our hold of the source and dest devices and free the memory
1198 * for our connection objects.
1199 */
1200 dev_put(c->original_dev);
1201 dev_put(c->reply_dev);
1202 kfree(c->original_match);
1203 kfree(c->reply_match);
1204 kfree(c);
1205}
1206
1207/*
1208 * sfe_ipv4_recv_udp()
1209 * Handle UDP packet receives and forwarding.
1210 */
1211static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001212 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001213{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001214 struct sfe_ipv4_udp_hdr *udph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001215 __be32 src_ip;
1216 __be32 dest_ip;
1217 __be16 src_port;
1218 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001219 struct sfe_ipv4_connection_match *cm;
1220 uint8_t ttl;
1221 struct net_device *xmit_dev;
1222
1223 /*
1224 * Is our packet too short to contain a valid UDP header?
1225 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001226 if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_udp_hdr) + ihl)))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001227 spin_lock(&si->lock);
1228 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++;
1229 si->packets_not_forwarded++;
1230 spin_unlock(&si->lock);
1231
1232 DEBUG_TRACE("packet too short for UDP header\n");
1233 return 0;
1234 }
1235
1236 /*
1237 * Read the IP address and port information. Read the IP header data first
1238 * because we've almost certainly got that in the cache. We may not yet have
1239 * the UDP header cached though so allow more time for any prefetching.
1240 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001241 src_ip = iph->saddr;
1242 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001243
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001244 udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001245 src_port = udph->source;
1246 dest_port = udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001247
1248 spin_lock(&si->lock);
1249
1250 /*
1251 * Look for a connection match.
1252 */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001253#ifdef CONFIG_NF_FLOW_COOKIE
1254 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1255 if (unlikely(!cm)) {
1256 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
1257 }
1258#else
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001259 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001260#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001261 if (unlikely(!cm)) {
1262 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION]++;
1263 si->packets_not_forwarded++;
1264 spin_unlock(&si->lock);
1265
1266 DEBUG_TRACE("no connection found\n");
1267 return 0;
1268 }
1269
1270 /*
1271 * If our packet has beern marked as "flush on find" we can't actually
1272 * forward it in the fast path, but now that we've found an associated
1273 * connection we can flush that out before we process the packet.
1274 */
1275 if (unlikely(flush_on_find)) {
1276 struct sfe_ipv4_connection *c = cm->connection;
1277 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1278 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1279 si->packets_not_forwarded++;
1280 spin_unlock(&si->lock);
1281
1282 DEBUG_TRACE("flush on find\n");
1283 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1284 return 0;
1285 }
1286
Zhi Chen8748eb32015-06-18 12:58:48 -07001287#ifdef CONFIG_XFRM
1288 /*
1289 * We can't accelerate the flow on this direction, just let it go
1290 * through the slow path.
1291 */
1292 if (unlikely(!cm->flow_accel)) {
1293 si->packets_not_forwarded++;
1294 spin_unlock(&si->lock);
1295 return 0;
1296 }
1297#endif
1298
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001299 /*
1300 * Does our TTL allow forwarding?
1301 */
1302 ttl = iph->ttl;
1303 if (unlikely(ttl < 2)) {
1304 struct sfe_ipv4_connection *c = cm->connection;
1305 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1306 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL]++;
1307 si->packets_not_forwarded++;
1308 spin_unlock(&si->lock);
1309
1310 DEBUG_TRACE("ttl too low\n");
1311 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1312 return 0;
1313 }
1314
1315 /*
1316 * If our packet is larger than the MTU of the transmit interface then
1317 * we can't forward it easily.
1318 */
1319 if (unlikely(len > cm->xmit_dev_mtu)) {
1320 struct sfe_ipv4_connection *c = cm->connection;
1321 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1322 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++;
1323 si->packets_not_forwarded++;
1324 spin_unlock(&si->lock);
1325
1326 DEBUG_TRACE("larger than mtu\n");
1327 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1328 return 0;
1329 }
1330
1331 /*
1332 * From this point on we're good to modify the packet.
1333 */
1334
1335 /*
1336 * Decrement our TTL.
1337 */
1338 iph->ttl = ttl - 1;
1339
1340 /*
1341 * Do we have to perform translations of the source address/port?
1342 */
1343 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1344 uint16_t udp_csum;
1345
Dave Hudson87973cd2013-10-22 16:00:04 +01001346 iph->saddr = cm->xlate_src_ip;
1347 udph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001348
1349 /*
1350 * Do we have a non-zero UDP checksum? If we do then we need
1351 * to update it.
1352 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001353 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001354 if (likely(udp_csum)) {
Xiaoping Fanad755af2015-04-01 16:58:46 -07001355 uint32_t sum;
1356
1357 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1358 sum = udp_csum + cm->xlate_src_partial_csum_adjustment;
1359 } else {
1360 sum = udp_csum + cm->xlate_src_csum_adjustment;
1361 }
1362
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001363 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001364 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001365 }
1366 }
1367
1368 /*
1369 * Do we have to perform translations of the destination address/port?
1370 */
1371 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1372 uint16_t udp_csum;
1373
Dave Hudson87973cd2013-10-22 16:00:04 +01001374 iph->daddr = cm->xlate_dest_ip;
1375 udph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001376
1377 /*
1378 * Do we have a non-zero UDP checksum? If we do then we need
1379 * to update it.
1380 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001381 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001382 if (likely(udp_csum)) {
Xiaoping Fanad755af2015-04-01 16:58:46 -07001383 uint32_t sum;
1384
1385 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1386 sum = udp_csum + cm->xlate_dest_partial_csum_adjustment;
1387 } else {
1388 sum = udp_csum + cm->xlate_dest_csum_adjustment;
1389 }
1390
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001391 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001392 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001393 }
1394 }
1395
1396 /*
1397 * Replace the IP checksum.
1398 */
1399 iph->check = sfe_ipv4_gen_ip_csum(iph);
1400
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001401 /*
1402 * Update traffic stats.
1403 */
1404 cm->rx_packet_count++;
1405 cm->rx_byte_count += len;
1406
1407 /*
1408 * If we're not already on the active list then insert ourselves at the tail
1409 * of the current list.
1410 */
1411 if (unlikely(!cm->active)) {
1412 cm->active = true;
1413 cm->active_prev = si->active_tail;
1414 if (likely(si->active_tail)) {
1415 si->active_tail->active_next = cm;
1416 } else {
1417 si->active_head = cm;
1418 }
1419 si->active_tail = cm;
1420 }
1421
1422 xmit_dev = cm->xmit_dev;
1423 skb->dev = xmit_dev;
1424
1425 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001426 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001427 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001428 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1429 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Matthew McClintocka8ad7962014-01-16 16:49:30 -06001430 xmit_dev->header_ops->create(skb, xmit_dev, ETH_P_IP,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001431 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001432 } else {
1433 /*
1434 * For the simple case we write this really fast.
1435 */
1436 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1437 eth->h_proto = htons(ETH_P_IP);
Matthew McClintockdab3c8f2014-02-19 14:29:39 -06001438 eth->h_dest[0] = cm->xmit_dest_mac[0];
1439 eth->h_dest[1] = cm->xmit_dest_mac[1];
1440 eth->h_dest[2] = cm->xmit_dest_mac[2];
1441 eth->h_source[0] = cm->xmit_src_mac[0];
1442 eth->h_source[1] = cm->xmit_src_mac[1];
1443 eth->h_source[2] = cm->xmit_src_mac[2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001444 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001445 }
1446
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001447 /*
1448 * Mark outgoing packet.
1449 */
Cristian Prundeanu94fff982013-12-23 15:02:59 -06001450 skb->mark = cm->connection->mark;
1451 if (skb->mark) {
1452 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1453 }
1454
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001455 si->packets_forwarded++;
1456 spin_unlock(&si->lock);
1457
1458 /*
1459 * We're going to check for GSO flags when we transmit the packet so
1460 * start fetching the necessary cache line now.
1461 */
1462 prefetch(skb_shinfo(skb));
1463
1464 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06001465 * Mark that this packet has been fast forwarded.
1466 */
1467 skb->fast_forwarded = 1;
1468
1469 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001470 * Send the packet on its way.
1471 */
1472 dev_queue_xmit(skb);
1473
1474 return 1;
1475}
1476
1477/*
1478 * sfe_ipv4_process_tcp_option_sack()
1479 * Parse TCP SACK option and update ack according
1480 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001481static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const uint32_t data_offs,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001482 uint32_t *ack) __attribute__((always_inline));
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001483static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const uint32_t data_offs,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001484 uint32_t *ack)
1485{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001486 uint32_t length = sizeof(struct sfe_ipv4_tcp_hdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001487 uint8_t *ptr = (uint8_t *)th + length;
1488
1489 /*
1490 * If option is TIMESTAMP discard it.
1491 */
1492 if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
1493 && likely(ptr[0] == TCPOPT_NOP)
1494 && likely(ptr[1] == TCPOPT_NOP)
1495 && likely(ptr[2] == TCPOPT_TIMESTAMP)
1496 && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
1497 return true;
1498 }
1499
1500 /*
1501 * TCP options. Parse SACK option.
1502 */
1503 while (length < data_offs) {
1504 uint8_t size;
1505 uint8_t kind;
1506
1507 ptr = (uint8_t *)th + length;
1508 kind = *ptr;
1509
1510 /*
1511 * NOP, for padding
1512 * Not in the switch because to fast escape and to not calculate size
1513 */
1514 if (kind == TCPOPT_NOP) {
1515 length++;
1516 continue;
1517 }
1518
1519 if (kind == TCPOPT_SACK) {
1520 uint32_t sack = 0;
1521 uint8_t re = 1 + 1;
1522
1523 size = *(ptr + 1);
1524 if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
1525 || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
1526 || (size > (data_offs - length))) {
1527 return false;
1528 }
1529
1530 re += 4;
1531 while (re < size) {
1532 uint32_t sack_re;
1533 uint8_t *sptr = ptr + re;
1534 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
1535 if (sack_re > sack) {
1536 sack = sack_re;
1537 }
1538 re += TCPOLEN_SACK_PERBLOCK;
1539 }
1540 if (sack > *ack) {
1541 *ack = sack;
1542 }
1543 length += size;
1544 continue;
1545 }
1546 if (kind == TCPOPT_EOL) {
1547 return true;
1548 }
1549 size = *(ptr + 1);
1550 if (size < 2) {
1551 return false;
1552 }
1553 length += size;
1554 }
1555
1556 return true;
1557}
1558
1559/*
1560 * sfe_ipv4_recv_tcp()
1561 * Handle TCP packet receives and forwarding.
1562 */
1563static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001564 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001565{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001566 struct sfe_ipv4_tcp_hdr *tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001567 __be32 src_ip;
1568 __be32 dest_ip;
1569 __be16 src_port;
1570 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001571 struct sfe_ipv4_connection_match *cm;
1572 struct sfe_ipv4_connection_match *counter_cm;
1573 uint8_t ttl;
1574 uint32_t flags;
1575 struct net_device *xmit_dev;
1576
1577 /*
1578 * Is our packet too short to contain a valid UDP header?
1579 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001580 if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_tcp_hdr) + ihl)))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001581 spin_lock(&si->lock);
1582 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++;
1583 si->packets_not_forwarded++;
1584 spin_unlock(&si->lock);
1585
1586 DEBUG_TRACE("packet too short for TCP header\n");
1587 return 0;
1588 }
1589
1590 /*
1591 * Read the IP address and port information. Read the IP header data first
1592 * because we've almost certainly got that in the cache. We may not yet have
1593 * the TCP header cached though so allow more time for any prefetching.
1594 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001595 src_ip = iph->saddr;
1596 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001597
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001598 tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001599 src_port = tcph->source;
1600 dest_port = tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001601 flags = tcp_flag_word(tcph);
1602
1603 spin_lock(&si->lock);
1604
1605 /*
1606 * Look for a connection match.
1607 */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001608#ifdef CONFIG_NF_FLOW_COOKIE
1609 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1610 if (unlikely(!cm)) {
Matthew McClintock37858802015-02-03 12:12:02 -06001611 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001612 }
1613#else
Matthew McClintock37858802015-02-03 12:12:02 -06001614 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001615#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001616 if (unlikely(!cm)) {
1617 /*
1618 * We didn't get a connection but as TCP is connection-oriented that
1619 * may be because this is a non-fast connection (not running established).
1620 * For diagnostic purposes we differentiate this here.
1621 */
1622 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
1623 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++;
1624 si->packets_not_forwarded++;
1625 spin_unlock(&si->lock);
1626
1627 DEBUG_TRACE("no connection found - fast flags\n");
1628 return 0;
1629 }
1630 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++;
1631 si->packets_not_forwarded++;
1632 spin_unlock(&si->lock);
1633
1634 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
1635 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1636 return 0;
1637 }
1638
1639 /*
1640 * If our packet has beern marked as "flush on find" we can't actually
1641 * forward it in the fast path, but now that we've found an associated
1642 * connection we can flush that out before we process the packet.
1643 */
1644 if (unlikely(flush_on_find)) {
1645 struct sfe_ipv4_connection *c = cm->connection;
1646 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1647 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1648 si->packets_not_forwarded++;
1649 spin_unlock(&si->lock);
1650
1651 DEBUG_TRACE("flush on find\n");
1652 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1653 return 0;
1654 }
1655
Zhi Chen8748eb32015-06-18 12:58:48 -07001656#ifdef CONFIG_XFRM
1657 /*
1658 * We can't accelerate the flow on this direction, just let it go
1659 * through the slow path.
1660 */
1661 if (unlikely(!cm->flow_accel)) {
1662 si->packets_not_forwarded++;
1663 spin_unlock(&si->lock);
1664 return 0;
1665 }
1666#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001667 /*
1668 * Does our TTL allow forwarding?
1669 */
1670 ttl = iph->ttl;
1671 if (unlikely(ttl < 2)) {
1672 struct sfe_ipv4_connection *c = cm->connection;
1673 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1674 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL]++;
1675 si->packets_not_forwarded++;
1676 spin_unlock(&si->lock);
1677
1678 DEBUG_TRACE("ttl too low\n");
1679 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1680 return 0;
1681 }
1682
1683 /*
1684 * If our packet is larger than the MTU of the transmit interface then
1685 * we can't forward it easily.
1686 */
Xiaoping Fand642a6e2015-04-10 15:19:06 -07001687 if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001688 struct sfe_ipv4_connection *c = cm->connection;
1689 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1690 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++;
1691 si->packets_not_forwarded++;
1692 spin_unlock(&si->lock);
1693
1694 DEBUG_TRACE("larger than mtu\n");
1695 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1696 return 0;
1697 }
1698
1699 /*
1700 * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN
1701 * set is not a fast path packet.
1702 */
1703 if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
1704 struct sfe_ipv4_connection *c = cm->connection;
1705 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1706 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS]++;
1707 si->packets_not_forwarded++;
1708 spin_unlock(&si->lock);
1709
1710 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
1711 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1712 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1713 return 0;
1714 }
1715
1716 counter_cm = cm->counter_match;
1717
1718 /*
1719 * Are we doing sequence number checking?
1720 */
1721 if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
1722 uint32_t seq;
1723 uint32_t ack;
1724 uint32_t sack;
1725 uint32_t data_offs;
1726 uint32_t end;
1727 uint32_t left_edge;
1728 uint32_t scaled_win;
1729 uint32_t max_end;
1730
1731 /*
1732 * Is our sequence fully past the right hand edge of the window?
1733 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001734 seq = ntohl(tcph->seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001735 if (unlikely((int32_t)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
1736 struct sfe_ipv4_connection *c = cm->connection;
1737 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1738 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++;
1739 si->packets_not_forwarded++;
1740 spin_unlock(&si->lock);
1741
1742 DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
1743 seq, cm->protocol_state.tcp.max_end + 1);
1744 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1745 return 0;
1746 }
1747
1748 /*
1749 * Check that our TCP data offset isn't too short.
1750 */
1751 data_offs = tcph->doff << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001752 if (unlikely(data_offs < sizeof(struct sfe_ipv4_tcp_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001753 struct sfe_ipv4_connection *c = cm->connection;
1754 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1755 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++;
1756 si->packets_not_forwarded++;
1757 spin_unlock(&si->lock);
1758
1759 DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
1760 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1761 return 0;
1762 }
1763
1764 /*
1765 * Update ACK according to any SACK option.
1766 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001767 ack = ntohl(tcph->ack_seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001768 sack = ack;
1769 if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) {
1770 struct sfe_ipv4_connection *c = cm->connection;
1771 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1772 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK]++;
1773 si->packets_not_forwarded++;
1774 spin_unlock(&si->lock);
1775
1776 DEBUG_TRACE("TCP option SACK size is wrong\n");
1777 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1778 return 0;
1779 }
1780
1781 /*
1782 * Check that our TCP data offset isn't past the end of the packet.
1783 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001784 data_offs += sizeof(struct sfe_ipv4_ip_hdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001785 if (unlikely(len < data_offs)) {
1786 struct sfe_ipv4_connection *c = cm->connection;
1787 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1788 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++;
1789 si->packets_not_forwarded++;
1790 spin_unlock(&si->lock);
1791
1792 DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
1793 data_offs, len);
1794 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1795 return 0;
1796 }
1797
1798 end = seq + len - data_offs;
1799
1800 /*
1801 * Is our sequence fully before the left hand edge of the window?
1802 */
1803 if (unlikely((int32_t)(end - (cm->protocol_state.tcp.end
1804 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
1805 struct sfe_ipv4_connection *c = cm->connection;
1806 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1807 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++;
1808 si->packets_not_forwarded++;
1809 spin_unlock(&si->lock);
1810
1811 DEBUG_TRACE("seq: %u before left edge: %u\n",
1812 end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
1813 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1814 return 0;
1815 }
1816
1817 /*
1818 * Are we acking data that is to the right of what has been sent?
1819 */
1820 if (unlikely((int32_t)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
1821 struct sfe_ipv4_connection *c = cm->connection;
1822 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1823 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++;
1824 si->packets_not_forwarded++;
1825 spin_unlock(&si->lock);
1826
1827 DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
1828 sack, counter_cm->protocol_state.tcp.end + 1);
1829 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1830 return 0;
1831 }
1832
1833 /*
1834 * Is our ack too far before the left hand edge of the window?
1835 */
1836 left_edge = counter_cm->protocol_state.tcp.end
1837 - cm->protocol_state.tcp.max_win
1838 - SFE_IPV4_TCP_MAX_ACK_WINDOW
1839 - 1;
1840 if (unlikely((int32_t)(sack - left_edge) < 0)) {
1841 struct sfe_ipv4_connection *c = cm->connection;
1842 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1843 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++;
1844 si->packets_not_forwarded++;
1845 spin_unlock(&si->lock);
1846
1847 DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
1848 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1849 return 0;
1850 }
1851
1852 /*
1853 * Have we just seen the largest window size yet for this connection? If yes
1854 * then we need to record the new value.
1855 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001856 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001857 scaled_win += (sack - ack);
1858 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
1859 cm->protocol_state.tcp.max_win = scaled_win;
1860 }
1861
1862 /*
1863 * If our sequence and/or ack numbers have advanced then record the new state.
1864 */
1865 if (likely((int32_t)(end - cm->protocol_state.tcp.end) >= 0)) {
1866 cm->protocol_state.tcp.end = end;
1867 }
1868
1869 max_end = sack + scaled_win;
1870 if (likely((int32_t)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
1871 counter_cm->protocol_state.tcp.max_end = max_end;
1872 }
1873 }
1874
1875 /*
1876 * From this point on we're good to modify the packet.
1877 */
1878
1879 /*
1880 * Decrement our TTL.
1881 */
1882 iph->ttl = ttl - 1;
1883
1884 /*
1885 * Do we have to perform translations of the source address/port?
1886 */
1887 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1888 uint16_t tcp_csum;
1889 uint32_t sum;
1890
Dave Hudson87973cd2013-10-22 16:00:04 +01001891 iph->saddr = cm->xlate_src_ip;
1892 tcph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001893
1894 /*
1895 * Do we have a non-zero UDP checksum? If we do then we need
1896 * to update it.
1897 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001898 tcp_csum = tcph->check;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001899 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1900 sum = tcp_csum + cm->xlate_src_partial_csum_adjustment;
1901 } else {
1902 sum = tcp_csum + cm->xlate_src_csum_adjustment;
1903 }
1904
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001905 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001906 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001907 }
1908
1909 /*
1910 * Do we have to perform translations of the destination address/port?
1911 */
1912 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1913 uint16_t tcp_csum;
1914 uint32_t sum;
1915
Dave Hudson87973cd2013-10-22 16:00:04 +01001916 iph->daddr = cm->xlate_dest_ip;
1917 tcph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001918
1919 /*
1920 * Do we have a non-zero UDP checksum? If we do then we need
1921 * to update it.
1922 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001923 tcp_csum = tcph->check;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001924 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1925 sum = tcp_csum + cm->xlate_dest_partial_csum_adjustment;
1926 } else {
1927 sum = tcp_csum + cm->xlate_dest_csum_adjustment;
1928 }
1929
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001930 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001931 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001932 }
1933
1934 /*
1935 * Replace the IP checksum.
1936 */
1937 iph->check = sfe_ipv4_gen_ip_csum(iph);
1938
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001939 /*
1940 * Update traffic stats.
1941 */
1942 cm->rx_packet_count++;
1943 cm->rx_byte_count += len;
1944
1945 /*
1946 * If we're not already on the active list then insert ourselves at the tail
1947 * of the current list.
1948 */
1949 if (unlikely(!cm->active)) {
1950 cm->active = true;
1951 cm->active_prev = si->active_tail;
1952 if (likely(si->active_tail)) {
1953 si->active_tail->active_next = cm;
1954 } else {
1955 si->active_head = cm;
1956 }
1957 si->active_tail = cm;
1958 }
1959
1960 xmit_dev = cm->xmit_dev;
1961 skb->dev = xmit_dev;
1962
1963 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001964 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001965 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001966 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1967 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Matthew McClintocka8ad7962014-01-16 16:49:30 -06001968 xmit_dev->header_ops->create(skb, xmit_dev, ETH_P_IP,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001969 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001970 } else {
1971 /*
1972 * For the simple case we write this really fast.
1973 */
1974 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1975 eth->h_proto = htons(ETH_P_IP);
Matthew McClintockdab3c8f2014-02-19 14:29:39 -06001976 eth->h_dest[0] = cm->xmit_dest_mac[0];
1977 eth->h_dest[1] = cm->xmit_dest_mac[1];
1978 eth->h_dest[2] = cm->xmit_dest_mac[2];
1979 eth->h_source[0] = cm->xmit_src_mac[0];
1980 eth->h_source[1] = cm->xmit_src_mac[1];
1981 eth->h_source[2] = cm->xmit_src_mac[2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001982 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001983 }
1984
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06001985 /*
1986 * Mark outgoing packet
1987 */
1988 skb->mark = cm->connection->mark;
1989 if (skb->mark) {
1990 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1991 }
1992
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001993 si->packets_forwarded++;
1994 spin_unlock(&si->lock);
1995
1996 /*
1997 * We're going to check for GSO flags when we transmit the packet so
1998 * start fetching the necessary cache line now.
1999 */
2000 prefetch(skb_shinfo(skb));
2001
2002 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06002003 * Mark that this packet has been fast forwarded.
2004 */
2005 skb->fast_forwarded = 1;
2006
2007 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002008 * Send the packet on its way.
2009 */
2010 dev_queue_xmit(skb);
2011
2012 return 1;
2013}
2014
2015/*
2016 * sfe_ipv4_recv_icmp()
2017 * Handle ICMP packet receives.
2018 *
2019 * ICMP packets aren't handled as a "fast path" and always have us process them
2020 * through the default Linux stack. What we do need to do is look for any errors
2021 * about connections we are handling in the fast path. If we find any such
2022 * connections then we want to flush their state so that the ICMP error path
2023 * within Linux has all of the correct state should it need it.
2024 */
2025static int sfe_ipv4_recv_icmp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002026 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002027{
2028 struct icmphdr *icmph;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002029 struct sfe_ipv4_ip_hdr *icmp_iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002030 unsigned int icmp_ihl_words;
2031 unsigned int icmp_ihl;
2032 uint32_t *icmp_trans_h;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002033 struct sfe_ipv4_udp_hdr *icmp_udph;
2034 struct sfe_ipv4_tcp_hdr *icmp_tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01002035 __be32 src_ip;
2036 __be32 dest_ip;
2037 __be16 src_port;
2038 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002039 struct sfe_ipv4_connection_match *cm;
2040 struct sfe_ipv4_connection *c;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002041 uint32_t pull_len = sizeof(struct icmphdr) + ihl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002042
2043 /*
2044 * Is our packet too short to contain a valid UDP header?
2045 */
2046 len -= ihl;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002047 if (!pskb_may_pull(skb, pull_len)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002048 spin_lock(&si->lock);
2049 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++;
2050 si->packets_not_forwarded++;
2051 spin_unlock(&si->lock);
2052
2053 DEBUG_TRACE("packet too short for ICMP header\n");
2054 return 0;
2055 }
2056
2057 /*
2058 * We only handle "destination unreachable" and "time exceeded" messages.
2059 */
2060 icmph = (struct icmphdr *)(skb->data + ihl);
2061 if ((icmph->type != ICMP_DEST_UNREACH)
2062 && (icmph->type != ICMP_TIME_EXCEEDED)) {
2063 spin_lock(&si->lock);
2064 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++;
2065 si->packets_not_forwarded++;
2066 spin_unlock(&si->lock);
2067
2068 DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->type);
2069 return 0;
2070 }
2071
2072 /*
2073 * Do we have the full embedded IP header?
2074 */
2075 len -= sizeof(struct icmphdr);
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002076 pull_len += sizeof(struct sfe_ipv4_ip_hdr);
2077 if (!pskb_may_pull(skb, pull_len)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002078 spin_lock(&si->lock);
2079 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE]++;
2080 si->packets_not_forwarded++;
2081 spin_unlock(&si->lock);
2082
2083 DEBUG_TRACE("Embedded IP header not complete\n");
2084 return 0;
2085 }
2086
2087 /*
2088 * Is our embedded IP version wrong?
2089 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002090 icmp_iph = (struct sfe_ipv4_ip_hdr *)(icmph + 1);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002091 if (unlikely(icmp_iph->version != 4)) {
2092 spin_lock(&si->lock);
2093 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4]++;
2094 si->packets_not_forwarded++;
2095 spin_unlock(&si->lock);
2096
2097 DEBUG_TRACE("IP version: %u\n", icmp_iph->version);
2098 return 0;
2099 }
2100
2101 /*
2102 * Do we have the full embedded IP header, including any options?
2103 */
2104 icmp_ihl_words = icmp_iph->ihl;
2105 icmp_ihl = icmp_ihl_words << 2;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002106 pull_len += icmp_ihl - sizeof(struct sfe_ipv4_ip_hdr);
2107 if (!pskb_may_pull(skb, pull_len)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002108 spin_lock(&si->lock);
2109 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE]++;
2110 si->packets_not_forwarded++;
2111 spin_unlock(&si->lock);
2112
2113 DEBUG_TRACE("Embedded header not large enough for IP options\n");
2114 return 0;
2115 }
2116
Nicolas Costaac2979c2014-01-14 10:35:24 -06002117 len -= icmp_ihl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002118 icmp_trans_h = ((uint32_t *)icmp_iph) + icmp_ihl_words;
2119
2120 /*
2121 * Handle the embedded transport layer header.
2122 */
2123 switch (icmp_iph->protocol) {
2124 case IPPROTO_UDP:
2125 /*
2126 * We should have 8 bytes of UDP header - that's enough to identify
2127 * the connection.
2128 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002129 pull_len += 8;
2130 if (!pskb_may_pull(skb, pull_len)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002131 spin_lock(&si->lock);
2132 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE]++;
2133 si->packets_not_forwarded++;
2134 spin_unlock(&si->lock);
2135
2136 DEBUG_TRACE("Incomplete embedded UDP header\n");
2137 return 0;
2138 }
2139
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002140 icmp_udph = (struct sfe_ipv4_udp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002141 src_port = icmp_udph->source;
2142 dest_port = icmp_udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002143 break;
2144
2145 case IPPROTO_TCP:
2146 /*
2147 * We should have 8 bytes of TCP header - that's enough to identify
2148 * the connection.
2149 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002150 pull_len += 8;
2151 if (!pskb_may_pull(skb, pull_len)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002152 spin_lock(&si->lock);
2153 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE]++;
2154 si->packets_not_forwarded++;
2155 spin_unlock(&si->lock);
2156
2157 DEBUG_TRACE("Incomplete embedded TCP header\n");
2158 return 0;
2159 }
2160
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002161 icmp_tcph = (struct sfe_ipv4_tcp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002162 src_port = icmp_tcph->source;
2163 dest_port = icmp_tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002164 break;
2165
2166 default:
2167 spin_lock(&si->lock);
2168 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL]++;
2169 si->packets_not_forwarded++;
2170 spin_unlock(&si->lock);
2171
2172 DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", icmp_iph->protocol);
2173 return 0;
2174 }
2175
Dave Hudson87973cd2013-10-22 16:00:04 +01002176 src_ip = icmp_iph->saddr;
2177 dest_ip = icmp_iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002178
2179 spin_lock(&si->lock);
2180
2181 /*
2182 * Look for a connection match. Note that we reverse the source and destination
2183 * here because our embedded message contains a packet that was sent in the
2184 * opposite direction to the one in which we just received it. It will have
2185 * been sent on the interface from which we received it though so that's still
2186 * ok to use.
2187 */
2188 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port);
2189 if (unlikely(!cm)) {
2190 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++;
2191 si->packets_not_forwarded++;
2192 spin_unlock(&si->lock);
2193
2194 DEBUG_TRACE("no connection found\n");
2195 return 0;
2196 }
2197
2198 /*
2199 * We found a connection so now remove it from the connection list and flush
2200 * its state.
2201 */
2202 c = cm->connection;
2203 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2204 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++;
2205 si->packets_not_forwarded++;
2206 spin_unlock(&si->lock);
2207
2208 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
2209 return 0;
2210}
2211
2212/*
2213 * sfe_ipv4_recv()
Matthew McClintocka8ad7962014-01-16 16:49:30 -06002214 * Handle packet receives and forwaring.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002215 *
2216 * Returns 1 if the packet is forwarded or 0 if it isn't.
2217 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002218int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002219{
2220 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002221 unsigned int len;
2222 unsigned int tot_len;
2223 unsigned int frag_off;
2224 unsigned int ihl;
2225 bool flush_on_find;
2226 bool ip_options;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002227 struct sfe_ipv4_ip_hdr *iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002228 uint32_t protocol;
2229
2230 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002231 * Check that we have space for an IP header here.
2232 */
2233 len = skb->len;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002234 if (unlikely(!pskb_may_pull(skb, sizeof(struct sfe_ipv4_ip_hdr)))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002235 spin_lock(&si->lock);
2236 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2237 si->packets_not_forwarded++;
2238 spin_unlock(&si->lock);
2239
2240 DEBUG_TRACE("len: %u is too short\n", len);
2241 return 0;
2242 }
2243
2244 /*
2245 * Check that our "total length" is large enough for an IP header.
2246 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002247 iph = (struct sfe_ipv4_ip_hdr *)skb->data;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002248 tot_len = ntohs(iph->tot_len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002249 if (unlikely(tot_len < sizeof(struct sfe_ipv4_ip_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002250 spin_lock(&si->lock);
2251 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH]++;
2252 si->packets_not_forwarded++;
2253 spin_unlock(&si->lock);
2254
2255 DEBUG_TRACE("tot_len: %u is too short\n", tot_len);
2256 return 0;
2257 }
2258
2259 /*
2260 * Is our IP version wrong?
2261 */
2262 if (unlikely(iph->version != 4)) {
2263 spin_lock(&si->lock);
2264 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_V4]++;
2265 si->packets_not_forwarded++;
2266 spin_unlock(&si->lock);
2267
2268 DEBUG_TRACE("IP version: %u\n", iph->version);
2269 return 0;
2270 }
2271
2272 /*
2273 * Does our datagram fit inside the skb?
2274 */
2275 if (unlikely(tot_len > len)) {
2276 spin_lock(&si->lock);
2277 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++;
2278 si->packets_not_forwarded++;
2279 spin_unlock(&si->lock);
2280
2281 DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len);
2282 return 0;
2283 }
2284
2285 /*
2286 * Do we have a non-initial fragment?
Nicolas Costaac2979c2014-01-14 10:35:24 -06002287 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002288 frag_off = ntohs(iph->frag_off);
2289 if (unlikely(frag_off & IP_OFFSET)) {
2290 spin_lock(&si->lock);
2291 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++;
2292 si->packets_not_forwarded++;
2293 spin_unlock(&si->lock);
2294
2295 DEBUG_TRACE("non-initial fragment\n");
2296 return 0;
2297 }
2298
2299 /*
2300 * If we have a (first) fragment then mark it to cause any connection to flush.
2301 */
2302 flush_on_find = unlikely(frag_off & IP_MF) ? true : false;
2303
2304 /*
2305 * Do we have any IP options? That's definite a slow path! If we do have IP
2306 * options we need to recheck our header size.
2307 */
2308 ihl = iph->ihl << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002309 ip_options = unlikely(ihl != sizeof(struct sfe_ipv4_ip_hdr)) ? true : false;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002310 if (unlikely(ip_options)) {
2311 if (unlikely(len < ihl)) {
2312 spin_lock(&si->lock);
2313 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE]++;
2314 si->packets_not_forwarded++;
2315 spin_unlock(&si->lock);
2316
2317 DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl);
2318 return 0;
2319 }
2320
2321 flush_on_find = true;
2322 }
2323
2324 protocol = iph->protocol;
2325 if (IPPROTO_UDP == protocol) {
2326 return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find);
2327 }
2328
2329 if (IPPROTO_TCP == protocol) {
2330 return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find);
2331 }
2332
2333 if (IPPROTO_ICMP == protocol) {
2334 return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl);
2335 }
2336
2337 spin_lock(&si->lock);
2338 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++;
2339 si->packets_not_forwarded++;
2340 spin_unlock(&si->lock);
2341
2342 DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol);
2343 return 0;
2344}
2345
Nicolas Costa436926b2014-01-14 10:36:22 -06002346static void
2347sfe_ipv4_update_tcp_state(struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002348 struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002349{
2350 struct sfe_ipv4_connection_match *orig_cm;
2351 struct sfe_ipv4_connection_match *repl_cm;
2352 struct sfe_ipv4_tcp_connection_match *orig_tcp;
2353 struct sfe_ipv4_tcp_connection_match *repl_tcp;
2354
2355 orig_cm = c->original_match;
2356 repl_cm = c->reply_match;
2357 orig_tcp = &orig_cm->protocol_state.tcp;
2358 repl_tcp = &repl_cm->protocol_state.tcp;
2359
2360 /* update orig */
2361 if (orig_tcp->max_win < sic->src_td_max_window) {
2362 orig_tcp->max_win = sic->src_td_max_window;
2363 }
2364 if ((int32_t)(orig_tcp->end - sic->src_td_end) < 0) {
2365 orig_tcp->end = sic->src_td_end;
2366 }
2367 if ((int32_t)(orig_tcp->max_end - sic->src_td_max_end) < 0) {
2368 orig_tcp->max_end = sic->src_td_max_end;
2369 }
2370
2371 /* update reply */
2372 if (repl_tcp->max_win < sic->dest_td_max_window) {
2373 repl_tcp->max_win = sic->dest_td_max_window;
2374 }
2375 if ((int32_t)(repl_tcp->end - sic->dest_td_end) < 0) {
2376 repl_tcp->end = sic->dest_td_end;
2377 }
2378 if ((int32_t)(repl_tcp->max_end - sic->dest_td_max_end) < 0) {
2379 repl_tcp->max_end = sic->dest_td_max_end;
2380 }
2381
2382 /* update match flags */
2383 orig_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2384 repl_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002385 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
Nicolas Costa436926b2014-01-14 10:36:22 -06002386 orig_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2387 repl_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2388 }
2389}
2390
2391static void
2392sfe_ipv4_update_protocol_state(struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002393 struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002394{
2395 switch (sic->protocol) {
2396 case IPPROTO_TCP:
2397 sfe_ipv4_update_tcp_state(c, sic);
2398 break;
2399 }
2400}
2401
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002402void sfe_ipv4_update_rule(struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002403{
2404 struct sfe_ipv4_connection *c;
2405 struct sfe_ipv4 *si = &__si;
2406
2407 spin_lock_bh(&si->lock);
2408
2409 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2410 sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002411 sic->src_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002412 sic->src_port,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002413 sic->dest_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002414 sic->dest_port);
2415 if (c != NULL) {
2416 sfe_ipv4_update_protocol_state(c, sic);
2417 }
2418
2419 spin_unlock_bh(&si->lock);
2420}
2421
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002422/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002423 * sfe_ipv4_create_rule()
2424 * Create a forwarding rule.
2425 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002426int sfe_ipv4_create_rule(struct sfe_connection_create *sic)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002427{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002428 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002429 struct sfe_ipv4_connection *c;
2430 struct sfe_ipv4_connection_match *original_cm;
2431 struct sfe_ipv4_connection_match *reply_cm;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002432 struct net_device *dest_dev;
2433 struct net_device *src_dev;
2434
2435 dest_dev = sic->dest_dev;
2436 src_dev = sic->src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002437
Matthew McClintock389b42a2014-09-24 14:05:51 -05002438 if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) ||
2439 (src_dev->reg_state != NETREG_REGISTERED))) {
2440 return -EINVAL;
2441 }
2442
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002443 spin_lock_bh(&si->lock);
2444 si->connection_create_requests++;
2445
2446 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002447 * Check to see if there is already a flow that matches the rule we're
2448 * trying to create. If there is then we can't create a new one.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002449 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002450 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2451 sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002452 sic->src_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002453 sic->src_port,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002454 sic->dest_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002455 sic->dest_port);
2456 if (c != NULL) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002457 si->connection_create_collisions++;
2458
2459 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002460 * If we already have the flow then it's likely that this
2461 * request to create the connection rule contains more
2462 * up-to-date information. Check and update accordingly.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002463 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002464 sfe_ipv4_update_protocol_state(c, sic);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002465 spin_unlock_bh(&si->lock);
2466
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002467 DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002468 " s: %s:%pM:%pI4:%u, d: %s:%pM:%pI4:%u\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002469 sic->mark, sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002470 sic->src_dev->name, sic->src_mac, &sic->src_ip.ip, ntohs(sic->src_port),
2471 sic->dest_dev->name, sic->dest_mac, &sic->dest_ip.ip, ntohs(sic->dest_port));
Nicolas Costa514fde02014-01-13 15:50:29 -06002472 return -EADDRINUSE;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002473 }
2474
2475 /*
2476 * Allocate the various connection tracking objects.
2477 */
2478 c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC);
2479 if (unlikely(!c)) {
2480 spin_unlock_bh(&si->lock);
Nicolas Costa514fde02014-01-13 15:50:29 -06002481 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002482 }
2483
2484 original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2485 if (unlikely(!original_cm)) {
2486 spin_unlock_bh(&si->lock);
2487 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002488 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002489 }
2490
2491 reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2492 if (unlikely(!reply_cm)) {
2493 spin_unlock_bh(&si->lock);
2494 kfree(original_cm);
2495 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002496 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002497 }
2498
2499 /*
2500 * Fill in the "original" direction connection matching object.
2501 * Note that the transmit MAC address is "dest_mac_xlate" because
2502 * we always know both ends of a connection by their translated
2503 * addresses and not their public addresses.
2504 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002505 original_cm->match_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002506 original_cm->match_protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002507 original_cm->match_src_ip = sic->src_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002508 original_cm->match_src_port = sic->src_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002509 original_cm->match_dest_ip = sic->dest_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002510 original_cm->match_dest_port = sic->dest_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002511 original_cm->xlate_src_ip = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002512 original_cm->xlate_src_port = sic->src_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002513 original_cm->xlate_dest_ip = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002514 original_cm->xlate_dest_port = sic->dest_port_xlate;
2515 original_cm->rx_packet_count = 0;
2516 original_cm->rx_packet_count64 = 0;
2517 original_cm->rx_byte_count = 0;
2518 original_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002519 original_cm->xmit_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002520 original_cm->xmit_dev_mtu = sic->dest_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002521 memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002522 memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN);
2523 original_cm->connection = c;
2524 original_cm->counter_match = reply_cm;
2525 original_cm->flags = 0;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002526#ifdef CONFIG_NF_FLOW_COOKIE
2527 original_cm->flow_cookie = 0;
2528#endif
Zhi Chen8748eb32015-06-18 12:58:48 -07002529#ifdef CONFIG_XFRM
2530 original_cm->flow_accel = sic->original_accel;
2531#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002532 original_cm->active_next = NULL;
2533 original_cm->active_prev = NULL;
2534 original_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002535
2536 /*
2537 * For PPP links we don't write an L2 header. For everything else we do.
2538 */
2539 if (!(dest_dev->flags & IFF_POINTOPOINT)) {
2540 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2541
2542 /*
2543 * If our dev writes Ethernet headers then we can write a really fast
2544 * version.
2545 */
2546 if (dest_dev->header_ops) {
2547 if (dest_dev->header_ops->create == eth_header) {
2548 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2549 }
2550 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002551 }
2552
2553 /*
2554 * Fill in the "reply" direction connection matching object.
2555 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002556 reply_cm->match_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002557 reply_cm->match_protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002558 reply_cm->match_src_ip = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002559 reply_cm->match_src_port = sic->dest_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002560 reply_cm->match_dest_ip = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002561 reply_cm->match_dest_port = sic->src_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002562 reply_cm->xlate_src_ip = sic->dest_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002563 reply_cm->xlate_src_port = sic->dest_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002564 reply_cm->xlate_dest_ip = sic->src_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002565 reply_cm->xlate_dest_port = sic->src_port;
2566 reply_cm->rx_packet_count = 0;
2567 reply_cm->rx_packet_count64 = 0;
2568 reply_cm->rx_byte_count = 0;
2569 reply_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002570 reply_cm->xmit_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002571 reply_cm->xmit_dev_mtu = sic->src_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002572 memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002573 memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN);
2574 reply_cm->connection = c;
2575 reply_cm->counter_match = original_cm;
2576 reply_cm->flags = 0;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002577#ifdef CONFIG_NF_FLOW_COOKIE
2578 reply_cm->flow_cookie = 0;
2579#endif
Zhi Chen8748eb32015-06-18 12:58:48 -07002580#ifdef CONFIG_XFRM
2581 reply_cm->flow_accel = sic->reply_accel;
2582#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002583 reply_cm->active_next = NULL;
2584 reply_cm->active_prev = NULL;
2585 reply_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002586
2587 /*
2588 * For PPP links we don't write an L2 header. For everything else we do.
2589 */
2590 if (!(src_dev->flags & IFF_POINTOPOINT)) {
2591 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2592
2593 /*
2594 * If our dev writes Ethernet headers then we can write a really fast
2595 * version.
2596 */
2597 if (src_dev->header_ops) {
2598 if (src_dev->header_ops->create == eth_header) {
2599 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2600 }
2601 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002602 }
2603
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002604
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002605 if (sic->dest_ip.ip != sic->dest_ip_xlate.ip || sic->dest_port != sic->dest_port_xlate) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002606 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2607 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2608 }
2609
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002610 if (sic->src_ip.ip != sic->src_ip_xlate.ip || sic->src_port != sic->src_port_xlate) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002611 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2612 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2613 }
2614
2615 c->protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002616 c->src_ip = sic->src_ip.ip;
2617 c->src_ip_xlate = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002618 c->src_port = sic->src_port;
2619 c->src_port_xlate = sic->src_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002620 c->original_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002621 c->original_match = original_cm;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002622 c->dest_ip = sic->dest_ip.ip;
2623 c->dest_ip_xlate = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002624 c->dest_port = sic->dest_port;
2625 c->dest_port_xlate = sic->dest_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002626 c->reply_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002627 c->reply_match = reply_cm;
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06002628 c->mark = sic->mark;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002629
2630 c->last_sync_jiffies = get_jiffies_64();
2631 c->iterators = 0;
2632 c->pending_free = false;
2633
2634 /*
2635 * Take hold of our source and dest devices for the duration of the connection.
2636 */
2637 dev_hold(c->original_dev);
2638 dev_hold(c->reply_dev);
2639
2640 /*
2641 * Initialize the protocol-specific information that we track.
2642 */
2643 switch (sic->protocol) {
2644 case IPPROTO_TCP:
2645 original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale;
2646 original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1;
2647 original_cm->protocol_state.tcp.end = sic->src_td_end;
2648 original_cm->protocol_state.tcp.max_end = sic->src_td_max_end;
2649 reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale;
2650 reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1;
2651 reply_cm->protocol_state.tcp.end = sic->dest_td_end;
2652 reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002653 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002654 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2655 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2656 }
2657 break;
2658 }
2659
2660 sfe_ipv4_connection_match_compute_translations(original_cm);
2661 sfe_ipv4_connection_match_compute_translations(reply_cm);
2662 sfe_ipv4_insert_sfe_ipv4_connection(si, c);
2663
2664 spin_unlock_bh(&si->lock);
2665
2666 /*
2667 * We have everything we need!
2668 */
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002669 DEBUG_INFO("new connection - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002670 " s: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n"
2671 " d: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002672 sic->mark, sic->protocol,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002673 sic->src_dev->name, sic->src_mac, sic->src_mac_xlate,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002674 &sic->src_ip.ip, &sic->src_ip_xlate.ip, ntohs(sic->src_port), ntohs(sic->src_port_xlate),
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002675 dest_dev->name, sic->dest_mac, sic->dest_mac_xlate,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002676 &sic->dest_ip.ip, &sic->dest_ip_xlate.ip, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate));
Nicolas Costa514fde02014-01-13 15:50:29 -06002677
2678 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002679}
2680
2681/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002682 * sfe_ipv4_destroy_rule()
2683 * Destroy a forwarding rule.
2684 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002685void sfe_ipv4_destroy_rule(struct sfe_connection_destroy *sid)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002686{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002687 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002688 struct sfe_ipv4_connection *c;
2689
2690 spin_lock_bh(&si->lock);
2691 si->connection_destroy_requests++;
2692
2693 /*
2694 * Check to see if we have a flow that matches the rule we're trying
2695 * to destroy. If there isn't then we can't destroy it.
2696 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002697 c = sfe_ipv4_find_sfe_ipv4_connection(si, sid->protocol, sid->src_ip.ip, sid->src_port,
2698 sid->dest_ip.ip, sid->dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002699 if (!c) {
2700 si->connection_destroy_misses++;
2701 spin_unlock_bh(&si->lock);
2702
2703 DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002704 sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2705 &sid->dest_ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002706 return;
2707 }
2708
2709 /*
2710 * Remove our connection details from the hash tables.
2711 */
2712 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2713 spin_unlock_bh(&si->lock);
2714
2715 /*
2716 * Finally synchronize state and free resources. We need to protect against
2717 * pre-emption by our bottom half while we do this though.
2718 */
2719 local_bh_disable();
2720 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
2721 local_bh_enable();
2722
2723 DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002724 sid->protocol, &sid->src_ip.ip, ntohs(sid->src_port),
2725 &sid->dest_ip.ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002726}
2727
2728/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002729 * sfe_ipv4_register_sync_rule_callback()
2730 * Register a callback for rule synchronization.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002731 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002732void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002733{
2734 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002735
2736 spin_lock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002737 rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002738 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002739}
2740
2741/*
2742 * sfe_ipv4_get_debug_dev()
2743 */
2744static ssize_t sfe_ipv4_get_debug_dev(struct device *dev,
2745 struct device_attribute *attr,
2746 char *buf)
2747{
2748 struct sfe_ipv4 *si = &__si;
2749 ssize_t count;
2750 int num;
2751
2752 spin_lock_bh(&si->lock);
2753 num = si->debug_dev;
2754 spin_unlock_bh(&si->lock);
2755
2756 count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num);
2757 return count;
2758}
2759
2760/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002761 * sysfs attributes.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002762 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002763static const struct device_attribute sfe_ipv4_debug_dev_attr =
2764 __ATTR(debug_dev, S_IWUGO | S_IRUGO, sfe_ipv4_get_debug_dev, NULL);
2765
2766/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002767 * sfe_ipv4_destroy_all_rules_for_dev()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002768 * Destroy all connections that match a particular device.
2769 *
2770 * If we pass dev as NULL then this destroys all connections.
2771 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002772void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002773{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002774 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002775 struct sfe_ipv4_connection *c;
2776 struct sfe_ipv4_connection *c_next;
2777
2778 spin_lock_bh(&si->lock);
2779 c = si->all_connections_head;
2780 if (!c) {
2781 spin_unlock_bh(&si->lock);
2782 return;
2783 }
2784
2785 c->iterators++;
2786
2787 /*
2788 * Iterate over all connections
2789 */
2790 while (c) {
2791 c_next = c->all_connections_next;
2792
2793 /*
2794 * Before we do anything else, take an iterator reference for the
2795 * connection we'll iterate next.
2796 */
2797 if (c_next) {
2798 c_next->iterators++;
2799 }
2800
2801 /*
2802 * Does this connection relate to the device we are destroying? If
2803 * it does then ensure it is marked for being freed as soon as it
2804 * is no longer being iterated.
2805 */
2806 if (!dev
2807 || (dev == c->original_dev)
2808 || (dev == c->reply_dev)) {
2809 c->pending_free = true;
2810 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2811 }
2812
2813 /*
2814 * Remove the iterator reference that we acquired and see if we
2815 * should free any resources.
2816 */
2817 if (sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c)) {
2818 spin_unlock_bh(&si->lock);
Nicolas Costabafb3af2014-01-29 16:39:39 -06002819
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002820 /*
2821 * This entry is dead so release our hold of the source and
2822 * dest devices and free the memory for our connection objects.
2823 */
2824 dev_put(c->original_dev);
2825 dev_put(c->reply_dev);
2826 kfree(c->original_match);
2827 kfree(c->reply_match);
2828 kfree(c);
2829
2830 spin_lock_bh(&si->lock);
2831 }
2832
2833 c = c_next;
2834 }
2835
2836 spin_unlock_bh(&si->lock);
2837}
2838
2839/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002840 * sfe_ipv4_periodic_sync()
2841 */
2842static void sfe_ipv4_periodic_sync(unsigned long arg)
2843{
2844 struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg;
2845 uint64_t now_jiffies;
2846 int quota;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002847 sfe_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002848
2849 now_jiffies = get_jiffies_64();
2850
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002851 rcu_read_lock();
2852 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
2853 if (!sync_rule_callback) {
2854 rcu_read_unlock();
2855 goto done;
2856 }
2857
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002858 spin_lock_bh(&si->lock);
2859 sfe_ipv4_update_summary_stats(si);
2860
2861 /*
2862 * Get an estimate of the number of connections to parse in this sync.
2863 */
2864 quota = (si->num_connections + 63) / 64;
2865
2866 /*
2867 * Walk the "active" list and sync the connection state.
2868 */
2869 while (quota--) {
2870 struct sfe_ipv4_connection_match *cm;
2871 struct sfe_ipv4_connection_match *counter_cm;
2872 struct sfe_ipv4_connection *c;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002873 struct sfe_connection_sync sis;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002874
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002875 cm = si->active_head;
2876 if (!cm) {
2877 break;
2878 }
2879
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002880 /*
Nicolas Costabafb3af2014-01-29 16:39:39 -06002881 * There's a possibility that our counter match is in the active list too.
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002882 * If it is then remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002883 */
2884 counter_cm = cm->counter_match;
2885 if (counter_cm->active) {
2886 counter_cm->active = false;
2887
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002888 /*
2889 * We must have a connection preceding this counter match
2890 * because that's the one that got us to this point, so we don't have
2891 * to worry about removing the head of the list.
2892 */
2893 counter_cm->active_prev->active_next = counter_cm->active_next;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002894
2895 if (likely(counter_cm->active_next)) {
2896 counter_cm->active_next->active_prev = counter_cm->active_prev;
2897 } else {
2898 si->active_tail = counter_cm->active_prev;
2899 }
2900
2901 counter_cm->active_next = NULL;
2902 counter_cm->active_prev = NULL;
2903 }
2904
2905 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002906 * Now remove the head of the active scan list.
2907 */
2908 cm->active = false;
2909 si->active_head = cm->active_next;
2910 if (likely(cm->active_next)) {
2911 cm->active_next->active_prev = NULL;
2912 } else {
2913 si->active_tail = NULL;
2914 }
2915 cm->active_next = NULL;
2916
2917 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002918 * Sync the connection state.
2919 */
2920 c = cm->connection;
2921 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, now_jiffies);
2922
2923 /*
2924 * We don't want to be holding the lock when we sync!
2925 */
2926 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002927 sync_rule_callback(&sis);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002928 spin_lock_bh(&si->lock);
2929 }
2930
2931 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002932 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002933
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002934done:
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002935 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002936}
2937
2938#define CHAR_DEV_MSG_SIZE 768
2939
2940/*
2941 * sfe_ipv4_debug_dev_read_start()
2942 * Generate part of the XML output.
2943 */
2944static bool sfe_ipv4_debug_dev_read_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2945 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2946{
2947 int bytes_read;
2948
2949 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "<sfe_ipv4>\n");
2950 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2951 return false;
2952 }
2953
2954 *length -= bytes_read;
2955 *total_read += bytes_read;
2956
2957 ws->state++;
2958 return true;
2959}
2960
2961/*
2962 * sfe_ipv4_debug_dev_read_connections_start()
2963 * Generate part of the XML output.
2964 */
2965static bool sfe_ipv4_debug_dev_read_connections_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2966 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2967{
2968 int bytes_read;
2969
2970 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<connections>\n");
2971 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2972 return false;
2973 }
2974
2975 *length -= bytes_read;
2976 *total_read += bytes_read;
2977
2978 ws->state++;
2979 return true;
2980}
2981
2982/*
2983 * sfe_ipv4_debug_dev_read_connections_connection()
2984 * Generate part of the XML output.
2985 */
2986static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2987 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2988{
2989 struct sfe_ipv4_connection *c;
2990 struct sfe_ipv4_connection *c_next;
2991 struct sfe_ipv4_connection_match *original_cm;
2992 struct sfe_ipv4_connection_match *reply_cm;
2993 int bytes_read;
2994 int protocol;
2995 struct net_device *src_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002996 __be32 src_ip;
2997 __be32 src_ip_xlate;
2998 __be16 src_port;
2999 __be16 src_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003000 uint64_t src_rx_packets;
3001 uint64_t src_rx_bytes;
3002 struct net_device *dest_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01003003 __be32 dest_ip;
3004 __be32 dest_ip_xlate;
3005 __be16 dest_port;
3006 __be16 dest_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003007 uint64_t dest_rx_packets;
3008 uint64_t dest_rx_bytes;
3009 uint64_t last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003010 uint32_t mark;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003011#ifdef CONFIG_NF_FLOW_COOKIE
3012 int src_flow_cookie, dst_flow_cookie;
3013#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003014
3015 spin_lock_bh(&si->lock);
3016 c = ws->iter_conn;
3017
3018 /*
3019 * Is this the first connection we need to scan?
3020 */
3021 if (!c) {
3022 c = si->all_connections_head;
3023
3024 /*
3025 * If there were no connections then move to the next state.
3026 */
3027 if (!c) {
3028 spin_unlock_bh(&si->lock);
3029
3030 ws->state++;
3031 return true;
3032 }
3033
3034 c->iterators++;
3035 }
3036
3037 c_next = c->all_connections_next;
3038 ws->iter_conn = c_next;
3039
3040 /*
3041 * Before we do anything else, take an iterator reference for the
3042 * connection we'll iterate next.
3043 */
3044 if (c_next) {
3045 c_next->iterators++;
3046 }
3047
3048 /*
3049 * Remove the iterator reference that we acquired and see if we
3050 * should free any resources.
3051 */
3052 if (sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c)) {
3053 spin_unlock_bh(&si->lock);
3054
3055 /*
3056 * This entry is dead so release our hold of the source and
3057 * dest devices and free the memory for our connection objects.
3058 */
3059 dev_put(c->original_dev);
3060 dev_put(c->reply_dev);
3061 kfree(c->original_match);
3062 kfree(c->reply_match);
3063 kfree(c);
3064
3065 /*
3066 * If we have no more connections then move to the next state.
3067 */
3068 if (!c_next) {
3069 ws->state++;
3070 }
3071
3072 return true;
3073 }
3074
3075 original_cm = c->original_match;
3076 reply_cm = c->reply_match;
3077
3078 protocol = c->protocol;
3079 src_dev = c->original_dev;
3080 src_ip = c->src_ip;
3081 src_ip_xlate = c->src_ip_xlate;
3082 src_port = c->src_port;
3083 src_port_xlate = c->src_port_xlate;
3084
3085 sfe_ipv4_connection_match_update_summary_stats(original_cm);
3086 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
3087
3088 src_rx_packets = original_cm->rx_packet_count64;
3089 src_rx_bytes = original_cm->rx_byte_count64;
3090 dest_dev = c->reply_dev;
3091 dest_ip = c->dest_ip;
3092 dest_ip_xlate = c->dest_ip_xlate;
3093 dest_port = c->dest_port;
3094 dest_port_xlate = c->dest_port_xlate;
3095 dest_rx_packets = reply_cm->rx_packet_count64;
3096 dest_rx_bytes = reply_cm->rx_byte_count64;
3097 last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003098 mark = c->mark;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003099#ifdef CONFIG_NF_FLOW_COOKIE
3100 src_flow_cookie = original_cm->flow_cookie;
3101 dst_flow_cookie = reply_cm->flow_cookie;
3102#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003103 spin_unlock_bh(&si->lock);
3104
3105 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t<connection "
3106 "protocol=\"%u\" "
3107 "src_dev=\"%s\" "
3108 "src_ip=\"%pI4\" src_ip_xlate=\"%pI4\" "
3109 "src_port=\"%u\" src_port_xlate=\"%u\" "
3110 "src_rx_pkts=\"%llu\" src_rx_bytes=\"%llu\" "
3111 "dest_dev=\"%s\" "
3112 "dest_ip=\"%pI4\" dest_ip_xlate=\"%pI4\" "
3113 "dest_port=\"%u\" dest_port_xlate=\"%u\" "
3114 "dest_rx_pkts=\"%llu\" dest_rx_bytes=\"%llu\" "
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003115#ifdef CONFIG_NF_FLOW_COOKIE
3116 "src_flow_cookie=\"%d\" dst_flow_cookie=\"%d\" "
3117#endif
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003118 "last_sync=\"%llu\" "
Nicolas Costabb85a2e2014-01-13 16:26:33 -06003119 "mark=\"%08x\" />\n",
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003120 protocol,
3121 src_dev->name,
3122 &src_ip, &src_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01003123 ntohs(src_port), ntohs(src_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003124 src_rx_packets, src_rx_bytes,
3125 dest_dev->name,
3126 &dest_ip, &dest_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01003127 ntohs(dest_port), ntohs(dest_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003128 dest_rx_packets, dest_rx_bytes,
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003129#ifdef CONFIG_NF_FLOW_COOKIE
3130 src_flow_cookie, dst_flow_cookie,
3131#endif
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003132 last_sync_jiffies, mark);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003133
3134 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3135 return false;
3136 }
3137
3138 *length -= bytes_read;
3139 *total_read += bytes_read;
3140
3141 /*
3142 * If we have no more connections then move to the next state.
3143 */
3144 if (!c_next) {
3145 ws->state++;
3146 }
3147
3148 return true;
3149}
3150
3151/*
3152 * sfe_ipv4_debug_dev_read_connections_end()
3153 * Generate part of the XML output.
3154 */
3155static bool sfe_ipv4_debug_dev_read_connections_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3156 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3157{
3158 int bytes_read;
3159
3160 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</connections>\n");
3161 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3162 return false;
3163 }
3164
3165 *length -= bytes_read;
3166 *total_read += bytes_read;
3167
3168 ws->state++;
3169 return true;
3170}
3171
3172/*
3173 * sfe_ipv4_debug_dev_read_exceptions_start()
3174 * Generate part of the XML output.
3175 */
3176static bool sfe_ipv4_debug_dev_read_exceptions_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3177 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3178{
3179 int bytes_read;
3180
3181 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<exceptions>\n");
3182 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3183 return false;
3184 }
3185
3186 *length -= bytes_read;
3187 *total_read += bytes_read;
3188
3189 ws->state++;
3190 return true;
3191}
3192
3193/*
3194 * sfe_ipv4_debug_dev_read_exceptions_exception()
3195 * Generate part of the XML output.
3196 */
3197static bool sfe_ipv4_debug_dev_read_exceptions_exception(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3198 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3199{
3200 uint64_t ct;
3201
3202 spin_lock_bh(&si->lock);
3203 ct = si->exception_events64[ws->iter_exception];
3204 spin_unlock_bh(&si->lock);
3205
3206 if (ct) {
3207 int bytes_read;
3208
3209 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE,
3210 "\t\t<exception name=\"%s\" count=\"%llu\" />\n",
3211 sfe_ipv4_exception_events_string[ws->iter_exception],
3212 ct);
3213 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3214 return false;
3215 }
3216
3217 *length -= bytes_read;
3218 *total_read += bytes_read;
3219 }
3220
3221 ws->iter_exception++;
3222 if (ws->iter_exception >= SFE_IPV4_EXCEPTION_EVENT_LAST) {
3223 ws->iter_exception = 0;
3224 ws->state++;
3225 }
3226
3227 return true;
3228}
3229
3230/*
3231 * sfe_ipv4_debug_dev_read_exceptions_end()
3232 * Generate part of the XML output.
3233 */
3234static bool sfe_ipv4_debug_dev_read_exceptions_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3235 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3236{
3237 int bytes_read;
3238
3239 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</exceptions>\n");
3240 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3241 return false;
3242 }
3243
3244 *length -= bytes_read;
3245 *total_read += bytes_read;
3246
3247 ws->state++;
3248 return true;
3249}
3250
3251/*
3252 * sfe_ipv4_debug_dev_read_stats()
3253 * Generate part of the XML output.
3254 */
3255static bool sfe_ipv4_debug_dev_read_stats(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3256 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3257{
3258 int bytes_read;
3259 unsigned int num_connections;
3260 uint64_t packets_forwarded;
3261 uint64_t packets_not_forwarded;
3262 uint64_t connection_create_requests;
3263 uint64_t connection_create_collisions;
3264 uint64_t connection_destroy_requests;
3265 uint64_t connection_destroy_misses;
3266 uint64_t connection_flushes;
3267 uint64_t connection_match_hash_hits;
3268 uint64_t connection_match_hash_reorders;
3269
3270 spin_lock_bh(&si->lock);
3271 sfe_ipv4_update_summary_stats(si);
3272
3273 num_connections = si->num_connections;
3274 packets_forwarded = si->packets_forwarded64;
3275 packets_not_forwarded = si->packets_not_forwarded64;
3276 connection_create_requests = si->connection_create_requests64;
3277 connection_create_collisions = si->connection_create_collisions64;
3278 connection_destroy_requests = si->connection_destroy_requests64;
3279 connection_destroy_misses = si->connection_destroy_misses64;
3280 connection_flushes = si->connection_flushes64;
3281 connection_match_hash_hits = si->connection_match_hash_hits64;
3282 connection_match_hash_reorders = si->connection_match_hash_reorders64;
3283 spin_unlock_bh(&si->lock);
3284
3285 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<stats "
3286 "num_connections=\"%u\" "
Xiaoping Fan59176422015-05-22 15:58:10 -07003287 "pkts_forwarded=\"%llu\" pkts_not_forwarded=\"%llu\" "
3288 "create_requests=\"%llu\" create_collisions=\"%llu\" "
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003289 "destroy_requests=\"%llu\" destroy_misses=\"%llu\" "
3290 "flushes=\"%llu\" "
3291 "hash_hits=\"%llu\" hash_reorders=\"%llu\" />\n",
3292 num_connections,
3293 packets_forwarded,
3294 packets_not_forwarded,
3295 connection_create_requests,
3296 connection_create_collisions,
3297 connection_destroy_requests,
3298 connection_destroy_misses,
3299 connection_flushes,
3300 connection_match_hash_hits,
3301 connection_match_hash_reorders);
3302 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3303 return false;
3304 }
3305
3306 *length -= bytes_read;
3307 *total_read += bytes_read;
3308
3309 ws->state++;
3310 return true;
3311}
3312
3313/*
3314 * sfe_ipv4_debug_dev_read_end()
3315 * Generate part of the XML output.
3316 */
3317static bool sfe_ipv4_debug_dev_read_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3318 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3319{
3320 int bytes_read;
3321
3322 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "</sfe_ipv4>\n");
3323 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3324 return false;
3325 }
3326
3327 *length -= bytes_read;
3328 *total_read += bytes_read;
3329
3330 ws->state++;
3331 return true;
3332}
3333
3334/*
3335 * Array of write functions that write various XML elements that correspond to
3336 * our XML output state machine.
3337 */
3338sfe_ipv4_debug_xml_write_method_t sfe_ipv4_debug_xml_write_methods[SFE_IPV4_DEBUG_XML_STATE_DONE] = {
3339 sfe_ipv4_debug_dev_read_start,
3340 sfe_ipv4_debug_dev_read_connections_start,
3341 sfe_ipv4_debug_dev_read_connections_connection,
3342 sfe_ipv4_debug_dev_read_connections_end,
3343 sfe_ipv4_debug_dev_read_exceptions_start,
3344 sfe_ipv4_debug_dev_read_exceptions_exception,
3345 sfe_ipv4_debug_dev_read_exceptions_end,
3346 sfe_ipv4_debug_dev_read_stats,
3347 sfe_ipv4_debug_dev_read_end,
3348};
3349
3350/*
3351 * sfe_ipv4_debug_dev_read()
3352 * Send info to userspace upon read request from user
3353 */
3354static ssize_t sfe_ipv4_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset)
3355{
3356 char msg[CHAR_DEV_MSG_SIZE];
3357 int total_read = 0;
3358 struct sfe_ipv4_debug_xml_write_state *ws;
3359 struct sfe_ipv4 *si = &__si;
3360
3361 ws = (struct sfe_ipv4_debug_xml_write_state *)filp->private_data;
3362 while ((ws->state != SFE_IPV4_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) {
3363 if ((sfe_ipv4_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) {
3364 continue;
3365 }
3366 }
3367
3368 return total_read;
3369}
3370
3371/*
3372 * sfe_ipv4_debug_dev_write()
Nicolas Costabafb3af2014-01-29 16:39:39 -06003373 * Write to char device resets some stats
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003374 */
3375static ssize_t sfe_ipv4_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset)
3376{
Matthew McClintock54167ab2014-01-14 21:06:28 -06003377 struct sfe_ipv4 *si = &__si;
3378
3379 spin_lock_bh(&si->lock);
3380 sfe_ipv4_update_summary_stats(si);
3381
Matthew McClintock54167ab2014-01-14 21:06:28 -06003382 si->packets_forwarded64 = 0;
3383 si->packets_not_forwarded64 = 0;
3384 si->connection_create_requests64 = 0;
3385 si->connection_create_collisions64 = 0;
3386 si->connection_destroy_requests64 = 0;
3387 si->connection_destroy_misses64 = 0;
3388 si->connection_flushes64 = 0;
3389 si->connection_match_hash_hits64 = 0;
3390 si->connection_match_hash_reorders64 = 0;
3391 spin_unlock_bh(&si->lock);
3392
3393 return length;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003394}
3395
3396/*
3397 * sfe_ipv4_debug_dev_open()
3398 */
3399static int sfe_ipv4_debug_dev_open(struct inode *inode, struct file *file)
3400{
3401 struct sfe_ipv4_debug_xml_write_state *ws;
3402
3403 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3404 if (!ws) {
3405 ws = kzalloc(sizeof(struct sfe_ipv4_debug_xml_write_state), GFP_KERNEL);
3406 if (!ws) {
3407 return -ENOMEM;
3408 }
3409
3410 ws->state = SFE_IPV4_DEBUG_XML_STATE_START;
3411 file->private_data = ws;
3412 }
3413
3414 return 0;
3415}
3416
3417/*
3418 * sfe_ipv4_debug_dev_release()
3419 */
3420static int sfe_ipv4_debug_dev_release(struct inode *inode, struct file *file)
3421{
3422 struct sfe_ipv4_debug_xml_write_state *ws;
3423
3424 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3425 if (ws) {
3426 struct sfe_ipv4_connection *c;
3427
3428 /*
3429 * Are we currently iterating a connection? If we are then
3430 * make sure that we reduce its iterator count and if necessary
3431 * free it.
3432 */
3433 c = ws->iter_conn;
3434 if (c) {
3435 struct sfe_ipv4 *si = &__si;
Nicolas Costabdddb9b2014-01-29 16:40:26 -06003436 bool free_connection;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003437
3438 spin_lock_bh(&si->lock);
Nicolas Costabdddb9b2014-01-29 16:40:26 -06003439 free_connection = sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c);
3440 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003441
Nicolas Costabdddb9b2014-01-29 16:40:26 -06003442 if (free_connection) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003443 /*
3444 * This entry is dead so release our hold of the source and
3445 * dest devices and free the memory for our connection objects.
3446 */
3447 dev_put(c->original_dev);
3448 dev_put(c->reply_dev);
3449 kfree(c->original_match);
3450 kfree(c->reply_match);
3451 kfree(c);
3452 }
3453 }
3454
3455 /*
3456 * We've finished with our output so free the write state.
3457 */
3458 kfree(ws);
3459 }
3460
3461 return 0;
3462}
3463
3464/*
3465 * File operations used in the debug char device
3466 */
3467static struct file_operations sfe_ipv4_debug_dev_fops = {
3468 .read = sfe_ipv4_debug_dev_read,
3469 .write = sfe_ipv4_debug_dev_write,
3470 .open = sfe_ipv4_debug_dev_open,
3471 .release = sfe_ipv4_debug_dev_release
3472};
3473
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003474#ifdef CONFIG_NF_FLOW_COOKIE
3475/*
3476 * sfe_register_flow_cookie_cb
3477 * register a function in SFE to let SFE use this function to configure flow cookie for a flow
3478 *
3479 * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE
3480 * can use this function to configure flow cookie for a flow.
3481 * return: 0, success; !=0, fail
3482 */
3483int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb)
3484{
3485 struct sfe_ipv4 *si = &__si;
3486
3487 BUG_ON(!cb);
3488
3489 if (si->flow_cookie_set_func) {
3490 return -1;
3491 }
3492
3493 rcu_assign_pointer(si->flow_cookie_set_func, cb);
3494 return 0;
3495}
3496
3497/*
3498 * sfe_unregister_flow_cookie_cb
3499 * unregister function which is used to configure flow cookie for a flow
3500 *
3501 * return: 0, success; !=0, fail
3502 */
3503int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb)
3504{
3505 struct sfe_ipv4 *si = &__si;
3506
3507 RCU_INIT_POINTER(si->flow_cookie_set_func, NULL);
3508 return 0;
3509}
3510#endif /*CONFIG_NF_FLOW_COOKIE*/
3511
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003512/*
Dave Hudson87973cd2013-10-22 16:00:04 +01003513 * sfe_ipv4_init()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003514 */
Dave Hudson87973cd2013-10-22 16:00:04 +01003515static int __init sfe_ipv4_init(void)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003516{
3517 struct sfe_ipv4 *si = &__si;
3518 int result = -1;
3519
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003520 DEBUG_INFO("SFE IPv4 init\n");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003521
3522 /*
3523 * Create sys/sfe_ipv4
3524 */
3525 si->sys_sfe_ipv4 = kobject_create_and_add("sfe_ipv4", NULL);
3526 if (!si->sys_sfe_ipv4) {
3527 DEBUG_ERROR("failed to register sfe_ipv4\n");
3528 goto exit1;
3529 }
3530
3531 /*
3532 * Create files, one for each parameter supported by this module.
3533 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003534 result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3535 if (result) {
3536 DEBUG_ERROR("failed to register debug dev file: %d\n", result);
3537 goto exit4;
3538 }
3539
3540 /*
3541 * Register our debug char device.
3542 */
3543 result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops);
3544 if (result < 0) {
3545 DEBUG_ERROR("Failed to register chrdev: %d\n", result);
3546 goto exit5;
3547 }
3548
3549 si->debug_dev = result;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003550
3551 /*
3552 * Create a timer to handle periodic statistics.
3553 */
3554 setup_timer(&si->timer, sfe_ipv4_periodic_sync, (unsigned long)si);
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06003555 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003556
Dave Hudson87973cd2013-10-22 16:00:04 +01003557 spin_lock_init(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003558
Dave Hudson87973cd2013-10-22 16:00:04 +01003559 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003560
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003561exit5:
3562 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3563
3564exit4:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003565 kobject_put(si->sys_sfe_ipv4);
3566
3567exit1:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003568 return result;
3569}
3570
3571/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003572 * sfe_ipv4_exit()
3573 */
3574static void __exit sfe_ipv4_exit(void)
3575{
Dave Hudson87973cd2013-10-22 16:00:04 +01003576 struct sfe_ipv4 *si = &__si;
3577
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003578 DEBUG_INFO("SFE IPv4 exit\n");
Dave Hudson87973cd2013-10-22 16:00:04 +01003579
3580 /*
3581 * Destroy all connections.
3582 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003583 sfe_ipv4_destroy_all_rules_for_dev(NULL);
Dave Hudson87973cd2013-10-22 16:00:04 +01003584
Dave Hudson87973cd2013-10-22 16:00:04 +01003585 del_timer_sync(&si->timer);
3586
Dave Hudson87973cd2013-10-22 16:00:04 +01003587 unregister_chrdev(si->debug_dev, "sfe_ipv4");
3588
3589 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3590
Dave Hudson87973cd2013-10-22 16:00:04 +01003591 kobject_put(si->sys_sfe_ipv4);
3592
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003593}
3594
3595module_init(sfe_ipv4_init)
3596module_exit(sfe_ipv4_exit)
3597
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003598EXPORT_SYMBOL(sfe_ipv4_recv);
3599EXPORT_SYMBOL(sfe_ipv4_create_rule);
3600EXPORT_SYMBOL(sfe_ipv4_destroy_rule);
3601EXPORT_SYMBOL(sfe_ipv4_destroy_all_rules_for_dev);
3602EXPORT_SYMBOL(sfe_ipv4_register_sync_rule_callback);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06003603EXPORT_SYMBOL(sfe_ipv4_mark_rule);
Nicolas Costa436926b2014-01-14 10:36:22 -06003604EXPORT_SYMBOL(sfe_ipv4_update_rule);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003605#ifdef CONFIG_NF_FLOW_COOKIE
3606EXPORT_SYMBOL(sfe_register_flow_cookie_cb);
3607EXPORT_SYMBOL(sfe_unregister_flow_cookie_cb);
3608#endif
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003609
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003610MODULE_AUTHOR("Qualcomm Atheros Inc.");
3611MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv4 edition");
Matthew McClintocka3221942014-01-16 11:44:26 -06003612MODULE_LICENSE("Dual BSD/GPL");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003613