blob: 6a123170663f23ecbeaa08c2b13cc70d37d86494 [file] [log] [blame]
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001/*
2 * sfe_ipv4.c
3 * Shortcut forwarding engine - IPv4 edition.
4 *
Xiaoping Fana42c68b2015-08-07 18:00:39 -07005 * Copyright (c) 2013-2015 The Linux Foundation. All rights reserved.
6 * Permission to use, copy, modify, and/or distribute this software for
7 * any purpose with or without fee is hereby granted, provided that the
8 * above copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
15 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010016 */
Matthew McClintocka3221942014-01-16 11:44:26 -060017
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010018#include <linux/module.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060019#include <linux/sysfs.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010020#include <linux/skbuff.h>
21#include <linux/icmp.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010022#include <net/tcp.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060023#include <linux/etherdevice.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010024
Dave Hudsondcd08fb2013-11-22 09:25:16 -060025#include "sfe.h"
Xiaoping Fand44a5b42015-05-26 17:37:37 -070026#include "sfe_cm.h"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010027
28/*
Dave Hudsona8197e72013-12-17 23:46:22 +000029 * By default Linux IP header and transport layer header structures are
30 * unpacked, assuming that such headers should be 32-bit aligned.
31 * Unfortunately some wireless adaptors can't cope with this requirement and
32 * some CPUs can't handle misaligned accesses. For those platforms we
33 * define SFE_IPV4_UNALIGNED_IP_HEADER and mark the structures as packed.
34 * When we do this the compiler will generate slightly worse code than for the
35 * aligned case (on most platforms) but will be much quicker than fixing
36 * things up in an unaligned trap handler.
37 */
38#define SFE_IPV4_UNALIGNED_IP_HEADER 1
39#if SFE_IPV4_UNALIGNED_IP_HEADER
40#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed))
41#else
42#define SFE_IPV4_UNALIGNED_STRUCT
43#endif
44
45/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060046 * An Ethernet header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000047 * help with performance on some platforms (see the definition of
48 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010049 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060050struct sfe_ipv4_eth_hdr {
51 __be16 h_dest[ETH_ALEN / 2];
52 __be16 h_source[ETH_ALEN / 2];
53 __be16 h_proto;
54} SFE_IPV4_UNALIGNED_STRUCT;
55
56/*
57 * An IPv4 header, but with an optional "packed" attribute to
58 * help with performance on some platforms (see the definition of
59 * SFE_IPV4_UNALIGNED_STRUCT)
60 */
61struct sfe_ipv4_ip_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010062#if defined(__LITTLE_ENDIAN_BITFIELD)
63 __u8 ihl:4,
64 version:4;
65#elif defined (__BIG_ENDIAN_BITFIELD)
66 __u8 version:4,
Xiaoping Fan59176422015-05-22 15:58:10 -070067 ihl:4;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010068#else
69#error "Please fix <asm/byteorder.h>"
70#endif
71 __u8 tos;
72 __be16 tot_len;
73 __be16 id;
74 __be16 frag_off;
75 __u8 ttl;
76 __u8 protocol;
77 __sum16 check;
78 __be32 saddr;
79 __be32 daddr;
Dave Hudsondcd08fb2013-11-22 09:25:16 -060080
81 /*
82 * The options start here.
83 */
Dave Hudsona8197e72013-12-17 23:46:22 +000084} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010085
86/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060087 * A UDP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000088 * help with performance on some platforms (see the definition of
89 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010090 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060091struct sfe_ipv4_udp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010092 __be16 source;
93 __be16 dest;
94 __be16 len;
95 __sum16 check;
Dave Hudsona8197e72013-12-17 23:46:22 +000096} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010097
98/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060099 * A TCP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +0000100 * help with performance on some platforms (see the definition of
101 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100102 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600103struct sfe_ipv4_tcp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100104 __be16 source;
105 __be16 dest;
106 __be32 seq;
107 __be32 ack_seq;
108#if defined(__LITTLE_ENDIAN_BITFIELD)
109 __u16 res1:4,
110 doff:4,
111 fin:1,
112 syn:1,
113 rst:1,
114 psh:1,
115 ack:1,
116 urg:1,
117 ece:1,
118 cwr:1;
119#elif defined(__BIG_ENDIAN_BITFIELD)
120 __u16 doff:4,
121 res1:4,
122 cwr:1,
123 ece:1,
124 urg:1,
125 ack:1,
126 psh:1,
127 rst:1,
128 syn:1,
129 fin:1;
130#else
131#error "Adjust your <asm/byteorder.h> defines"
Nicolas Costaac2979c2014-01-14 10:35:24 -0600132#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100133 __be16 window;
134 __sum16 check;
135 __be16 urg_ptr;
Dave Hudsona8197e72013-12-17 23:46:22 +0000136} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100137
138/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100139 * Specifies the lower bound on ACK numbers carried in the TCP header
140 */
141#define SFE_IPV4_TCP_MAX_ACK_WINDOW 65520
142
143/*
144 * IPv4 TCP connection match additional data.
145 */
146struct sfe_ipv4_tcp_connection_match {
147 uint8_t win_scale; /* Window scale */
148 uint32_t max_win; /* Maximum window size seen */
149 uint32_t end; /* Sequence number of the next byte to send (seq + segment length) */
150 uint32_t max_end; /* Sequence number of the last byte to ack */
151};
152
153/*
154 * Bit flags for IPv4 connection matching entry.
155 */
156#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC 0x1
157 /* Perform source translation */
158#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST 0x2
159 /* Perform destination translation */
160#define SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK 0x4
161 /* Ignore TCP sequence numbers */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600162#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR 0x8
163 /* Fast Ethernet header write */
164#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR 0x10
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100165 /* Fast Ethernet header write */
166
167/*
168 * IPv4 connection matching structure.
169 */
170struct sfe_ipv4_connection_match {
171 /*
172 * References to other objects.
173 */
174 struct sfe_ipv4_connection_match *next;
175 /* Next connection match entry in a list */
176 struct sfe_ipv4_connection_match *prev;
177 /* Previous connection match entry in a list */
178 struct sfe_ipv4_connection *connection;
179 /* Pointer to our connection */
180 struct sfe_ipv4_connection_match *counter_match;
181 /* Pointer to the connection match in the "counter" direction to this one */
182 struct sfe_ipv4_connection_match *active_next;
183 /* Pointer to the next connection in the active list */
184 struct sfe_ipv4_connection_match *active_prev;
185 /* Pointer to the previous connection in the active list */
186 bool active; /* Flag to indicate if we're on the active list */
187
188 /*
189 * Characteristics that identify flows that match this rule.
190 */
191 struct net_device *match_dev; /* Network device */
192 uint8_t match_protocol; /* Protocol */
Dave Hudson87973cd2013-10-22 16:00:04 +0100193 __be32 match_src_ip; /* Source IP address */
194 __be32 match_dest_ip; /* Destination IP address */
195 __be16 match_src_port; /* Source port/connection ident */
196 __be16 match_dest_port; /* Destination port/connection ident */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100197
198 /*
199 * Control the operations of the match.
200 */
201 uint32_t flags; /* Bit flags */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800202#ifdef CONFIG_NF_FLOW_COOKIE
203 uint32_t flow_cookie; /* used flow cookie, for debug */
204#endif
Zhi Chen8748eb32015-06-18 12:58:48 -0700205#ifdef CONFIG_XFRM
206 uint32_t flow_accel; /* The flow accelerated or not */
207#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100208
209 /*
210 * Connection state that we track once we match.
211 */
212 union { /* Protocol-specific state */
213 struct sfe_ipv4_tcp_connection_match tcp;
214 } protocol_state;
215 uint32_t rx_packet_count; /* Number of packets RX'd */
216 uint32_t rx_byte_count; /* Number of bytes RX'd */
217
218 /*
219 * Packet translation information.
220 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100221 __be32 xlate_src_ip; /* Address after source translation */
222 __be16 xlate_src_port; /* Port/connection ident after source translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100223 uint16_t xlate_src_csum_adjustment;
224 /* Transport layer checksum adjustment after source translation */
Xiaoping Fanad755af2015-04-01 16:58:46 -0700225 uint16_t xlate_src_partial_csum_adjustment;
226 /* Transport layer pseudo header checksum adjustment after source translation */
227
Dave Hudson87973cd2013-10-22 16:00:04 +0100228 __be32 xlate_dest_ip; /* Address after destination translation */
229 __be16 xlate_dest_port; /* Port/connection ident after destination translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100230 uint16_t xlate_dest_csum_adjustment;
231 /* Transport layer checksum adjustment after destination translation */
Xiaoping Fanad755af2015-04-01 16:58:46 -0700232 uint16_t xlate_dest_partial_csum_adjustment;
233 /* Transport layer pseudo header checksum adjustment after destination translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100234
235 /*
236 * Packet transmit information.
237 */
238 struct net_device *xmit_dev; /* Network device on which to transmit */
239 unsigned short int xmit_dev_mtu;
240 /* Interface MTU */
241 uint16_t xmit_dest_mac[ETH_ALEN / 2];
242 /* Destination MAC address to use when forwarding */
243 uint16_t xmit_src_mac[ETH_ALEN / 2];
244 /* Source MAC address to use when forwarding */
245
246 /*
247 * Summary stats.
248 */
249 uint64_t rx_packet_count64; /* Number of packets RX'd */
250 uint64_t rx_byte_count64; /* Number of bytes RX'd */
251};
252
253/*
254 * Per-connection data structure.
255 */
256struct sfe_ipv4_connection {
257 struct sfe_ipv4_connection *next;
258 /* Pointer to the next entry in a hash chain */
259 struct sfe_ipv4_connection *prev;
260 /* Pointer to the previous entry in a hash chain */
261 int protocol; /* IP protocol number */
Dave Hudson87973cd2013-10-22 16:00:04 +0100262 __be32 src_ip; /* Source IP address */
263 __be32 src_ip_xlate; /* NAT-translated source IP address */
264 __be32 dest_ip; /* Destination IP address */
265 __be32 dest_ip_xlate; /* NAT-translated destination IP address */
266 __be16 src_port; /* Source port */
267 __be16 src_port_xlate; /* NAT-translated source port */
268 __be16 dest_port; /* Destination port */
269 __be16 dest_port_xlate; /* NAT-translated destination port */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100270 struct sfe_ipv4_connection_match *original_match;
271 /* Original direction matching structure */
272 struct net_device *original_dev;
273 /* Original direction source device */
274 struct sfe_ipv4_connection_match *reply_match;
275 /* Reply direction matching structure */
276 struct net_device *reply_dev; /* Reply direction source device */
277 uint64_t last_sync_jiffies; /* Jiffies count for the last sync */
278 struct sfe_ipv4_connection *all_connections_next;
279 /* Pointer to the next entry in the list of all connections */
280 struct sfe_ipv4_connection *all_connections_prev;
281 /* Pointer to the previous entry in the list of all connections */
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600282 uint32_t mark; /* mark for outgoing packet */
Xiaoping Fan34586472015-07-03 02:20:35 -0700283 uint32_t debug_read_seq; /* sequence number for debug dump */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100284};
285
286/*
287 * IPv4 connections and hash table size information.
288 */
289#define SFE_IPV4_CONNECTION_HASH_SHIFT 12
290#define SFE_IPV4_CONNECTION_HASH_SIZE (1 << SFE_IPV4_CONNECTION_HASH_SHIFT)
291#define SFE_IPV4_CONNECTION_HASH_MASK (SFE_IPV4_CONNECTION_HASH_SIZE - 1)
292
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800293#ifdef CONFIG_NF_FLOW_COOKIE
294#define SFE_FLOW_COOKIE_SIZE 2048
295#define SFE_FLOW_COOKIE_MASK 0x7ff
296
297struct sfe_flow_cookie_entry {
298 struct sfe_ipv4_connection_match *match;
299 unsigned long last_clean_time;
300};
301#endif
302
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100303enum sfe_ipv4_exception_events {
304 SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE,
305 SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION,
306 SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
307 SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL,
308 SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION,
309 SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE,
310 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS,
311 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS,
312 SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
313 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL,
314 SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION,
315 SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS,
316 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE,
317 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS,
318 SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK,
319 SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS,
320 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE,
321 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE,
322 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE,
323 SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE,
324 SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE,
325 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE,
326 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4,
327 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE,
328 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE,
329 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE,
330 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL,
331 SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION,
332 SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION,
333 SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE,
334 SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH,
335 SFE_IPV4_EXCEPTION_EVENT_NON_V4,
336 SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT,
337 SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE,
338 SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE,
339 SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL,
340 SFE_IPV4_EXCEPTION_EVENT_LAST
341};
342
343static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = {
344 "UDP_HEADER_INCOMPLETE",
345 "UDP_NO_CONNECTION",
346 "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
347 "UDP_SMALL_TTL",
348 "UDP_NEEDS_FRAGMENTATION",
349 "TCP_HEADER_INCOMPLETE",
350 "TCP_NO_CONNECTION_SLOW_FLAGS",
351 "TCP_NO_CONNECTION_FAST_FLAGS",
352 "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
353 "TCP_SMALL_TTL",
354 "TCP_NEEDS_FRAGMENTATION",
355 "TCP_FLAGS",
356 "TCP_SEQ_EXCEEDS_RIGHT_EDGE",
357 "TCP_SMALL_DATA_OFFS",
358 "TCP_BAD_SACK",
359 "TCP_BIG_DATA_OFFS",
360 "TCP_SEQ_BEFORE_LEFT_EDGE",
361 "TCP_ACK_EXCEEDS_RIGHT_EDGE",
362 "TCP_ACK_BEFORE_LEFT_EDGE",
363 "ICMP_HEADER_INCOMPLETE",
364 "ICMP_UNHANDLED_TYPE",
365 "ICMP_IPV4_HEADER_INCOMPLETE",
366 "ICMP_IPV4_NON_V4",
367 "ICMP_IPV4_IP_OPTIONS_INCOMPLETE",
368 "ICMP_IPV4_UDP_HEADER_INCOMPLETE",
369 "ICMP_IPV4_TCP_HEADER_INCOMPLETE",
370 "ICMP_IPV4_UNHANDLED_PROTOCOL",
371 "ICMP_NO_CONNECTION",
372 "ICMP_FLUSHED_CONNECTION",
373 "HEADER_INCOMPLETE",
374 "BAD_TOTAL_LENGTH",
375 "NON_V4",
376 "NON_INITIAL_FRAGMENT",
377 "DATAGRAM_INCOMPLETE",
378 "IP_OPTIONS_INCOMPLETE",
379 "UNHANDLED_PROTOCOL"
380};
381
382/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600383 * Per-module structure.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100384 */
385struct sfe_ipv4 {
386 spinlock_t lock; /* Lock for SMP correctness */
387 struct sfe_ipv4_connection_match *active_head;
388 /* Head of the list of recently active connections */
389 struct sfe_ipv4_connection_match *active_tail;
390 /* Tail of the list of recently active connections */
391 struct sfe_ipv4_connection *all_connections_head;
392 /* Head of the list of all connections */
393 struct sfe_ipv4_connection *all_connections_tail;
394 /* Tail of the list of all connections */
395 unsigned int num_connections; /* Number of connections */
396 struct timer_list timer; /* Timer used for periodic sync ops */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700397 sfe_sync_rule_callback_t __rcu sync_rule_callback;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600398 /* Callback function registered by a connection manager for stats syncing */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100399 struct sfe_ipv4_connection *conn_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
400 /* Connection hash table */
401 struct sfe_ipv4_connection_match *conn_match_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
402 /* Connection match hash table */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800403#ifdef CONFIG_NF_FLOW_COOKIE
404 struct sfe_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE];
405 /* flow cookie table*/
406 flow_cookie_set_func_t flow_cookie_set_func;
407 /* function used to configure flow cookie in hardware*/
408#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100409
410 /*
411 * Statistics.
412 */
413 uint32_t connection_create_requests;
414 /* Number of IPv4 connection create requests */
415 uint32_t connection_create_collisions;
416 /* Number of IPv4 connection create requests that collided with existing hash table entries */
417 uint32_t connection_destroy_requests;
418 /* Number of IPv4 connection destroy requests */
419 uint32_t connection_destroy_misses;
420 /* Number of IPv4 connection destroy requests that missed our hash table */
421 uint32_t connection_match_hash_hits;
422 /* Number of IPv4 connection match hash hits */
423 uint32_t connection_match_hash_reorders;
424 /* Number of IPv4 connection match hash reorders */
425 uint32_t connection_flushes; /* Number of IPv4 connection flushes */
426 uint32_t packets_forwarded; /* Number of IPv4 packets forwarded */
427 uint32_t packets_not_forwarded; /* Number of IPv4 packets not forwarded */
428 uint32_t exception_events[SFE_IPV4_EXCEPTION_EVENT_LAST];
429
430 /*
431 * Summary tatistics.
432 */
433 uint64_t connection_create_requests64;
434 /* Number of IPv4 connection create requests */
435 uint64_t connection_create_collisions64;
436 /* Number of IPv4 connection create requests that collided with existing hash table entries */
437 uint64_t connection_destroy_requests64;
438 /* Number of IPv4 connection destroy requests */
439 uint64_t connection_destroy_misses64;
440 /* Number of IPv4 connection destroy requests that missed our hash table */
441 uint64_t connection_match_hash_hits64;
442 /* Number of IPv4 connection match hash hits */
443 uint64_t connection_match_hash_reorders64;
444 /* Number of IPv4 connection match hash reorders */
445 uint64_t connection_flushes64; /* Number of IPv4 connection flushes */
446 uint64_t packets_forwarded64; /* Number of IPv4 packets forwarded */
447 uint64_t packets_not_forwarded64;
448 /* Number of IPv4 packets not forwarded */
449 uint64_t exception_events64[SFE_IPV4_EXCEPTION_EVENT_LAST];
450
451 /*
452 * Control state.
453 */
454 struct kobject *sys_sfe_ipv4; /* sysfs linkage */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100455 int debug_dev; /* Major number of the debug char device */
Xiaoping Fan34586472015-07-03 02:20:35 -0700456 uint32_t debug_read_seq; /* sequence number for debug dump */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100457};
458
459/*
460 * Enumeration of the XML output.
461 */
462enum sfe_ipv4_debug_xml_states {
463 SFE_IPV4_DEBUG_XML_STATE_START,
464 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_START,
465 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_CONNECTION,
466 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_END,
467 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_START,
468 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION,
469 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_END,
470 SFE_IPV4_DEBUG_XML_STATE_STATS,
471 SFE_IPV4_DEBUG_XML_STATE_END,
472 SFE_IPV4_DEBUG_XML_STATE_DONE
473};
474
475/*
476 * XML write state.
477 */
478struct sfe_ipv4_debug_xml_write_state {
479 enum sfe_ipv4_debug_xml_states state;
480 /* XML output file state machine state */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100481 int iter_exception; /* Next exception iterator */
482};
483
484typedef bool (*sfe_ipv4_debug_xml_write_method_t)(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
485 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws);
486
487struct sfe_ipv4 __si;
488
489/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100490 * sfe_ipv4_gen_ip_csum()
491 * Generate the IP checksum for an IPv4 header.
492 *
493 * Note that this function assumes that we have only 20 bytes of IP header.
494 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600495static inline uint16_t sfe_ipv4_gen_ip_csum(struct sfe_ipv4_ip_hdr *iph)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100496{
497 uint32_t sum;
498 uint16_t *i = (uint16_t *)iph;
499
500 iph->check = 0;
501
502 /*
503 * Generate the sum.
504 */
505 sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9];
506
507 /*
508 * Fold it to ones-complement form.
509 */
510 sum = (sum & 0xffff) + (sum >> 16);
511 sum = (sum & 0xffff) + (sum >> 16);
512
513 return (uint16_t)sum ^ 0xffff;
514}
515
516/*
517 * sfe_ipv4_get_connection_match_hash()
518 * Generate the hash used in connection match lookups.
519 */
520static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100521 __be32 src_ip, __be16 src_port,
522 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100523{
524 size_t dev_addr = (size_t)dev;
Dave Hudson87973cd2013-10-22 16:00:04 +0100525 uint32_t hash = ((uint32_t)dev_addr) ^ ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100526 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
527}
528
529/*
530 * sfe_ipv4_find_sfe_ipv4_connection_match()
531 * Get the IPv4 flow match info that corresponds to a particular 5-tuple.
532 *
533 * On entry we must be holding the lock that protects the hash table.
534 */
535static struct sfe_ipv4_connection_match *
536sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100537 __be32 src_ip, __be16 src_port,
538 __be32 dest_ip, __be16 dest_port) __attribute__((always_inline));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100539static struct sfe_ipv4_connection_match *
540sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100541 __be32 src_ip, __be16 src_port,
542 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100543{
544 struct sfe_ipv4_connection_match *cm;
545 struct sfe_ipv4_connection_match *head;
546 unsigned int conn_match_idx;
547
548 conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port);
549 cm = si->conn_match_hash[conn_match_idx];
550
551 /*
552 * If we don't have anything in this chain then bale.
553 */
554 if (unlikely(!cm)) {
555 return cm;
556 }
557
558 /*
559 * Hopefully the first entry is the one we want.
560 */
561 if (likely(cm->match_src_port == src_port)
562 && likely(cm->match_dest_port == dest_port)
563 && likely(cm->match_src_ip == src_ip)
564 && likely(cm->match_dest_ip == dest_ip)
565 && likely(cm->match_protocol == protocol)
566 && likely(cm->match_dev == dev)) {
567 si->connection_match_hash_hits++;
568 return cm;
569 }
570
571 /*
572 * We may or may not have a matching entry but if we do then we want to
573 * move that entry to the top of the hash chain when we get to it. We
574 * presume that this will be reused again very quickly.
575 */
576 head = cm;
577 do {
578 cm = cm->next;
579 } while (cm && (cm->match_src_port != src_port
580 || cm->match_dest_port != dest_port
581 || cm->match_src_ip != src_ip
582 || cm->match_dest_ip != dest_ip
583 || cm->match_protocol != protocol
584 || cm->match_dev != dev));
585
586 /*
587 * Not found then we're done.
588 */
589 if (unlikely(!cm)) {
590 return cm;
591 }
592
593 /*
594 * We found a match so move it.
595 */
596 if (cm->next) {
597 cm->next->prev = cm->prev;
598 }
599 cm->prev->next = cm->next;
600 cm->prev = NULL;
601 cm->next = head;
602 head->prev = cm;
603 si->conn_match_hash[conn_match_idx] = cm;
604 si->connection_match_hash_reorders++;
605
606 return cm;
607}
608
609/*
610 * sfe_ipv4_connection_match_update_summary_stats()
611 * Update the summary stats for a connection match entry.
612 */
613static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm)
614{
615 cm->rx_packet_count64 += cm->rx_packet_count;
616 cm->rx_packet_count = 0;
617 cm->rx_byte_count64 += cm->rx_byte_count;
618 cm->rx_byte_count = 0;
619}
620
621/*
622 * sfe_ipv4_connection_match_compute_translations()
623 * Compute port and address translations for a connection match entry.
624 */
625static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm)
626{
627 /*
628 * Before we insert the entry look to see if this is tagged as doing address
629 * translations. If it is then work out the adjustment that we need to apply
630 * to the transport checksum.
631 */
632 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
633 /*
634 * Precompute an incremental checksum adjustment so we can
635 * edit packets in this stream very quickly. The algorithm is from RFC1624.
636 */
637 uint16_t src_ip_hi = cm->match_src_ip >> 16;
638 uint16_t src_ip_lo = cm->match_src_ip & 0xffff;
639 uint32_t xlate_src_ip = ~cm->xlate_src_ip;
640 uint16_t xlate_src_ip_hi = xlate_src_ip >> 16;
641 uint16_t xlate_src_ip_lo = xlate_src_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100642 uint16_t xlate_src_port = ~cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100643 uint32_t adj;
644
645 /*
646 * When we compute this fold it down to a 16-bit offset
647 * as that way we can avoid having to do a double
648 * folding of the twos-complement result because the
649 * addition of 2 16-bit values cannot cause a double
650 * wrap-around!
651 */
652 adj = src_ip_hi + src_ip_lo + cm->match_src_port
653 + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port;
654 adj = (adj & 0xffff) + (adj >> 16);
655 adj = (adj & 0xffff) + (adj >> 16);
656 cm->xlate_src_csum_adjustment = (uint16_t)adj;
Nicolas Costaac2979c2014-01-14 10:35:24 -0600657
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100658 }
659
660 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
661 /*
662 * Precompute an incremental checksum adjustment so we can
663 * edit packets in this stream very quickly. The algorithm is from RFC1624.
664 */
665 uint16_t dest_ip_hi = cm->match_dest_ip >> 16;
666 uint16_t dest_ip_lo = cm->match_dest_ip & 0xffff;
667 uint32_t xlate_dest_ip = ~cm->xlate_dest_ip;
668 uint16_t xlate_dest_ip_hi = xlate_dest_ip >> 16;
669 uint16_t xlate_dest_ip_lo = xlate_dest_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100670 uint16_t xlate_dest_port = ~cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100671 uint32_t adj;
672
673 /*
674 * When we compute this fold it down to a 16-bit offset
675 * as that way we can avoid having to do a double
676 * folding of the twos-complement result because the
677 * addition of 2 16-bit values cannot cause a double
678 * wrap-around!
679 */
680 adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port
681 + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port;
682 adj = (adj & 0xffff) + (adj >> 16);
683 adj = (adj & 0xffff) + (adj >> 16);
684 cm->xlate_dest_csum_adjustment = (uint16_t)adj;
685 }
Xiaoping Fanad755af2015-04-01 16:58:46 -0700686
687 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
688 uint32_t adj = ~cm->match_src_ip + cm->xlate_src_ip;
689 if (adj < cm->xlate_src_ip) {
690 adj++;
691 }
692
693 adj = (adj & 0xffff) + (adj >> 16);
694 adj = (adj & 0xffff) + (adj >> 16);
695 cm->xlate_src_partial_csum_adjustment = (uint16_t)adj;
696 }
697
698 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
699 uint32_t adj = ~cm->match_dest_ip + cm->xlate_dest_ip;
700 if (adj < cm->xlate_dest_ip) {
701 adj++;
702 }
703
704 adj = (adj & 0xffff) + (adj >> 16);
705 adj = (adj & 0xffff) + (adj >> 16);
706 cm->xlate_dest_partial_csum_adjustment = (uint16_t)adj;
707 }
708
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100709}
710
711/*
712 * sfe_ipv4_update_summary_stats()
713 * Update the summary stats.
714 */
715static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si)
716{
717 int i;
718
719 si->connection_create_requests64 += si->connection_create_requests;
720 si->connection_create_requests = 0;
721 si->connection_create_collisions64 += si->connection_create_collisions;
722 si->connection_create_collisions = 0;
723 si->connection_destroy_requests64 += si->connection_destroy_requests;
724 si->connection_destroy_requests = 0;
725 si->connection_destroy_misses64 += si->connection_destroy_misses;
726 si->connection_destroy_misses = 0;
727 si->connection_match_hash_hits64 += si->connection_match_hash_hits;
728 si->connection_match_hash_hits = 0;
729 si->connection_match_hash_reorders64 += si->connection_match_hash_reorders;
730 si->connection_match_hash_reorders = 0;
731 si->connection_flushes64 += si->connection_flushes;
732 si->connection_flushes = 0;
733 si->packets_forwarded64 += si->packets_forwarded;
734 si->packets_forwarded = 0;
735 si->packets_not_forwarded64 += si->packets_not_forwarded;
736 si->packets_not_forwarded = 0;
737
738 for (i = 0; i < SFE_IPV4_EXCEPTION_EVENT_LAST; i++) {
739 si->exception_events64[i] += si->exception_events[i];
740 si->exception_events[i] = 0;
741 }
742}
743
744/*
745 * sfe_ipv4_insert_sfe_ipv4_connection_match()
746 * Insert a connection match into the hash.
747 *
748 * On entry we must be holding the lock that protects the hash table.
749 */
750static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
751{
752 struct sfe_ipv4_connection_match **hash_head;
753 struct sfe_ipv4_connection_match *prev_head;
754 unsigned int conn_match_idx
755 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
756 cm->match_src_ip, cm->match_src_port,
757 cm->match_dest_ip, cm->match_dest_port);
758 hash_head = &si->conn_match_hash[conn_match_idx];
759 prev_head = *hash_head;
760 cm->prev = NULL;
761 if (prev_head) {
762 prev_head->prev = cm;
763 }
764
765 cm->next = prev_head;
766 *hash_head = cm;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800767
768#ifdef CONFIG_NF_FLOW_COOKIE
769 /*
770 * Configure hardware to put a flow cookie in packet of this flow,
771 * then we can accelerate the lookup process when we received this packet.
772 */
773 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
774 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
775
776 if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) {
777 flow_cookie_set_func_t func;
778
779 rcu_read_lock();
780 func = rcu_dereference(si->flow_cookie_set_func);
781 if (func) {
Xiaoping Fan59176422015-05-22 15:58:10 -0700782 if (!func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800783 cm->match_dest_ip, cm->match_dest_port, conn_match_idx)) {
784 entry->match = cm;
785 cm->flow_cookie = conn_match_idx;
786 }
787 }
788 rcu_read_unlock();
789
790 break;
791 }
792 }
793#endif
794
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100795}
796
797/*
798 * sfe_ipv4_remove_sfe_ipv4_connection_match()
799 * Remove a connection match object from the hash.
800 *
801 * On entry we must be holding the lock that protects the hash table.
802 */
803static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
804{
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800805#ifdef CONFIG_NF_FLOW_COOKIE
806 /*
807 * Tell hardware that we no longer need a flow cookie in packet of this flow
808 */
809 unsigned int conn_match_idx;
810
811 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
812 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
813
814 if (cm == entry->match) {
815 flow_cookie_set_func_t func;
816
817 rcu_read_lock();
818 func = rcu_dereference(si->flow_cookie_set_func);
819 if (func) {
820 func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
821 cm->match_dest_ip, cm->match_dest_port, 0);
822 }
823 rcu_read_unlock();
824
825 cm->flow_cookie = 0;
826 entry->match = NULL;
827 entry->last_clean_time = jiffies;
828 break;
829 }
830 }
831#endif
832
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100833 /*
834 * Unlink the connection match entry from the hash.
835 */
836 if (cm->prev) {
837 cm->prev->next = cm->next;
838 } else {
839 unsigned int conn_match_idx
840 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
841 cm->match_src_ip, cm->match_src_port,
842 cm->match_dest_ip, cm->match_dest_port);
843 si->conn_match_hash[conn_match_idx] = cm->next;
844 }
845
846 if (cm->next) {
847 cm->next->prev = cm->prev;
848 }
849
850 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600851 * If the connection match entry is in the active list remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100852 */
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600853 if (cm->active) {
854 if (likely(cm->active_prev)) {
855 cm->active_prev->active_next = cm->active_next;
856 } else {
857 si->active_head = cm->active_next;
858 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100859
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600860 if (likely(cm->active_next)) {
861 cm->active_next->active_prev = cm->active_prev;
862 } else {
863 si->active_tail = cm->active_prev;
864 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100865 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100866}
867
868/*
869 * sfe_ipv4_get_connection_hash()
870 * Generate the hash used in connection lookups.
871 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100872static inline unsigned int sfe_ipv4_get_connection_hash(uint8_t protocol, __be32 src_ip, __be16 src_port,
873 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100874{
Dave Hudson87973cd2013-10-22 16:00:04 +0100875 uint32_t hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100876 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
877}
878
879/*
880 * sfe_ipv4_find_sfe_ipv4_connection()
881 * Get the IPv4 connection info that corresponds to a particular 5-tuple.
882 *
883 * On entry we must be holding the lock that protects the hash table.
884 */
885static inline struct sfe_ipv4_connection *sfe_ipv4_find_sfe_ipv4_connection(struct sfe_ipv4 *si, uint32_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100886 __be32 src_ip, __be16 src_port,
887 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100888{
889 struct sfe_ipv4_connection *c;
890 unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port);
891 c = si->conn_hash[conn_idx];
892
893 /*
894 * If we don't have anything in this chain then bale.
895 */
896 if (unlikely(!c)) {
897 return c;
898 }
899
900 /*
901 * Hopefully the first entry is the one we want.
902 */
903 if (likely(c->src_port == src_port)
904 && likely(c->dest_port == dest_port)
905 && likely(c->src_ip == src_ip)
906 && likely(c->dest_ip == dest_ip)
907 && likely(c->protocol == protocol)) {
908 return c;
909 }
910
911 /*
912 * We may or may not have a matching entry but if we do then we want to
913 * move that entry to the top of the hash chain when we get to it. We
914 * presume that this will be reused again very quickly.
915 */
916 do {
917 c = c->next;
918 } while (c && (c->src_port != src_port
919 || c->dest_port != dest_port
920 || c->src_ip != src_ip
921 || c->dest_ip != dest_ip
922 || c->protocol != protocol));
923
924 /*
925 * Will need connection entry for next create/destroy metadata,
926 * So no need to re-order entry for these requests
927 */
928 return c;
929}
930
931/*
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600932 * sfe_ipv4_mark_rule()
933 * Updates the mark for a current offloaded connection
934 *
935 * Will take hash lock upon entry
936 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700937void sfe_ipv4_mark_rule(struct sfe_connection_mark *mark)
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600938{
939 struct sfe_ipv4 *si = &__si;
940 struct sfe_ipv4_connection *c;
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600941
Xiaoping Fan3c423e32015-07-03 03:09:29 -0700942 spin_lock_bh(&si->lock);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600943 c = sfe_ipv4_find_sfe_ipv4_connection(si, mark->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700944 mark->src_ip.ip, mark->src_port,
945 mark->dest_ip.ip, mark->dest_port);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600946 if (c) {
Nicolas Costaf53d6fe2014-01-13 16:03:46 -0600947 DEBUG_TRACE("Matching connection found for mark, "
948 "setting from %08x to %08x\n",
949 c->mark, mark->mark);
950 WARN_ON((0 != c->mark) && (0 == mark->mark));
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600951 c->mark = mark->mark;
952 }
Xiaoping Fan3c423e32015-07-03 03:09:29 -0700953 spin_unlock_bh(&si->lock);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600954}
955
956/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100957 * sfe_ipv4_insert_sfe_ipv4_connection()
958 * Insert a connection into the hash.
959 *
960 * On entry we must be holding the lock that protects the hash table.
961 */
962static void sfe_ipv4_insert_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
963{
964 struct sfe_ipv4_connection **hash_head;
965 struct sfe_ipv4_connection *prev_head;
966 unsigned int conn_idx;
967
968 /*
969 * Insert entry into the connection hash.
970 */
971 conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
972 c->dest_ip, c->dest_port);
973 hash_head = &si->conn_hash[conn_idx];
974 prev_head = *hash_head;
975 c->prev = NULL;
976 if (prev_head) {
977 prev_head->prev = c;
978 }
979
980 c->next = prev_head;
981 *hash_head = c;
982
983 /*
984 * Insert entry into the "all connections" list.
985 */
986 if (si->all_connections_tail) {
987 c->all_connections_prev = si->all_connections_tail;
988 si->all_connections_tail->all_connections_next = c;
989 } else {
990 c->all_connections_prev = NULL;
991 si->all_connections_head = c;
992 }
993
994 si->all_connections_tail = c;
995 c->all_connections_next = NULL;
996 si->num_connections++;
997
998 /*
999 * Insert the connection match objects too.
1000 */
1001 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->original_match);
1002 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->reply_match);
1003}
1004
1005/*
1006 * sfe_ipv4_remove_sfe_ipv4_connection()
1007 * Remove a sfe_ipv4_connection object from the hash.
1008 *
1009 * On entry we must be holding the lock that protects the hash table.
1010 */
1011static void sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1012{
1013 /*
1014 * Remove the connection match objects.
1015 */
1016 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->reply_match);
1017 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->original_match);
1018
1019 /*
1020 * Unlink the connection.
1021 */
1022 if (c->prev) {
1023 c->prev->next = c->next;
1024 } else {
1025 unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
1026 c->dest_ip, c->dest_port);
1027 si->conn_hash[conn_idx] = c->next;
1028 }
1029
1030 if (c->next) {
1031 c->next->prev = c->prev;
1032 }
Xiaoping Fan34586472015-07-03 02:20:35 -07001033
1034 /*
1035 * Unlink connection from all_connections list
1036 */
1037 if (c->all_connections_prev) {
1038 c->all_connections_prev->all_connections_next = c->all_connections_next;
1039 } else {
1040 si->all_connections_head = c->all_connections_next;
1041 }
1042
1043 if (c->all_connections_next) {
1044 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1045 } else {
1046 si->all_connections_tail = c->all_connections_prev;
1047 }
1048
1049 si->num_connections--;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001050}
1051
1052/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001053 * sfe_ipv4_sync_sfe_ipv4_connection()
1054 * Sync a connection.
1055 *
1056 * On entry to this function we expect that the lock for the connection is either
1057 * already held or isn't required.
1058 */
1059static void sfe_ipv4_gen_sync_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001060 struct sfe_connection_sync *sis, uint64_t now_jiffies)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001061{
1062 struct sfe_ipv4_connection_match *original_cm;
1063 struct sfe_ipv4_connection_match *reply_cm;
1064
1065 /*
1066 * Fill in the update message.
1067 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001068 sis->is_v6 = 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001069 sis->protocol = c->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001070 sis->src_ip.ip = c->src_ip;
1071 sis->dest_ip.ip = c->dest_ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001072 sis->src_port = c->src_port;
1073 sis->dest_port = c->dest_port;
1074
1075 original_cm = c->original_match;
1076 reply_cm = c->reply_match;
1077 sis->src_td_max_window = original_cm->protocol_state.tcp.max_win;
1078 sis->src_td_end = original_cm->protocol_state.tcp.end;
1079 sis->src_td_max_end = original_cm->protocol_state.tcp.max_end;
1080 sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win;
1081 sis->dest_td_end = reply_cm->protocol_state.tcp.end;
1082 sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end;
1083
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001084 sis->src_new_packet_count = original_cm->rx_packet_count;
1085 sis->src_new_byte_count = original_cm->rx_byte_count;
1086 sis->dest_new_packet_count = reply_cm->rx_packet_count;
1087 sis->dest_new_byte_count = reply_cm->rx_byte_count;
1088
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001089 sfe_ipv4_connection_match_update_summary_stats(original_cm);
1090 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
1091
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001092 sis->src_dev = original_cm->match_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001093 sis->src_packet_count = original_cm->rx_packet_count64;
1094 sis->src_byte_count = original_cm->rx_byte_count64;
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001095
1096 sis->dest_dev = reply_cm->match_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001097 sis->dest_packet_count = reply_cm->rx_packet_count64;
1098 sis->dest_byte_count = reply_cm->rx_byte_count64;
1099
1100 /*
1101 * Get the time increment since our last sync.
1102 */
1103 sis->delta_jiffies = now_jiffies - c->last_sync_jiffies;
1104 c->last_sync_jiffies = now_jiffies;
1105}
1106
1107/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001108 * sfe_ipv4_flush_sfe_ipv4_connection()
1109 * Flush a connection and free all associated resources.
1110 *
1111 * We need to be called with bottom halves disabled locally as we need to acquire
1112 * the connection hash lock and release it again. In general we're actually called
1113 * from within a BH and so we're fine, but we're also called when connections are
1114 * torn down.
1115 */
1116static void sfe_ipv4_flush_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1117{
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001118 struct sfe_connection_sync sis;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001119 uint64_t now_jiffies;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001120 sfe_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001121
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001122 rcu_read_lock();
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001123 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001124 si->connection_flushes++;
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001125 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001126 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001127
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001128 if (sync_rule_callback) {
1129 /*
1130 * Generate a sync message and then sync.
1131 */
1132 now_jiffies = get_jiffies_64();
1133 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, now_jiffies);
1134 sync_rule_callback(&sis);
1135 }
1136
1137 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001138
1139 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001140 * Release our hold of the source and dest devices and free the memory
1141 * for our connection objects.
1142 */
1143 dev_put(c->original_dev);
1144 dev_put(c->reply_dev);
1145 kfree(c->original_match);
1146 kfree(c->reply_match);
1147 kfree(c);
1148}
1149
1150/*
1151 * sfe_ipv4_recv_udp()
1152 * Handle UDP packet receives and forwarding.
1153 */
1154static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001155 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001156{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001157 struct sfe_ipv4_udp_hdr *udph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001158 __be32 src_ip;
1159 __be32 dest_ip;
1160 __be16 src_port;
1161 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001162 struct sfe_ipv4_connection_match *cm;
1163 uint8_t ttl;
1164 struct net_device *xmit_dev;
1165
1166 /*
1167 * Is our packet too short to contain a valid UDP header?
1168 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001169 if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_udp_hdr) + ihl)))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001170 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001171 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++;
1172 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001173 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001174
1175 DEBUG_TRACE("packet too short for UDP header\n");
1176 return 0;
1177 }
1178
1179 /*
1180 * Read the IP address and port information. Read the IP header data first
1181 * because we've almost certainly got that in the cache. We may not yet have
1182 * the UDP header cached though so allow more time for any prefetching.
1183 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001184 src_ip = iph->saddr;
1185 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001186
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001187 udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001188 src_port = udph->source;
1189 dest_port = udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001190
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001191 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001192
1193 /*
1194 * Look for a connection match.
1195 */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001196#ifdef CONFIG_NF_FLOW_COOKIE
1197 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1198 if (unlikely(!cm)) {
1199 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
1200 }
1201#else
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001202 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001203#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001204 if (unlikely(!cm)) {
1205 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION]++;
1206 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001207 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001208
1209 DEBUG_TRACE("no connection found\n");
1210 return 0;
1211 }
1212
1213 /*
1214 * If our packet has beern marked as "flush on find" we can't actually
1215 * forward it in the fast path, but now that we've found an associated
1216 * connection we can flush that out before we process the packet.
1217 */
1218 if (unlikely(flush_on_find)) {
1219 struct sfe_ipv4_connection *c = cm->connection;
1220 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1221 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1222 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001223 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001224
1225 DEBUG_TRACE("flush on find\n");
1226 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1227 return 0;
1228 }
1229
Zhi Chen8748eb32015-06-18 12:58:48 -07001230#ifdef CONFIG_XFRM
1231 /*
1232 * We can't accelerate the flow on this direction, just let it go
1233 * through the slow path.
1234 */
1235 if (unlikely(!cm->flow_accel)) {
1236 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001237 spin_unlock_bh(&si->lock);
Zhi Chen8748eb32015-06-18 12:58:48 -07001238 return 0;
1239 }
1240#endif
1241
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001242 /*
1243 * Does our TTL allow forwarding?
1244 */
1245 ttl = iph->ttl;
1246 if (unlikely(ttl < 2)) {
1247 struct sfe_ipv4_connection *c = cm->connection;
1248 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1249 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL]++;
1250 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001251 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001252
1253 DEBUG_TRACE("ttl too low\n");
1254 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1255 return 0;
1256 }
1257
1258 /*
1259 * If our packet is larger than the MTU of the transmit interface then
1260 * we can't forward it easily.
1261 */
1262 if (unlikely(len > cm->xmit_dev_mtu)) {
1263 struct sfe_ipv4_connection *c = cm->connection;
1264 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1265 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++;
1266 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001267 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001268
1269 DEBUG_TRACE("larger than mtu\n");
1270 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1271 return 0;
1272 }
1273
1274 /*
1275 * From this point on we're good to modify the packet.
1276 */
1277
1278 /*
1279 * Decrement our TTL.
1280 */
1281 iph->ttl = ttl - 1;
1282
1283 /*
1284 * Do we have to perform translations of the source address/port?
1285 */
1286 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1287 uint16_t udp_csum;
1288
Dave Hudson87973cd2013-10-22 16:00:04 +01001289 iph->saddr = cm->xlate_src_ip;
1290 udph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001291
1292 /*
1293 * Do we have a non-zero UDP checksum? If we do then we need
1294 * to update it.
1295 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001296 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001297 if (likely(udp_csum)) {
Xiaoping Fanad755af2015-04-01 16:58:46 -07001298 uint32_t sum;
1299
1300 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1301 sum = udp_csum + cm->xlate_src_partial_csum_adjustment;
1302 } else {
1303 sum = udp_csum + cm->xlate_src_csum_adjustment;
1304 }
1305
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001306 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001307 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001308 }
1309 }
1310
1311 /*
1312 * Do we have to perform translations of the destination address/port?
1313 */
1314 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1315 uint16_t udp_csum;
1316
Dave Hudson87973cd2013-10-22 16:00:04 +01001317 iph->daddr = cm->xlate_dest_ip;
1318 udph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001319
1320 /*
1321 * Do we have a non-zero UDP checksum? If we do then we need
1322 * to update it.
1323 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001324 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001325 if (likely(udp_csum)) {
Xiaoping Fanad755af2015-04-01 16:58:46 -07001326 uint32_t sum;
1327
1328 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1329 sum = udp_csum + cm->xlate_dest_partial_csum_adjustment;
1330 } else {
1331 sum = udp_csum + cm->xlate_dest_csum_adjustment;
1332 }
1333
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001334 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001335 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001336 }
1337 }
1338
1339 /*
1340 * Replace the IP checksum.
1341 */
1342 iph->check = sfe_ipv4_gen_ip_csum(iph);
1343
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001344 /*
1345 * Update traffic stats.
1346 */
1347 cm->rx_packet_count++;
1348 cm->rx_byte_count += len;
1349
1350 /*
1351 * If we're not already on the active list then insert ourselves at the tail
1352 * of the current list.
1353 */
1354 if (unlikely(!cm->active)) {
1355 cm->active = true;
1356 cm->active_prev = si->active_tail;
1357 if (likely(si->active_tail)) {
1358 si->active_tail->active_next = cm;
1359 } else {
1360 si->active_head = cm;
1361 }
1362 si->active_tail = cm;
1363 }
1364
1365 xmit_dev = cm->xmit_dev;
1366 skb->dev = xmit_dev;
1367
1368 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001369 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001370 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001371 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1372 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Xiaoping Fan2784e612015-06-25 17:57:41 -07001373 dev_hard_header(skb, xmit_dev, ETH_P_IP,
1374 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001375 } else {
1376 /*
1377 * For the simple case we write this really fast.
1378 */
1379 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1380 eth->h_proto = htons(ETH_P_IP);
Matthew McClintockdab3c8f2014-02-19 14:29:39 -06001381 eth->h_dest[0] = cm->xmit_dest_mac[0];
1382 eth->h_dest[1] = cm->xmit_dest_mac[1];
1383 eth->h_dest[2] = cm->xmit_dest_mac[2];
1384 eth->h_source[0] = cm->xmit_src_mac[0];
1385 eth->h_source[1] = cm->xmit_src_mac[1];
1386 eth->h_source[2] = cm->xmit_src_mac[2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001387 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001388 }
1389
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001390 /*
1391 * Mark outgoing packet.
1392 */
Cristian Prundeanu94fff982013-12-23 15:02:59 -06001393 skb->mark = cm->connection->mark;
1394 if (skb->mark) {
1395 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1396 }
1397
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001398 si->packets_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001399 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001400
1401 /*
1402 * We're going to check for GSO flags when we transmit the packet so
1403 * start fetching the necessary cache line now.
1404 */
1405 prefetch(skb_shinfo(skb));
1406
1407 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06001408 * Mark that this packet has been fast forwarded.
1409 */
1410 skb->fast_forwarded = 1;
1411
1412 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001413 * Send the packet on its way.
1414 */
1415 dev_queue_xmit(skb);
1416
1417 return 1;
1418}
1419
1420/*
1421 * sfe_ipv4_process_tcp_option_sack()
1422 * Parse TCP SACK option and update ack according
1423 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001424static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const uint32_t data_offs,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001425 uint32_t *ack) __attribute__((always_inline));
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001426static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const uint32_t data_offs,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001427 uint32_t *ack)
1428{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001429 uint32_t length = sizeof(struct sfe_ipv4_tcp_hdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001430 uint8_t *ptr = (uint8_t *)th + length;
1431
1432 /*
1433 * If option is TIMESTAMP discard it.
1434 */
1435 if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
1436 && likely(ptr[0] == TCPOPT_NOP)
1437 && likely(ptr[1] == TCPOPT_NOP)
1438 && likely(ptr[2] == TCPOPT_TIMESTAMP)
1439 && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
1440 return true;
1441 }
1442
1443 /*
1444 * TCP options. Parse SACK option.
1445 */
1446 while (length < data_offs) {
1447 uint8_t size;
1448 uint8_t kind;
1449
1450 ptr = (uint8_t *)th + length;
1451 kind = *ptr;
1452
1453 /*
1454 * NOP, for padding
1455 * Not in the switch because to fast escape and to not calculate size
1456 */
1457 if (kind == TCPOPT_NOP) {
1458 length++;
1459 continue;
1460 }
1461
1462 if (kind == TCPOPT_SACK) {
1463 uint32_t sack = 0;
1464 uint8_t re = 1 + 1;
1465
1466 size = *(ptr + 1);
1467 if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
1468 || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
1469 || (size > (data_offs - length))) {
1470 return false;
1471 }
1472
1473 re += 4;
1474 while (re < size) {
1475 uint32_t sack_re;
1476 uint8_t *sptr = ptr + re;
1477 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
1478 if (sack_re > sack) {
1479 sack = sack_re;
1480 }
1481 re += TCPOLEN_SACK_PERBLOCK;
1482 }
1483 if (sack > *ack) {
1484 *ack = sack;
1485 }
1486 length += size;
1487 continue;
1488 }
1489 if (kind == TCPOPT_EOL) {
1490 return true;
1491 }
1492 size = *(ptr + 1);
1493 if (size < 2) {
1494 return false;
1495 }
1496 length += size;
1497 }
1498
1499 return true;
1500}
1501
1502/*
1503 * sfe_ipv4_recv_tcp()
1504 * Handle TCP packet receives and forwarding.
1505 */
1506static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001507 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001508{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001509 struct sfe_ipv4_tcp_hdr *tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001510 __be32 src_ip;
1511 __be32 dest_ip;
1512 __be16 src_port;
1513 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001514 struct sfe_ipv4_connection_match *cm;
1515 struct sfe_ipv4_connection_match *counter_cm;
1516 uint8_t ttl;
1517 uint32_t flags;
1518 struct net_device *xmit_dev;
1519
1520 /*
1521 * Is our packet too short to contain a valid UDP header?
1522 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001523 if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_tcp_hdr) + ihl)))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001524 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001525 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++;
1526 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001527 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001528
1529 DEBUG_TRACE("packet too short for TCP header\n");
1530 return 0;
1531 }
1532
1533 /*
1534 * Read the IP address and port information. Read the IP header data first
1535 * because we've almost certainly got that in the cache. We may not yet have
1536 * the TCP header cached though so allow more time for any prefetching.
1537 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001538 src_ip = iph->saddr;
1539 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001540
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001541 tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001542 src_port = tcph->source;
1543 dest_port = tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001544 flags = tcp_flag_word(tcph);
1545
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001546 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001547
1548 /*
1549 * Look for a connection match.
1550 */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001551#ifdef CONFIG_NF_FLOW_COOKIE
1552 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1553 if (unlikely(!cm)) {
Matthew McClintock37858802015-02-03 12:12:02 -06001554 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001555 }
1556#else
Matthew McClintock37858802015-02-03 12:12:02 -06001557 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001558#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001559 if (unlikely(!cm)) {
1560 /*
1561 * We didn't get a connection but as TCP is connection-oriented that
1562 * may be because this is a non-fast connection (not running established).
1563 * For diagnostic purposes we differentiate this here.
1564 */
1565 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
1566 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++;
1567 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001568 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001569
1570 DEBUG_TRACE("no connection found - fast flags\n");
1571 return 0;
1572 }
1573 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++;
1574 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001575 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001576
1577 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
1578 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1579 return 0;
1580 }
1581
1582 /*
1583 * If our packet has beern marked as "flush on find" we can't actually
1584 * forward it in the fast path, but now that we've found an associated
1585 * connection we can flush that out before we process the packet.
1586 */
1587 if (unlikely(flush_on_find)) {
1588 struct sfe_ipv4_connection *c = cm->connection;
1589 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1590 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1591 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001592 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001593
1594 DEBUG_TRACE("flush on find\n");
1595 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1596 return 0;
1597 }
1598
Zhi Chen8748eb32015-06-18 12:58:48 -07001599#ifdef CONFIG_XFRM
1600 /*
1601 * We can't accelerate the flow on this direction, just let it go
1602 * through the slow path.
1603 */
1604 if (unlikely(!cm->flow_accel)) {
1605 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001606 spin_unlock_bh(&si->lock);
Zhi Chen8748eb32015-06-18 12:58:48 -07001607 return 0;
1608 }
1609#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001610 /*
1611 * Does our TTL allow forwarding?
1612 */
1613 ttl = iph->ttl;
1614 if (unlikely(ttl < 2)) {
1615 struct sfe_ipv4_connection *c = cm->connection;
1616 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1617 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL]++;
1618 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001619 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001620
1621 DEBUG_TRACE("ttl too low\n");
1622 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1623 return 0;
1624 }
1625
1626 /*
1627 * If our packet is larger than the MTU of the transmit interface then
1628 * we can't forward it easily.
1629 */
Xiaoping Fand642a6e2015-04-10 15:19:06 -07001630 if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001631 struct sfe_ipv4_connection *c = cm->connection;
1632 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1633 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++;
1634 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001635 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001636
1637 DEBUG_TRACE("larger than mtu\n");
1638 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1639 return 0;
1640 }
1641
1642 /*
1643 * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN
1644 * set is not a fast path packet.
1645 */
1646 if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
1647 struct sfe_ipv4_connection *c = cm->connection;
1648 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1649 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS]++;
1650 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001651 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001652
1653 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
1654 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1655 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1656 return 0;
1657 }
1658
1659 counter_cm = cm->counter_match;
1660
1661 /*
1662 * Are we doing sequence number checking?
1663 */
1664 if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
1665 uint32_t seq;
1666 uint32_t ack;
1667 uint32_t sack;
1668 uint32_t data_offs;
1669 uint32_t end;
1670 uint32_t left_edge;
1671 uint32_t scaled_win;
1672 uint32_t max_end;
1673
1674 /*
1675 * Is our sequence fully past the right hand edge of the window?
1676 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001677 seq = ntohl(tcph->seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001678 if (unlikely((int32_t)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
1679 struct sfe_ipv4_connection *c = cm->connection;
1680 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1681 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++;
1682 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001683 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001684
1685 DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
1686 seq, cm->protocol_state.tcp.max_end + 1);
1687 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1688 return 0;
1689 }
1690
1691 /*
1692 * Check that our TCP data offset isn't too short.
1693 */
1694 data_offs = tcph->doff << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001695 if (unlikely(data_offs < sizeof(struct sfe_ipv4_tcp_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001696 struct sfe_ipv4_connection *c = cm->connection;
1697 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1698 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++;
1699 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001700 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001701
1702 DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
1703 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1704 return 0;
1705 }
1706
1707 /*
1708 * Update ACK according to any SACK option.
1709 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001710 ack = ntohl(tcph->ack_seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001711 sack = ack;
1712 if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) {
1713 struct sfe_ipv4_connection *c = cm->connection;
1714 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1715 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK]++;
1716 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001717 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001718
1719 DEBUG_TRACE("TCP option SACK size is wrong\n");
1720 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1721 return 0;
1722 }
1723
1724 /*
1725 * Check that our TCP data offset isn't past the end of the packet.
1726 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001727 data_offs += sizeof(struct sfe_ipv4_ip_hdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001728 if (unlikely(len < data_offs)) {
1729 struct sfe_ipv4_connection *c = cm->connection;
1730 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1731 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++;
1732 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001733 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001734
1735 DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
1736 data_offs, len);
1737 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1738 return 0;
1739 }
1740
1741 end = seq + len - data_offs;
1742
1743 /*
1744 * Is our sequence fully before the left hand edge of the window?
1745 */
1746 if (unlikely((int32_t)(end - (cm->protocol_state.tcp.end
1747 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
1748 struct sfe_ipv4_connection *c = cm->connection;
1749 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1750 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++;
1751 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001752 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001753
1754 DEBUG_TRACE("seq: %u before left edge: %u\n",
1755 end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
1756 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1757 return 0;
1758 }
1759
1760 /*
1761 * Are we acking data that is to the right of what has been sent?
1762 */
1763 if (unlikely((int32_t)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
1764 struct sfe_ipv4_connection *c = cm->connection;
1765 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1766 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++;
1767 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001768 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001769
1770 DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
1771 sack, counter_cm->protocol_state.tcp.end + 1);
1772 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1773 return 0;
1774 }
1775
1776 /*
1777 * Is our ack too far before the left hand edge of the window?
1778 */
1779 left_edge = counter_cm->protocol_state.tcp.end
1780 - cm->protocol_state.tcp.max_win
1781 - SFE_IPV4_TCP_MAX_ACK_WINDOW
1782 - 1;
1783 if (unlikely((int32_t)(sack - left_edge) < 0)) {
1784 struct sfe_ipv4_connection *c = cm->connection;
1785 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1786 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++;
1787 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001788 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001789
1790 DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
1791 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1792 return 0;
1793 }
1794
1795 /*
1796 * Have we just seen the largest window size yet for this connection? If yes
1797 * then we need to record the new value.
1798 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001799 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001800 scaled_win += (sack - ack);
1801 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
1802 cm->protocol_state.tcp.max_win = scaled_win;
1803 }
1804
1805 /*
1806 * If our sequence and/or ack numbers have advanced then record the new state.
1807 */
1808 if (likely((int32_t)(end - cm->protocol_state.tcp.end) >= 0)) {
1809 cm->protocol_state.tcp.end = end;
1810 }
1811
1812 max_end = sack + scaled_win;
1813 if (likely((int32_t)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
1814 counter_cm->protocol_state.tcp.max_end = max_end;
1815 }
1816 }
1817
1818 /*
1819 * From this point on we're good to modify the packet.
1820 */
1821
1822 /*
1823 * Decrement our TTL.
1824 */
1825 iph->ttl = ttl - 1;
1826
1827 /*
1828 * Do we have to perform translations of the source address/port?
1829 */
1830 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1831 uint16_t tcp_csum;
1832 uint32_t sum;
1833
Dave Hudson87973cd2013-10-22 16:00:04 +01001834 iph->saddr = cm->xlate_src_ip;
1835 tcph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001836
1837 /*
1838 * Do we have a non-zero UDP checksum? If we do then we need
1839 * to update it.
1840 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001841 tcp_csum = tcph->check;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001842 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1843 sum = tcp_csum + cm->xlate_src_partial_csum_adjustment;
1844 } else {
1845 sum = tcp_csum + cm->xlate_src_csum_adjustment;
1846 }
1847
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001848 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001849 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001850 }
1851
1852 /*
1853 * Do we have to perform translations of the destination address/port?
1854 */
1855 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1856 uint16_t tcp_csum;
1857 uint32_t sum;
1858
Dave Hudson87973cd2013-10-22 16:00:04 +01001859 iph->daddr = cm->xlate_dest_ip;
1860 tcph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001861
1862 /*
1863 * Do we have a non-zero UDP checksum? If we do then we need
1864 * to update it.
1865 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001866 tcp_csum = tcph->check;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001867 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1868 sum = tcp_csum + cm->xlate_dest_partial_csum_adjustment;
1869 } else {
1870 sum = tcp_csum + cm->xlate_dest_csum_adjustment;
1871 }
1872
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001873 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001874 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001875 }
1876
1877 /*
1878 * Replace the IP checksum.
1879 */
1880 iph->check = sfe_ipv4_gen_ip_csum(iph);
1881
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001882 /*
1883 * Update traffic stats.
1884 */
1885 cm->rx_packet_count++;
1886 cm->rx_byte_count += len;
1887
1888 /*
1889 * If we're not already on the active list then insert ourselves at the tail
1890 * of the current list.
1891 */
1892 if (unlikely(!cm->active)) {
1893 cm->active = true;
1894 cm->active_prev = si->active_tail;
1895 if (likely(si->active_tail)) {
1896 si->active_tail->active_next = cm;
1897 } else {
1898 si->active_head = cm;
1899 }
1900 si->active_tail = cm;
1901 }
1902
1903 xmit_dev = cm->xmit_dev;
1904 skb->dev = xmit_dev;
1905
1906 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001907 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001908 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001909 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1910 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Xiaoping Fan2784e612015-06-25 17:57:41 -07001911 dev_hard_header(skb, xmit_dev, ETH_P_IP,
1912 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001913 } else {
1914 /*
1915 * For the simple case we write this really fast.
1916 */
1917 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1918 eth->h_proto = htons(ETH_P_IP);
Matthew McClintockdab3c8f2014-02-19 14:29:39 -06001919 eth->h_dest[0] = cm->xmit_dest_mac[0];
1920 eth->h_dest[1] = cm->xmit_dest_mac[1];
1921 eth->h_dest[2] = cm->xmit_dest_mac[2];
1922 eth->h_source[0] = cm->xmit_src_mac[0];
1923 eth->h_source[1] = cm->xmit_src_mac[1];
1924 eth->h_source[2] = cm->xmit_src_mac[2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001925 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001926 }
1927
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06001928 /*
1929 * Mark outgoing packet
1930 */
1931 skb->mark = cm->connection->mark;
1932 if (skb->mark) {
1933 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1934 }
1935
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001936 si->packets_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001937 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001938
1939 /*
1940 * We're going to check for GSO flags when we transmit the packet so
1941 * start fetching the necessary cache line now.
1942 */
1943 prefetch(skb_shinfo(skb));
1944
1945 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06001946 * Mark that this packet has been fast forwarded.
1947 */
1948 skb->fast_forwarded = 1;
1949
1950 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001951 * Send the packet on its way.
1952 */
1953 dev_queue_xmit(skb);
1954
1955 return 1;
1956}
1957
1958/*
1959 * sfe_ipv4_recv_icmp()
1960 * Handle ICMP packet receives.
1961 *
1962 * ICMP packets aren't handled as a "fast path" and always have us process them
1963 * through the default Linux stack. What we do need to do is look for any errors
1964 * about connections we are handling in the fast path. If we find any such
1965 * connections then we want to flush their state so that the ICMP error path
1966 * within Linux has all of the correct state should it need it.
1967 */
1968static int sfe_ipv4_recv_icmp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001969 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001970{
1971 struct icmphdr *icmph;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001972 struct sfe_ipv4_ip_hdr *icmp_iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001973 unsigned int icmp_ihl_words;
1974 unsigned int icmp_ihl;
1975 uint32_t *icmp_trans_h;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001976 struct sfe_ipv4_udp_hdr *icmp_udph;
1977 struct sfe_ipv4_tcp_hdr *icmp_tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001978 __be32 src_ip;
1979 __be32 dest_ip;
1980 __be16 src_port;
1981 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001982 struct sfe_ipv4_connection_match *cm;
1983 struct sfe_ipv4_connection *c;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001984 uint32_t pull_len = sizeof(struct icmphdr) + ihl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001985
1986 /*
1987 * Is our packet too short to contain a valid UDP header?
1988 */
1989 len -= ihl;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001990 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001991 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001992 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++;
1993 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001994 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001995
1996 DEBUG_TRACE("packet too short for ICMP header\n");
1997 return 0;
1998 }
1999
2000 /*
2001 * We only handle "destination unreachable" and "time exceeded" messages.
2002 */
2003 icmph = (struct icmphdr *)(skb->data + ihl);
2004 if ((icmph->type != ICMP_DEST_UNREACH)
2005 && (icmph->type != ICMP_TIME_EXCEEDED)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002006 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002007 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++;
2008 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002009 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002010
2011 DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->type);
2012 return 0;
2013 }
2014
2015 /*
2016 * Do we have the full embedded IP header?
2017 */
2018 len -= sizeof(struct icmphdr);
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002019 pull_len += sizeof(struct sfe_ipv4_ip_hdr);
2020 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002021 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002022 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE]++;
2023 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002024 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002025
2026 DEBUG_TRACE("Embedded IP header not complete\n");
2027 return 0;
2028 }
2029
2030 /*
2031 * Is our embedded IP version wrong?
2032 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002033 icmp_iph = (struct sfe_ipv4_ip_hdr *)(icmph + 1);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002034 if (unlikely(icmp_iph->version != 4)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002035 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002036 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4]++;
2037 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002038 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002039
2040 DEBUG_TRACE("IP version: %u\n", icmp_iph->version);
2041 return 0;
2042 }
2043
2044 /*
2045 * Do we have the full embedded IP header, including any options?
2046 */
2047 icmp_ihl_words = icmp_iph->ihl;
2048 icmp_ihl = icmp_ihl_words << 2;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002049 pull_len += icmp_ihl - sizeof(struct sfe_ipv4_ip_hdr);
2050 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002051 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002052 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE]++;
2053 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002054 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002055
2056 DEBUG_TRACE("Embedded header not large enough for IP options\n");
2057 return 0;
2058 }
2059
Nicolas Costaac2979c2014-01-14 10:35:24 -06002060 len -= icmp_ihl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002061 icmp_trans_h = ((uint32_t *)icmp_iph) + icmp_ihl_words;
2062
2063 /*
2064 * Handle the embedded transport layer header.
2065 */
2066 switch (icmp_iph->protocol) {
2067 case IPPROTO_UDP:
2068 /*
2069 * We should have 8 bytes of UDP header - that's enough to identify
2070 * the connection.
2071 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002072 pull_len += 8;
2073 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002074 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002075 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE]++;
2076 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002077 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002078
2079 DEBUG_TRACE("Incomplete embedded UDP header\n");
2080 return 0;
2081 }
2082
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002083 icmp_udph = (struct sfe_ipv4_udp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002084 src_port = icmp_udph->source;
2085 dest_port = icmp_udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002086 break;
2087
2088 case IPPROTO_TCP:
2089 /*
2090 * We should have 8 bytes of TCP header - that's enough to identify
2091 * the connection.
2092 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002093 pull_len += 8;
2094 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002095 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002096 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE]++;
2097 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002098 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002099
2100 DEBUG_TRACE("Incomplete embedded TCP header\n");
2101 return 0;
2102 }
2103
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002104 icmp_tcph = (struct sfe_ipv4_tcp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002105 src_port = icmp_tcph->source;
2106 dest_port = icmp_tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002107 break;
2108
2109 default:
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002110 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002111 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL]++;
2112 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002113 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002114
2115 DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", icmp_iph->protocol);
2116 return 0;
2117 }
2118
Dave Hudson87973cd2013-10-22 16:00:04 +01002119 src_ip = icmp_iph->saddr;
2120 dest_ip = icmp_iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002121
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002122 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002123
2124 /*
2125 * Look for a connection match. Note that we reverse the source and destination
2126 * here because our embedded message contains a packet that was sent in the
2127 * opposite direction to the one in which we just received it. It will have
2128 * been sent on the interface from which we received it though so that's still
2129 * ok to use.
2130 */
2131 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port);
2132 if (unlikely(!cm)) {
2133 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++;
2134 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002135 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002136
2137 DEBUG_TRACE("no connection found\n");
2138 return 0;
2139 }
2140
2141 /*
2142 * We found a connection so now remove it from the connection list and flush
2143 * its state.
2144 */
2145 c = cm->connection;
2146 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2147 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++;
2148 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002149 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002150
2151 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
2152 return 0;
2153}
2154
2155/*
2156 * sfe_ipv4_recv()
Matthew McClintocka8ad7962014-01-16 16:49:30 -06002157 * Handle packet receives and forwaring.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002158 *
2159 * Returns 1 if the packet is forwarded or 0 if it isn't.
2160 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002161int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002162{
2163 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002164 unsigned int len;
2165 unsigned int tot_len;
2166 unsigned int frag_off;
2167 unsigned int ihl;
2168 bool flush_on_find;
2169 bool ip_options;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002170 struct sfe_ipv4_ip_hdr *iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002171 uint32_t protocol;
2172
2173 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002174 * Check that we have space for an IP header here.
2175 */
2176 len = skb->len;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002177 if (unlikely(!pskb_may_pull(skb, sizeof(struct sfe_ipv4_ip_hdr)))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002178 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002179 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2180 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002181 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002182
2183 DEBUG_TRACE("len: %u is too short\n", len);
2184 return 0;
2185 }
2186
2187 /*
2188 * Check that our "total length" is large enough for an IP header.
2189 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002190 iph = (struct sfe_ipv4_ip_hdr *)skb->data;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002191 tot_len = ntohs(iph->tot_len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002192 if (unlikely(tot_len < sizeof(struct sfe_ipv4_ip_hdr))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002193 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002194 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH]++;
2195 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002196 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002197
2198 DEBUG_TRACE("tot_len: %u is too short\n", tot_len);
2199 return 0;
2200 }
2201
2202 /*
2203 * Is our IP version wrong?
2204 */
2205 if (unlikely(iph->version != 4)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002206 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002207 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_V4]++;
2208 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002209 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002210
2211 DEBUG_TRACE("IP version: %u\n", iph->version);
2212 return 0;
2213 }
2214
2215 /*
2216 * Does our datagram fit inside the skb?
2217 */
2218 if (unlikely(tot_len > len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002219 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002220 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++;
2221 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002222 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002223
2224 DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len);
2225 return 0;
2226 }
2227
2228 /*
2229 * Do we have a non-initial fragment?
Nicolas Costaac2979c2014-01-14 10:35:24 -06002230 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002231 frag_off = ntohs(iph->frag_off);
2232 if (unlikely(frag_off & IP_OFFSET)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002233 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002234 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++;
2235 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002236 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002237
2238 DEBUG_TRACE("non-initial fragment\n");
2239 return 0;
2240 }
2241
2242 /*
2243 * If we have a (first) fragment then mark it to cause any connection to flush.
2244 */
2245 flush_on_find = unlikely(frag_off & IP_MF) ? true : false;
2246
2247 /*
2248 * Do we have any IP options? That's definite a slow path! If we do have IP
2249 * options we need to recheck our header size.
2250 */
2251 ihl = iph->ihl << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002252 ip_options = unlikely(ihl != sizeof(struct sfe_ipv4_ip_hdr)) ? true : false;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002253 if (unlikely(ip_options)) {
2254 if (unlikely(len < ihl)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002255 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002256 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE]++;
2257 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002258 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002259
2260 DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl);
2261 return 0;
2262 }
2263
2264 flush_on_find = true;
2265 }
2266
2267 protocol = iph->protocol;
2268 if (IPPROTO_UDP == protocol) {
2269 return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find);
2270 }
2271
2272 if (IPPROTO_TCP == protocol) {
2273 return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find);
2274 }
2275
2276 if (IPPROTO_ICMP == protocol) {
2277 return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl);
2278 }
2279
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002280 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002281 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++;
2282 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002283 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002284
2285 DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol);
2286 return 0;
2287}
2288
Nicolas Costa436926b2014-01-14 10:36:22 -06002289static void
2290sfe_ipv4_update_tcp_state(struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002291 struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002292{
2293 struct sfe_ipv4_connection_match *orig_cm;
2294 struct sfe_ipv4_connection_match *repl_cm;
2295 struct sfe_ipv4_tcp_connection_match *orig_tcp;
2296 struct sfe_ipv4_tcp_connection_match *repl_tcp;
2297
2298 orig_cm = c->original_match;
2299 repl_cm = c->reply_match;
2300 orig_tcp = &orig_cm->protocol_state.tcp;
2301 repl_tcp = &repl_cm->protocol_state.tcp;
2302
2303 /* update orig */
2304 if (orig_tcp->max_win < sic->src_td_max_window) {
2305 orig_tcp->max_win = sic->src_td_max_window;
2306 }
2307 if ((int32_t)(orig_tcp->end - sic->src_td_end) < 0) {
2308 orig_tcp->end = sic->src_td_end;
2309 }
2310 if ((int32_t)(orig_tcp->max_end - sic->src_td_max_end) < 0) {
2311 orig_tcp->max_end = sic->src_td_max_end;
2312 }
2313
2314 /* update reply */
2315 if (repl_tcp->max_win < sic->dest_td_max_window) {
2316 repl_tcp->max_win = sic->dest_td_max_window;
2317 }
2318 if ((int32_t)(repl_tcp->end - sic->dest_td_end) < 0) {
2319 repl_tcp->end = sic->dest_td_end;
2320 }
2321 if ((int32_t)(repl_tcp->max_end - sic->dest_td_max_end) < 0) {
2322 repl_tcp->max_end = sic->dest_td_max_end;
2323 }
2324
2325 /* update match flags */
2326 orig_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2327 repl_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002328 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
Nicolas Costa436926b2014-01-14 10:36:22 -06002329 orig_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2330 repl_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2331 }
2332}
2333
2334static void
2335sfe_ipv4_update_protocol_state(struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002336 struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002337{
2338 switch (sic->protocol) {
2339 case IPPROTO_TCP:
2340 sfe_ipv4_update_tcp_state(c, sic);
2341 break;
2342 }
2343}
2344
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002345void sfe_ipv4_update_rule(struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002346{
2347 struct sfe_ipv4_connection *c;
2348 struct sfe_ipv4 *si = &__si;
2349
2350 spin_lock_bh(&si->lock);
2351
2352 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2353 sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002354 sic->src_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002355 sic->src_port,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002356 sic->dest_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002357 sic->dest_port);
2358 if (c != NULL) {
2359 sfe_ipv4_update_protocol_state(c, sic);
2360 }
2361
2362 spin_unlock_bh(&si->lock);
2363}
2364
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002365/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002366 * sfe_ipv4_create_rule()
2367 * Create a forwarding rule.
2368 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002369int sfe_ipv4_create_rule(struct sfe_connection_create *sic)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002370{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002371 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002372 struct sfe_ipv4_connection *c;
2373 struct sfe_ipv4_connection_match *original_cm;
2374 struct sfe_ipv4_connection_match *reply_cm;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002375 struct net_device *dest_dev;
2376 struct net_device *src_dev;
2377
2378 dest_dev = sic->dest_dev;
2379 src_dev = sic->src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002380
Matthew McClintock389b42a2014-09-24 14:05:51 -05002381 if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) ||
2382 (src_dev->reg_state != NETREG_REGISTERED))) {
2383 return -EINVAL;
2384 }
2385
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002386 spin_lock_bh(&si->lock);
2387 si->connection_create_requests++;
2388
2389 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002390 * Check to see if there is already a flow that matches the rule we're
2391 * trying to create. If there is then we can't create a new one.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002392 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002393 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2394 sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002395 sic->src_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002396 sic->src_port,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002397 sic->dest_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002398 sic->dest_port);
2399 if (c != NULL) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002400 si->connection_create_collisions++;
2401
2402 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002403 * If we already have the flow then it's likely that this
2404 * request to create the connection rule contains more
2405 * up-to-date information. Check and update accordingly.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002406 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002407 sfe_ipv4_update_protocol_state(c, sic);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002408 spin_unlock_bh(&si->lock);
2409
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002410 DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002411 " s: %s:%pM:%pI4:%u, d: %s:%pM:%pI4:%u\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002412 sic->mark, sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002413 sic->src_dev->name, sic->src_mac, &sic->src_ip.ip, ntohs(sic->src_port),
2414 sic->dest_dev->name, sic->dest_mac, &sic->dest_ip.ip, ntohs(sic->dest_port));
Nicolas Costa514fde02014-01-13 15:50:29 -06002415 return -EADDRINUSE;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002416 }
2417
2418 /*
2419 * Allocate the various connection tracking objects.
2420 */
2421 c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC);
2422 if (unlikely(!c)) {
2423 spin_unlock_bh(&si->lock);
Nicolas Costa514fde02014-01-13 15:50:29 -06002424 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002425 }
2426
2427 original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2428 if (unlikely(!original_cm)) {
2429 spin_unlock_bh(&si->lock);
2430 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002431 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002432 }
2433
2434 reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2435 if (unlikely(!reply_cm)) {
2436 spin_unlock_bh(&si->lock);
2437 kfree(original_cm);
2438 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002439 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002440 }
2441
2442 /*
2443 * Fill in the "original" direction connection matching object.
2444 * Note that the transmit MAC address is "dest_mac_xlate" because
2445 * we always know both ends of a connection by their translated
2446 * addresses and not their public addresses.
2447 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002448 original_cm->match_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002449 original_cm->match_protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002450 original_cm->match_src_ip = sic->src_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002451 original_cm->match_src_port = sic->src_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002452 original_cm->match_dest_ip = sic->dest_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002453 original_cm->match_dest_port = sic->dest_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002454 original_cm->xlate_src_ip = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002455 original_cm->xlate_src_port = sic->src_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002456 original_cm->xlate_dest_ip = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002457 original_cm->xlate_dest_port = sic->dest_port_xlate;
2458 original_cm->rx_packet_count = 0;
2459 original_cm->rx_packet_count64 = 0;
2460 original_cm->rx_byte_count = 0;
2461 original_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002462 original_cm->xmit_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002463 original_cm->xmit_dev_mtu = sic->dest_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002464 memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002465 memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN);
2466 original_cm->connection = c;
2467 original_cm->counter_match = reply_cm;
2468 original_cm->flags = 0;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002469#ifdef CONFIG_NF_FLOW_COOKIE
2470 original_cm->flow_cookie = 0;
2471#endif
Zhi Chen8748eb32015-06-18 12:58:48 -07002472#ifdef CONFIG_XFRM
2473 original_cm->flow_accel = sic->original_accel;
2474#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002475 original_cm->active_next = NULL;
2476 original_cm->active_prev = NULL;
2477 original_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002478
2479 /*
2480 * For PPP links we don't write an L2 header. For everything else we do.
2481 */
2482 if (!(dest_dev->flags & IFF_POINTOPOINT)) {
2483 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2484
2485 /*
2486 * If our dev writes Ethernet headers then we can write a really fast
2487 * version.
2488 */
2489 if (dest_dev->header_ops) {
2490 if (dest_dev->header_ops->create == eth_header) {
2491 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2492 }
2493 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002494 }
2495
2496 /*
2497 * Fill in the "reply" direction connection matching object.
2498 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002499 reply_cm->match_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002500 reply_cm->match_protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002501 reply_cm->match_src_ip = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002502 reply_cm->match_src_port = sic->dest_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002503 reply_cm->match_dest_ip = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002504 reply_cm->match_dest_port = sic->src_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002505 reply_cm->xlate_src_ip = sic->dest_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002506 reply_cm->xlate_src_port = sic->dest_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002507 reply_cm->xlate_dest_ip = sic->src_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002508 reply_cm->xlate_dest_port = sic->src_port;
2509 reply_cm->rx_packet_count = 0;
2510 reply_cm->rx_packet_count64 = 0;
2511 reply_cm->rx_byte_count = 0;
2512 reply_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002513 reply_cm->xmit_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002514 reply_cm->xmit_dev_mtu = sic->src_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002515 memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002516 memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN);
2517 reply_cm->connection = c;
2518 reply_cm->counter_match = original_cm;
2519 reply_cm->flags = 0;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002520#ifdef CONFIG_NF_FLOW_COOKIE
2521 reply_cm->flow_cookie = 0;
2522#endif
Zhi Chen8748eb32015-06-18 12:58:48 -07002523#ifdef CONFIG_XFRM
2524 reply_cm->flow_accel = sic->reply_accel;
2525#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002526 reply_cm->active_next = NULL;
2527 reply_cm->active_prev = NULL;
2528 reply_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002529
2530 /*
2531 * For PPP links we don't write an L2 header. For everything else we do.
2532 */
2533 if (!(src_dev->flags & IFF_POINTOPOINT)) {
2534 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2535
2536 /*
2537 * If our dev writes Ethernet headers then we can write a really fast
2538 * version.
2539 */
2540 if (src_dev->header_ops) {
2541 if (src_dev->header_ops->create == eth_header) {
2542 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2543 }
2544 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002545 }
2546
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002547
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002548 if (sic->dest_ip.ip != sic->dest_ip_xlate.ip || sic->dest_port != sic->dest_port_xlate) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002549 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2550 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2551 }
2552
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002553 if (sic->src_ip.ip != sic->src_ip_xlate.ip || sic->src_port != sic->src_port_xlate) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002554 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2555 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2556 }
2557
2558 c->protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002559 c->src_ip = sic->src_ip.ip;
2560 c->src_ip_xlate = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002561 c->src_port = sic->src_port;
2562 c->src_port_xlate = sic->src_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002563 c->original_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002564 c->original_match = original_cm;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002565 c->dest_ip = sic->dest_ip.ip;
2566 c->dest_ip_xlate = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002567 c->dest_port = sic->dest_port;
2568 c->dest_port_xlate = sic->dest_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002569 c->reply_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002570 c->reply_match = reply_cm;
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06002571 c->mark = sic->mark;
Xiaoping Fan34586472015-07-03 02:20:35 -07002572 c->debug_read_seq = 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002573 c->last_sync_jiffies = get_jiffies_64();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002574
2575 /*
2576 * Take hold of our source and dest devices for the duration of the connection.
2577 */
2578 dev_hold(c->original_dev);
2579 dev_hold(c->reply_dev);
2580
2581 /*
2582 * Initialize the protocol-specific information that we track.
2583 */
2584 switch (sic->protocol) {
2585 case IPPROTO_TCP:
2586 original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale;
2587 original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1;
2588 original_cm->protocol_state.tcp.end = sic->src_td_end;
2589 original_cm->protocol_state.tcp.max_end = sic->src_td_max_end;
2590 reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale;
2591 reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1;
2592 reply_cm->protocol_state.tcp.end = sic->dest_td_end;
2593 reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002594 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002595 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2596 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2597 }
2598 break;
2599 }
2600
2601 sfe_ipv4_connection_match_compute_translations(original_cm);
2602 sfe_ipv4_connection_match_compute_translations(reply_cm);
2603 sfe_ipv4_insert_sfe_ipv4_connection(si, c);
2604
2605 spin_unlock_bh(&si->lock);
2606
2607 /*
2608 * We have everything we need!
2609 */
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002610 DEBUG_INFO("new connection - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002611 " s: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n"
2612 " d: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002613 sic->mark, sic->protocol,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002614 sic->src_dev->name, sic->src_mac, sic->src_mac_xlate,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002615 &sic->src_ip.ip, &sic->src_ip_xlate.ip, ntohs(sic->src_port), ntohs(sic->src_port_xlate),
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002616 dest_dev->name, sic->dest_mac, sic->dest_mac_xlate,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002617 &sic->dest_ip.ip, &sic->dest_ip_xlate.ip, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate));
Nicolas Costa514fde02014-01-13 15:50:29 -06002618
2619 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002620}
2621
2622/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002623 * sfe_ipv4_destroy_rule()
2624 * Destroy a forwarding rule.
2625 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002626void sfe_ipv4_destroy_rule(struct sfe_connection_destroy *sid)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002627{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002628 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002629 struct sfe_ipv4_connection *c;
2630
2631 spin_lock_bh(&si->lock);
2632 si->connection_destroy_requests++;
2633
2634 /*
2635 * Check to see if we have a flow that matches the rule we're trying
2636 * to destroy. If there isn't then we can't destroy it.
2637 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002638 c = sfe_ipv4_find_sfe_ipv4_connection(si, sid->protocol, sid->src_ip.ip, sid->src_port,
2639 sid->dest_ip.ip, sid->dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002640 if (!c) {
2641 si->connection_destroy_misses++;
2642 spin_unlock_bh(&si->lock);
2643
2644 DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002645 sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2646 &sid->dest_ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002647 return;
2648 }
2649
2650 /*
2651 * Remove our connection details from the hash tables.
2652 */
2653 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2654 spin_unlock_bh(&si->lock);
2655
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002656 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002657
2658 DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002659 sid->protocol, &sid->src_ip.ip, ntohs(sid->src_port),
2660 &sid->dest_ip.ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002661}
2662
2663/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002664 * sfe_ipv4_register_sync_rule_callback()
2665 * Register a callback for rule synchronization.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002666 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002667void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002668{
2669 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002670
2671 spin_lock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002672 rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002673 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002674}
2675
2676/*
2677 * sfe_ipv4_get_debug_dev()
2678 */
2679static ssize_t sfe_ipv4_get_debug_dev(struct device *dev,
2680 struct device_attribute *attr,
2681 char *buf)
2682{
2683 struct sfe_ipv4 *si = &__si;
2684 ssize_t count;
2685 int num;
2686
2687 spin_lock_bh(&si->lock);
2688 num = si->debug_dev;
2689 spin_unlock_bh(&si->lock);
2690
2691 count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num);
2692 return count;
2693}
2694
2695/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002696 * sysfs attributes.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002697 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002698static const struct device_attribute sfe_ipv4_debug_dev_attr =
2699 __ATTR(debug_dev, S_IWUGO | S_IRUGO, sfe_ipv4_get_debug_dev, NULL);
2700
2701/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002702 * sfe_ipv4_destroy_all_rules_for_dev()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002703 * Destroy all connections that match a particular device.
2704 *
2705 * If we pass dev as NULL then this destroys all connections.
2706 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002707void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002708{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002709 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002710 struct sfe_ipv4_connection *c;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002711
Xiaoping Fan34586472015-07-03 02:20:35 -07002712another_round:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002713 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002714
Xiaoping Fan34586472015-07-03 02:20:35 -07002715 for (c = si->all_connections_head; c; c = c->all_connections_next) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002716 /*
Xiaoping Fan34586472015-07-03 02:20:35 -07002717 * Does this connection relate to the device we are destroying?
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002718 */
2719 if (!dev
2720 || (dev == c->original_dev)
2721 || (dev == c->reply_dev)) {
Xiaoping Fan34586472015-07-03 02:20:35 -07002722 break;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002723 }
Xiaoping Fan34586472015-07-03 02:20:35 -07002724 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002725
Xiaoping Fan34586472015-07-03 02:20:35 -07002726 if (c) {
2727 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002728 }
2729
2730 spin_unlock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07002731
2732 if (c) {
2733 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
2734 goto another_round;
2735 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002736}
2737
2738/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002739 * sfe_ipv4_periodic_sync()
2740 */
2741static void sfe_ipv4_periodic_sync(unsigned long arg)
2742{
2743 struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg;
2744 uint64_t now_jiffies;
2745 int quota;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002746 sfe_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002747
2748 now_jiffies = get_jiffies_64();
2749
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002750 rcu_read_lock();
2751 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
2752 if (!sync_rule_callback) {
2753 rcu_read_unlock();
2754 goto done;
2755 }
2756
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002757 spin_lock_bh(&si->lock);
2758 sfe_ipv4_update_summary_stats(si);
2759
2760 /*
2761 * Get an estimate of the number of connections to parse in this sync.
2762 */
2763 quota = (si->num_connections + 63) / 64;
2764
2765 /*
2766 * Walk the "active" list and sync the connection state.
2767 */
2768 while (quota--) {
2769 struct sfe_ipv4_connection_match *cm;
2770 struct sfe_ipv4_connection_match *counter_cm;
2771 struct sfe_ipv4_connection *c;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002772 struct sfe_connection_sync sis;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002773
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002774 cm = si->active_head;
2775 if (!cm) {
2776 break;
2777 }
2778
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002779 /*
Nicolas Costabafb3af2014-01-29 16:39:39 -06002780 * There's a possibility that our counter match is in the active list too.
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002781 * If it is then remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002782 */
2783 counter_cm = cm->counter_match;
2784 if (counter_cm->active) {
2785 counter_cm->active = false;
2786
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002787 /*
2788 * We must have a connection preceding this counter match
2789 * because that's the one that got us to this point, so we don't have
2790 * to worry about removing the head of the list.
2791 */
2792 counter_cm->active_prev->active_next = counter_cm->active_next;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002793
2794 if (likely(counter_cm->active_next)) {
2795 counter_cm->active_next->active_prev = counter_cm->active_prev;
2796 } else {
2797 si->active_tail = counter_cm->active_prev;
2798 }
2799
2800 counter_cm->active_next = NULL;
2801 counter_cm->active_prev = NULL;
2802 }
2803
2804 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002805 * Now remove the head of the active scan list.
2806 */
2807 cm->active = false;
2808 si->active_head = cm->active_next;
2809 if (likely(cm->active_next)) {
2810 cm->active_next->active_prev = NULL;
2811 } else {
2812 si->active_tail = NULL;
2813 }
2814 cm->active_next = NULL;
2815
2816 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002817 * Sync the connection state.
2818 */
2819 c = cm->connection;
2820 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, now_jiffies);
2821
2822 /*
2823 * We don't want to be holding the lock when we sync!
2824 */
2825 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002826 sync_rule_callback(&sis);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002827 spin_lock_bh(&si->lock);
2828 }
2829
2830 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002831 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002832
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002833done:
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002834 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002835}
2836
2837#define CHAR_DEV_MSG_SIZE 768
2838
2839/*
2840 * sfe_ipv4_debug_dev_read_start()
2841 * Generate part of the XML output.
2842 */
2843static bool sfe_ipv4_debug_dev_read_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2844 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2845{
2846 int bytes_read;
2847
Xiaoping Fan34586472015-07-03 02:20:35 -07002848 si->debug_read_seq++;
2849
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002850 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "<sfe_ipv4>\n");
2851 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2852 return false;
2853 }
2854
2855 *length -= bytes_read;
2856 *total_read += bytes_read;
2857
2858 ws->state++;
2859 return true;
2860}
2861
2862/*
2863 * sfe_ipv4_debug_dev_read_connections_start()
2864 * Generate part of the XML output.
2865 */
2866static bool sfe_ipv4_debug_dev_read_connections_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2867 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2868{
2869 int bytes_read;
2870
2871 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<connections>\n");
2872 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2873 return false;
2874 }
2875
2876 *length -= bytes_read;
2877 *total_read += bytes_read;
2878
2879 ws->state++;
2880 return true;
2881}
2882
2883/*
2884 * sfe_ipv4_debug_dev_read_connections_connection()
2885 * Generate part of the XML output.
2886 */
2887static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2888 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2889{
2890 struct sfe_ipv4_connection *c;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002891 struct sfe_ipv4_connection_match *original_cm;
2892 struct sfe_ipv4_connection_match *reply_cm;
2893 int bytes_read;
2894 int protocol;
2895 struct net_device *src_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002896 __be32 src_ip;
2897 __be32 src_ip_xlate;
2898 __be16 src_port;
2899 __be16 src_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002900 uint64_t src_rx_packets;
2901 uint64_t src_rx_bytes;
2902 struct net_device *dest_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002903 __be32 dest_ip;
2904 __be32 dest_ip_xlate;
2905 __be16 dest_port;
2906 __be16 dest_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002907 uint64_t dest_rx_packets;
2908 uint64_t dest_rx_bytes;
2909 uint64_t last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002910 uint32_t mark;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002911#ifdef CONFIG_NF_FLOW_COOKIE
2912 int src_flow_cookie, dst_flow_cookie;
2913#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002914
2915 spin_lock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07002916
2917 for (c = si->all_connections_head; c; c = c->all_connections_next) {
2918 if (c->debug_read_seq < si->debug_read_seq) {
2919 c->debug_read_seq = si->debug_read_seq;
2920 break;
2921 }
2922 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002923
2924 /*
Xiaoping Fan34586472015-07-03 02:20:35 -07002925 * If there were no connections then move to the next state.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002926 */
2927 if (!c) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002928 spin_unlock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07002929 ws->state++;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002930 return true;
2931 }
2932
2933 original_cm = c->original_match;
2934 reply_cm = c->reply_match;
2935
2936 protocol = c->protocol;
2937 src_dev = c->original_dev;
2938 src_ip = c->src_ip;
2939 src_ip_xlate = c->src_ip_xlate;
2940 src_port = c->src_port;
2941 src_port_xlate = c->src_port_xlate;
2942
2943 sfe_ipv4_connection_match_update_summary_stats(original_cm);
2944 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
2945
2946 src_rx_packets = original_cm->rx_packet_count64;
2947 src_rx_bytes = original_cm->rx_byte_count64;
2948 dest_dev = c->reply_dev;
2949 dest_ip = c->dest_ip;
2950 dest_ip_xlate = c->dest_ip_xlate;
2951 dest_port = c->dest_port;
2952 dest_port_xlate = c->dest_port_xlate;
2953 dest_rx_packets = reply_cm->rx_packet_count64;
2954 dest_rx_bytes = reply_cm->rx_byte_count64;
2955 last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002956 mark = c->mark;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002957#ifdef CONFIG_NF_FLOW_COOKIE
2958 src_flow_cookie = original_cm->flow_cookie;
2959 dst_flow_cookie = reply_cm->flow_cookie;
2960#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002961 spin_unlock_bh(&si->lock);
2962
2963 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t<connection "
2964 "protocol=\"%u\" "
2965 "src_dev=\"%s\" "
2966 "src_ip=\"%pI4\" src_ip_xlate=\"%pI4\" "
2967 "src_port=\"%u\" src_port_xlate=\"%u\" "
2968 "src_rx_pkts=\"%llu\" src_rx_bytes=\"%llu\" "
2969 "dest_dev=\"%s\" "
2970 "dest_ip=\"%pI4\" dest_ip_xlate=\"%pI4\" "
2971 "dest_port=\"%u\" dest_port_xlate=\"%u\" "
2972 "dest_rx_pkts=\"%llu\" dest_rx_bytes=\"%llu\" "
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002973#ifdef CONFIG_NF_FLOW_COOKIE
2974 "src_flow_cookie=\"%d\" dst_flow_cookie=\"%d\" "
2975#endif
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002976 "last_sync=\"%llu\" "
Nicolas Costabb85a2e2014-01-13 16:26:33 -06002977 "mark=\"%08x\" />\n",
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002978 protocol,
2979 src_dev->name,
2980 &src_ip, &src_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002981 ntohs(src_port), ntohs(src_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002982 src_rx_packets, src_rx_bytes,
2983 dest_dev->name,
2984 &dest_ip, &dest_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002985 ntohs(dest_port), ntohs(dest_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002986 dest_rx_packets, dest_rx_bytes,
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002987#ifdef CONFIG_NF_FLOW_COOKIE
2988 src_flow_cookie, dst_flow_cookie,
2989#endif
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002990 last_sync_jiffies, mark);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002991
2992 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2993 return false;
2994 }
2995
2996 *length -= bytes_read;
2997 *total_read += bytes_read;
2998
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002999 return true;
3000}
3001
3002/*
3003 * sfe_ipv4_debug_dev_read_connections_end()
3004 * Generate part of the XML output.
3005 */
3006static bool sfe_ipv4_debug_dev_read_connections_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3007 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3008{
3009 int bytes_read;
3010
3011 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</connections>\n");
3012 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3013 return false;
3014 }
3015
3016 *length -= bytes_read;
3017 *total_read += bytes_read;
3018
3019 ws->state++;
3020 return true;
3021}
3022
3023/*
3024 * sfe_ipv4_debug_dev_read_exceptions_start()
3025 * Generate part of the XML output.
3026 */
3027static bool sfe_ipv4_debug_dev_read_exceptions_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3028 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3029{
3030 int bytes_read;
3031
3032 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<exceptions>\n");
3033 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3034 return false;
3035 }
3036
3037 *length -= bytes_read;
3038 *total_read += bytes_read;
3039
3040 ws->state++;
3041 return true;
3042}
3043
3044/*
3045 * sfe_ipv4_debug_dev_read_exceptions_exception()
3046 * Generate part of the XML output.
3047 */
3048static bool sfe_ipv4_debug_dev_read_exceptions_exception(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3049 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3050{
3051 uint64_t ct;
3052
3053 spin_lock_bh(&si->lock);
3054 ct = si->exception_events64[ws->iter_exception];
3055 spin_unlock_bh(&si->lock);
3056
3057 if (ct) {
3058 int bytes_read;
3059
3060 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE,
3061 "\t\t<exception name=\"%s\" count=\"%llu\" />\n",
3062 sfe_ipv4_exception_events_string[ws->iter_exception],
3063 ct);
3064 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3065 return false;
3066 }
3067
3068 *length -= bytes_read;
3069 *total_read += bytes_read;
3070 }
3071
3072 ws->iter_exception++;
3073 if (ws->iter_exception >= SFE_IPV4_EXCEPTION_EVENT_LAST) {
3074 ws->iter_exception = 0;
3075 ws->state++;
3076 }
3077
3078 return true;
3079}
3080
3081/*
3082 * sfe_ipv4_debug_dev_read_exceptions_end()
3083 * Generate part of the XML output.
3084 */
3085static bool sfe_ipv4_debug_dev_read_exceptions_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3086 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3087{
3088 int bytes_read;
3089
3090 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</exceptions>\n");
3091 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3092 return false;
3093 }
3094
3095 *length -= bytes_read;
3096 *total_read += bytes_read;
3097
3098 ws->state++;
3099 return true;
3100}
3101
3102/*
3103 * sfe_ipv4_debug_dev_read_stats()
3104 * Generate part of the XML output.
3105 */
3106static bool sfe_ipv4_debug_dev_read_stats(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3107 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3108{
3109 int bytes_read;
3110 unsigned int num_connections;
3111 uint64_t packets_forwarded;
3112 uint64_t packets_not_forwarded;
3113 uint64_t connection_create_requests;
3114 uint64_t connection_create_collisions;
3115 uint64_t connection_destroy_requests;
3116 uint64_t connection_destroy_misses;
3117 uint64_t connection_flushes;
3118 uint64_t connection_match_hash_hits;
3119 uint64_t connection_match_hash_reorders;
3120
3121 spin_lock_bh(&si->lock);
3122 sfe_ipv4_update_summary_stats(si);
3123
3124 num_connections = si->num_connections;
3125 packets_forwarded = si->packets_forwarded64;
3126 packets_not_forwarded = si->packets_not_forwarded64;
3127 connection_create_requests = si->connection_create_requests64;
3128 connection_create_collisions = si->connection_create_collisions64;
3129 connection_destroy_requests = si->connection_destroy_requests64;
3130 connection_destroy_misses = si->connection_destroy_misses64;
3131 connection_flushes = si->connection_flushes64;
3132 connection_match_hash_hits = si->connection_match_hash_hits64;
3133 connection_match_hash_reorders = si->connection_match_hash_reorders64;
3134 spin_unlock_bh(&si->lock);
3135
3136 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<stats "
3137 "num_connections=\"%u\" "
Xiaoping Fan59176422015-05-22 15:58:10 -07003138 "pkts_forwarded=\"%llu\" pkts_not_forwarded=\"%llu\" "
3139 "create_requests=\"%llu\" create_collisions=\"%llu\" "
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003140 "destroy_requests=\"%llu\" destroy_misses=\"%llu\" "
3141 "flushes=\"%llu\" "
3142 "hash_hits=\"%llu\" hash_reorders=\"%llu\" />\n",
3143 num_connections,
3144 packets_forwarded,
3145 packets_not_forwarded,
3146 connection_create_requests,
3147 connection_create_collisions,
3148 connection_destroy_requests,
3149 connection_destroy_misses,
3150 connection_flushes,
3151 connection_match_hash_hits,
3152 connection_match_hash_reorders);
3153 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3154 return false;
3155 }
3156
3157 *length -= bytes_read;
3158 *total_read += bytes_read;
3159
3160 ws->state++;
3161 return true;
3162}
3163
3164/*
3165 * sfe_ipv4_debug_dev_read_end()
3166 * Generate part of the XML output.
3167 */
3168static bool sfe_ipv4_debug_dev_read_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3169 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3170{
3171 int bytes_read;
3172
3173 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "</sfe_ipv4>\n");
3174 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3175 return false;
3176 }
3177
3178 *length -= bytes_read;
3179 *total_read += bytes_read;
3180
3181 ws->state++;
3182 return true;
3183}
3184
3185/*
3186 * Array of write functions that write various XML elements that correspond to
3187 * our XML output state machine.
3188 */
3189sfe_ipv4_debug_xml_write_method_t sfe_ipv4_debug_xml_write_methods[SFE_IPV4_DEBUG_XML_STATE_DONE] = {
3190 sfe_ipv4_debug_dev_read_start,
3191 sfe_ipv4_debug_dev_read_connections_start,
3192 sfe_ipv4_debug_dev_read_connections_connection,
3193 sfe_ipv4_debug_dev_read_connections_end,
3194 sfe_ipv4_debug_dev_read_exceptions_start,
3195 sfe_ipv4_debug_dev_read_exceptions_exception,
3196 sfe_ipv4_debug_dev_read_exceptions_end,
3197 sfe_ipv4_debug_dev_read_stats,
3198 sfe_ipv4_debug_dev_read_end,
3199};
3200
3201/*
3202 * sfe_ipv4_debug_dev_read()
3203 * Send info to userspace upon read request from user
3204 */
3205static ssize_t sfe_ipv4_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset)
3206{
3207 char msg[CHAR_DEV_MSG_SIZE];
3208 int total_read = 0;
3209 struct sfe_ipv4_debug_xml_write_state *ws;
3210 struct sfe_ipv4 *si = &__si;
3211
3212 ws = (struct sfe_ipv4_debug_xml_write_state *)filp->private_data;
3213 while ((ws->state != SFE_IPV4_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) {
3214 if ((sfe_ipv4_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) {
3215 continue;
3216 }
3217 }
3218
3219 return total_read;
3220}
3221
3222/*
3223 * sfe_ipv4_debug_dev_write()
Nicolas Costabafb3af2014-01-29 16:39:39 -06003224 * Write to char device resets some stats
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003225 */
3226static ssize_t sfe_ipv4_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset)
3227{
Matthew McClintock54167ab2014-01-14 21:06:28 -06003228 struct sfe_ipv4 *si = &__si;
3229
3230 spin_lock_bh(&si->lock);
3231 sfe_ipv4_update_summary_stats(si);
3232
Matthew McClintock54167ab2014-01-14 21:06:28 -06003233 si->packets_forwarded64 = 0;
3234 si->packets_not_forwarded64 = 0;
3235 si->connection_create_requests64 = 0;
3236 si->connection_create_collisions64 = 0;
3237 si->connection_destroy_requests64 = 0;
3238 si->connection_destroy_misses64 = 0;
3239 si->connection_flushes64 = 0;
3240 si->connection_match_hash_hits64 = 0;
3241 si->connection_match_hash_reorders64 = 0;
3242 spin_unlock_bh(&si->lock);
3243
3244 return length;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003245}
3246
3247/*
3248 * sfe_ipv4_debug_dev_open()
3249 */
3250static int sfe_ipv4_debug_dev_open(struct inode *inode, struct file *file)
3251{
3252 struct sfe_ipv4_debug_xml_write_state *ws;
3253
3254 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3255 if (!ws) {
3256 ws = kzalloc(sizeof(struct sfe_ipv4_debug_xml_write_state), GFP_KERNEL);
3257 if (!ws) {
3258 return -ENOMEM;
3259 }
3260
3261 ws->state = SFE_IPV4_DEBUG_XML_STATE_START;
3262 file->private_data = ws;
3263 }
3264
3265 return 0;
3266}
3267
3268/*
3269 * sfe_ipv4_debug_dev_release()
3270 */
3271static int sfe_ipv4_debug_dev_release(struct inode *inode, struct file *file)
3272{
3273 struct sfe_ipv4_debug_xml_write_state *ws;
3274
3275 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3276 if (ws) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003277 /*
3278 * We've finished with our output so free the write state.
3279 */
3280 kfree(ws);
3281 }
3282
3283 return 0;
3284}
3285
3286/*
3287 * File operations used in the debug char device
3288 */
3289static struct file_operations sfe_ipv4_debug_dev_fops = {
3290 .read = sfe_ipv4_debug_dev_read,
3291 .write = sfe_ipv4_debug_dev_write,
3292 .open = sfe_ipv4_debug_dev_open,
3293 .release = sfe_ipv4_debug_dev_release
3294};
3295
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003296#ifdef CONFIG_NF_FLOW_COOKIE
3297/*
3298 * sfe_register_flow_cookie_cb
3299 * register a function in SFE to let SFE use this function to configure flow cookie for a flow
3300 *
3301 * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE
3302 * can use this function to configure flow cookie for a flow.
3303 * return: 0, success; !=0, fail
3304 */
3305int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb)
3306{
3307 struct sfe_ipv4 *si = &__si;
3308
3309 BUG_ON(!cb);
3310
3311 if (si->flow_cookie_set_func) {
3312 return -1;
3313 }
3314
3315 rcu_assign_pointer(si->flow_cookie_set_func, cb);
3316 return 0;
3317}
3318
3319/*
3320 * sfe_unregister_flow_cookie_cb
3321 * unregister function which is used to configure flow cookie for a flow
3322 *
3323 * return: 0, success; !=0, fail
3324 */
3325int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb)
3326{
3327 struct sfe_ipv4 *si = &__si;
3328
3329 RCU_INIT_POINTER(si->flow_cookie_set_func, NULL);
3330 return 0;
3331}
3332#endif /*CONFIG_NF_FLOW_COOKIE*/
3333
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003334/*
Dave Hudson87973cd2013-10-22 16:00:04 +01003335 * sfe_ipv4_init()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003336 */
Dave Hudson87973cd2013-10-22 16:00:04 +01003337static int __init sfe_ipv4_init(void)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003338{
3339 struct sfe_ipv4 *si = &__si;
3340 int result = -1;
3341
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003342 DEBUG_INFO("SFE IPv4 init\n");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003343
3344 /*
3345 * Create sys/sfe_ipv4
3346 */
3347 si->sys_sfe_ipv4 = kobject_create_and_add("sfe_ipv4", NULL);
3348 if (!si->sys_sfe_ipv4) {
3349 DEBUG_ERROR("failed to register sfe_ipv4\n");
3350 goto exit1;
3351 }
3352
3353 /*
3354 * Create files, one for each parameter supported by this module.
3355 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003356 result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3357 if (result) {
3358 DEBUG_ERROR("failed to register debug dev file: %d\n", result);
3359 goto exit4;
3360 }
3361
3362 /*
3363 * Register our debug char device.
3364 */
3365 result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops);
3366 if (result < 0) {
3367 DEBUG_ERROR("Failed to register chrdev: %d\n", result);
3368 goto exit5;
3369 }
3370
3371 si->debug_dev = result;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003372
3373 /*
3374 * Create a timer to handle periodic statistics.
3375 */
3376 setup_timer(&si->timer, sfe_ipv4_periodic_sync, (unsigned long)si);
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06003377 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003378
Dave Hudson87973cd2013-10-22 16:00:04 +01003379 spin_lock_init(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003380
Dave Hudson87973cd2013-10-22 16:00:04 +01003381 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003382
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003383exit5:
3384 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3385
3386exit4:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003387 kobject_put(si->sys_sfe_ipv4);
3388
3389exit1:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003390 return result;
3391}
3392
3393/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003394 * sfe_ipv4_exit()
3395 */
3396static void __exit sfe_ipv4_exit(void)
3397{
Dave Hudson87973cd2013-10-22 16:00:04 +01003398 struct sfe_ipv4 *si = &__si;
3399
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003400 DEBUG_INFO("SFE IPv4 exit\n");
Dave Hudson87973cd2013-10-22 16:00:04 +01003401
3402 /*
3403 * Destroy all connections.
3404 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003405 sfe_ipv4_destroy_all_rules_for_dev(NULL);
Dave Hudson87973cd2013-10-22 16:00:04 +01003406
Dave Hudson87973cd2013-10-22 16:00:04 +01003407 del_timer_sync(&si->timer);
3408
Dave Hudson87973cd2013-10-22 16:00:04 +01003409 unregister_chrdev(si->debug_dev, "sfe_ipv4");
3410
3411 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3412
Dave Hudson87973cd2013-10-22 16:00:04 +01003413 kobject_put(si->sys_sfe_ipv4);
3414
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003415}
3416
3417module_init(sfe_ipv4_init)
3418module_exit(sfe_ipv4_exit)
3419
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003420EXPORT_SYMBOL(sfe_ipv4_recv);
3421EXPORT_SYMBOL(sfe_ipv4_create_rule);
3422EXPORT_SYMBOL(sfe_ipv4_destroy_rule);
3423EXPORT_SYMBOL(sfe_ipv4_destroy_all_rules_for_dev);
3424EXPORT_SYMBOL(sfe_ipv4_register_sync_rule_callback);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06003425EXPORT_SYMBOL(sfe_ipv4_mark_rule);
Nicolas Costa436926b2014-01-14 10:36:22 -06003426EXPORT_SYMBOL(sfe_ipv4_update_rule);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003427#ifdef CONFIG_NF_FLOW_COOKIE
3428EXPORT_SYMBOL(sfe_register_flow_cookie_cb);
3429EXPORT_SYMBOL(sfe_unregister_flow_cookie_cb);
3430#endif
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003431
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003432MODULE_AUTHOR("Qualcomm Atheros Inc.");
3433MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv4 edition");
Matthew McClintocka3221942014-01-16 11:44:26 -06003434MODULE_LICENSE("Dual BSD/GPL");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003435