blob: 37df39fdea079f09c429376dd64041351c61661a [file] [log] [blame]
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001/*
2 * sfe_ipv4.c
3 * Shortcut forwarding engine - IPv4 edition.
4 *
Xiaoping Fand642a6e2015-04-10 15:19:06 -07005 * Copyright (c) 2013-2015 Qualcomm Atheros, Inc.
Matthew McClintocka3221942014-01-16 11:44:26 -06006 *
7 * All Rights Reserved.
8 * Qualcomm Atheros Confidential and Proprietary.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01009 */
Matthew McClintocka3221942014-01-16 11:44:26 -060010
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010011#include <linux/module.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060012#include <linux/sysfs.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010013#include <linux/skbuff.h>
14#include <linux/icmp.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010015#include <net/tcp.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060016#include <linux/etherdevice.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010017
Dave Hudsondcd08fb2013-11-22 09:25:16 -060018#include "sfe.h"
Xiaoping Fand44a5b42015-05-26 17:37:37 -070019#include "sfe_cm.h"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010020
21/*
Dave Hudsona8197e72013-12-17 23:46:22 +000022 * By default Linux IP header and transport layer header structures are
23 * unpacked, assuming that such headers should be 32-bit aligned.
24 * Unfortunately some wireless adaptors can't cope with this requirement and
25 * some CPUs can't handle misaligned accesses. For those platforms we
26 * define SFE_IPV4_UNALIGNED_IP_HEADER and mark the structures as packed.
27 * When we do this the compiler will generate slightly worse code than for the
28 * aligned case (on most platforms) but will be much quicker than fixing
29 * things up in an unaligned trap handler.
30 */
31#define SFE_IPV4_UNALIGNED_IP_HEADER 1
32#if SFE_IPV4_UNALIGNED_IP_HEADER
33#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed))
34#else
35#define SFE_IPV4_UNALIGNED_STRUCT
36#endif
37
38/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060039 * An Ethernet header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000040 * help with performance on some platforms (see the definition of
41 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010042 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060043struct sfe_ipv4_eth_hdr {
44 __be16 h_dest[ETH_ALEN / 2];
45 __be16 h_source[ETH_ALEN / 2];
46 __be16 h_proto;
47} SFE_IPV4_UNALIGNED_STRUCT;
48
49/*
50 * An IPv4 header, but with an optional "packed" attribute to
51 * help with performance on some platforms (see the definition of
52 * SFE_IPV4_UNALIGNED_STRUCT)
53 */
54struct sfe_ipv4_ip_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010055#if defined(__LITTLE_ENDIAN_BITFIELD)
56 __u8 ihl:4,
57 version:4;
58#elif defined (__BIG_ENDIAN_BITFIELD)
59 __u8 version:4,
Xiaoping Fan59176422015-05-22 15:58:10 -070060 ihl:4;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010061#else
62#error "Please fix <asm/byteorder.h>"
63#endif
64 __u8 tos;
65 __be16 tot_len;
66 __be16 id;
67 __be16 frag_off;
68 __u8 ttl;
69 __u8 protocol;
70 __sum16 check;
71 __be32 saddr;
72 __be32 daddr;
Dave Hudsondcd08fb2013-11-22 09:25:16 -060073
74 /*
75 * The options start here.
76 */
Dave Hudsona8197e72013-12-17 23:46:22 +000077} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010078
79/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060080 * A UDP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000081 * help with performance on some platforms (see the definition of
82 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010083 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060084struct sfe_ipv4_udp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010085 __be16 source;
86 __be16 dest;
87 __be16 len;
88 __sum16 check;
Dave Hudsona8197e72013-12-17 23:46:22 +000089} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010090
91/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060092 * A TCP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000093 * help with performance on some platforms (see the definition of
94 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010095 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060096struct sfe_ipv4_tcp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010097 __be16 source;
98 __be16 dest;
99 __be32 seq;
100 __be32 ack_seq;
101#if defined(__LITTLE_ENDIAN_BITFIELD)
102 __u16 res1:4,
103 doff:4,
104 fin:1,
105 syn:1,
106 rst:1,
107 psh:1,
108 ack:1,
109 urg:1,
110 ece:1,
111 cwr:1;
112#elif defined(__BIG_ENDIAN_BITFIELD)
113 __u16 doff:4,
114 res1:4,
115 cwr:1,
116 ece:1,
117 urg:1,
118 ack:1,
119 psh:1,
120 rst:1,
121 syn:1,
122 fin:1;
123#else
124#error "Adjust your <asm/byteorder.h> defines"
Nicolas Costaac2979c2014-01-14 10:35:24 -0600125#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100126 __be16 window;
127 __sum16 check;
128 __be16 urg_ptr;
Dave Hudsona8197e72013-12-17 23:46:22 +0000129} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100130
131/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100132 * Specifies the lower bound on ACK numbers carried in the TCP header
133 */
134#define SFE_IPV4_TCP_MAX_ACK_WINDOW 65520
135
136/*
137 * IPv4 TCP connection match additional data.
138 */
139struct sfe_ipv4_tcp_connection_match {
140 uint8_t win_scale; /* Window scale */
141 uint32_t max_win; /* Maximum window size seen */
142 uint32_t end; /* Sequence number of the next byte to send (seq + segment length) */
143 uint32_t max_end; /* Sequence number of the last byte to ack */
144};
145
146/*
147 * Bit flags for IPv4 connection matching entry.
148 */
149#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC 0x1
150 /* Perform source translation */
151#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST 0x2
152 /* Perform destination translation */
153#define SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK 0x4
154 /* Ignore TCP sequence numbers */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600155#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR 0x8
156 /* Fast Ethernet header write */
157#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR 0x10
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100158 /* Fast Ethernet header write */
159
160/*
161 * IPv4 connection matching structure.
162 */
163struct sfe_ipv4_connection_match {
164 /*
165 * References to other objects.
166 */
167 struct sfe_ipv4_connection_match *next;
168 /* Next connection match entry in a list */
169 struct sfe_ipv4_connection_match *prev;
170 /* Previous connection match entry in a list */
171 struct sfe_ipv4_connection *connection;
172 /* Pointer to our connection */
173 struct sfe_ipv4_connection_match *counter_match;
174 /* Pointer to the connection match in the "counter" direction to this one */
175 struct sfe_ipv4_connection_match *active_next;
176 /* Pointer to the next connection in the active list */
177 struct sfe_ipv4_connection_match *active_prev;
178 /* Pointer to the previous connection in the active list */
179 bool active; /* Flag to indicate if we're on the active list */
180
181 /*
182 * Characteristics that identify flows that match this rule.
183 */
184 struct net_device *match_dev; /* Network device */
185 uint8_t match_protocol; /* Protocol */
Dave Hudson87973cd2013-10-22 16:00:04 +0100186 __be32 match_src_ip; /* Source IP address */
187 __be32 match_dest_ip; /* Destination IP address */
188 __be16 match_src_port; /* Source port/connection ident */
189 __be16 match_dest_port; /* Destination port/connection ident */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100190
191 /*
192 * Control the operations of the match.
193 */
194 uint32_t flags; /* Bit flags */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800195#ifdef CONFIG_NF_FLOW_COOKIE
196 uint32_t flow_cookie; /* used flow cookie, for debug */
197#endif
Zhi Chen8748eb32015-06-18 12:58:48 -0700198#ifdef CONFIG_XFRM
199 uint32_t flow_accel; /* The flow accelerated or not */
200#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100201
202 /*
203 * Connection state that we track once we match.
204 */
205 union { /* Protocol-specific state */
206 struct sfe_ipv4_tcp_connection_match tcp;
207 } protocol_state;
208 uint32_t rx_packet_count; /* Number of packets RX'd */
209 uint32_t rx_byte_count; /* Number of bytes RX'd */
210
211 /*
212 * Packet translation information.
213 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100214 __be32 xlate_src_ip; /* Address after source translation */
215 __be16 xlate_src_port; /* Port/connection ident after source translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100216 uint16_t xlate_src_csum_adjustment;
217 /* Transport layer checksum adjustment after source translation */
Xiaoping Fanad755af2015-04-01 16:58:46 -0700218 uint16_t xlate_src_partial_csum_adjustment;
219 /* Transport layer pseudo header checksum adjustment after source translation */
220
Dave Hudson87973cd2013-10-22 16:00:04 +0100221 __be32 xlate_dest_ip; /* Address after destination translation */
222 __be16 xlate_dest_port; /* Port/connection ident after destination translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100223 uint16_t xlate_dest_csum_adjustment;
224 /* Transport layer checksum adjustment after destination translation */
Xiaoping Fanad755af2015-04-01 16:58:46 -0700225 uint16_t xlate_dest_partial_csum_adjustment;
226 /* Transport layer pseudo header checksum adjustment after destination translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100227
228 /*
229 * Packet transmit information.
230 */
231 struct net_device *xmit_dev; /* Network device on which to transmit */
232 unsigned short int xmit_dev_mtu;
233 /* Interface MTU */
234 uint16_t xmit_dest_mac[ETH_ALEN / 2];
235 /* Destination MAC address to use when forwarding */
236 uint16_t xmit_src_mac[ETH_ALEN / 2];
237 /* Source MAC address to use when forwarding */
238
239 /*
240 * Summary stats.
241 */
242 uint64_t rx_packet_count64; /* Number of packets RX'd */
243 uint64_t rx_byte_count64; /* Number of bytes RX'd */
244};
245
246/*
247 * Per-connection data structure.
248 */
249struct sfe_ipv4_connection {
250 struct sfe_ipv4_connection *next;
251 /* Pointer to the next entry in a hash chain */
252 struct sfe_ipv4_connection *prev;
253 /* Pointer to the previous entry in a hash chain */
254 int protocol; /* IP protocol number */
Dave Hudson87973cd2013-10-22 16:00:04 +0100255 __be32 src_ip; /* Source IP address */
256 __be32 src_ip_xlate; /* NAT-translated source IP address */
257 __be32 dest_ip; /* Destination IP address */
258 __be32 dest_ip_xlate; /* NAT-translated destination IP address */
259 __be16 src_port; /* Source port */
260 __be16 src_port_xlate; /* NAT-translated source port */
261 __be16 dest_port; /* Destination port */
262 __be16 dest_port_xlate; /* NAT-translated destination port */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100263 struct sfe_ipv4_connection_match *original_match;
264 /* Original direction matching structure */
265 struct net_device *original_dev;
266 /* Original direction source device */
267 struct sfe_ipv4_connection_match *reply_match;
268 /* Reply direction matching structure */
269 struct net_device *reply_dev; /* Reply direction source device */
270 uint64_t last_sync_jiffies; /* Jiffies count for the last sync */
271 struct sfe_ipv4_connection *all_connections_next;
272 /* Pointer to the next entry in the list of all connections */
273 struct sfe_ipv4_connection *all_connections_prev;
274 /* Pointer to the previous entry in the list of all connections */
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600275 uint32_t mark; /* mark for outgoing packet */
Xiaoping Fan34586472015-07-03 02:20:35 -0700276 uint32_t debug_read_seq; /* sequence number for debug dump */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100277};
278
279/*
280 * IPv4 connections and hash table size information.
281 */
282#define SFE_IPV4_CONNECTION_HASH_SHIFT 12
283#define SFE_IPV4_CONNECTION_HASH_SIZE (1 << SFE_IPV4_CONNECTION_HASH_SHIFT)
284#define SFE_IPV4_CONNECTION_HASH_MASK (SFE_IPV4_CONNECTION_HASH_SIZE - 1)
285
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800286#ifdef CONFIG_NF_FLOW_COOKIE
287#define SFE_FLOW_COOKIE_SIZE 2048
288#define SFE_FLOW_COOKIE_MASK 0x7ff
289
290struct sfe_flow_cookie_entry {
291 struct sfe_ipv4_connection_match *match;
292 unsigned long last_clean_time;
293};
294#endif
295
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100296enum sfe_ipv4_exception_events {
297 SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE,
298 SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION,
299 SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
300 SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL,
301 SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION,
302 SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE,
303 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS,
304 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS,
305 SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
306 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL,
307 SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION,
308 SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS,
309 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE,
310 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS,
311 SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK,
312 SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS,
313 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE,
314 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE,
315 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE,
316 SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE,
317 SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE,
318 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE,
319 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4,
320 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE,
321 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE,
322 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE,
323 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL,
324 SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION,
325 SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION,
326 SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE,
327 SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH,
328 SFE_IPV4_EXCEPTION_EVENT_NON_V4,
329 SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT,
330 SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE,
331 SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE,
332 SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL,
333 SFE_IPV4_EXCEPTION_EVENT_LAST
334};
335
336static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = {
337 "UDP_HEADER_INCOMPLETE",
338 "UDP_NO_CONNECTION",
339 "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
340 "UDP_SMALL_TTL",
341 "UDP_NEEDS_FRAGMENTATION",
342 "TCP_HEADER_INCOMPLETE",
343 "TCP_NO_CONNECTION_SLOW_FLAGS",
344 "TCP_NO_CONNECTION_FAST_FLAGS",
345 "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
346 "TCP_SMALL_TTL",
347 "TCP_NEEDS_FRAGMENTATION",
348 "TCP_FLAGS",
349 "TCP_SEQ_EXCEEDS_RIGHT_EDGE",
350 "TCP_SMALL_DATA_OFFS",
351 "TCP_BAD_SACK",
352 "TCP_BIG_DATA_OFFS",
353 "TCP_SEQ_BEFORE_LEFT_EDGE",
354 "TCP_ACK_EXCEEDS_RIGHT_EDGE",
355 "TCP_ACK_BEFORE_LEFT_EDGE",
356 "ICMP_HEADER_INCOMPLETE",
357 "ICMP_UNHANDLED_TYPE",
358 "ICMP_IPV4_HEADER_INCOMPLETE",
359 "ICMP_IPV4_NON_V4",
360 "ICMP_IPV4_IP_OPTIONS_INCOMPLETE",
361 "ICMP_IPV4_UDP_HEADER_INCOMPLETE",
362 "ICMP_IPV4_TCP_HEADER_INCOMPLETE",
363 "ICMP_IPV4_UNHANDLED_PROTOCOL",
364 "ICMP_NO_CONNECTION",
365 "ICMP_FLUSHED_CONNECTION",
366 "HEADER_INCOMPLETE",
367 "BAD_TOTAL_LENGTH",
368 "NON_V4",
369 "NON_INITIAL_FRAGMENT",
370 "DATAGRAM_INCOMPLETE",
371 "IP_OPTIONS_INCOMPLETE",
372 "UNHANDLED_PROTOCOL"
373};
374
375/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600376 * Per-module structure.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100377 */
378struct sfe_ipv4 {
379 spinlock_t lock; /* Lock for SMP correctness */
380 struct sfe_ipv4_connection_match *active_head;
381 /* Head of the list of recently active connections */
382 struct sfe_ipv4_connection_match *active_tail;
383 /* Tail of the list of recently active connections */
384 struct sfe_ipv4_connection *all_connections_head;
385 /* Head of the list of all connections */
386 struct sfe_ipv4_connection *all_connections_tail;
387 /* Tail of the list of all connections */
388 unsigned int num_connections; /* Number of connections */
389 struct timer_list timer; /* Timer used for periodic sync ops */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700390 sfe_sync_rule_callback_t __rcu sync_rule_callback;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600391 /* Callback function registered by a connection manager for stats syncing */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100392 struct sfe_ipv4_connection *conn_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
393 /* Connection hash table */
394 struct sfe_ipv4_connection_match *conn_match_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
395 /* Connection match hash table */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800396#ifdef CONFIG_NF_FLOW_COOKIE
397 struct sfe_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE];
398 /* flow cookie table*/
399 flow_cookie_set_func_t flow_cookie_set_func;
400 /* function used to configure flow cookie in hardware*/
401#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100402
403 /*
404 * Statistics.
405 */
406 uint32_t connection_create_requests;
407 /* Number of IPv4 connection create requests */
408 uint32_t connection_create_collisions;
409 /* Number of IPv4 connection create requests that collided with existing hash table entries */
410 uint32_t connection_destroy_requests;
411 /* Number of IPv4 connection destroy requests */
412 uint32_t connection_destroy_misses;
413 /* Number of IPv4 connection destroy requests that missed our hash table */
414 uint32_t connection_match_hash_hits;
415 /* Number of IPv4 connection match hash hits */
416 uint32_t connection_match_hash_reorders;
417 /* Number of IPv4 connection match hash reorders */
418 uint32_t connection_flushes; /* Number of IPv4 connection flushes */
419 uint32_t packets_forwarded; /* Number of IPv4 packets forwarded */
420 uint32_t packets_not_forwarded; /* Number of IPv4 packets not forwarded */
421 uint32_t exception_events[SFE_IPV4_EXCEPTION_EVENT_LAST];
422
423 /*
424 * Summary tatistics.
425 */
426 uint64_t connection_create_requests64;
427 /* Number of IPv4 connection create requests */
428 uint64_t connection_create_collisions64;
429 /* Number of IPv4 connection create requests that collided with existing hash table entries */
430 uint64_t connection_destroy_requests64;
431 /* Number of IPv4 connection destroy requests */
432 uint64_t connection_destroy_misses64;
433 /* Number of IPv4 connection destroy requests that missed our hash table */
434 uint64_t connection_match_hash_hits64;
435 /* Number of IPv4 connection match hash hits */
436 uint64_t connection_match_hash_reorders64;
437 /* Number of IPv4 connection match hash reorders */
438 uint64_t connection_flushes64; /* Number of IPv4 connection flushes */
439 uint64_t packets_forwarded64; /* Number of IPv4 packets forwarded */
440 uint64_t packets_not_forwarded64;
441 /* Number of IPv4 packets not forwarded */
442 uint64_t exception_events64[SFE_IPV4_EXCEPTION_EVENT_LAST];
443
444 /*
445 * Control state.
446 */
447 struct kobject *sys_sfe_ipv4; /* sysfs linkage */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100448 int debug_dev; /* Major number of the debug char device */
Xiaoping Fan34586472015-07-03 02:20:35 -0700449 uint32_t debug_read_seq; /* sequence number for debug dump */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100450};
451
452/*
453 * Enumeration of the XML output.
454 */
455enum sfe_ipv4_debug_xml_states {
456 SFE_IPV4_DEBUG_XML_STATE_START,
457 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_START,
458 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_CONNECTION,
459 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_END,
460 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_START,
461 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION,
462 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_END,
463 SFE_IPV4_DEBUG_XML_STATE_STATS,
464 SFE_IPV4_DEBUG_XML_STATE_END,
465 SFE_IPV4_DEBUG_XML_STATE_DONE
466};
467
468/*
469 * XML write state.
470 */
471struct sfe_ipv4_debug_xml_write_state {
472 enum sfe_ipv4_debug_xml_states state;
473 /* XML output file state machine state */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100474 int iter_exception; /* Next exception iterator */
475};
476
477typedef bool (*sfe_ipv4_debug_xml_write_method_t)(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
478 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws);
479
480struct sfe_ipv4 __si;
481
482/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100483 * sfe_ipv4_gen_ip_csum()
484 * Generate the IP checksum for an IPv4 header.
485 *
486 * Note that this function assumes that we have only 20 bytes of IP header.
487 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600488static inline uint16_t sfe_ipv4_gen_ip_csum(struct sfe_ipv4_ip_hdr *iph)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100489{
490 uint32_t sum;
491 uint16_t *i = (uint16_t *)iph;
492
493 iph->check = 0;
494
495 /*
496 * Generate the sum.
497 */
498 sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9];
499
500 /*
501 * Fold it to ones-complement form.
502 */
503 sum = (sum & 0xffff) + (sum >> 16);
504 sum = (sum & 0xffff) + (sum >> 16);
505
506 return (uint16_t)sum ^ 0xffff;
507}
508
509/*
510 * sfe_ipv4_get_connection_match_hash()
511 * Generate the hash used in connection match lookups.
512 */
513static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100514 __be32 src_ip, __be16 src_port,
515 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100516{
517 size_t dev_addr = (size_t)dev;
Dave Hudson87973cd2013-10-22 16:00:04 +0100518 uint32_t hash = ((uint32_t)dev_addr) ^ ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100519 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
520}
521
522/*
523 * sfe_ipv4_find_sfe_ipv4_connection_match()
524 * Get the IPv4 flow match info that corresponds to a particular 5-tuple.
525 *
526 * On entry we must be holding the lock that protects the hash table.
527 */
528static struct sfe_ipv4_connection_match *
529sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100530 __be32 src_ip, __be16 src_port,
531 __be32 dest_ip, __be16 dest_port) __attribute__((always_inline));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100532static struct sfe_ipv4_connection_match *
533sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100534 __be32 src_ip, __be16 src_port,
535 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100536{
537 struct sfe_ipv4_connection_match *cm;
538 struct sfe_ipv4_connection_match *head;
539 unsigned int conn_match_idx;
540
541 conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port);
542 cm = si->conn_match_hash[conn_match_idx];
543
544 /*
545 * If we don't have anything in this chain then bale.
546 */
547 if (unlikely(!cm)) {
548 return cm;
549 }
550
551 /*
552 * Hopefully the first entry is the one we want.
553 */
554 if (likely(cm->match_src_port == src_port)
555 && likely(cm->match_dest_port == dest_port)
556 && likely(cm->match_src_ip == src_ip)
557 && likely(cm->match_dest_ip == dest_ip)
558 && likely(cm->match_protocol == protocol)
559 && likely(cm->match_dev == dev)) {
560 si->connection_match_hash_hits++;
561 return cm;
562 }
563
564 /*
565 * We may or may not have a matching entry but if we do then we want to
566 * move that entry to the top of the hash chain when we get to it. We
567 * presume that this will be reused again very quickly.
568 */
569 head = cm;
570 do {
571 cm = cm->next;
572 } while (cm && (cm->match_src_port != src_port
573 || cm->match_dest_port != dest_port
574 || cm->match_src_ip != src_ip
575 || cm->match_dest_ip != dest_ip
576 || cm->match_protocol != protocol
577 || cm->match_dev != dev));
578
579 /*
580 * Not found then we're done.
581 */
582 if (unlikely(!cm)) {
583 return cm;
584 }
585
586 /*
587 * We found a match so move it.
588 */
589 if (cm->next) {
590 cm->next->prev = cm->prev;
591 }
592 cm->prev->next = cm->next;
593 cm->prev = NULL;
594 cm->next = head;
595 head->prev = cm;
596 si->conn_match_hash[conn_match_idx] = cm;
597 si->connection_match_hash_reorders++;
598
599 return cm;
600}
601
602/*
603 * sfe_ipv4_connection_match_update_summary_stats()
604 * Update the summary stats for a connection match entry.
605 */
606static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm)
607{
608 cm->rx_packet_count64 += cm->rx_packet_count;
609 cm->rx_packet_count = 0;
610 cm->rx_byte_count64 += cm->rx_byte_count;
611 cm->rx_byte_count = 0;
612}
613
614/*
615 * sfe_ipv4_connection_match_compute_translations()
616 * Compute port and address translations for a connection match entry.
617 */
618static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm)
619{
620 /*
621 * Before we insert the entry look to see if this is tagged as doing address
622 * translations. If it is then work out the adjustment that we need to apply
623 * to the transport checksum.
624 */
625 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
626 /*
627 * Precompute an incremental checksum adjustment so we can
628 * edit packets in this stream very quickly. The algorithm is from RFC1624.
629 */
630 uint16_t src_ip_hi = cm->match_src_ip >> 16;
631 uint16_t src_ip_lo = cm->match_src_ip & 0xffff;
632 uint32_t xlate_src_ip = ~cm->xlate_src_ip;
633 uint16_t xlate_src_ip_hi = xlate_src_ip >> 16;
634 uint16_t xlate_src_ip_lo = xlate_src_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100635 uint16_t xlate_src_port = ~cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100636 uint32_t adj;
637
638 /*
639 * When we compute this fold it down to a 16-bit offset
640 * as that way we can avoid having to do a double
641 * folding of the twos-complement result because the
642 * addition of 2 16-bit values cannot cause a double
643 * wrap-around!
644 */
645 adj = src_ip_hi + src_ip_lo + cm->match_src_port
646 + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port;
647 adj = (adj & 0xffff) + (adj >> 16);
648 adj = (adj & 0xffff) + (adj >> 16);
649 cm->xlate_src_csum_adjustment = (uint16_t)adj;
Nicolas Costaac2979c2014-01-14 10:35:24 -0600650
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100651 }
652
653 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
654 /*
655 * Precompute an incremental checksum adjustment so we can
656 * edit packets in this stream very quickly. The algorithm is from RFC1624.
657 */
658 uint16_t dest_ip_hi = cm->match_dest_ip >> 16;
659 uint16_t dest_ip_lo = cm->match_dest_ip & 0xffff;
660 uint32_t xlate_dest_ip = ~cm->xlate_dest_ip;
661 uint16_t xlate_dest_ip_hi = xlate_dest_ip >> 16;
662 uint16_t xlate_dest_ip_lo = xlate_dest_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100663 uint16_t xlate_dest_port = ~cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100664 uint32_t adj;
665
666 /*
667 * When we compute this fold it down to a 16-bit offset
668 * as that way we can avoid having to do a double
669 * folding of the twos-complement result because the
670 * addition of 2 16-bit values cannot cause a double
671 * wrap-around!
672 */
673 adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port
674 + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port;
675 adj = (adj & 0xffff) + (adj >> 16);
676 adj = (adj & 0xffff) + (adj >> 16);
677 cm->xlate_dest_csum_adjustment = (uint16_t)adj;
678 }
Xiaoping Fanad755af2015-04-01 16:58:46 -0700679
680 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
681 uint32_t adj = ~cm->match_src_ip + cm->xlate_src_ip;
682 if (adj < cm->xlate_src_ip) {
683 adj++;
684 }
685
686 adj = (adj & 0xffff) + (adj >> 16);
687 adj = (adj & 0xffff) + (adj >> 16);
688 cm->xlate_src_partial_csum_adjustment = (uint16_t)adj;
689 }
690
691 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
692 uint32_t adj = ~cm->match_dest_ip + cm->xlate_dest_ip;
693 if (adj < cm->xlate_dest_ip) {
694 adj++;
695 }
696
697 adj = (adj & 0xffff) + (adj >> 16);
698 adj = (adj & 0xffff) + (adj >> 16);
699 cm->xlate_dest_partial_csum_adjustment = (uint16_t)adj;
700 }
701
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100702}
703
704/*
705 * sfe_ipv4_update_summary_stats()
706 * Update the summary stats.
707 */
708static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si)
709{
710 int i;
711
712 si->connection_create_requests64 += si->connection_create_requests;
713 si->connection_create_requests = 0;
714 si->connection_create_collisions64 += si->connection_create_collisions;
715 si->connection_create_collisions = 0;
716 si->connection_destroy_requests64 += si->connection_destroy_requests;
717 si->connection_destroy_requests = 0;
718 si->connection_destroy_misses64 += si->connection_destroy_misses;
719 si->connection_destroy_misses = 0;
720 si->connection_match_hash_hits64 += si->connection_match_hash_hits;
721 si->connection_match_hash_hits = 0;
722 si->connection_match_hash_reorders64 += si->connection_match_hash_reorders;
723 si->connection_match_hash_reorders = 0;
724 si->connection_flushes64 += si->connection_flushes;
725 si->connection_flushes = 0;
726 si->packets_forwarded64 += si->packets_forwarded;
727 si->packets_forwarded = 0;
728 si->packets_not_forwarded64 += si->packets_not_forwarded;
729 si->packets_not_forwarded = 0;
730
731 for (i = 0; i < SFE_IPV4_EXCEPTION_EVENT_LAST; i++) {
732 si->exception_events64[i] += si->exception_events[i];
733 si->exception_events[i] = 0;
734 }
735}
736
737/*
738 * sfe_ipv4_insert_sfe_ipv4_connection_match()
739 * Insert a connection match into the hash.
740 *
741 * On entry we must be holding the lock that protects the hash table.
742 */
743static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
744{
745 struct sfe_ipv4_connection_match **hash_head;
746 struct sfe_ipv4_connection_match *prev_head;
747 unsigned int conn_match_idx
748 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
749 cm->match_src_ip, cm->match_src_port,
750 cm->match_dest_ip, cm->match_dest_port);
751 hash_head = &si->conn_match_hash[conn_match_idx];
752 prev_head = *hash_head;
753 cm->prev = NULL;
754 if (prev_head) {
755 prev_head->prev = cm;
756 }
757
758 cm->next = prev_head;
759 *hash_head = cm;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800760
761#ifdef CONFIG_NF_FLOW_COOKIE
762 /*
763 * Configure hardware to put a flow cookie in packet of this flow,
764 * then we can accelerate the lookup process when we received this packet.
765 */
766 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
767 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
768
769 if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) {
770 flow_cookie_set_func_t func;
771
772 rcu_read_lock();
773 func = rcu_dereference(si->flow_cookie_set_func);
774 if (func) {
Xiaoping Fan59176422015-05-22 15:58:10 -0700775 if (!func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800776 cm->match_dest_ip, cm->match_dest_port, conn_match_idx)) {
777 entry->match = cm;
778 cm->flow_cookie = conn_match_idx;
779 }
780 }
781 rcu_read_unlock();
782
783 break;
784 }
785 }
786#endif
787
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100788}
789
790/*
791 * sfe_ipv4_remove_sfe_ipv4_connection_match()
792 * Remove a connection match object from the hash.
793 *
794 * On entry we must be holding the lock that protects the hash table.
795 */
796static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
797{
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800798#ifdef CONFIG_NF_FLOW_COOKIE
799 /*
800 * Tell hardware that we no longer need a flow cookie in packet of this flow
801 */
802 unsigned int conn_match_idx;
803
804 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
805 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
806
807 if (cm == entry->match) {
808 flow_cookie_set_func_t func;
809
810 rcu_read_lock();
811 func = rcu_dereference(si->flow_cookie_set_func);
812 if (func) {
813 func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
814 cm->match_dest_ip, cm->match_dest_port, 0);
815 }
816 rcu_read_unlock();
817
818 cm->flow_cookie = 0;
819 entry->match = NULL;
820 entry->last_clean_time = jiffies;
821 break;
822 }
823 }
824#endif
825
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100826 /*
827 * Unlink the connection match entry from the hash.
828 */
829 if (cm->prev) {
830 cm->prev->next = cm->next;
831 } else {
832 unsigned int conn_match_idx
833 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
834 cm->match_src_ip, cm->match_src_port,
835 cm->match_dest_ip, cm->match_dest_port);
836 si->conn_match_hash[conn_match_idx] = cm->next;
837 }
838
839 if (cm->next) {
840 cm->next->prev = cm->prev;
841 }
842
843 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600844 * If the connection match entry is in the active list remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100845 */
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600846 if (cm->active) {
847 if (likely(cm->active_prev)) {
848 cm->active_prev->active_next = cm->active_next;
849 } else {
850 si->active_head = cm->active_next;
851 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100852
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600853 if (likely(cm->active_next)) {
854 cm->active_next->active_prev = cm->active_prev;
855 } else {
856 si->active_tail = cm->active_prev;
857 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100858 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100859}
860
861/*
862 * sfe_ipv4_get_connection_hash()
863 * Generate the hash used in connection lookups.
864 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100865static inline unsigned int sfe_ipv4_get_connection_hash(uint8_t protocol, __be32 src_ip, __be16 src_port,
866 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100867{
Dave Hudson87973cd2013-10-22 16:00:04 +0100868 uint32_t hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100869 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
870}
871
872/*
873 * sfe_ipv4_find_sfe_ipv4_connection()
874 * Get the IPv4 connection info that corresponds to a particular 5-tuple.
875 *
876 * On entry we must be holding the lock that protects the hash table.
877 */
878static inline struct sfe_ipv4_connection *sfe_ipv4_find_sfe_ipv4_connection(struct sfe_ipv4 *si, uint32_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100879 __be32 src_ip, __be16 src_port,
880 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100881{
882 struct sfe_ipv4_connection *c;
883 unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port);
884 c = si->conn_hash[conn_idx];
885
886 /*
887 * If we don't have anything in this chain then bale.
888 */
889 if (unlikely(!c)) {
890 return c;
891 }
892
893 /*
894 * Hopefully the first entry is the one we want.
895 */
896 if (likely(c->src_port == src_port)
897 && likely(c->dest_port == dest_port)
898 && likely(c->src_ip == src_ip)
899 && likely(c->dest_ip == dest_ip)
900 && likely(c->protocol == protocol)) {
901 return c;
902 }
903
904 /*
905 * We may or may not have a matching entry but if we do then we want to
906 * move that entry to the top of the hash chain when we get to it. We
907 * presume that this will be reused again very quickly.
908 */
909 do {
910 c = c->next;
911 } while (c && (c->src_port != src_port
912 || c->dest_port != dest_port
913 || c->src_ip != src_ip
914 || c->dest_ip != dest_ip
915 || c->protocol != protocol));
916
917 /*
918 * Will need connection entry for next create/destroy metadata,
919 * So no need to re-order entry for these requests
920 */
921 return c;
922}
923
924/*
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600925 * sfe_ipv4_mark_rule()
926 * Updates the mark for a current offloaded connection
927 *
928 * Will take hash lock upon entry
929 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700930void sfe_ipv4_mark_rule(struct sfe_connection_mark *mark)
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600931{
932 struct sfe_ipv4 *si = &__si;
933 struct sfe_ipv4_connection *c;
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600934
Xiaoping Fan3c423e32015-07-03 03:09:29 -0700935 spin_lock_bh(&si->lock);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600936 c = sfe_ipv4_find_sfe_ipv4_connection(si, mark->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700937 mark->src_ip.ip, mark->src_port,
938 mark->dest_ip.ip, mark->dest_port);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600939 if (c) {
Nicolas Costaf53d6fe2014-01-13 16:03:46 -0600940 DEBUG_TRACE("Matching connection found for mark, "
941 "setting from %08x to %08x\n",
942 c->mark, mark->mark);
943 WARN_ON((0 != c->mark) && (0 == mark->mark));
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600944 c->mark = mark->mark;
945 }
Xiaoping Fan3c423e32015-07-03 03:09:29 -0700946 spin_unlock_bh(&si->lock);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600947}
948
949/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100950 * sfe_ipv4_insert_sfe_ipv4_connection()
951 * Insert a connection into the hash.
952 *
953 * On entry we must be holding the lock that protects the hash table.
954 */
955static void sfe_ipv4_insert_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
956{
957 struct sfe_ipv4_connection **hash_head;
958 struct sfe_ipv4_connection *prev_head;
959 unsigned int conn_idx;
960
961 /*
962 * Insert entry into the connection hash.
963 */
964 conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
965 c->dest_ip, c->dest_port);
966 hash_head = &si->conn_hash[conn_idx];
967 prev_head = *hash_head;
968 c->prev = NULL;
969 if (prev_head) {
970 prev_head->prev = c;
971 }
972
973 c->next = prev_head;
974 *hash_head = c;
975
976 /*
977 * Insert entry into the "all connections" list.
978 */
979 if (si->all_connections_tail) {
980 c->all_connections_prev = si->all_connections_tail;
981 si->all_connections_tail->all_connections_next = c;
982 } else {
983 c->all_connections_prev = NULL;
984 si->all_connections_head = c;
985 }
986
987 si->all_connections_tail = c;
988 c->all_connections_next = NULL;
989 si->num_connections++;
990
991 /*
992 * Insert the connection match objects too.
993 */
994 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->original_match);
995 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->reply_match);
996}
997
998/*
999 * sfe_ipv4_remove_sfe_ipv4_connection()
1000 * Remove a sfe_ipv4_connection object from the hash.
1001 *
1002 * On entry we must be holding the lock that protects the hash table.
1003 */
1004static void sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1005{
1006 /*
1007 * Remove the connection match objects.
1008 */
1009 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->reply_match);
1010 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->original_match);
1011
1012 /*
1013 * Unlink the connection.
1014 */
1015 if (c->prev) {
1016 c->prev->next = c->next;
1017 } else {
1018 unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
1019 c->dest_ip, c->dest_port);
1020 si->conn_hash[conn_idx] = c->next;
1021 }
1022
1023 if (c->next) {
1024 c->next->prev = c->prev;
1025 }
Xiaoping Fan34586472015-07-03 02:20:35 -07001026
1027 /*
1028 * Unlink connection from all_connections list
1029 */
1030 if (c->all_connections_prev) {
1031 c->all_connections_prev->all_connections_next = c->all_connections_next;
1032 } else {
1033 si->all_connections_head = c->all_connections_next;
1034 }
1035
1036 if (c->all_connections_next) {
1037 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1038 } else {
1039 si->all_connections_tail = c->all_connections_prev;
1040 }
1041
1042 si->num_connections--;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001043}
1044
1045/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001046 * sfe_ipv4_sync_sfe_ipv4_connection()
1047 * Sync a connection.
1048 *
1049 * On entry to this function we expect that the lock for the connection is either
1050 * already held or isn't required.
1051 */
1052static void sfe_ipv4_gen_sync_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001053 struct sfe_connection_sync *sis, uint64_t now_jiffies)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001054{
1055 struct sfe_ipv4_connection_match *original_cm;
1056 struct sfe_ipv4_connection_match *reply_cm;
1057
1058 /*
1059 * Fill in the update message.
1060 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001061 sis->is_v6 = 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001062 sis->protocol = c->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001063 sis->src_ip.ip = c->src_ip;
1064 sis->dest_ip.ip = c->dest_ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001065 sis->src_port = c->src_port;
1066 sis->dest_port = c->dest_port;
1067
1068 original_cm = c->original_match;
1069 reply_cm = c->reply_match;
1070 sis->src_td_max_window = original_cm->protocol_state.tcp.max_win;
1071 sis->src_td_end = original_cm->protocol_state.tcp.end;
1072 sis->src_td_max_end = original_cm->protocol_state.tcp.max_end;
1073 sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win;
1074 sis->dest_td_end = reply_cm->protocol_state.tcp.end;
1075 sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end;
1076
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001077 sis->src_new_packet_count = original_cm->rx_packet_count;
1078 sis->src_new_byte_count = original_cm->rx_byte_count;
1079 sis->dest_new_packet_count = reply_cm->rx_packet_count;
1080 sis->dest_new_byte_count = reply_cm->rx_byte_count;
1081
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001082 sfe_ipv4_connection_match_update_summary_stats(original_cm);
1083 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
1084
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001085 sis->src_dev = original_cm->match_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001086 sis->src_packet_count = original_cm->rx_packet_count64;
1087 sis->src_byte_count = original_cm->rx_byte_count64;
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001088
1089 sis->dest_dev = reply_cm->match_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001090 sis->dest_packet_count = reply_cm->rx_packet_count64;
1091 sis->dest_byte_count = reply_cm->rx_byte_count64;
1092
1093 /*
1094 * Get the time increment since our last sync.
1095 */
1096 sis->delta_jiffies = now_jiffies - c->last_sync_jiffies;
1097 c->last_sync_jiffies = now_jiffies;
1098}
1099
1100/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001101 * sfe_ipv4_flush_sfe_ipv4_connection()
1102 * Flush a connection and free all associated resources.
1103 *
1104 * We need to be called with bottom halves disabled locally as we need to acquire
1105 * the connection hash lock and release it again. In general we're actually called
1106 * from within a BH and so we're fine, but we're also called when connections are
1107 * torn down.
1108 */
1109static void sfe_ipv4_flush_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1110{
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001111 struct sfe_connection_sync sis;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001112 uint64_t now_jiffies;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001113 sfe_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001114
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001115 rcu_read_lock();
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001116 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001117 si->connection_flushes++;
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001118 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001119 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001120
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001121 if (sync_rule_callback) {
1122 /*
1123 * Generate a sync message and then sync.
1124 */
1125 now_jiffies = get_jiffies_64();
1126 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, now_jiffies);
1127 sync_rule_callback(&sis);
1128 }
1129
1130 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001131
1132 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001133 * Release our hold of the source and dest devices and free the memory
1134 * for our connection objects.
1135 */
1136 dev_put(c->original_dev);
1137 dev_put(c->reply_dev);
1138 kfree(c->original_match);
1139 kfree(c->reply_match);
1140 kfree(c);
1141}
1142
1143/*
1144 * sfe_ipv4_recv_udp()
1145 * Handle UDP packet receives and forwarding.
1146 */
1147static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001148 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001149{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001150 struct sfe_ipv4_udp_hdr *udph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001151 __be32 src_ip;
1152 __be32 dest_ip;
1153 __be16 src_port;
1154 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001155 struct sfe_ipv4_connection_match *cm;
1156 uint8_t ttl;
1157 struct net_device *xmit_dev;
1158
1159 /*
1160 * Is our packet too short to contain a valid UDP header?
1161 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001162 if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_udp_hdr) + ihl)))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001163 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001164 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++;
1165 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001166 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001167
1168 DEBUG_TRACE("packet too short for UDP header\n");
1169 return 0;
1170 }
1171
1172 /*
1173 * Read the IP address and port information. Read the IP header data first
1174 * because we've almost certainly got that in the cache. We may not yet have
1175 * the UDP header cached though so allow more time for any prefetching.
1176 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001177 src_ip = iph->saddr;
1178 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001179
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001180 udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001181 src_port = udph->source;
1182 dest_port = udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001183
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001184 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001185
1186 /*
1187 * Look for a connection match.
1188 */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001189#ifdef CONFIG_NF_FLOW_COOKIE
1190 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1191 if (unlikely(!cm)) {
1192 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
1193 }
1194#else
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001195 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001196#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001197 if (unlikely(!cm)) {
1198 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION]++;
1199 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001200 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001201
1202 DEBUG_TRACE("no connection found\n");
1203 return 0;
1204 }
1205
1206 /*
1207 * If our packet has beern marked as "flush on find" we can't actually
1208 * forward it in the fast path, but now that we've found an associated
1209 * connection we can flush that out before we process the packet.
1210 */
1211 if (unlikely(flush_on_find)) {
1212 struct sfe_ipv4_connection *c = cm->connection;
1213 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1214 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1215 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001216 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001217
1218 DEBUG_TRACE("flush on find\n");
1219 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1220 return 0;
1221 }
1222
Zhi Chen8748eb32015-06-18 12:58:48 -07001223#ifdef CONFIG_XFRM
1224 /*
1225 * We can't accelerate the flow on this direction, just let it go
1226 * through the slow path.
1227 */
1228 if (unlikely(!cm->flow_accel)) {
1229 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001230 spin_unlock_bh(&si->lock);
Zhi Chen8748eb32015-06-18 12:58:48 -07001231 return 0;
1232 }
1233#endif
1234
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001235 /*
1236 * Does our TTL allow forwarding?
1237 */
1238 ttl = iph->ttl;
1239 if (unlikely(ttl < 2)) {
1240 struct sfe_ipv4_connection *c = cm->connection;
1241 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1242 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL]++;
1243 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001244 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001245
1246 DEBUG_TRACE("ttl too low\n");
1247 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1248 return 0;
1249 }
1250
1251 /*
1252 * If our packet is larger than the MTU of the transmit interface then
1253 * we can't forward it easily.
1254 */
1255 if (unlikely(len > cm->xmit_dev_mtu)) {
1256 struct sfe_ipv4_connection *c = cm->connection;
1257 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1258 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++;
1259 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001260 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001261
1262 DEBUG_TRACE("larger than mtu\n");
1263 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1264 return 0;
1265 }
1266
1267 /*
1268 * From this point on we're good to modify the packet.
1269 */
1270
1271 /*
1272 * Decrement our TTL.
1273 */
1274 iph->ttl = ttl - 1;
1275
1276 /*
1277 * Do we have to perform translations of the source address/port?
1278 */
1279 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1280 uint16_t udp_csum;
1281
Dave Hudson87973cd2013-10-22 16:00:04 +01001282 iph->saddr = cm->xlate_src_ip;
1283 udph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001284
1285 /*
1286 * Do we have a non-zero UDP checksum? If we do then we need
1287 * to update it.
1288 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001289 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001290 if (likely(udp_csum)) {
Xiaoping Fanad755af2015-04-01 16:58:46 -07001291 uint32_t sum;
1292
1293 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1294 sum = udp_csum + cm->xlate_src_partial_csum_adjustment;
1295 } else {
1296 sum = udp_csum + cm->xlate_src_csum_adjustment;
1297 }
1298
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001299 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001300 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001301 }
1302 }
1303
1304 /*
1305 * Do we have to perform translations of the destination address/port?
1306 */
1307 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1308 uint16_t udp_csum;
1309
Dave Hudson87973cd2013-10-22 16:00:04 +01001310 iph->daddr = cm->xlate_dest_ip;
1311 udph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001312
1313 /*
1314 * Do we have a non-zero UDP checksum? If we do then we need
1315 * to update it.
1316 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001317 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001318 if (likely(udp_csum)) {
Xiaoping Fanad755af2015-04-01 16:58:46 -07001319 uint32_t sum;
1320
1321 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1322 sum = udp_csum + cm->xlate_dest_partial_csum_adjustment;
1323 } else {
1324 sum = udp_csum + cm->xlate_dest_csum_adjustment;
1325 }
1326
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001327 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001328 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001329 }
1330 }
1331
1332 /*
1333 * Replace the IP checksum.
1334 */
1335 iph->check = sfe_ipv4_gen_ip_csum(iph);
1336
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001337 /*
1338 * Update traffic stats.
1339 */
1340 cm->rx_packet_count++;
1341 cm->rx_byte_count += len;
1342
1343 /*
1344 * If we're not already on the active list then insert ourselves at the tail
1345 * of the current list.
1346 */
1347 if (unlikely(!cm->active)) {
1348 cm->active = true;
1349 cm->active_prev = si->active_tail;
1350 if (likely(si->active_tail)) {
1351 si->active_tail->active_next = cm;
1352 } else {
1353 si->active_head = cm;
1354 }
1355 si->active_tail = cm;
1356 }
1357
1358 xmit_dev = cm->xmit_dev;
1359 skb->dev = xmit_dev;
1360
1361 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001362 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001363 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001364 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1365 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Xiaoping Fan2784e612015-06-25 17:57:41 -07001366 dev_hard_header(skb, xmit_dev, ETH_P_IP,
1367 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001368 } else {
1369 /*
1370 * For the simple case we write this really fast.
1371 */
1372 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1373 eth->h_proto = htons(ETH_P_IP);
Matthew McClintockdab3c8f2014-02-19 14:29:39 -06001374 eth->h_dest[0] = cm->xmit_dest_mac[0];
1375 eth->h_dest[1] = cm->xmit_dest_mac[1];
1376 eth->h_dest[2] = cm->xmit_dest_mac[2];
1377 eth->h_source[0] = cm->xmit_src_mac[0];
1378 eth->h_source[1] = cm->xmit_src_mac[1];
1379 eth->h_source[2] = cm->xmit_src_mac[2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001380 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001381 }
1382
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001383 /*
1384 * Mark outgoing packet.
1385 */
Cristian Prundeanu94fff982013-12-23 15:02:59 -06001386 skb->mark = cm->connection->mark;
1387 if (skb->mark) {
1388 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1389 }
1390
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001391 si->packets_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001392 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001393
1394 /*
1395 * We're going to check for GSO flags when we transmit the packet so
1396 * start fetching the necessary cache line now.
1397 */
1398 prefetch(skb_shinfo(skb));
1399
1400 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06001401 * Mark that this packet has been fast forwarded.
1402 */
1403 skb->fast_forwarded = 1;
1404
1405 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001406 * Send the packet on its way.
1407 */
1408 dev_queue_xmit(skb);
1409
1410 return 1;
1411}
1412
1413/*
1414 * sfe_ipv4_process_tcp_option_sack()
1415 * Parse TCP SACK option and update ack according
1416 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001417static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const uint32_t data_offs,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001418 uint32_t *ack) __attribute__((always_inline));
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001419static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const uint32_t data_offs,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001420 uint32_t *ack)
1421{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001422 uint32_t length = sizeof(struct sfe_ipv4_tcp_hdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001423 uint8_t *ptr = (uint8_t *)th + length;
1424
1425 /*
1426 * If option is TIMESTAMP discard it.
1427 */
1428 if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
1429 && likely(ptr[0] == TCPOPT_NOP)
1430 && likely(ptr[1] == TCPOPT_NOP)
1431 && likely(ptr[2] == TCPOPT_TIMESTAMP)
1432 && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
1433 return true;
1434 }
1435
1436 /*
1437 * TCP options. Parse SACK option.
1438 */
1439 while (length < data_offs) {
1440 uint8_t size;
1441 uint8_t kind;
1442
1443 ptr = (uint8_t *)th + length;
1444 kind = *ptr;
1445
1446 /*
1447 * NOP, for padding
1448 * Not in the switch because to fast escape and to not calculate size
1449 */
1450 if (kind == TCPOPT_NOP) {
1451 length++;
1452 continue;
1453 }
1454
1455 if (kind == TCPOPT_SACK) {
1456 uint32_t sack = 0;
1457 uint8_t re = 1 + 1;
1458
1459 size = *(ptr + 1);
1460 if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
1461 || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
1462 || (size > (data_offs - length))) {
1463 return false;
1464 }
1465
1466 re += 4;
1467 while (re < size) {
1468 uint32_t sack_re;
1469 uint8_t *sptr = ptr + re;
1470 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
1471 if (sack_re > sack) {
1472 sack = sack_re;
1473 }
1474 re += TCPOLEN_SACK_PERBLOCK;
1475 }
1476 if (sack > *ack) {
1477 *ack = sack;
1478 }
1479 length += size;
1480 continue;
1481 }
1482 if (kind == TCPOPT_EOL) {
1483 return true;
1484 }
1485 size = *(ptr + 1);
1486 if (size < 2) {
1487 return false;
1488 }
1489 length += size;
1490 }
1491
1492 return true;
1493}
1494
1495/*
1496 * sfe_ipv4_recv_tcp()
1497 * Handle TCP packet receives and forwarding.
1498 */
1499static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001500 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001501{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001502 struct sfe_ipv4_tcp_hdr *tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001503 __be32 src_ip;
1504 __be32 dest_ip;
1505 __be16 src_port;
1506 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001507 struct sfe_ipv4_connection_match *cm;
1508 struct sfe_ipv4_connection_match *counter_cm;
1509 uint8_t ttl;
1510 uint32_t flags;
1511 struct net_device *xmit_dev;
1512
1513 /*
1514 * Is our packet too short to contain a valid UDP header?
1515 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001516 if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_tcp_hdr) + ihl)))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001517 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001518 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++;
1519 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001520 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001521
1522 DEBUG_TRACE("packet too short for TCP header\n");
1523 return 0;
1524 }
1525
1526 /*
1527 * Read the IP address and port information. Read the IP header data first
1528 * because we've almost certainly got that in the cache. We may not yet have
1529 * the TCP header cached though so allow more time for any prefetching.
1530 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001531 src_ip = iph->saddr;
1532 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001533
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001534 tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001535 src_port = tcph->source;
1536 dest_port = tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001537 flags = tcp_flag_word(tcph);
1538
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001539 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001540
1541 /*
1542 * Look for a connection match.
1543 */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001544#ifdef CONFIG_NF_FLOW_COOKIE
1545 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1546 if (unlikely(!cm)) {
Matthew McClintock37858802015-02-03 12:12:02 -06001547 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001548 }
1549#else
Matthew McClintock37858802015-02-03 12:12:02 -06001550 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001551#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001552 if (unlikely(!cm)) {
1553 /*
1554 * We didn't get a connection but as TCP is connection-oriented that
1555 * may be because this is a non-fast connection (not running established).
1556 * For diagnostic purposes we differentiate this here.
1557 */
1558 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
1559 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++;
1560 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001561 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001562
1563 DEBUG_TRACE("no connection found - fast flags\n");
1564 return 0;
1565 }
1566 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++;
1567 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001568 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001569
1570 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
1571 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1572 return 0;
1573 }
1574
1575 /*
1576 * If our packet has beern marked as "flush on find" we can't actually
1577 * forward it in the fast path, but now that we've found an associated
1578 * connection we can flush that out before we process the packet.
1579 */
1580 if (unlikely(flush_on_find)) {
1581 struct sfe_ipv4_connection *c = cm->connection;
1582 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1583 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1584 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001585 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001586
1587 DEBUG_TRACE("flush on find\n");
1588 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1589 return 0;
1590 }
1591
Zhi Chen8748eb32015-06-18 12:58:48 -07001592#ifdef CONFIG_XFRM
1593 /*
1594 * We can't accelerate the flow on this direction, just let it go
1595 * through the slow path.
1596 */
1597 if (unlikely(!cm->flow_accel)) {
1598 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001599 spin_unlock_bh(&si->lock);
Zhi Chen8748eb32015-06-18 12:58:48 -07001600 return 0;
1601 }
1602#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001603 /*
1604 * Does our TTL allow forwarding?
1605 */
1606 ttl = iph->ttl;
1607 if (unlikely(ttl < 2)) {
1608 struct sfe_ipv4_connection *c = cm->connection;
1609 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1610 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL]++;
1611 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001612 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001613
1614 DEBUG_TRACE("ttl too low\n");
1615 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1616 return 0;
1617 }
1618
1619 /*
1620 * If our packet is larger than the MTU of the transmit interface then
1621 * we can't forward it easily.
1622 */
Xiaoping Fand642a6e2015-04-10 15:19:06 -07001623 if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001624 struct sfe_ipv4_connection *c = cm->connection;
1625 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1626 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++;
1627 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001628 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001629
1630 DEBUG_TRACE("larger than mtu\n");
1631 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1632 return 0;
1633 }
1634
1635 /*
1636 * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN
1637 * set is not a fast path packet.
1638 */
1639 if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
1640 struct sfe_ipv4_connection *c = cm->connection;
1641 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1642 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS]++;
1643 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001644 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001645
1646 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
1647 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1648 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1649 return 0;
1650 }
1651
1652 counter_cm = cm->counter_match;
1653
1654 /*
1655 * Are we doing sequence number checking?
1656 */
1657 if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
1658 uint32_t seq;
1659 uint32_t ack;
1660 uint32_t sack;
1661 uint32_t data_offs;
1662 uint32_t end;
1663 uint32_t left_edge;
1664 uint32_t scaled_win;
1665 uint32_t max_end;
1666
1667 /*
1668 * Is our sequence fully past the right hand edge of the window?
1669 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001670 seq = ntohl(tcph->seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001671 if (unlikely((int32_t)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
1672 struct sfe_ipv4_connection *c = cm->connection;
1673 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1674 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++;
1675 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001676 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001677
1678 DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
1679 seq, cm->protocol_state.tcp.max_end + 1);
1680 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1681 return 0;
1682 }
1683
1684 /*
1685 * Check that our TCP data offset isn't too short.
1686 */
1687 data_offs = tcph->doff << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001688 if (unlikely(data_offs < sizeof(struct sfe_ipv4_tcp_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001689 struct sfe_ipv4_connection *c = cm->connection;
1690 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1691 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++;
1692 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001693 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001694
1695 DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
1696 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1697 return 0;
1698 }
1699
1700 /*
1701 * Update ACK according to any SACK option.
1702 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001703 ack = ntohl(tcph->ack_seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001704 sack = ack;
1705 if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) {
1706 struct sfe_ipv4_connection *c = cm->connection;
1707 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1708 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK]++;
1709 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001710 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001711
1712 DEBUG_TRACE("TCP option SACK size is wrong\n");
1713 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1714 return 0;
1715 }
1716
1717 /*
1718 * Check that our TCP data offset isn't past the end of the packet.
1719 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001720 data_offs += sizeof(struct sfe_ipv4_ip_hdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001721 if (unlikely(len < data_offs)) {
1722 struct sfe_ipv4_connection *c = cm->connection;
1723 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1724 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++;
1725 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001726 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001727
1728 DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
1729 data_offs, len);
1730 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1731 return 0;
1732 }
1733
1734 end = seq + len - data_offs;
1735
1736 /*
1737 * Is our sequence fully before the left hand edge of the window?
1738 */
1739 if (unlikely((int32_t)(end - (cm->protocol_state.tcp.end
1740 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
1741 struct sfe_ipv4_connection *c = cm->connection;
1742 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1743 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++;
1744 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001745 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001746
1747 DEBUG_TRACE("seq: %u before left edge: %u\n",
1748 end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
1749 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1750 return 0;
1751 }
1752
1753 /*
1754 * Are we acking data that is to the right of what has been sent?
1755 */
1756 if (unlikely((int32_t)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
1757 struct sfe_ipv4_connection *c = cm->connection;
1758 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1759 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++;
1760 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001761 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001762
1763 DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
1764 sack, counter_cm->protocol_state.tcp.end + 1);
1765 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1766 return 0;
1767 }
1768
1769 /*
1770 * Is our ack too far before the left hand edge of the window?
1771 */
1772 left_edge = counter_cm->protocol_state.tcp.end
1773 - cm->protocol_state.tcp.max_win
1774 - SFE_IPV4_TCP_MAX_ACK_WINDOW
1775 - 1;
1776 if (unlikely((int32_t)(sack - left_edge) < 0)) {
1777 struct sfe_ipv4_connection *c = cm->connection;
1778 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1779 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++;
1780 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001781 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001782
1783 DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
1784 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1785 return 0;
1786 }
1787
1788 /*
1789 * Have we just seen the largest window size yet for this connection? If yes
1790 * then we need to record the new value.
1791 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001792 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001793 scaled_win += (sack - ack);
1794 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
1795 cm->protocol_state.tcp.max_win = scaled_win;
1796 }
1797
1798 /*
1799 * If our sequence and/or ack numbers have advanced then record the new state.
1800 */
1801 if (likely((int32_t)(end - cm->protocol_state.tcp.end) >= 0)) {
1802 cm->protocol_state.tcp.end = end;
1803 }
1804
1805 max_end = sack + scaled_win;
1806 if (likely((int32_t)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
1807 counter_cm->protocol_state.tcp.max_end = max_end;
1808 }
1809 }
1810
1811 /*
1812 * From this point on we're good to modify the packet.
1813 */
1814
1815 /*
1816 * Decrement our TTL.
1817 */
1818 iph->ttl = ttl - 1;
1819
1820 /*
1821 * Do we have to perform translations of the source address/port?
1822 */
1823 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1824 uint16_t tcp_csum;
1825 uint32_t sum;
1826
Dave Hudson87973cd2013-10-22 16:00:04 +01001827 iph->saddr = cm->xlate_src_ip;
1828 tcph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001829
1830 /*
1831 * Do we have a non-zero UDP checksum? If we do then we need
1832 * to update it.
1833 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001834 tcp_csum = tcph->check;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001835 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1836 sum = tcp_csum + cm->xlate_src_partial_csum_adjustment;
1837 } else {
1838 sum = tcp_csum + cm->xlate_src_csum_adjustment;
1839 }
1840
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001841 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001842 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001843 }
1844
1845 /*
1846 * Do we have to perform translations of the destination address/port?
1847 */
1848 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1849 uint16_t tcp_csum;
1850 uint32_t sum;
1851
Dave Hudson87973cd2013-10-22 16:00:04 +01001852 iph->daddr = cm->xlate_dest_ip;
1853 tcph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001854
1855 /*
1856 * Do we have a non-zero UDP checksum? If we do then we need
1857 * to update it.
1858 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001859 tcp_csum = tcph->check;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001860 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1861 sum = tcp_csum + cm->xlate_dest_partial_csum_adjustment;
1862 } else {
1863 sum = tcp_csum + cm->xlate_dest_csum_adjustment;
1864 }
1865
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001866 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001867 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001868 }
1869
1870 /*
1871 * Replace the IP checksum.
1872 */
1873 iph->check = sfe_ipv4_gen_ip_csum(iph);
1874
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001875 /*
1876 * Update traffic stats.
1877 */
1878 cm->rx_packet_count++;
1879 cm->rx_byte_count += len;
1880
1881 /*
1882 * If we're not already on the active list then insert ourselves at the tail
1883 * of the current list.
1884 */
1885 if (unlikely(!cm->active)) {
1886 cm->active = true;
1887 cm->active_prev = si->active_tail;
1888 if (likely(si->active_tail)) {
1889 si->active_tail->active_next = cm;
1890 } else {
1891 si->active_head = cm;
1892 }
1893 si->active_tail = cm;
1894 }
1895
1896 xmit_dev = cm->xmit_dev;
1897 skb->dev = xmit_dev;
1898
1899 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001900 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001901 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001902 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1903 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Xiaoping Fan2784e612015-06-25 17:57:41 -07001904 dev_hard_header(skb, xmit_dev, ETH_P_IP,
1905 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001906 } else {
1907 /*
1908 * For the simple case we write this really fast.
1909 */
1910 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1911 eth->h_proto = htons(ETH_P_IP);
Matthew McClintockdab3c8f2014-02-19 14:29:39 -06001912 eth->h_dest[0] = cm->xmit_dest_mac[0];
1913 eth->h_dest[1] = cm->xmit_dest_mac[1];
1914 eth->h_dest[2] = cm->xmit_dest_mac[2];
1915 eth->h_source[0] = cm->xmit_src_mac[0];
1916 eth->h_source[1] = cm->xmit_src_mac[1];
1917 eth->h_source[2] = cm->xmit_src_mac[2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001918 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001919 }
1920
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06001921 /*
1922 * Mark outgoing packet
1923 */
1924 skb->mark = cm->connection->mark;
1925 if (skb->mark) {
1926 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1927 }
1928
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001929 si->packets_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001930 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001931
1932 /*
1933 * We're going to check for GSO flags when we transmit the packet so
1934 * start fetching the necessary cache line now.
1935 */
1936 prefetch(skb_shinfo(skb));
1937
1938 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06001939 * Mark that this packet has been fast forwarded.
1940 */
1941 skb->fast_forwarded = 1;
1942
1943 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001944 * Send the packet on its way.
1945 */
1946 dev_queue_xmit(skb);
1947
1948 return 1;
1949}
1950
1951/*
1952 * sfe_ipv4_recv_icmp()
1953 * Handle ICMP packet receives.
1954 *
1955 * ICMP packets aren't handled as a "fast path" and always have us process them
1956 * through the default Linux stack. What we do need to do is look for any errors
1957 * about connections we are handling in the fast path. If we find any such
1958 * connections then we want to flush their state so that the ICMP error path
1959 * within Linux has all of the correct state should it need it.
1960 */
1961static int sfe_ipv4_recv_icmp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001962 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001963{
1964 struct icmphdr *icmph;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001965 struct sfe_ipv4_ip_hdr *icmp_iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001966 unsigned int icmp_ihl_words;
1967 unsigned int icmp_ihl;
1968 uint32_t *icmp_trans_h;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001969 struct sfe_ipv4_udp_hdr *icmp_udph;
1970 struct sfe_ipv4_tcp_hdr *icmp_tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001971 __be32 src_ip;
1972 __be32 dest_ip;
1973 __be16 src_port;
1974 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001975 struct sfe_ipv4_connection_match *cm;
1976 struct sfe_ipv4_connection *c;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001977 uint32_t pull_len = sizeof(struct icmphdr) + ihl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001978
1979 /*
1980 * Is our packet too short to contain a valid UDP header?
1981 */
1982 len -= ihl;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001983 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001984 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001985 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++;
1986 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001987 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001988
1989 DEBUG_TRACE("packet too short for ICMP header\n");
1990 return 0;
1991 }
1992
1993 /*
1994 * We only handle "destination unreachable" and "time exceeded" messages.
1995 */
1996 icmph = (struct icmphdr *)(skb->data + ihl);
1997 if ((icmph->type != ICMP_DEST_UNREACH)
1998 && (icmph->type != ICMP_TIME_EXCEEDED)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001999 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002000 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++;
2001 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002002 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002003
2004 DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->type);
2005 return 0;
2006 }
2007
2008 /*
2009 * Do we have the full embedded IP header?
2010 */
2011 len -= sizeof(struct icmphdr);
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002012 pull_len += sizeof(struct sfe_ipv4_ip_hdr);
2013 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002014 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002015 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE]++;
2016 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002017 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002018
2019 DEBUG_TRACE("Embedded IP header not complete\n");
2020 return 0;
2021 }
2022
2023 /*
2024 * Is our embedded IP version wrong?
2025 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002026 icmp_iph = (struct sfe_ipv4_ip_hdr *)(icmph + 1);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002027 if (unlikely(icmp_iph->version != 4)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002028 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002029 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4]++;
2030 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002031 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002032
2033 DEBUG_TRACE("IP version: %u\n", icmp_iph->version);
2034 return 0;
2035 }
2036
2037 /*
2038 * Do we have the full embedded IP header, including any options?
2039 */
2040 icmp_ihl_words = icmp_iph->ihl;
2041 icmp_ihl = icmp_ihl_words << 2;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002042 pull_len += icmp_ihl - sizeof(struct sfe_ipv4_ip_hdr);
2043 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002044 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002045 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE]++;
2046 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002047 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002048
2049 DEBUG_TRACE("Embedded header not large enough for IP options\n");
2050 return 0;
2051 }
2052
Nicolas Costaac2979c2014-01-14 10:35:24 -06002053 len -= icmp_ihl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002054 icmp_trans_h = ((uint32_t *)icmp_iph) + icmp_ihl_words;
2055
2056 /*
2057 * Handle the embedded transport layer header.
2058 */
2059 switch (icmp_iph->protocol) {
2060 case IPPROTO_UDP:
2061 /*
2062 * We should have 8 bytes of UDP header - that's enough to identify
2063 * the connection.
2064 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002065 pull_len += 8;
2066 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002067 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002068 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE]++;
2069 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002070 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002071
2072 DEBUG_TRACE("Incomplete embedded UDP header\n");
2073 return 0;
2074 }
2075
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002076 icmp_udph = (struct sfe_ipv4_udp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002077 src_port = icmp_udph->source;
2078 dest_port = icmp_udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002079 break;
2080
2081 case IPPROTO_TCP:
2082 /*
2083 * We should have 8 bytes of TCP header - that's enough to identify
2084 * the connection.
2085 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002086 pull_len += 8;
2087 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002088 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002089 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE]++;
2090 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002091 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002092
2093 DEBUG_TRACE("Incomplete embedded TCP header\n");
2094 return 0;
2095 }
2096
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002097 icmp_tcph = (struct sfe_ipv4_tcp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002098 src_port = icmp_tcph->source;
2099 dest_port = icmp_tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002100 break;
2101
2102 default:
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002103 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002104 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL]++;
2105 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002106 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002107
2108 DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", icmp_iph->protocol);
2109 return 0;
2110 }
2111
Dave Hudson87973cd2013-10-22 16:00:04 +01002112 src_ip = icmp_iph->saddr;
2113 dest_ip = icmp_iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002114
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002115 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002116
2117 /*
2118 * Look for a connection match. Note that we reverse the source and destination
2119 * here because our embedded message contains a packet that was sent in the
2120 * opposite direction to the one in which we just received it. It will have
2121 * been sent on the interface from which we received it though so that's still
2122 * ok to use.
2123 */
2124 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port);
2125 if (unlikely(!cm)) {
2126 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++;
2127 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002128 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002129
2130 DEBUG_TRACE("no connection found\n");
2131 return 0;
2132 }
2133
2134 /*
2135 * We found a connection so now remove it from the connection list and flush
2136 * its state.
2137 */
2138 c = cm->connection;
2139 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2140 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++;
2141 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002142 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002143
2144 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
2145 return 0;
2146}
2147
2148/*
2149 * sfe_ipv4_recv()
Matthew McClintocka8ad7962014-01-16 16:49:30 -06002150 * Handle packet receives and forwaring.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002151 *
2152 * Returns 1 if the packet is forwarded or 0 if it isn't.
2153 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002154int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002155{
2156 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002157 unsigned int len;
2158 unsigned int tot_len;
2159 unsigned int frag_off;
2160 unsigned int ihl;
2161 bool flush_on_find;
2162 bool ip_options;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002163 struct sfe_ipv4_ip_hdr *iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002164 uint32_t protocol;
2165
2166 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002167 * Check that we have space for an IP header here.
2168 */
2169 len = skb->len;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002170 if (unlikely(!pskb_may_pull(skb, sizeof(struct sfe_ipv4_ip_hdr)))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002171 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002172 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2173 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002174 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002175
2176 DEBUG_TRACE("len: %u is too short\n", len);
2177 return 0;
2178 }
2179
2180 /*
2181 * Check that our "total length" is large enough for an IP header.
2182 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002183 iph = (struct sfe_ipv4_ip_hdr *)skb->data;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002184 tot_len = ntohs(iph->tot_len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002185 if (unlikely(tot_len < sizeof(struct sfe_ipv4_ip_hdr))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002186 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002187 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH]++;
2188 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002189 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002190
2191 DEBUG_TRACE("tot_len: %u is too short\n", tot_len);
2192 return 0;
2193 }
2194
2195 /*
2196 * Is our IP version wrong?
2197 */
2198 if (unlikely(iph->version != 4)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002199 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002200 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_V4]++;
2201 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002202 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002203
2204 DEBUG_TRACE("IP version: %u\n", iph->version);
2205 return 0;
2206 }
2207
2208 /*
2209 * Does our datagram fit inside the skb?
2210 */
2211 if (unlikely(tot_len > len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002212 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002213 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++;
2214 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002215 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002216
2217 DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len);
2218 return 0;
2219 }
2220
2221 /*
2222 * Do we have a non-initial fragment?
Nicolas Costaac2979c2014-01-14 10:35:24 -06002223 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002224 frag_off = ntohs(iph->frag_off);
2225 if (unlikely(frag_off & IP_OFFSET)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002226 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002227 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++;
2228 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002229 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002230
2231 DEBUG_TRACE("non-initial fragment\n");
2232 return 0;
2233 }
2234
2235 /*
2236 * If we have a (first) fragment then mark it to cause any connection to flush.
2237 */
2238 flush_on_find = unlikely(frag_off & IP_MF) ? true : false;
2239
2240 /*
2241 * Do we have any IP options? That's definite a slow path! If we do have IP
2242 * options we need to recheck our header size.
2243 */
2244 ihl = iph->ihl << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002245 ip_options = unlikely(ihl != sizeof(struct sfe_ipv4_ip_hdr)) ? true : false;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002246 if (unlikely(ip_options)) {
2247 if (unlikely(len < ihl)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002248 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002249 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE]++;
2250 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002251 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002252
2253 DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl);
2254 return 0;
2255 }
2256
2257 flush_on_find = true;
2258 }
2259
2260 protocol = iph->protocol;
2261 if (IPPROTO_UDP == protocol) {
2262 return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find);
2263 }
2264
2265 if (IPPROTO_TCP == protocol) {
2266 return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find);
2267 }
2268
2269 if (IPPROTO_ICMP == protocol) {
2270 return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl);
2271 }
2272
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002273 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002274 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++;
2275 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002276 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002277
2278 DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol);
2279 return 0;
2280}
2281
Nicolas Costa436926b2014-01-14 10:36:22 -06002282static void
2283sfe_ipv4_update_tcp_state(struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002284 struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002285{
2286 struct sfe_ipv4_connection_match *orig_cm;
2287 struct sfe_ipv4_connection_match *repl_cm;
2288 struct sfe_ipv4_tcp_connection_match *orig_tcp;
2289 struct sfe_ipv4_tcp_connection_match *repl_tcp;
2290
2291 orig_cm = c->original_match;
2292 repl_cm = c->reply_match;
2293 orig_tcp = &orig_cm->protocol_state.tcp;
2294 repl_tcp = &repl_cm->protocol_state.tcp;
2295
2296 /* update orig */
2297 if (orig_tcp->max_win < sic->src_td_max_window) {
2298 orig_tcp->max_win = sic->src_td_max_window;
2299 }
2300 if ((int32_t)(orig_tcp->end - sic->src_td_end) < 0) {
2301 orig_tcp->end = sic->src_td_end;
2302 }
2303 if ((int32_t)(orig_tcp->max_end - sic->src_td_max_end) < 0) {
2304 orig_tcp->max_end = sic->src_td_max_end;
2305 }
2306
2307 /* update reply */
2308 if (repl_tcp->max_win < sic->dest_td_max_window) {
2309 repl_tcp->max_win = sic->dest_td_max_window;
2310 }
2311 if ((int32_t)(repl_tcp->end - sic->dest_td_end) < 0) {
2312 repl_tcp->end = sic->dest_td_end;
2313 }
2314 if ((int32_t)(repl_tcp->max_end - sic->dest_td_max_end) < 0) {
2315 repl_tcp->max_end = sic->dest_td_max_end;
2316 }
2317
2318 /* update match flags */
2319 orig_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2320 repl_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002321 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
Nicolas Costa436926b2014-01-14 10:36:22 -06002322 orig_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2323 repl_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2324 }
2325}
2326
2327static void
2328sfe_ipv4_update_protocol_state(struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002329 struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002330{
2331 switch (sic->protocol) {
2332 case IPPROTO_TCP:
2333 sfe_ipv4_update_tcp_state(c, sic);
2334 break;
2335 }
2336}
2337
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002338void sfe_ipv4_update_rule(struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002339{
2340 struct sfe_ipv4_connection *c;
2341 struct sfe_ipv4 *si = &__si;
2342
2343 spin_lock_bh(&si->lock);
2344
2345 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2346 sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002347 sic->src_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002348 sic->src_port,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002349 sic->dest_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002350 sic->dest_port);
2351 if (c != NULL) {
2352 sfe_ipv4_update_protocol_state(c, sic);
2353 }
2354
2355 spin_unlock_bh(&si->lock);
2356}
2357
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002358/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002359 * sfe_ipv4_create_rule()
2360 * Create a forwarding rule.
2361 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002362int sfe_ipv4_create_rule(struct sfe_connection_create *sic)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002363{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002364 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002365 struct sfe_ipv4_connection *c;
2366 struct sfe_ipv4_connection_match *original_cm;
2367 struct sfe_ipv4_connection_match *reply_cm;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002368 struct net_device *dest_dev;
2369 struct net_device *src_dev;
2370
2371 dest_dev = sic->dest_dev;
2372 src_dev = sic->src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002373
Matthew McClintock389b42a2014-09-24 14:05:51 -05002374 if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) ||
2375 (src_dev->reg_state != NETREG_REGISTERED))) {
2376 return -EINVAL;
2377 }
2378
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002379 spin_lock_bh(&si->lock);
2380 si->connection_create_requests++;
2381
2382 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002383 * Check to see if there is already a flow that matches the rule we're
2384 * trying to create. If there is then we can't create a new one.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002385 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002386 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2387 sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002388 sic->src_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002389 sic->src_port,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002390 sic->dest_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002391 sic->dest_port);
2392 if (c != NULL) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002393 si->connection_create_collisions++;
2394
2395 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002396 * If we already have the flow then it's likely that this
2397 * request to create the connection rule contains more
2398 * up-to-date information. Check and update accordingly.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002399 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002400 sfe_ipv4_update_protocol_state(c, sic);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002401 spin_unlock_bh(&si->lock);
2402
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002403 DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002404 " s: %s:%pM:%pI4:%u, d: %s:%pM:%pI4:%u\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002405 sic->mark, sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002406 sic->src_dev->name, sic->src_mac, &sic->src_ip.ip, ntohs(sic->src_port),
2407 sic->dest_dev->name, sic->dest_mac, &sic->dest_ip.ip, ntohs(sic->dest_port));
Nicolas Costa514fde02014-01-13 15:50:29 -06002408 return -EADDRINUSE;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002409 }
2410
2411 /*
2412 * Allocate the various connection tracking objects.
2413 */
2414 c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC);
2415 if (unlikely(!c)) {
2416 spin_unlock_bh(&si->lock);
Nicolas Costa514fde02014-01-13 15:50:29 -06002417 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002418 }
2419
2420 original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2421 if (unlikely(!original_cm)) {
2422 spin_unlock_bh(&si->lock);
2423 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002424 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002425 }
2426
2427 reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2428 if (unlikely(!reply_cm)) {
2429 spin_unlock_bh(&si->lock);
2430 kfree(original_cm);
2431 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002432 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002433 }
2434
2435 /*
2436 * Fill in the "original" direction connection matching object.
2437 * Note that the transmit MAC address is "dest_mac_xlate" because
2438 * we always know both ends of a connection by their translated
2439 * addresses and not their public addresses.
2440 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002441 original_cm->match_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002442 original_cm->match_protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002443 original_cm->match_src_ip = sic->src_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002444 original_cm->match_src_port = sic->src_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002445 original_cm->match_dest_ip = sic->dest_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002446 original_cm->match_dest_port = sic->dest_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002447 original_cm->xlate_src_ip = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002448 original_cm->xlate_src_port = sic->src_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002449 original_cm->xlate_dest_ip = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002450 original_cm->xlate_dest_port = sic->dest_port_xlate;
2451 original_cm->rx_packet_count = 0;
2452 original_cm->rx_packet_count64 = 0;
2453 original_cm->rx_byte_count = 0;
2454 original_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002455 original_cm->xmit_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002456 original_cm->xmit_dev_mtu = sic->dest_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002457 memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002458 memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN);
2459 original_cm->connection = c;
2460 original_cm->counter_match = reply_cm;
2461 original_cm->flags = 0;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002462#ifdef CONFIG_NF_FLOW_COOKIE
2463 original_cm->flow_cookie = 0;
2464#endif
Zhi Chen8748eb32015-06-18 12:58:48 -07002465#ifdef CONFIG_XFRM
2466 original_cm->flow_accel = sic->original_accel;
2467#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002468 original_cm->active_next = NULL;
2469 original_cm->active_prev = NULL;
2470 original_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002471
2472 /*
2473 * For PPP links we don't write an L2 header. For everything else we do.
2474 */
2475 if (!(dest_dev->flags & IFF_POINTOPOINT)) {
2476 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2477
2478 /*
2479 * If our dev writes Ethernet headers then we can write a really fast
2480 * version.
2481 */
2482 if (dest_dev->header_ops) {
2483 if (dest_dev->header_ops->create == eth_header) {
2484 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2485 }
2486 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002487 }
2488
2489 /*
2490 * Fill in the "reply" direction connection matching object.
2491 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002492 reply_cm->match_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002493 reply_cm->match_protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002494 reply_cm->match_src_ip = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002495 reply_cm->match_src_port = sic->dest_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002496 reply_cm->match_dest_ip = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002497 reply_cm->match_dest_port = sic->src_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002498 reply_cm->xlate_src_ip = sic->dest_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002499 reply_cm->xlate_src_port = sic->dest_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002500 reply_cm->xlate_dest_ip = sic->src_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002501 reply_cm->xlate_dest_port = sic->src_port;
2502 reply_cm->rx_packet_count = 0;
2503 reply_cm->rx_packet_count64 = 0;
2504 reply_cm->rx_byte_count = 0;
2505 reply_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002506 reply_cm->xmit_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002507 reply_cm->xmit_dev_mtu = sic->src_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002508 memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002509 memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN);
2510 reply_cm->connection = c;
2511 reply_cm->counter_match = original_cm;
2512 reply_cm->flags = 0;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002513#ifdef CONFIG_NF_FLOW_COOKIE
2514 reply_cm->flow_cookie = 0;
2515#endif
Zhi Chen8748eb32015-06-18 12:58:48 -07002516#ifdef CONFIG_XFRM
2517 reply_cm->flow_accel = sic->reply_accel;
2518#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002519 reply_cm->active_next = NULL;
2520 reply_cm->active_prev = NULL;
2521 reply_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002522
2523 /*
2524 * For PPP links we don't write an L2 header. For everything else we do.
2525 */
2526 if (!(src_dev->flags & IFF_POINTOPOINT)) {
2527 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2528
2529 /*
2530 * If our dev writes Ethernet headers then we can write a really fast
2531 * version.
2532 */
2533 if (src_dev->header_ops) {
2534 if (src_dev->header_ops->create == eth_header) {
2535 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2536 }
2537 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002538 }
2539
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002540
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002541 if (sic->dest_ip.ip != sic->dest_ip_xlate.ip || sic->dest_port != sic->dest_port_xlate) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002542 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2543 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2544 }
2545
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002546 if (sic->src_ip.ip != sic->src_ip_xlate.ip || sic->src_port != sic->src_port_xlate) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002547 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2548 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2549 }
2550
2551 c->protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002552 c->src_ip = sic->src_ip.ip;
2553 c->src_ip_xlate = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002554 c->src_port = sic->src_port;
2555 c->src_port_xlate = sic->src_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002556 c->original_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002557 c->original_match = original_cm;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002558 c->dest_ip = sic->dest_ip.ip;
2559 c->dest_ip_xlate = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002560 c->dest_port = sic->dest_port;
2561 c->dest_port_xlate = sic->dest_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002562 c->reply_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002563 c->reply_match = reply_cm;
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06002564 c->mark = sic->mark;
Xiaoping Fan34586472015-07-03 02:20:35 -07002565 c->debug_read_seq = 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002566 c->last_sync_jiffies = get_jiffies_64();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002567
2568 /*
2569 * Take hold of our source and dest devices for the duration of the connection.
2570 */
2571 dev_hold(c->original_dev);
2572 dev_hold(c->reply_dev);
2573
2574 /*
2575 * Initialize the protocol-specific information that we track.
2576 */
2577 switch (sic->protocol) {
2578 case IPPROTO_TCP:
2579 original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale;
2580 original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1;
2581 original_cm->protocol_state.tcp.end = sic->src_td_end;
2582 original_cm->protocol_state.tcp.max_end = sic->src_td_max_end;
2583 reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale;
2584 reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1;
2585 reply_cm->protocol_state.tcp.end = sic->dest_td_end;
2586 reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002587 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002588 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2589 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2590 }
2591 break;
2592 }
2593
2594 sfe_ipv4_connection_match_compute_translations(original_cm);
2595 sfe_ipv4_connection_match_compute_translations(reply_cm);
2596 sfe_ipv4_insert_sfe_ipv4_connection(si, c);
2597
2598 spin_unlock_bh(&si->lock);
2599
2600 /*
2601 * We have everything we need!
2602 */
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002603 DEBUG_INFO("new connection - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002604 " s: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n"
2605 " d: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002606 sic->mark, sic->protocol,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002607 sic->src_dev->name, sic->src_mac, sic->src_mac_xlate,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002608 &sic->src_ip.ip, &sic->src_ip_xlate.ip, ntohs(sic->src_port), ntohs(sic->src_port_xlate),
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002609 dest_dev->name, sic->dest_mac, sic->dest_mac_xlate,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002610 &sic->dest_ip.ip, &sic->dest_ip_xlate.ip, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate));
Nicolas Costa514fde02014-01-13 15:50:29 -06002611
2612 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002613}
2614
2615/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002616 * sfe_ipv4_destroy_rule()
2617 * Destroy a forwarding rule.
2618 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002619void sfe_ipv4_destroy_rule(struct sfe_connection_destroy *sid)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002620{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002621 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002622 struct sfe_ipv4_connection *c;
2623
2624 spin_lock_bh(&si->lock);
2625 si->connection_destroy_requests++;
2626
2627 /*
2628 * Check to see if we have a flow that matches the rule we're trying
2629 * to destroy. If there isn't then we can't destroy it.
2630 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002631 c = sfe_ipv4_find_sfe_ipv4_connection(si, sid->protocol, sid->src_ip.ip, sid->src_port,
2632 sid->dest_ip.ip, sid->dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002633 if (!c) {
2634 si->connection_destroy_misses++;
2635 spin_unlock_bh(&si->lock);
2636
2637 DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002638 sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2639 &sid->dest_ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002640 return;
2641 }
2642
2643 /*
2644 * Remove our connection details from the hash tables.
2645 */
2646 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2647 spin_unlock_bh(&si->lock);
2648
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002649 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002650
2651 DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002652 sid->protocol, &sid->src_ip.ip, ntohs(sid->src_port),
2653 &sid->dest_ip.ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002654}
2655
2656/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002657 * sfe_ipv4_register_sync_rule_callback()
2658 * Register a callback for rule synchronization.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002659 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002660void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002661{
2662 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002663
2664 spin_lock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002665 rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002666 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002667}
2668
2669/*
2670 * sfe_ipv4_get_debug_dev()
2671 */
2672static ssize_t sfe_ipv4_get_debug_dev(struct device *dev,
2673 struct device_attribute *attr,
2674 char *buf)
2675{
2676 struct sfe_ipv4 *si = &__si;
2677 ssize_t count;
2678 int num;
2679
2680 spin_lock_bh(&si->lock);
2681 num = si->debug_dev;
2682 spin_unlock_bh(&si->lock);
2683
2684 count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num);
2685 return count;
2686}
2687
2688/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002689 * sysfs attributes.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002690 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002691static const struct device_attribute sfe_ipv4_debug_dev_attr =
2692 __ATTR(debug_dev, S_IWUGO | S_IRUGO, sfe_ipv4_get_debug_dev, NULL);
2693
2694/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002695 * sfe_ipv4_destroy_all_rules_for_dev()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002696 * Destroy all connections that match a particular device.
2697 *
2698 * If we pass dev as NULL then this destroys all connections.
2699 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002700void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002701{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002702 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002703 struct sfe_ipv4_connection *c;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002704
Xiaoping Fan34586472015-07-03 02:20:35 -07002705another_round:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002706 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002707
Xiaoping Fan34586472015-07-03 02:20:35 -07002708 for (c = si->all_connections_head; c; c = c->all_connections_next) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002709 /*
Xiaoping Fan34586472015-07-03 02:20:35 -07002710 * Does this connection relate to the device we are destroying?
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002711 */
2712 if (!dev
2713 || (dev == c->original_dev)
2714 || (dev == c->reply_dev)) {
Xiaoping Fan34586472015-07-03 02:20:35 -07002715 break;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002716 }
Xiaoping Fan34586472015-07-03 02:20:35 -07002717 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002718
Xiaoping Fan34586472015-07-03 02:20:35 -07002719 if (c) {
2720 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002721 }
2722
2723 spin_unlock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07002724
2725 if (c) {
2726 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
2727 goto another_round;
2728 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002729}
2730
2731/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002732 * sfe_ipv4_periodic_sync()
2733 */
2734static void sfe_ipv4_periodic_sync(unsigned long arg)
2735{
2736 struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg;
2737 uint64_t now_jiffies;
2738 int quota;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002739 sfe_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002740
2741 now_jiffies = get_jiffies_64();
2742
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002743 rcu_read_lock();
2744 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
2745 if (!sync_rule_callback) {
2746 rcu_read_unlock();
2747 goto done;
2748 }
2749
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002750 spin_lock_bh(&si->lock);
2751 sfe_ipv4_update_summary_stats(si);
2752
2753 /*
2754 * Get an estimate of the number of connections to parse in this sync.
2755 */
2756 quota = (si->num_connections + 63) / 64;
2757
2758 /*
2759 * Walk the "active" list and sync the connection state.
2760 */
2761 while (quota--) {
2762 struct sfe_ipv4_connection_match *cm;
2763 struct sfe_ipv4_connection_match *counter_cm;
2764 struct sfe_ipv4_connection *c;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002765 struct sfe_connection_sync sis;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002766
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002767 cm = si->active_head;
2768 if (!cm) {
2769 break;
2770 }
2771
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002772 /*
Nicolas Costabafb3af2014-01-29 16:39:39 -06002773 * There's a possibility that our counter match is in the active list too.
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002774 * If it is then remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002775 */
2776 counter_cm = cm->counter_match;
2777 if (counter_cm->active) {
2778 counter_cm->active = false;
2779
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002780 /*
2781 * We must have a connection preceding this counter match
2782 * because that's the one that got us to this point, so we don't have
2783 * to worry about removing the head of the list.
2784 */
2785 counter_cm->active_prev->active_next = counter_cm->active_next;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002786
2787 if (likely(counter_cm->active_next)) {
2788 counter_cm->active_next->active_prev = counter_cm->active_prev;
2789 } else {
2790 si->active_tail = counter_cm->active_prev;
2791 }
2792
2793 counter_cm->active_next = NULL;
2794 counter_cm->active_prev = NULL;
2795 }
2796
2797 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002798 * Now remove the head of the active scan list.
2799 */
2800 cm->active = false;
2801 si->active_head = cm->active_next;
2802 if (likely(cm->active_next)) {
2803 cm->active_next->active_prev = NULL;
2804 } else {
2805 si->active_tail = NULL;
2806 }
2807 cm->active_next = NULL;
2808
2809 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002810 * Sync the connection state.
2811 */
2812 c = cm->connection;
2813 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, now_jiffies);
2814
2815 /*
2816 * We don't want to be holding the lock when we sync!
2817 */
2818 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002819 sync_rule_callback(&sis);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002820 spin_lock_bh(&si->lock);
2821 }
2822
2823 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002824 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002825
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002826done:
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002827 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002828}
2829
2830#define CHAR_DEV_MSG_SIZE 768
2831
2832/*
2833 * sfe_ipv4_debug_dev_read_start()
2834 * Generate part of the XML output.
2835 */
2836static bool sfe_ipv4_debug_dev_read_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2837 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2838{
2839 int bytes_read;
2840
Xiaoping Fan34586472015-07-03 02:20:35 -07002841 si->debug_read_seq++;
2842
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002843 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "<sfe_ipv4>\n");
2844 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2845 return false;
2846 }
2847
2848 *length -= bytes_read;
2849 *total_read += bytes_read;
2850
2851 ws->state++;
2852 return true;
2853}
2854
2855/*
2856 * sfe_ipv4_debug_dev_read_connections_start()
2857 * Generate part of the XML output.
2858 */
2859static bool sfe_ipv4_debug_dev_read_connections_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2860 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2861{
2862 int bytes_read;
2863
2864 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<connections>\n");
2865 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2866 return false;
2867 }
2868
2869 *length -= bytes_read;
2870 *total_read += bytes_read;
2871
2872 ws->state++;
2873 return true;
2874}
2875
2876/*
2877 * sfe_ipv4_debug_dev_read_connections_connection()
2878 * Generate part of the XML output.
2879 */
2880static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2881 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2882{
2883 struct sfe_ipv4_connection *c;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002884 struct sfe_ipv4_connection_match *original_cm;
2885 struct sfe_ipv4_connection_match *reply_cm;
2886 int bytes_read;
2887 int protocol;
2888 struct net_device *src_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002889 __be32 src_ip;
2890 __be32 src_ip_xlate;
2891 __be16 src_port;
2892 __be16 src_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002893 uint64_t src_rx_packets;
2894 uint64_t src_rx_bytes;
2895 struct net_device *dest_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002896 __be32 dest_ip;
2897 __be32 dest_ip_xlate;
2898 __be16 dest_port;
2899 __be16 dest_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002900 uint64_t dest_rx_packets;
2901 uint64_t dest_rx_bytes;
2902 uint64_t last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002903 uint32_t mark;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002904#ifdef CONFIG_NF_FLOW_COOKIE
2905 int src_flow_cookie, dst_flow_cookie;
2906#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002907
2908 spin_lock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07002909
2910 for (c = si->all_connections_head; c; c = c->all_connections_next) {
2911 if (c->debug_read_seq < si->debug_read_seq) {
2912 c->debug_read_seq = si->debug_read_seq;
2913 break;
2914 }
2915 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002916
2917 /*
Xiaoping Fan34586472015-07-03 02:20:35 -07002918 * If there were no connections then move to the next state.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002919 */
2920 if (!c) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002921 spin_unlock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07002922 ws->state++;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002923 return true;
2924 }
2925
2926 original_cm = c->original_match;
2927 reply_cm = c->reply_match;
2928
2929 protocol = c->protocol;
2930 src_dev = c->original_dev;
2931 src_ip = c->src_ip;
2932 src_ip_xlate = c->src_ip_xlate;
2933 src_port = c->src_port;
2934 src_port_xlate = c->src_port_xlate;
2935
2936 sfe_ipv4_connection_match_update_summary_stats(original_cm);
2937 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
2938
2939 src_rx_packets = original_cm->rx_packet_count64;
2940 src_rx_bytes = original_cm->rx_byte_count64;
2941 dest_dev = c->reply_dev;
2942 dest_ip = c->dest_ip;
2943 dest_ip_xlate = c->dest_ip_xlate;
2944 dest_port = c->dest_port;
2945 dest_port_xlate = c->dest_port_xlate;
2946 dest_rx_packets = reply_cm->rx_packet_count64;
2947 dest_rx_bytes = reply_cm->rx_byte_count64;
2948 last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002949 mark = c->mark;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002950#ifdef CONFIG_NF_FLOW_COOKIE
2951 src_flow_cookie = original_cm->flow_cookie;
2952 dst_flow_cookie = reply_cm->flow_cookie;
2953#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002954 spin_unlock_bh(&si->lock);
2955
2956 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t<connection "
2957 "protocol=\"%u\" "
2958 "src_dev=\"%s\" "
2959 "src_ip=\"%pI4\" src_ip_xlate=\"%pI4\" "
2960 "src_port=\"%u\" src_port_xlate=\"%u\" "
2961 "src_rx_pkts=\"%llu\" src_rx_bytes=\"%llu\" "
2962 "dest_dev=\"%s\" "
2963 "dest_ip=\"%pI4\" dest_ip_xlate=\"%pI4\" "
2964 "dest_port=\"%u\" dest_port_xlate=\"%u\" "
2965 "dest_rx_pkts=\"%llu\" dest_rx_bytes=\"%llu\" "
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002966#ifdef CONFIG_NF_FLOW_COOKIE
2967 "src_flow_cookie=\"%d\" dst_flow_cookie=\"%d\" "
2968#endif
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002969 "last_sync=\"%llu\" "
Nicolas Costabb85a2e2014-01-13 16:26:33 -06002970 "mark=\"%08x\" />\n",
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002971 protocol,
2972 src_dev->name,
2973 &src_ip, &src_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002974 ntohs(src_port), ntohs(src_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002975 src_rx_packets, src_rx_bytes,
2976 dest_dev->name,
2977 &dest_ip, &dest_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002978 ntohs(dest_port), ntohs(dest_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002979 dest_rx_packets, dest_rx_bytes,
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002980#ifdef CONFIG_NF_FLOW_COOKIE
2981 src_flow_cookie, dst_flow_cookie,
2982#endif
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002983 last_sync_jiffies, mark);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002984
2985 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2986 return false;
2987 }
2988
2989 *length -= bytes_read;
2990 *total_read += bytes_read;
2991
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002992 return true;
2993}
2994
2995/*
2996 * sfe_ipv4_debug_dev_read_connections_end()
2997 * Generate part of the XML output.
2998 */
2999static bool sfe_ipv4_debug_dev_read_connections_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3000 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3001{
3002 int bytes_read;
3003
3004 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</connections>\n");
3005 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3006 return false;
3007 }
3008
3009 *length -= bytes_read;
3010 *total_read += bytes_read;
3011
3012 ws->state++;
3013 return true;
3014}
3015
3016/*
3017 * sfe_ipv4_debug_dev_read_exceptions_start()
3018 * Generate part of the XML output.
3019 */
3020static bool sfe_ipv4_debug_dev_read_exceptions_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3021 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3022{
3023 int bytes_read;
3024
3025 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<exceptions>\n");
3026 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3027 return false;
3028 }
3029
3030 *length -= bytes_read;
3031 *total_read += bytes_read;
3032
3033 ws->state++;
3034 return true;
3035}
3036
3037/*
3038 * sfe_ipv4_debug_dev_read_exceptions_exception()
3039 * Generate part of the XML output.
3040 */
3041static bool sfe_ipv4_debug_dev_read_exceptions_exception(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3042 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3043{
3044 uint64_t ct;
3045
3046 spin_lock_bh(&si->lock);
3047 ct = si->exception_events64[ws->iter_exception];
3048 spin_unlock_bh(&si->lock);
3049
3050 if (ct) {
3051 int bytes_read;
3052
3053 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE,
3054 "\t\t<exception name=\"%s\" count=\"%llu\" />\n",
3055 sfe_ipv4_exception_events_string[ws->iter_exception],
3056 ct);
3057 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3058 return false;
3059 }
3060
3061 *length -= bytes_read;
3062 *total_read += bytes_read;
3063 }
3064
3065 ws->iter_exception++;
3066 if (ws->iter_exception >= SFE_IPV4_EXCEPTION_EVENT_LAST) {
3067 ws->iter_exception = 0;
3068 ws->state++;
3069 }
3070
3071 return true;
3072}
3073
3074/*
3075 * sfe_ipv4_debug_dev_read_exceptions_end()
3076 * Generate part of the XML output.
3077 */
3078static bool sfe_ipv4_debug_dev_read_exceptions_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3079 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3080{
3081 int bytes_read;
3082
3083 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</exceptions>\n");
3084 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3085 return false;
3086 }
3087
3088 *length -= bytes_read;
3089 *total_read += bytes_read;
3090
3091 ws->state++;
3092 return true;
3093}
3094
3095/*
3096 * sfe_ipv4_debug_dev_read_stats()
3097 * Generate part of the XML output.
3098 */
3099static bool sfe_ipv4_debug_dev_read_stats(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3100 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3101{
3102 int bytes_read;
3103 unsigned int num_connections;
3104 uint64_t packets_forwarded;
3105 uint64_t packets_not_forwarded;
3106 uint64_t connection_create_requests;
3107 uint64_t connection_create_collisions;
3108 uint64_t connection_destroy_requests;
3109 uint64_t connection_destroy_misses;
3110 uint64_t connection_flushes;
3111 uint64_t connection_match_hash_hits;
3112 uint64_t connection_match_hash_reorders;
3113
3114 spin_lock_bh(&si->lock);
3115 sfe_ipv4_update_summary_stats(si);
3116
3117 num_connections = si->num_connections;
3118 packets_forwarded = si->packets_forwarded64;
3119 packets_not_forwarded = si->packets_not_forwarded64;
3120 connection_create_requests = si->connection_create_requests64;
3121 connection_create_collisions = si->connection_create_collisions64;
3122 connection_destroy_requests = si->connection_destroy_requests64;
3123 connection_destroy_misses = si->connection_destroy_misses64;
3124 connection_flushes = si->connection_flushes64;
3125 connection_match_hash_hits = si->connection_match_hash_hits64;
3126 connection_match_hash_reorders = si->connection_match_hash_reorders64;
3127 spin_unlock_bh(&si->lock);
3128
3129 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<stats "
3130 "num_connections=\"%u\" "
Xiaoping Fan59176422015-05-22 15:58:10 -07003131 "pkts_forwarded=\"%llu\" pkts_not_forwarded=\"%llu\" "
3132 "create_requests=\"%llu\" create_collisions=\"%llu\" "
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003133 "destroy_requests=\"%llu\" destroy_misses=\"%llu\" "
3134 "flushes=\"%llu\" "
3135 "hash_hits=\"%llu\" hash_reorders=\"%llu\" />\n",
3136 num_connections,
3137 packets_forwarded,
3138 packets_not_forwarded,
3139 connection_create_requests,
3140 connection_create_collisions,
3141 connection_destroy_requests,
3142 connection_destroy_misses,
3143 connection_flushes,
3144 connection_match_hash_hits,
3145 connection_match_hash_reorders);
3146 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3147 return false;
3148 }
3149
3150 *length -= bytes_read;
3151 *total_read += bytes_read;
3152
3153 ws->state++;
3154 return true;
3155}
3156
3157/*
3158 * sfe_ipv4_debug_dev_read_end()
3159 * Generate part of the XML output.
3160 */
3161static bool sfe_ipv4_debug_dev_read_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3162 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3163{
3164 int bytes_read;
3165
3166 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "</sfe_ipv4>\n");
3167 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3168 return false;
3169 }
3170
3171 *length -= bytes_read;
3172 *total_read += bytes_read;
3173
3174 ws->state++;
3175 return true;
3176}
3177
3178/*
3179 * Array of write functions that write various XML elements that correspond to
3180 * our XML output state machine.
3181 */
3182sfe_ipv4_debug_xml_write_method_t sfe_ipv4_debug_xml_write_methods[SFE_IPV4_DEBUG_XML_STATE_DONE] = {
3183 sfe_ipv4_debug_dev_read_start,
3184 sfe_ipv4_debug_dev_read_connections_start,
3185 sfe_ipv4_debug_dev_read_connections_connection,
3186 sfe_ipv4_debug_dev_read_connections_end,
3187 sfe_ipv4_debug_dev_read_exceptions_start,
3188 sfe_ipv4_debug_dev_read_exceptions_exception,
3189 sfe_ipv4_debug_dev_read_exceptions_end,
3190 sfe_ipv4_debug_dev_read_stats,
3191 sfe_ipv4_debug_dev_read_end,
3192};
3193
3194/*
3195 * sfe_ipv4_debug_dev_read()
3196 * Send info to userspace upon read request from user
3197 */
3198static ssize_t sfe_ipv4_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset)
3199{
3200 char msg[CHAR_DEV_MSG_SIZE];
3201 int total_read = 0;
3202 struct sfe_ipv4_debug_xml_write_state *ws;
3203 struct sfe_ipv4 *si = &__si;
3204
3205 ws = (struct sfe_ipv4_debug_xml_write_state *)filp->private_data;
3206 while ((ws->state != SFE_IPV4_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) {
3207 if ((sfe_ipv4_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) {
3208 continue;
3209 }
3210 }
3211
3212 return total_read;
3213}
3214
3215/*
3216 * sfe_ipv4_debug_dev_write()
Nicolas Costabafb3af2014-01-29 16:39:39 -06003217 * Write to char device resets some stats
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003218 */
3219static ssize_t sfe_ipv4_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset)
3220{
Matthew McClintock54167ab2014-01-14 21:06:28 -06003221 struct sfe_ipv4 *si = &__si;
3222
3223 spin_lock_bh(&si->lock);
3224 sfe_ipv4_update_summary_stats(si);
3225
Matthew McClintock54167ab2014-01-14 21:06:28 -06003226 si->packets_forwarded64 = 0;
3227 si->packets_not_forwarded64 = 0;
3228 si->connection_create_requests64 = 0;
3229 si->connection_create_collisions64 = 0;
3230 si->connection_destroy_requests64 = 0;
3231 si->connection_destroy_misses64 = 0;
3232 si->connection_flushes64 = 0;
3233 si->connection_match_hash_hits64 = 0;
3234 si->connection_match_hash_reorders64 = 0;
3235 spin_unlock_bh(&si->lock);
3236
3237 return length;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003238}
3239
3240/*
3241 * sfe_ipv4_debug_dev_open()
3242 */
3243static int sfe_ipv4_debug_dev_open(struct inode *inode, struct file *file)
3244{
3245 struct sfe_ipv4_debug_xml_write_state *ws;
3246
3247 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3248 if (!ws) {
3249 ws = kzalloc(sizeof(struct sfe_ipv4_debug_xml_write_state), GFP_KERNEL);
3250 if (!ws) {
3251 return -ENOMEM;
3252 }
3253
3254 ws->state = SFE_IPV4_DEBUG_XML_STATE_START;
3255 file->private_data = ws;
3256 }
3257
3258 return 0;
3259}
3260
3261/*
3262 * sfe_ipv4_debug_dev_release()
3263 */
3264static int sfe_ipv4_debug_dev_release(struct inode *inode, struct file *file)
3265{
3266 struct sfe_ipv4_debug_xml_write_state *ws;
3267
3268 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3269 if (ws) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003270 /*
3271 * We've finished with our output so free the write state.
3272 */
3273 kfree(ws);
3274 }
3275
3276 return 0;
3277}
3278
3279/*
3280 * File operations used in the debug char device
3281 */
3282static struct file_operations sfe_ipv4_debug_dev_fops = {
3283 .read = sfe_ipv4_debug_dev_read,
3284 .write = sfe_ipv4_debug_dev_write,
3285 .open = sfe_ipv4_debug_dev_open,
3286 .release = sfe_ipv4_debug_dev_release
3287};
3288
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003289#ifdef CONFIG_NF_FLOW_COOKIE
3290/*
3291 * sfe_register_flow_cookie_cb
3292 * register a function in SFE to let SFE use this function to configure flow cookie for a flow
3293 *
3294 * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE
3295 * can use this function to configure flow cookie for a flow.
3296 * return: 0, success; !=0, fail
3297 */
3298int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb)
3299{
3300 struct sfe_ipv4 *si = &__si;
3301
3302 BUG_ON(!cb);
3303
3304 if (si->flow_cookie_set_func) {
3305 return -1;
3306 }
3307
3308 rcu_assign_pointer(si->flow_cookie_set_func, cb);
3309 return 0;
3310}
3311
3312/*
3313 * sfe_unregister_flow_cookie_cb
3314 * unregister function which is used to configure flow cookie for a flow
3315 *
3316 * return: 0, success; !=0, fail
3317 */
3318int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb)
3319{
3320 struct sfe_ipv4 *si = &__si;
3321
3322 RCU_INIT_POINTER(si->flow_cookie_set_func, NULL);
3323 return 0;
3324}
3325#endif /*CONFIG_NF_FLOW_COOKIE*/
3326
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003327/*
Dave Hudson87973cd2013-10-22 16:00:04 +01003328 * sfe_ipv4_init()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003329 */
Dave Hudson87973cd2013-10-22 16:00:04 +01003330static int __init sfe_ipv4_init(void)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003331{
3332 struct sfe_ipv4 *si = &__si;
3333 int result = -1;
3334
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003335 DEBUG_INFO("SFE IPv4 init\n");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003336
3337 /*
3338 * Create sys/sfe_ipv4
3339 */
3340 si->sys_sfe_ipv4 = kobject_create_and_add("sfe_ipv4", NULL);
3341 if (!si->sys_sfe_ipv4) {
3342 DEBUG_ERROR("failed to register sfe_ipv4\n");
3343 goto exit1;
3344 }
3345
3346 /*
3347 * Create files, one for each parameter supported by this module.
3348 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003349 result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3350 if (result) {
3351 DEBUG_ERROR("failed to register debug dev file: %d\n", result);
3352 goto exit4;
3353 }
3354
3355 /*
3356 * Register our debug char device.
3357 */
3358 result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops);
3359 if (result < 0) {
3360 DEBUG_ERROR("Failed to register chrdev: %d\n", result);
3361 goto exit5;
3362 }
3363
3364 si->debug_dev = result;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003365
3366 /*
3367 * Create a timer to handle periodic statistics.
3368 */
3369 setup_timer(&si->timer, sfe_ipv4_periodic_sync, (unsigned long)si);
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06003370 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003371
Dave Hudson87973cd2013-10-22 16:00:04 +01003372 spin_lock_init(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003373
Dave Hudson87973cd2013-10-22 16:00:04 +01003374 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003375
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003376exit5:
3377 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3378
3379exit4:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003380 kobject_put(si->sys_sfe_ipv4);
3381
3382exit1:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003383 return result;
3384}
3385
3386/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003387 * sfe_ipv4_exit()
3388 */
3389static void __exit sfe_ipv4_exit(void)
3390{
Dave Hudson87973cd2013-10-22 16:00:04 +01003391 struct sfe_ipv4 *si = &__si;
3392
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003393 DEBUG_INFO("SFE IPv4 exit\n");
Dave Hudson87973cd2013-10-22 16:00:04 +01003394
3395 /*
3396 * Destroy all connections.
3397 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003398 sfe_ipv4_destroy_all_rules_for_dev(NULL);
Dave Hudson87973cd2013-10-22 16:00:04 +01003399
Dave Hudson87973cd2013-10-22 16:00:04 +01003400 del_timer_sync(&si->timer);
3401
Dave Hudson87973cd2013-10-22 16:00:04 +01003402 unregister_chrdev(si->debug_dev, "sfe_ipv4");
3403
3404 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3405
Dave Hudson87973cd2013-10-22 16:00:04 +01003406 kobject_put(si->sys_sfe_ipv4);
3407
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003408}
3409
3410module_init(sfe_ipv4_init)
3411module_exit(sfe_ipv4_exit)
3412
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003413EXPORT_SYMBOL(sfe_ipv4_recv);
3414EXPORT_SYMBOL(sfe_ipv4_create_rule);
3415EXPORT_SYMBOL(sfe_ipv4_destroy_rule);
3416EXPORT_SYMBOL(sfe_ipv4_destroy_all_rules_for_dev);
3417EXPORT_SYMBOL(sfe_ipv4_register_sync_rule_callback);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06003418EXPORT_SYMBOL(sfe_ipv4_mark_rule);
Nicolas Costa436926b2014-01-14 10:36:22 -06003419EXPORT_SYMBOL(sfe_ipv4_update_rule);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003420#ifdef CONFIG_NF_FLOW_COOKIE
3421EXPORT_SYMBOL(sfe_register_flow_cookie_cb);
3422EXPORT_SYMBOL(sfe_unregister_flow_cookie_cb);
3423#endif
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003424
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003425MODULE_AUTHOR("Qualcomm Atheros Inc.");
3426MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv4 edition");
Matthew McClintocka3221942014-01-16 11:44:26 -06003427MODULE_LICENSE("Dual BSD/GPL");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003428