blob: 4b8a5da2d45af2e6f5a6009392bc9a6188763b24 [file] [log] [blame]
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001/*
2 * sfe_ipv4.c
3 * Shortcut forwarding engine - IPv4 edition.
4 *
Xiaoping Fand642a6e2015-04-10 15:19:06 -07005 * Copyright (c) 2013-2015 Qualcomm Atheros, Inc.
Matthew McClintocka3221942014-01-16 11:44:26 -06006 *
7 * All Rights Reserved.
8 * Qualcomm Atheros Confidential and Proprietary.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01009 */
Matthew McClintocka3221942014-01-16 11:44:26 -060010
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010011#include <linux/module.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060012#include <linux/sysfs.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010013#include <linux/skbuff.h>
14#include <linux/icmp.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010015#include <net/tcp.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060016#include <linux/etherdevice.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010017
Dave Hudsondcd08fb2013-11-22 09:25:16 -060018#include "sfe.h"
Xiaoping Fand44a5b42015-05-26 17:37:37 -070019#include "sfe_cm.h"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010020
21/*
Dave Hudsona8197e72013-12-17 23:46:22 +000022 * By default Linux IP header and transport layer header structures are
23 * unpacked, assuming that such headers should be 32-bit aligned.
24 * Unfortunately some wireless adaptors can't cope with this requirement and
25 * some CPUs can't handle misaligned accesses. For those platforms we
26 * define SFE_IPV4_UNALIGNED_IP_HEADER and mark the structures as packed.
27 * When we do this the compiler will generate slightly worse code than for the
28 * aligned case (on most platforms) but will be much quicker than fixing
29 * things up in an unaligned trap handler.
30 */
31#define SFE_IPV4_UNALIGNED_IP_HEADER 1
32#if SFE_IPV4_UNALIGNED_IP_HEADER
33#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed))
34#else
35#define SFE_IPV4_UNALIGNED_STRUCT
36#endif
37
38/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060039 * An Ethernet header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000040 * help with performance on some platforms (see the definition of
41 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010042 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060043struct sfe_ipv4_eth_hdr {
44 __be16 h_dest[ETH_ALEN / 2];
45 __be16 h_source[ETH_ALEN / 2];
46 __be16 h_proto;
47} SFE_IPV4_UNALIGNED_STRUCT;
48
49/*
50 * An IPv4 header, but with an optional "packed" attribute to
51 * help with performance on some platforms (see the definition of
52 * SFE_IPV4_UNALIGNED_STRUCT)
53 */
54struct sfe_ipv4_ip_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010055#if defined(__LITTLE_ENDIAN_BITFIELD)
56 __u8 ihl:4,
57 version:4;
58#elif defined (__BIG_ENDIAN_BITFIELD)
59 __u8 version:4,
Xiaoping Fan59176422015-05-22 15:58:10 -070060 ihl:4;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010061#else
62#error "Please fix <asm/byteorder.h>"
63#endif
64 __u8 tos;
65 __be16 tot_len;
66 __be16 id;
67 __be16 frag_off;
68 __u8 ttl;
69 __u8 protocol;
70 __sum16 check;
71 __be32 saddr;
72 __be32 daddr;
Dave Hudsondcd08fb2013-11-22 09:25:16 -060073
74 /*
75 * The options start here.
76 */
Dave Hudsona8197e72013-12-17 23:46:22 +000077} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010078
79/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060080 * A UDP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000081 * help with performance on some platforms (see the definition of
82 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010083 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060084struct sfe_ipv4_udp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010085 __be16 source;
86 __be16 dest;
87 __be16 len;
88 __sum16 check;
Dave Hudsona8197e72013-12-17 23:46:22 +000089} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010090
91/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060092 * A TCP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000093 * help with performance on some platforms (see the definition of
94 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010095 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060096struct sfe_ipv4_tcp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010097 __be16 source;
98 __be16 dest;
99 __be32 seq;
100 __be32 ack_seq;
101#if defined(__LITTLE_ENDIAN_BITFIELD)
102 __u16 res1:4,
103 doff:4,
104 fin:1,
105 syn:1,
106 rst:1,
107 psh:1,
108 ack:1,
109 urg:1,
110 ece:1,
111 cwr:1;
112#elif defined(__BIG_ENDIAN_BITFIELD)
113 __u16 doff:4,
114 res1:4,
115 cwr:1,
116 ece:1,
117 urg:1,
118 ack:1,
119 psh:1,
120 rst:1,
121 syn:1,
122 fin:1;
123#else
124#error "Adjust your <asm/byteorder.h> defines"
Nicolas Costaac2979c2014-01-14 10:35:24 -0600125#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100126 __be16 window;
127 __sum16 check;
128 __be16 urg_ptr;
Dave Hudsona8197e72013-12-17 23:46:22 +0000129} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100130
131/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100132 * Specifies the lower bound on ACK numbers carried in the TCP header
133 */
134#define SFE_IPV4_TCP_MAX_ACK_WINDOW 65520
135
136/*
137 * IPv4 TCP connection match additional data.
138 */
139struct sfe_ipv4_tcp_connection_match {
140 uint8_t win_scale; /* Window scale */
141 uint32_t max_win; /* Maximum window size seen */
142 uint32_t end; /* Sequence number of the next byte to send (seq + segment length) */
143 uint32_t max_end; /* Sequence number of the last byte to ack */
144};
145
146/*
147 * Bit flags for IPv4 connection matching entry.
148 */
149#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC 0x1
150 /* Perform source translation */
151#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST 0x2
152 /* Perform destination translation */
153#define SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK 0x4
154 /* Ignore TCP sequence numbers */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600155#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR 0x8
156 /* Fast Ethernet header write */
157#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR 0x10
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100158 /* Fast Ethernet header write */
159
160/*
161 * IPv4 connection matching structure.
162 */
163struct sfe_ipv4_connection_match {
164 /*
165 * References to other objects.
166 */
167 struct sfe_ipv4_connection_match *next;
168 /* Next connection match entry in a list */
169 struct sfe_ipv4_connection_match *prev;
170 /* Previous connection match entry in a list */
171 struct sfe_ipv4_connection *connection;
172 /* Pointer to our connection */
173 struct sfe_ipv4_connection_match *counter_match;
174 /* Pointer to the connection match in the "counter" direction to this one */
175 struct sfe_ipv4_connection_match *active_next;
176 /* Pointer to the next connection in the active list */
177 struct sfe_ipv4_connection_match *active_prev;
178 /* Pointer to the previous connection in the active list */
179 bool active; /* Flag to indicate if we're on the active list */
180
181 /*
182 * Characteristics that identify flows that match this rule.
183 */
184 struct net_device *match_dev; /* Network device */
185 uint8_t match_protocol; /* Protocol */
Dave Hudson87973cd2013-10-22 16:00:04 +0100186 __be32 match_src_ip; /* Source IP address */
187 __be32 match_dest_ip; /* Destination IP address */
188 __be16 match_src_port; /* Source port/connection ident */
189 __be16 match_dest_port; /* Destination port/connection ident */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100190
191 /*
192 * Control the operations of the match.
193 */
194 uint32_t flags; /* Bit flags */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800195#ifdef CONFIG_NF_FLOW_COOKIE
196 uint32_t flow_cookie; /* used flow cookie, for debug */
197#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100198
199 /*
200 * Connection state that we track once we match.
201 */
202 union { /* Protocol-specific state */
203 struct sfe_ipv4_tcp_connection_match tcp;
204 } protocol_state;
205 uint32_t rx_packet_count; /* Number of packets RX'd */
206 uint32_t rx_byte_count; /* Number of bytes RX'd */
207
208 /*
209 * Packet translation information.
210 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100211 __be32 xlate_src_ip; /* Address after source translation */
212 __be16 xlate_src_port; /* Port/connection ident after source translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100213 uint16_t xlate_src_csum_adjustment;
214 /* Transport layer checksum adjustment after source translation */
Xiaoping Fanad755af2015-04-01 16:58:46 -0700215 uint16_t xlate_src_partial_csum_adjustment;
216 /* Transport layer pseudo header checksum adjustment after source translation */
217
Dave Hudson87973cd2013-10-22 16:00:04 +0100218 __be32 xlate_dest_ip; /* Address after destination translation */
219 __be16 xlate_dest_port; /* Port/connection ident after destination translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100220 uint16_t xlate_dest_csum_adjustment;
221 /* Transport layer checksum adjustment after destination translation */
Xiaoping Fanad755af2015-04-01 16:58:46 -0700222 uint16_t xlate_dest_partial_csum_adjustment;
223 /* Transport layer pseudo header checksum adjustment after destination translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100224
225 /*
226 * Packet transmit information.
227 */
228 struct net_device *xmit_dev; /* Network device on which to transmit */
229 unsigned short int xmit_dev_mtu;
230 /* Interface MTU */
231 uint16_t xmit_dest_mac[ETH_ALEN / 2];
232 /* Destination MAC address to use when forwarding */
233 uint16_t xmit_src_mac[ETH_ALEN / 2];
234 /* Source MAC address to use when forwarding */
235
236 /*
237 * Summary stats.
238 */
239 uint64_t rx_packet_count64; /* Number of packets RX'd */
240 uint64_t rx_byte_count64; /* Number of bytes RX'd */
241};
242
243/*
244 * Per-connection data structure.
245 */
246struct sfe_ipv4_connection {
247 struct sfe_ipv4_connection *next;
248 /* Pointer to the next entry in a hash chain */
249 struct sfe_ipv4_connection *prev;
250 /* Pointer to the previous entry in a hash chain */
251 int protocol; /* IP protocol number */
Dave Hudson87973cd2013-10-22 16:00:04 +0100252 __be32 src_ip; /* Source IP address */
253 __be32 src_ip_xlate; /* NAT-translated source IP address */
254 __be32 dest_ip; /* Destination IP address */
255 __be32 dest_ip_xlate; /* NAT-translated destination IP address */
256 __be16 src_port; /* Source port */
257 __be16 src_port_xlate; /* NAT-translated source port */
258 __be16 dest_port; /* Destination port */
259 __be16 dest_port_xlate; /* NAT-translated destination port */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100260 struct sfe_ipv4_connection_match *original_match;
261 /* Original direction matching structure */
262 struct net_device *original_dev;
263 /* Original direction source device */
264 struct sfe_ipv4_connection_match *reply_match;
265 /* Reply direction matching structure */
266 struct net_device *reply_dev; /* Reply direction source device */
267 uint64_t last_sync_jiffies; /* Jiffies count for the last sync */
268 struct sfe_ipv4_connection *all_connections_next;
269 /* Pointer to the next entry in the list of all connections */
270 struct sfe_ipv4_connection *all_connections_prev;
271 /* Pointer to the previous entry in the list of all connections */
272 int iterators; /* Number of iterators currently using this connection */
273 bool pending_free; /* Flag that indicates that this connection should be freed after iteration */
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600274 uint32_t mark; /* mark for outgoing packet */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100275};
276
277/*
278 * IPv4 connections and hash table size information.
279 */
280#define SFE_IPV4_CONNECTION_HASH_SHIFT 12
281#define SFE_IPV4_CONNECTION_HASH_SIZE (1 << SFE_IPV4_CONNECTION_HASH_SHIFT)
282#define SFE_IPV4_CONNECTION_HASH_MASK (SFE_IPV4_CONNECTION_HASH_SIZE - 1)
283
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800284#ifdef CONFIG_NF_FLOW_COOKIE
285#define SFE_FLOW_COOKIE_SIZE 2048
286#define SFE_FLOW_COOKIE_MASK 0x7ff
287
288struct sfe_flow_cookie_entry {
289 struct sfe_ipv4_connection_match *match;
290 unsigned long last_clean_time;
291};
292#endif
293
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100294enum sfe_ipv4_exception_events {
295 SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE,
296 SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION,
297 SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
298 SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL,
299 SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION,
300 SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE,
301 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS,
302 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS,
303 SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
304 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL,
305 SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION,
306 SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS,
307 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE,
308 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS,
309 SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK,
310 SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS,
311 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE,
312 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE,
313 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE,
314 SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE,
315 SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE,
316 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE,
317 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4,
318 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE,
319 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE,
320 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE,
321 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL,
322 SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION,
323 SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION,
324 SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE,
325 SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH,
326 SFE_IPV4_EXCEPTION_EVENT_NON_V4,
327 SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT,
328 SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE,
329 SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE,
330 SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL,
331 SFE_IPV4_EXCEPTION_EVENT_LAST
332};
333
334static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = {
335 "UDP_HEADER_INCOMPLETE",
336 "UDP_NO_CONNECTION",
337 "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
338 "UDP_SMALL_TTL",
339 "UDP_NEEDS_FRAGMENTATION",
340 "TCP_HEADER_INCOMPLETE",
341 "TCP_NO_CONNECTION_SLOW_FLAGS",
342 "TCP_NO_CONNECTION_FAST_FLAGS",
343 "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
344 "TCP_SMALL_TTL",
345 "TCP_NEEDS_FRAGMENTATION",
346 "TCP_FLAGS",
347 "TCP_SEQ_EXCEEDS_RIGHT_EDGE",
348 "TCP_SMALL_DATA_OFFS",
349 "TCP_BAD_SACK",
350 "TCP_BIG_DATA_OFFS",
351 "TCP_SEQ_BEFORE_LEFT_EDGE",
352 "TCP_ACK_EXCEEDS_RIGHT_EDGE",
353 "TCP_ACK_BEFORE_LEFT_EDGE",
354 "ICMP_HEADER_INCOMPLETE",
355 "ICMP_UNHANDLED_TYPE",
356 "ICMP_IPV4_HEADER_INCOMPLETE",
357 "ICMP_IPV4_NON_V4",
358 "ICMP_IPV4_IP_OPTIONS_INCOMPLETE",
359 "ICMP_IPV4_UDP_HEADER_INCOMPLETE",
360 "ICMP_IPV4_TCP_HEADER_INCOMPLETE",
361 "ICMP_IPV4_UNHANDLED_PROTOCOL",
362 "ICMP_NO_CONNECTION",
363 "ICMP_FLUSHED_CONNECTION",
364 "HEADER_INCOMPLETE",
365 "BAD_TOTAL_LENGTH",
366 "NON_V4",
367 "NON_INITIAL_FRAGMENT",
368 "DATAGRAM_INCOMPLETE",
369 "IP_OPTIONS_INCOMPLETE",
370 "UNHANDLED_PROTOCOL"
371};
372
373/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600374 * Per-module structure.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100375 */
376struct sfe_ipv4 {
377 spinlock_t lock; /* Lock for SMP correctness */
378 struct sfe_ipv4_connection_match *active_head;
379 /* Head of the list of recently active connections */
380 struct sfe_ipv4_connection_match *active_tail;
381 /* Tail of the list of recently active connections */
382 struct sfe_ipv4_connection *all_connections_head;
383 /* Head of the list of all connections */
384 struct sfe_ipv4_connection *all_connections_tail;
385 /* Tail of the list of all connections */
386 unsigned int num_connections; /* Number of connections */
387 struct timer_list timer; /* Timer used for periodic sync ops */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700388 sfe_sync_rule_callback_t __rcu sync_rule_callback;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600389 /* Callback function registered by a connection manager for stats syncing */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100390 struct sfe_ipv4_connection *conn_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
391 /* Connection hash table */
392 struct sfe_ipv4_connection_match *conn_match_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
393 /* Connection match hash table */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800394#ifdef CONFIG_NF_FLOW_COOKIE
395 struct sfe_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE];
396 /* flow cookie table*/
397 flow_cookie_set_func_t flow_cookie_set_func;
398 /* function used to configure flow cookie in hardware*/
399#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100400
401 /*
402 * Statistics.
403 */
404 uint32_t connection_create_requests;
405 /* Number of IPv4 connection create requests */
406 uint32_t connection_create_collisions;
407 /* Number of IPv4 connection create requests that collided with existing hash table entries */
408 uint32_t connection_destroy_requests;
409 /* Number of IPv4 connection destroy requests */
410 uint32_t connection_destroy_misses;
411 /* Number of IPv4 connection destroy requests that missed our hash table */
412 uint32_t connection_match_hash_hits;
413 /* Number of IPv4 connection match hash hits */
414 uint32_t connection_match_hash_reorders;
415 /* Number of IPv4 connection match hash reorders */
416 uint32_t connection_flushes; /* Number of IPv4 connection flushes */
417 uint32_t packets_forwarded; /* Number of IPv4 packets forwarded */
418 uint32_t packets_not_forwarded; /* Number of IPv4 packets not forwarded */
419 uint32_t exception_events[SFE_IPV4_EXCEPTION_EVENT_LAST];
420
421 /*
422 * Summary tatistics.
423 */
424 uint64_t connection_create_requests64;
425 /* Number of IPv4 connection create requests */
426 uint64_t connection_create_collisions64;
427 /* Number of IPv4 connection create requests that collided with existing hash table entries */
428 uint64_t connection_destroy_requests64;
429 /* Number of IPv4 connection destroy requests */
430 uint64_t connection_destroy_misses64;
431 /* Number of IPv4 connection destroy requests that missed our hash table */
432 uint64_t connection_match_hash_hits64;
433 /* Number of IPv4 connection match hash hits */
434 uint64_t connection_match_hash_reorders64;
435 /* Number of IPv4 connection match hash reorders */
436 uint64_t connection_flushes64; /* Number of IPv4 connection flushes */
437 uint64_t packets_forwarded64; /* Number of IPv4 packets forwarded */
438 uint64_t packets_not_forwarded64;
439 /* Number of IPv4 packets not forwarded */
440 uint64_t exception_events64[SFE_IPV4_EXCEPTION_EVENT_LAST];
441
442 /*
443 * Control state.
444 */
445 struct kobject *sys_sfe_ipv4; /* sysfs linkage */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100446 int debug_dev; /* Major number of the debug char device */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100447};
448
449/*
450 * Enumeration of the XML output.
451 */
452enum sfe_ipv4_debug_xml_states {
453 SFE_IPV4_DEBUG_XML_STATE_START,
454 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_START,
455 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_CONNECTION,
456 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_END,
457 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_START,
458 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION,
459 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_END,
460 SFE_IPV4_DEBUG_XML_STATE_STATS,
461 SFE_IPV4_DEBUG_XML_STATE_END,
462 SFE_IPV4_DEBUG_XML_STATE_DONE
463};
464
465/*
466 * XML write state.
467 */
468struct sfe_ipv4_debug_xml_write_state {
469 enum sfe_ipv4_debug_xml_states state;
470 /* XML output file state machine state */
471 struct sfe_ipv4_connection *iter_conn;
472 /* Next connection iterator */
473 int iter_exception; /* Next exception iterator */
474};
475
476typedef bool (*sfe_ipv4_debug_xml_write_method_t)(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
477 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws);
478
479struct sfe_ipv4 __si;
480
481/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100482 * sfe_ipv4_gen_ip_csum()
483 * Generate the IP checksum for an IPv4 header.
484 *
485 * Note that this function assumes that we have only 20 bytes of IP header.
486 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600487static inline uint16_t sfe_ipv4_gen_ip_csum(struct sfe_ipv4_ip_hdr *iph)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100488{
489 uint32_t sum;
490 uint16_t *i = (uint16_t *)iph;
491
492 iph->check = 0;
493
494 /*
495 * Generate the sum.
496 */
497 sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9];
498
499 /*
500 * Fold it to ones-complement form.
501 */
502 sum = (sum & 0xffff) + (sum >> 16);
503 sum = (sum & 0xffff) + (sum >> 16);
504
505 return (uint16_t)sum ^ 0xffff;
506}
507
508/*
509 * sfe_ipv4_get_connection_match_hash()
510 * Generate the hash used in connection match lookups.
511 */
512static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100513 __be32 src_ip, __be16 src_port,
514 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100515{
516 size_t dev_addr = (size_t)dev;
Dave Hudson87973cd2013-10-22 16:00:04 +0100517 uint32_t hash = ((uint32_t)dev_addr) ^ ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100518 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
519}
520
521/*
522 * sfe_ipv4_find_sfe_ipv4_connection_match()
523 * Get the IPv4 flow match info that corresponds to a particular 5-tuple.
524 *
525 * On entry we must be holding the lock that protects the hash table.
526 */
527static struct sfe_ipv4_connection_match *
528sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100529 __be32 src_ip, __be16 src_port,
530 __be32 dest_ip, __be16 dest_port) __attribute__((always_inline));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100531static struct sfe_ipv4_connection_match *
532sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100533 __be32 src_ip, __be16 src_port,
534 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100535{
536 struct sfe_ipv4_connection_match *cm;
537 struct sfe_ipv4_connection_match *head;
538 unsigned int conn_match_idx;
539
540 conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port);
541 cm = si->conn_match_hash[conn_match_idx];
542
543 /*
544 * If we don't have anything in this chain then bale.
545 */
546 if (unlikely(!cm)) {
547 return cm;
548 }
549
550 /*
551 * Hopefully the first entry is the one we want.
552 */
553 if (likely(cm->match_src_port == src_port)
554 && likely(cm->match_dest_port == dest_port)
555 && likely(cm->match_src_ip == src_ip)
556 && likely(cm->match_dest_ip == dest_ip)
557 && likely(cm->match_protocol == protocol)
558 && likely(cm->match_dev == dev)) {
559 si->connection_match_hash_hits++;
560 return cm;
561 }
562
563 /*
564 * We may or may not have a matching entry but if we do then we want to
565 * move that entry to the top of the hash chain when we get to it. We
566 * presume that this will be reused again very quickly.
567 */
568 head = cm;
569 do {
570 cm = cm->next;
571 } while (cm && (cm->match_src_port != src_port
572 || cm->match_dest_port != dest_port
573 || cm->match_src_ip != src_ip
574 || cm->match_dest_ip != dest_ip
575 || cm->match_protocol != protocol
576 || cm->match_dev != dev));
577
578 /*
579 * Not found then we're done.
580 */
581 if (unlikely(!cm)) {
582 return cm;
583 }
584
585 /*
586 * We found a match so move it.
587 */
588 if (cm->next) {
589 cm->next->prev = cm->prev;
590 }
591 cm->prev->next = cm->next;
592 cm->prev = NULL;
593 cm->next = head;
594 head->prev = cm;
595 si->conn_match_hash[conn_match_idx] = cm;
596 si->connection_match_hash_reorders++;
597
598 return cm;
599}
600
601/*
602 * sfe_ipv4_connection_match_update_summary_stats()
603 * Update the summary stats for a connection match entry.
604 */
605static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm)
606{
607 cm->rx_packet_count64 += cm->rx_packet_count;
608 cm->rx_packet_count = 0;
609 cm->rx_byte_count64 += cm->rx_byte_count;
610 cm->rx_byte_count = 0;
611}
612
613/*
614 * sfe_ipv4_connection_match_compute_translations()
615 * Compute port and address translations for a connection match entry.
616 */
617static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm)
618{
619 /*
620 * Before we insert the entry look to see if this is tagged as doing address
621 * translations. If it is then work out the adjustment that we need to apply
622 * to the transport checksum.
623 */
624 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
625 /*
626 * Precompute an incremental checksum adjustment so we can
627 * edit packets in this stream very quickly. The algorithm is from RFC1624.
628 */
629 uint16_t src_ip_hi = cm->match_src_ip >> 16;
630 uint16_t src_ip_lo = cm->match_src_ip & 0xffff;
631 uint32_t xlate_src_ip = ~cm->xlate_src_ip;
632 uint16_t xlate_src_ip_hi = xlate_src_ip >> 16;
633 uint16_t xlate_src_ip_lo = xlate_src_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100634 uint16_t xlate_src_port = ~cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100635 uint32_t adj;
636
637 /*
638 * When we compute this fold it down to a 16-bit offset
639 * as that way we can avoid having to do a double
640 * folding of the twos-complement result because the
641 * addition of 2 16-bit values cannot cause a double
642 * wrap-around!
643 */
644 adj = src_ip_hi + src_ip_lo + cm->match_src_port
645 + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port;
646 adj = (adj & 0xffff) + (adj >> 16);
647 adj = (adj & 0xffff) + (adj >> 16);
648 cm->xlate_src_csum_adjustment = (uint16_t)adj;
Nicolas Costaac2979c2014-01-14 10:35:24 -0600649
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100650 }
651
652 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
653 /*
654 * Precompute an incremental checksum adjustment so we can
655 * edit packets in this stream very quickly. The algorithm is from RFC1624.
656 */
657 uint16_t dest_ip_hi = cm->match_dest_ip >> 16;
658 uint16_t dest_ip_lo = cm->match_dest_ip & 0xffff;
659 uint32_t xlate_dest_ip = ~cm->xlate_dest_ip;
660 uint16_t xlate_dest_ip_hi = xlate_dest_ip >> 16;
661 uint16_t xlate_dest_ip_lo = xlate_dest_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100662 uint16_t xlate_dest_port = ~cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100663 uint32_t adj;
664
665 /*
666 * When we compute this fold it down to a 16-bit offset
667 * as that way we can avoid having to do a double
668 * folding of the twos-complement result because the
669 * addition of 2 16-bit values cannot cause a double
670 * wrap-around!
671 */
672 adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port
673 + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port;
674 adj = (adj & 0xffff) + (adj >> 16);
675 adj = (adj & 0xffff) + (adj >> 16);
676 cm->xlate_dest_csum_adjustment = (uint16_t)adj;
677 }
Xiaoping Fanad755af2015-04-01 16:58:46 -0700678
679 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
680 uint32_t adj = ~cm->match_src_ip + cm->xlate_src_ip;
681 if (adj < cm->xlate_src_ip) {
682 adj++;
683 }
684
685 adj = (adj & 0xffff) + (adj >> 16);
686 adj = (adj & 0xffff) + (adj >> 16);
687 cm->xlate_src_partial_csum_adjustment = (uint16_t)adj;
688 }
689
690 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
691 uint32_t adj = ~cm->match_dest_ip + cm->xlate_dest_ip;
692 if (adj < cm->xlate_dest_ip) {
693 adj++;
694 }
695
696 adj = (adj & 0xffff) + (adj >> 16);
697 adj = (adj & 0xffff) + (adj >> 16);
698 cm->xlate_dest_partial_csum_adjustment = (uint16_t)adj;
699 }
700
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100701}
702
703/*
704 * sfe_ipv4_update_summary_stats()
705 * Update the summary stats.
706 */
707static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si)
708{
709 int i;
710
711 si->connection_create_requests64 += si->connection_create_requests;
712 si->connection_create_requests = 0;
713 si->connection_create_collisions64 += si->connection_create_collisions;
714 si->connection_create_collisions = 0;
715 si->connection_destroy_requests64 += si->connection_destroy_requests;
716 si->connection_destroy_requests = 0;
717 si->connection_destroy_misses64 += si->connection_destroy_misses;
718 si->connection_destroy_misses = 0;
719 si->connection_match_hash_hits64 += si->connection_match_hash_hits;
720 si->connection_match_hash_hits = 0;
721 si->connection_match_hash_reorders64 += si->connection_match_hash_reorders;
722 si->connection_match_hash_reorders = 0;
723 si->connection_flushes64 += si->connection_flushes;
724 si->connection_flushes = 0;
725 si->packets_forwarded64 += si->packets_forwarded;
726 si->packets_forwarded = 0;
727 si->packets_not_forwarded64 += si->packets_not_forwarded;
728 si->packets_not_forwarded = 0;
729
730 for (i = 0; i < SFE_IPV4_EXCEPTION_EVENT_LAST; i++) {
731 si->exception_events64[i] += si->exception_events[i];
732 si->exception_events[i] = 0;
733 }
734}
735
736/*
737 * sfe_ipv4_insert_sfe_ipv4_connection_match()
738 * Insert a connection match into the hash.
739 *
740 * On entry we must be holding the lock that protects the hash table.
741 */
742static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
743{
744 struct sfe_ipv4_connection_match **hash_head;
745 struct sfe_ipv4_connection_match *prev_head;
746 unsigned int conn_match_idx
747 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
748 cm->match_src_ip, cm->match_src_port,
749 cm->match_dest_ip, cm->match_dest_port);
750 hash_head = &si->conn_match_hash[conn_match_idx];
751 prev_head = *hash_head;
752 cm->prev = NULL;
753 if (prev_head) {
754 prev_head->prev = cm;
755 }
756
757 cm->next = prev_head;
758 *hash_head = cm;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800759
760#ifdef CONFIG_NF_FLOW_COOKIE
761 /*
762 * Configure hardware to put a flow cookie in packet of this flow,
763 * then we can accelerate the lookup process when we received this packet.
764 */
765 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
766 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
767
768 if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) {
769 flow_cookie_set_func_t func;
770
771 rcu_read_lock();
772 func = rcu_dereference(si->flow_cookie_set_func);
773 if (func) {
Xiaoping Fan59176422015-05-22 15:58:10 -0700774 if (!func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800775 cm->match_dest_ip, cm->match_dest_port, conn_match_idx)) {
776 entry->match = cm;
777 cm->flow_cookie = conn_match_idx;
778 }
779 }
780 rcu_read_unlock();
781
782 break;
783 }
784 }
785#endif
786
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100787}
788
789/*
790 * sfe_ipv4_remove_sfe_ipv4_connection_match()
791 * Remove a connection match object from the hash.
792 *
793 * On entry we must be holding the lock that protects the hash table.
794 */
795static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
796{
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800797#ifdef CONFIG_NF_FLOW_COOKIE
798 /*
799 * Tell hardware that we no longer need a flow cookie in packet of this flow
800 */
801 unsigned int conn_match_idx;
802
803 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
804 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
805
806 if (cm == entry->match) {
807 flow_cookie_set_func_t func;
808
809 rcu_read_lock();
810 func = rcu_dereference(si->flow_cookie_set_func);
811 if (func) {
812 func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
813 cm->match_dest_ip, cm->match_dest_port, 0);
814 }
815 rcu_read_unlock();
816
817 cm->flow_cookie = 0;
818 entry->match = NULL;
819 entry->last_clean_time = jiffies;
820 break;
821 }
822 }
823#endif
824
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100825 /*
826 * Unlink the connection match entry from the hash.
827 */
828 if (cm->prev) {
829 cm->prev->next = cm->next;
830 } else {
831 unsigned int conn_match_idx
832 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
833 cm->match_src_ip, cm->match_src_port,
834 cm->match_dest_ip, cm->match_dest_port);
835 si->conn_match_hash[conn_match_idx] = cm->next;
836 }
837
838 if (cm->next) {
839 cm->next->prev = cm->prev;
840 }
841
842 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600843 * If the connection match entry is in the active list remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100844 */
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600845 if (cm->active) {
846 if (likely(cm->active_prev)) {
847 cm->active_prev->active_next = cm->active_next;
848 } else {
849 si->active_head = cm->active_next;
850 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100851
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600852 if (likely(cm->active_next)) {
853 cm->active_next->active_prev = cm->active_prev;
854 } else {
855 si->active_tail = cm->active_prev;
856 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100857 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100858}
859
860/*
861 * sfe_ipv4_get_connection_hash()
862 * Generate the hash used in connection lookups.
863 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100864static inline unsigned int sfe_ipv4_get_connection_hash(uint8_t protocol, __be32 src_ip, __be16 src_port,
865 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100866{
Dave Hudson87973cd2013-10-22 16:00:04 +0100867 uint32_t hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100868 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
869}
870
871/*
872 * sfe_ipv4_find_sfe_ipv4_connection()
873 * Get the IPv4 connection info that corresponds to a particular 5-tuple.
874 *
875 * On entry we must be holding the lock that protects the hash table.
876 */
877static inline struct sfe_ipv4_connection *sfe_ipv4_find_sfe_ipv4_connection(struct sfe_ipv4 *si, uint32_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100878 __be32 src_ip, __be16 src_port,
879 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100880{
881 struct sfe_ipv4_connection *c;
882 unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port);
883 c = si->conn_hash[conn_idx];
884
885 /*
886 * If we don't have anything in this chain then bale.
887 */
888 if (unlikely(!c)) {
889 return c;
890 }
891
892 /*
893 * Hopefully the first entry is the one we want.
894 */
895 if (likely(c->src_port == src_port)
896 && likely(c->dest_port == dest_port)
897 && likely(c->src_ip == src_ip)
898 && likely(c->dest_ip == dest_ip)
899 && likely(c->protocol == protocol)) {
900 return c;
901 }
902
903 /*
904 * We may or may not have a matching entry but if we do then we want to
905 * move that entry to the top of the hash chain when we get to it. We
906 * presume that this will be reused again very quickly.
907 */
908 do {
909 c = c->next;
910 } while (c && (c->src_port != src_port
911 || c->dest_port != dest_port
912 || c->src_ip != src_ip
913 || c->dest_ip != dest_ip
914 || c->protocol != protocol));
915
916 /*
917 * Will need connection entry for next create/destroy metadata,
918 * So no need to re-order entry for these requests
919 */
920 return c;
921}
922
923/*
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600924 * sfe_ipv4_mark_rule()
925 * Updates the mark for a current offloaded connection
926 *
927 * Will take hash lock upon entry
928 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700929void sfe_ipv4_mark_rule(struct sfe_connection_mark *mark)
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600930{
931 struct sfe_ipv4 *si = &__si;
932 struct sfe_ipv4_connection *c;
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600933
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600934 spin_lock(&si->lock);
935 c = sfe_ipv4_find_sfe_ipv4_connection(si, mark->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700936 mark->src_ip.ip, mark->src_port,
937 mark->dest_ip.ip, mark->dest_port);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600938 if (c) {
Nicolas Costaf53d6fe2014-01-13 16:03:46 -0600939 DEBUG_TRACE("Matching connection found for mark, "
940 "setting from %08x to %08x\n",
941 c->mark, mark->mark);
942 WARN_ON((0 != c->mark) && (0 == mark->mark));
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600943 c->mark = mark->mark;
944 }
945 spin_unlock(&si->lock);
946}
947
948/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100949 * sfe_ipv4_insert_sfe_ipv4_connection()
950 * Insert a connection into the hash.
951 *
952 * On entry we must be holding the lock that protects the hash table.
953 */
954static void sfe_ipv4_insert_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
955{
956 struct sfe_ipv4_connection **hash_head;
957 struct sfe_ipv4_connection *prev_head;
958 unsigned int conn_idx;
959
960 /*
961 * Insert entry into the connection hash.
962 */
963 conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
964 c->dest_ip, c->dest_port);
965 hash_head = &si->conn_hash[conn_idx];
966 prev_head = *hash_head;
967 c->prev = NULL;
968 if (prev_head) {
969 prev_head->prev = c;
970 }
971
972 c->next = prev_head;
973 *hash_head = c;
974
975 /*
976 * Insert entry into the "all connections" list.
977 */
978 if (si->all_connections_tail) {
979 c->all_connections_prev = si->all_connections_tail;
980 si->all_connections_tail->all_connections_next = c;
981 } else {
982 c->all_connections_prev = NULL;
983 si->all_connections_head = c;
984 }
985
986 si->all_connections_tail = c;
987 c->all_connections_next = NULL;
988 si->num_connections++;
989
990 /*
991 * Insert the connection match objects too.
992 */
993 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->original_match);
994 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->reply_match);
995}
996
997/*
998 * sfe_ipv4_remove_sfe_ipv4_connection()
999 * Remove a sfe_ipv4_connection object from the hash.
1000 *
1001 * On entry we must be holding the lock that protects the hash table.
1002 */
1003static void sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1004{
1005 /*
1006 * Remove the connection match objects.
1007 */
1008 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->reply_match);
1009 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->original_match);
1010
1011 /*
1012 * Unlink the connection.
1013 */
1014 if (c->prev) {
1015 c->prev->next = c->next;
1016 } else {
1017 unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
1018 c->dest_ip, c->dest_port);
1019 si->conn_hash[conn_idx] = c->next;
1020 }
1021
1022 if (c->next) {
1023 c->next->prev = c->prev;
1024 }
1025}
1026
1027/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001028 * sfe_ipv4_sync_sfe_ipv4_connection()
1029 * Sync a connection.
1030 *
1031 * On entry to this function we expect that the lock for the connection is either
1032 * already held or isn't required.
1033 */
1034static void sfe_ipv4_gen_sync_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001035 struct sfe_connection_sync *sis, uint64_t now_jiffies)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001036{
1037 struct sfe_ipv4_connection_match *original_cm;
1038 struct sfe_ipv4_connection_match *reply_cm;
1039
1040 /*
1041 * Fill in the update message.
1042 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001043 sis->is_v6 = 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001044 sis->protocol = c->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001045 sis->src_ip.ip = c->src_ip;
1046 sis->dest_ip.ip = c->dest_ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001047 sis->src_port = c->src_port;
1048 sis->dest_port = c->dest_port;
1049
1050 original_cm = c->original_match;
1051 reply_cm = c->reply_match;
1052 sis->src_td_max_window = original_cm->protocol_state.tcp.max_win;
1053 sis->src_td_end = original_cm->protocol_state.tcp.end;
1054 sis->src_td_max_end = original_cm->protocol_state.tcp.max_end;
1055 sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win;
1056 sis->dest_td_end = reply_cm->protocol_state.tcp.end;
1057 sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end;
1058
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001059 sis->src_new_packet_count = original_cm->rx_packet_count;
1060 sis->src_new_byte_count = original_cm->rx_byte_count;
1061 sis->dest_new_packet_count = reply_cm->rx_packet_count;
1062 sis->dest_new_byte_count = reply_cm->rx_byte_count;
1063
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001064 sfe_ipv4_connection_match_update_summary_stats(original_cm);
1065 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
1066
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001067 sis->src_dev = original_cm->match_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001068 sis->src_packet_count = original_cm->rx_packet_count64;
1069 sis->src_byte_count = original_cm->rx_byte_count64;
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001070
1071 sis->dest_dev = reply_cm->match_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001072 sis->dest_packet_count = reply_cm->rx_packet_count64;
1073 sis->dest_byte_count = reply_cm->rx_byte_count64;
1074
1075 /*
1076 * Get the time increment since our last sync.
1077 */
1078 sis->delta_jiffies = now_jiffies - c->last_sync_jiffies;
1079 c->last_sync_jiffies = now_jiffies;
1080}
1081
1082/*
1083 * sfe_ipv4_decrement_sfe_ipv4_connection_iterator()
1084 * Remove an iterator from a connection - free all resources if necessary.
1085 *
1086 * Returns true if the connection should now be free, false if not.
1087 *
1088 * We must be locked on entry to this function.
1089 */
1090static bool sfe_ipv4_decrement_sfe_ipv4_connection_iterator(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1091{
1092 /*
1093 * Are we the last iterator for this connection?
1094 */
1095 c->iterators--;
1096 if (c->iterators) {
1097 return false;
1098 }
1099
1100 /*
1101 * Is this connection marked for deletion?
1102 */
1103 if (!c->pending_free) {
1104 return false;
1105 }
1106
1107 /*
1108 * We're ready to delete this connection so unlink it from the "all
1109 * connections" list.
1110 */
1111 si->num_connections--;
1112 if (c->all_connections_prev) {
1113 c->all_connections_prev->all_connections_next = c->all_connections_next;
1114 } else {
1115 si->all_connections_head = c->all_connections_next;
1116 }
1117
1118 if (c->all_connections_next) {
1119 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1120 } else {
1121 si->all_connections_tail = c->all_connections_prev;
1122 }
1123
1124 return true;
1125}
1126
1127/*
1128 * sfe_ipv4_flush_sfe_ipv4_connection()
1129 * Flush a connection and free all associated resources.
1130 *
1131 * We need to be called with bottom halves disabled locally as we need to acquire
1132 * the connection hash lock and release it again. In general we're actually called
1133 * from within a BH and so we're fine, but we're also called when connections are
1134 * torn down.
1135 */
1136static void sfe_ipv4_flush_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1137{
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001138 struct sfe_connection_sync sis;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001139 uint64_t now_jiffies;
1140 bool pending_free = false;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001141 sfe_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001142
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001143 rcu_read_lock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001144 spin_lock(&si->lock);
1145 si->connection_flushes++;
1146
1147 /*
1148 * Check that we're not currently being iterated. If we are then
1149 * we can't free this entry yet but must mark it pending a free. If it's
1150 * not being iterated then we can unlink it from the list of all
1151 * connections.
1152 */
1153 if (c->iterators) {
1154 pending_free = true;
1155 c->pending_free = true;
1156 } else {
1157 si->num_connections--;
1158 if (c->all_connections_prev) {
1159 c->all_connections_prev->all_connections_next = c->all_connections_next;
1160 } else {
1161 si->all_connections_head = c->all_connections_next;
1162 }
1163
1164 if (c->all_connections_next) {
1165 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1166 } else {
1167 si->all_connections_tail = c->all_connections_prev;
1168 }
1169 }
1170
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001171 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
1172
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001173 spin_unlock(&si->lock);
1174
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001175 if (sync_rule_callback) {
1176 /*
1177 * Generate a sync message and then sync.
1178 */
1179 now_jiffies = get_jiffies_64();
1180 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, now_jiffies);
1181 sync_rule_callback(&sis);
1182 }
1183
1184 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001185
1186 /*
1187 * If we can't yet free the underlying memory then we're done.
1188 */
1189 if (pending_free) {
1190 return;
1191 }
1192
1193 /*
1194 * Release our hold of the source and dest devices and free the memory
1195 * for our connection objects.
1196 */
1197 dev_put(c->original_dev);
1198 dev_put(c->reply_dev);
1199 kfree(c->original_match);
1200 kfree(c->reply_match);
1201 kfree(c);
1202}
1203
1204/*
1205 * sfe_ipv4_recv_udp()
1206 * Handle UDP packet receives and forwarding.
1207 */
1208static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001209 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001210{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001211 struct sfe_ipv4_udp_hdr *udph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001212 __be32 src_ip;
1213 __be32 dest_ip;
1214 __be16 src_port;
1215 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001216 struct sfe_ipv4_connection_match *cm;
1217 uint8_t ttl;
1218 struct net_device *xmit_dev;
1219
1220 /*
1221 * Is our packet too short to contain a valid UDP header?
1222 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001223 if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_udp_hdr) + ihl)))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001224 spin_lock(&si->lock);
1225 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++;
1226 si->packets_not_forwarded++;
1227 spin_unlock(&si->lock);
1228
1229 DEBUG_TRACE("packet too short for UDP header\n");
1230 return 0;
1231 }
1232
1233 /*
1234 * Read the IP address and port information. Read the IP header data first
1235 * because we've almost certainly got that in the cache. We may not yet have
1236 * the UDP header cached though so allow more time for any prefetching.
1237 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001238 src_ip = iph->saddr;
1239 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001240
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001241 udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001242 src_port = udph->source;
1243 dest_port = udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001244
1245 spin_lock(&si->lock);
1246
1247 /*
1248 * Look for a connection match.
1249 */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001250#ifdef CONFIG_NF_FLOW_COOKIE
1251 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1252 if (unlikely(!cm)) {
1253 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
1254 }
1255#else
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001256 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001257#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001258 if (unlikely(!cm)) {
1259 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION]++;
1260 si->packets_not_forwarded++;
1261 spin_unlock(&si->lock);
1262
1263 DEBUG_TRACE("no connection found\n");
1264 return 0;
1265 }
1266
1267 /*
1268 * If our packet has beern marked as "flush on find" we can't actually
1269 * forward it in the fast path, but now that we've found an associated
1270 * connection we can flush that out before we process the packet.
1271 */
1272 if (unlikely(flush_on_find)) {
1273 struct sfe_ipv4_connection *c = cm->connection;
1274 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1275 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1276 si->packets_not_forwarded++;
1277 spin_unlock(&si->lock);
1278
1279 DEBUG_TRACE("flush on find\n");
1280 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1281 return 0;
1282 }
1283
1284 /*
1285 * Does our TTL allow forwarding?
1286 */
1287 ttl = iph->ttl;
1288 if (unlikely(ttl < 2)) {
1289 struct sfe_ipv4_connection *c = cm->connection;
1290 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1291 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL]++;
1292 si->packets_not_forwarded++;
1293 spin_unlock(&si->lock);
1294
1295 DEBUG_TRACE("ttl too low\n");
1296 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1297 return 0;
1298 }
1299
1300 /*
1301 * If our packet is larger than the MTU of the transmit interface then
1302 * we can't forward it easily.
1303 */
1304 if (unlikely(len > cm->xmit_dev_mtu)) {
1305 struct sfe_ipv4_connection *c = cm->connection;
1306 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1307 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++;
1308 si->packets_not_forwarded++;
1309 spin_unlock(&si->lock);
1310
1311 DEBUG_TRACE("larger than mtu\n");
1312 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1313 return 0;
1314 }
1315
1316 /*
1317 * From this point on we're good to modify the packet.
1318 */
1319
1320 /*
1321 * Decrement our TTL.
1322 */
1323 iph->ttl = ttl - 1;
1324
1325 /*
1326 * Do we have to perform translations of the source address/port?
1327 */
1328 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1329 uint16_t udp_csum;
1330
Dave Hudson87973cd2013-10-22 16:00:04 +01001331 iph->saddr = cm->xlate_src_ip;
1332 udph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001333
1334 /*
1335 * Do we have a non-zero UDP checksum? If we do then we need
1336 * to update it.
1337 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001338 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001339 if (likely(udp_csum)) {
Xiaoping Fanad755af2015-04-01 16:58:46 -07001340 uint32_t sum;
1341
1342 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1343 sum = udp_csum + cm->xlate_src_partial_csum_adjustment;
1344 } else {
1345 sum = udp_csum + cm->xlate_src_csum_adjustment;
1346 }
1347
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001348 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001349 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001350 }
1351 }
1352
1353 /*
1354 * Do we have to perform translations of the destination address/port?
1355 */
1356 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1357 uint16_t udp_csum;
1358
Dave Hudson87973cd2013-10-22 16:00:04 +01001359 iph->daddr = cm->xlate_dest_ip;
1360 udph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001361
1362 /*
1363 * Do we have a non-zero UDP checksum? If we do then we need
1364 * to update it.
1365 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001366 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001367 if (likely(udp_csum)) {
Xiaoping Fanad755af2015-04-01 16:58:46 -07001368 uint32_t sum;
1369
1370 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1371 sum = udp_csum + cm->xlate_dest_partial_csum_adjustment;
1372 } else {
1373 sum = udp_csum + cm->xlate_dest_csum_adjustment;
1374 }
1375
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001376 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001377 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001378 }
1379 }
1380
1381 /*
1382 * Replace the IP checksum.
1383 */
1384 iph->check = sfe_ipv4_gen_ip_csum(iph);
1385
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001386 /*
1387 * Update traffic stats.
1388 */
1389 cm->rx_packet_count++;
1390 cm->rx_byte_count += len;
1391
1392 /*
1393 * If we're not already on the active list then insert ourselves at the tail
1394 * of the current list.
1395 */
1396 if (unlikely(!cm->active)) {
1397 cm->active = true;
1398 cm->active_prev = si->active_tail;
1399 if (likely(si->active_tail)) {
1400 si->active_tail->active_next = cm;
1401 } else {
1402 si->active_head = cm;
1403 }
1404 si->active_tail = cm;
1405 }
1406
1407 xmit_dev = cm->xmit_dev;
1408 skb->dev = xmit_dev;
1409
1410 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001411 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001412 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001413 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1414 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Matthew McClintocka8ad7962014-01-16 16:49:30 -06001415 xmit_dev->header_ops->create(skb, xmit_dev, ETH_P_IP,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001416 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001417 } else {
1418 /*
1419 * For the simple case we write this really fast.
1420 */
1421 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1422 eth->h_proto = htons(ETH_P_IP);
Matthew McClintockdab3c8f2014-02-19 14:29:39 -06001423 eth->h_dest[0] = cm->xmit_dest_mac[0];
1424 eth->h_dest[1] = cm->xmit_dest_mac[1];
1425 eth->h_dest[2] = cm->xmit_dest_mac[2];
1426 eth->h_source[0] = cm->xmit_src_mac[0];
1427 eth->h_source[1] = cm->xmit_src_mac[1];
1428 eth->h_source[2] = cm->xmit_src_mac[2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001429 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001430 }
1431
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001432 /*
1433 * Mark outgoing packet.
1434 */
Cristian Prundeanu94fff982013-12-23 15:02:59 -06001435 skb->mark = cm->connection->mark;
1436 if (skb->mark) {
1437 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1438 }
1439
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001440 si->packets_forwarded++;
1441 spin_unlock(&si->lock);
1442
1443 /*
1444 * We're going to check for GSO flags when we transmit the packet so
1445 * start fetching the necessary cache line now.
1446 */
1447 prefetch(skb_shinfo(skb));
1448
1449 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06001450 * Mark that this packet has been fast forwarded.
1451 */
1452 skb->fast_forwarded = 1;
1453
1454 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001455 * Send the packet on its way.
1456 */
1457 dev_queue_xmit(skb);
1458
1459 return 1;
1460}
1461
1462/*
1463 * sfe_ipv4_process_tcp_option_sack()
1464 * Parse TCP SACK option and update ack according
1465 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001466static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const uint32_t data_offs,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001467 uint32_t *ack) __attribute__((always_inline));
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001468static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const uint32_t data_offs,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001469 uint32_t *ack)
1470{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001471 uint32_t length = sizeof(struct sfe_ipv4_tcp_hdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001472 uint8_t *ptr = (uint8_t *)th + length;
1473
1474 /*
1475 * If option is TIMESTAMP discard it.
1476 */
1477 if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
1478 && likely(ptr[0] == TCPOPT_NOP)
1479 && likely(ptr[1] == TCPOPT_NOP)
1480 && likely(ptr[2] == TCPOPT_TIMESTAMP)
1481 && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
1482 return true;
1483 }
1484
1485 /*
1486 * TCP options. Parse SACK option.
1487 */
1488 while (length < data_offs) {
1489 uint8_t size;
1490 uint8_t kind;
1491
1492 ptr = (uint8_t *)th + length;
1493 kind = *ptr;
1494
1495 /*
1496 * NOP, for padding
1497 * Not in the switch because to fast escape and to not calculate size
1498 */
1499 if (kind == TCPOPT_NOP) {
1500 length++;
1501 continue;
1502 }
1503
1504 if (kind == TCPOPT_SACK) {
1505 uint32_t sack = 0;
1506 uint8_t re = 1 + 1;
1507
1508 size = *(ptr + 1);
1509 if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
1510 || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
1511 || (size > (data_offs - length))) {
1512 return false;
1513 }
1514
1515 re += 4;
1516 while (re < size) {
1517 uint32_t sack_re;
1518 uint8_t *sptr = ptr + re;
1519 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
1520 if (sack_re > sack) {
1521 sack = sack_re;
1522 }
1523 re += TCPOLEN_SACK_PERBLOCK;
1524 }
1525 if (sack > *ack) {
1526 *ack = sack;
1527 }
1528 length += size;
1529 continue;
1530 }
1531 if (kind == TCPOPT_EOL) {
1532 return true;
1533 }
1534 size = *(ptr + 1);
1535 if (size < 2) {
1536 return false;
1537 }
1538 length += size;
1539 }
1540
1541 return true;
1542}
1543
1544/*
1545 * sfe_ipv4_recv_tcp()
1546 * Handle TCP packet receives and forwarding.
1547 */
1548static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001549 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001550{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001551 struct sfe_ipv4_tcp_hdr *tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001552 __be32 src_ip;
1553 __be32 dest_ip;
1554 __be16 src_port;
1555 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001556 struct sfe_ipv4_connection_match *cm;
1557 struct sfe_ipv4_connection_match *counter_cm;
1558 uint8_t ttl;
1559 uint32_t flags;
1560 struct net_device *xmit_dev;
1561
1562 /*
1563 * Is our packet too short to contain a valid UDP header?
1564 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001565 if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_tcp_hdr) + ihl)))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001566 spin_lock(&si->lock);
1567 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++;
1568 si->packets_not_forwarded++;
1569 spin_unlock(&si->lock);
1570
1571 DEBUG_TRACE("packet too short for TCP header\n");
1572 return 0;
1573 }
1574
1575 /*
1576 * Read the IP address and port information. Read the IP header data first
1577 * because we've almost certainly got that in the cache. We may not yet have
1578 * the TCP header cached though so allow more time for any prefetching.
1579 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001580 src_ip = iph->saddr;
1581 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001582
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001583 tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001584 src_port = tcph->source;
1585 dest_port = tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001586 flags = tcp_flag_word(tcph);
1587
1588 spin_lock(&si->lock);
1589
1590 /*
1591 * Look for a connection match.
1592 */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001593#ifdef CONFIG_NF_FLOW_COOKIE
1594 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1595 if (unlikely(!cm)) {
Matthew McClintock37858802015-02-03 12:12:02 -06001596 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001597 }
1598#else
Matthew McClintock37858802015-02-03 12:12:02 -06001599 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001600#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001601 if (unlikely(!cm)) {
1602 /*
1603 * We didn't get a connection but as TCP is connection-oriented that
1604 * may be because this is a non-fast connection (not running established).
1605 * For diagnostic purposes we differentiate this here.
1606 */
1607 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
1608 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++;
1609 si->packets_not_forwarded++;
1610 spin_unlock(&si->lock);
1611
1612 DEBUG_TRACE("no connection found - fast flags\n");
1613 return 0;
1614 }
1615 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++;
1616 si->packets_not_forwarded++;
1617 spin_unlock(&si->lock);
1618
1619 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
1620 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1621 return 0;
1622 }
1623
1624 /*
1625 * If our packet has beern marked as "flush on find" we can't actually
1626 * forward it in the fast path, but now that we've found an associated
1627 * connection we can flush that out before we process the packet.
1628 */
1629 if (unlikely(flush_on_find)) {
1630 struct sfe_ipv4_connection *c = cm->connection;
1631 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1632 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1633 si->packets_not_forwarded++;
1634 spin_unlock(&si->lock);
1635
1636 DEBUG_TRACE("flush on find\n");
1637 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1638 return 0;
1639 }
1640
1641 /*
1642 * Does our TTL allow forwarding?
1643 */
1644 ttl = iph->ttl;
1645 if (unlikely(ttl < 2)) {
1646 struct sfe_ipv4_connection *c = cm->connection;
1647 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1648 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL]++;
1649 si->packets_not_forwarded++;
1650 spin_unlock(&si->lock);
1651
1652 DEBUG_TRACE("ttl too low\n");
1653 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1654 return 0;
1655 }
1656
1657 /*
1658 * If our packet is larger than the MTU of the transmit interface then
1659 * we can't forward it easily.
1660 */
Xiaoping Fand642a6e2015-04-10 15:19:06 -07001661 if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001662 struct sfe_ipv4_connection *c = cm->connection;
1663 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1664 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++;
1665 si->packets_not_forwarded++;
1666 spin_unlock(&si->lock);
1667
1668 DEBUG_TRACE("larger than mtu\n");
1669 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1670 return 0;
1671 }
1672
1673 /*
1674 * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN
1675 * set is not a fast path packet.
1676 */
1677 if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
1678 struct sfe_ipv4_connection *c = cm->connection;
1679 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1680 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS]++;
1681 si->packets_not_forwarded++;
1682 spin_unlock(&si->lock);
1683
1684 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
1685 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1686 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1687 return 0;
1688 }
1689
1690 counter_cm = cm->counter_match;
1691
1692 /*
1693 * Are we doing sequence number checking?
1694 */
1695 if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
1696 uint32_t seq;
1697 uint32_t ack;
1698 uint32_t sack;
1699 uint32_t data_offs;
1700 uint32_t end;
1701 uint32_t left_edge;
1702 uint32_t scaled_win;
1703 uint32_t max_end;
1704
1705 /*
1706 * Is our sequence fully past the right hand edge of the window?
1707 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001708 seq = ntohl(tcph->seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001709 if (unlikely((int32_t)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
1710 struct sfe_ipv4_connection *c = cm->connection;
1711 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1712 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++;
1713 si->packets_not_forwarded++;
1714 spin_unlock(&si->lock);
1715
1716 DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
1717 seq, cm->protocol_state.tcp.max_end + 1);
1718 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1719 return 0;
1720 }
1721
1722 /*
1723 * Check that our TCP data offset isn't too short.
1724 */
1725 data_offs = tcph->doff << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001726 if (unlikely(data_offs < sizeof(struct sfe_ipv4_tcp_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001727 struct sfe_ipv4_connection *c = cm->connection;
1728 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1729 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++;
1730 si->packets_not_forwarded++;
1731 spin_unlock(&si->lock);
1732
1733 DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
1734 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1735 return 0;
1736 }
1737
1738 /*
1739 * Update ACK according to any SACK option.
1740 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001741 ack = ntohl(tcph->ack_seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001742 sack = ack;
1743 if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) {
1744 struct sfe_ipv4_connection *c = cm->connection;
1745 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1746 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK]++;
1747 si->packets_not_forwarded++;
1748 spin_unlock(&si->lock);
1749
1750 DEBUG_TRACE("TCP option SACK size is wrong\n");
1751 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1752 return 0;
1753 }
1754
1755 /*
1756 * Check that our TCP data offset isn't past the end of the packet.
1757 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001758 data_offs += sizeof(struct sfe_ipv4_ip_hdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001759 if (unlikely(len < data_offs)) {
1760 struct sfe_ipv4_connection *c = cm->connection;
1761 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1762 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++;
1763 si->packets_not_forwarded++;
1764 spin_unlock(&si->lock);
1765
1766 DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
1767 data_offs, len);
1768 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1769 return 0;
1770 }
1771
1772 end = seq + len - data_offs;
1773
1774 /*
1775 * Is our sequence fully before the left hand edge of the window?
1776 */
1777 if (unlikely((int32_t)(end - (cm->protocol_state.tcp.end
1778 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
1779 struct sfe_ipv4_connection *c = cm->connection;
1780 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1781 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++;
1782 si->packets_not_forwarded++;
1783 spin_unlock(&si->lock);
1784
1785 DEBUG_TRACE("seq: %u before left edge: %u\n",
1786 end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
1787 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1788 return 0;
1789 }
1790
1791 /*
1792 * Are we acking data that is to the right of what has been sent?
1793 */
1794 if (unlikely((int32_t)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
1795 struct sfe_ipv4_connection *c = cm->connection;
1796 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1797 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++;
1798 si->packets_not_forwarded++;
1799 spin_unlock(&si->lock);
1800
1801 DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
1802 sack, counter_cm->protocol_state.tcp.end + 1);
1803 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1804 return 0;
1805 }
1806
1807 /*
1808 * Is our ack too far before the left hand edge of the window?
1809 */
1810 left_edge = counter_cm->protocol_state.tcp.end
1811 - cm->protocol_state.tcp.max_win
1812 - SFE_IPV4_TCP_MAX_ACK_WINDOW
1813 - 1;
1814 if (unlikely((int32_t)(sack - left_edge) < 0)) {
1815 struct sfe_ipv4_connection *c = cm->connection;
1816 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1817 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++;
1818 si->packets_not_forwarded++;
1819 spin_unlock(&si->lock);
1820
1821 DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
1822 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1823 return 0;
1824 }
1825
1826 /*
1827 * Have we just seen the largest window size yet for this connection? If yes
1828 * then we need to record the new value.
1829 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001830 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001831 scaled_win += (sack - ack);
1832 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
1833 cm->protocol_state.tcp.max_win = scaled_win;
1834 }
1835
1836 /*
1837 * If our sequence and/or ack numbers have advanced then record the new state.
1838 */
1839 if (likely((int32_t)(end - cm->protocol_state.tcp.end) >= 0)) {
1840 cm->protocol_state.tcp.end = end;
1841 }
1842
1843 max_end = sack + scaled_win;
1844 if (likely((int32_t)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
1845 counter_cm->protocol_state.tcp.max_end = max_end;
1846 }
1847 }
1848
1849 /*
1850 * From this point on we're good to modify the packet.
1851 */
1852
1853 /*
1854 * Decrement our TTL.
1855 */
1856 iph->ttl = ttl - 1;
1857
1858 /*
1859 * Do we have to perform translations of the source address/port?
1860 */
1861 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1862 uint16_t tcp_csum;
1863 uint32_t sum;
1864
Dave Hudson87973cd2013-10-22 16:00:04 +01001865 iph->saddr = cm->xlate_src_ip;
1866 tcph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001867
1868 /*
1869 * Do we have a non-zero UDP checksum? If we do then we need
1870 * to update it.
1871 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001872 tcp_csum = tcph->check;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001873 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1874 sum = tcp_csum + cm->xlate_src_partial_csum_adjustment;
1875 } else {
1876 sum = tcp_csum + cm->xlate_src_csum_adjustment;
1877 }
1878
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001879 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001880 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001881 }
1882
1883 /*
1884 * Do we have to perform translations of the destination address/port?
1885 */
1886 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1887 uint16_t tcp_csum;
1888 uint32_t sum;
1889
Dave Hudson87973cd2013-10-22 16:00:04 +01001890 iph->daddr = cm->xlate_dest_ip;
1891 tcph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001892
1893 /*
1894 * Do we have a non-zero UDP checksum? If we do then we need
1895 * to update it.
1896 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001897 tcp_csum = tcph->check;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001898 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1899 sum = tcp_csum + cm->xlate_dest_partial_csum_adjustment;
1900 } else {
1901 sum = tcp_csum + cm->xlate_dest_csum_adjustment;
1902 }
1903
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001904 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001905 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001906 }
1907
1908 /*
1909 * Replace the IP checksum.
1910 */
1911 iph->check = sfe_ipv4_gen_ip_csum(iph);
1912
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001913 /*
1914 * Update traffic stats.
1915 */
1916 cm->rx_packet_count++;
1917 cm->rx_byte_count += len;
1918
1919 /*
1920 * If we're not already on the active list then insert ourselves at the tail
1921 * of the current list.
1922 */
1923 if (unlikely(!cm->active)) {
1924 cm->active = true;
1925 cm->active_prev = si->active_tail;
1926 if (likely(si->active_tail)) {
1927 si->active_tail->active_next = cm;
1928 } else {
1929 si->active_head = cm;
1930 }
1931 si->active_tail = cm;
1932 }
1933
1934 xmit_dev = cm->xmit_dev;
1935 skb->dev = xmit_dev;
1936
1937 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001938 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001939 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001940 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1941 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Matthew McClintocka8ad7962014-01-16 16:49:30 -06001942 xmit_dev->header_ops->create(skb, xmit_dev, ETH_P_IP,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001943 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001944 } else {
1945 /*
1946 * For the simple case we write this really fast.
1947 */
1948 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1949 eth->h_proto = htons(ETH_P_IP);
Matthew McClintockdab3c8f2014-02-19 14:29:39 -06001950 eth->h_dest[0] = cm->xmit_dest_mac[0];
1951 eth->h_dest[1] = cm->xmit_dest_mac[1];
1952 eth->h_dest[2] = cm->xmit_dest_mac[2];
1953 eth->h_source[0] = cm->xmit_src_mac[0];
1954 eth->h_source[1] = cm->xmit_src_mac[1];
1955 eth->h_source[2] = cm->xmit_src_mac[2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001956 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001957 }
1958
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06001959 /*
1960 * Mark outgoing packet
1961 */
1962 skb->mark = cm->connection->mark;
1963 if (skb->mark) {
1964 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1965 }
1966
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001967 si->packets_forwarded++;
1968 spin_unlock(&si->lock);
1969
1970 /*
1971 * We're going to check for GSO flags when we transmit the packet so
1972 * start fetching the necessary cache line now.
1973 */
1974 prefetch(skb_shinfo(skb));
1975
1976 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06001977 * Mark that this packet has been fast forwarded.
1978 */
1979 skb->fast_forwarded = 1;
1980
1981 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001982 * Send the packet on its way.
1983 */
1984 dev_queue_xmit(skb);
1985
1986 return 1;
1987}
1988
1989/*
1990 * sfe_ipv4_recv_icmp()
1991 * Handle ICMP packet receives.
1992 *
1993 * ICMP packets aren't handled as a "fast path" and always have us process them
1994 * through the default Linux stack. What we do need to do is look for any errors
1995 * about connections we are handling in the fast path. If we find any such
1996 * connections then we want to flush their state so that the ICMP error path
1997 * within Linux has all of the correct state should it need it.
1998 */
1999static int sfe_ipv4_recv_icmp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002000 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002001{
2002 struct icmphdr *icmph;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002003 struct sfe_ipv4_ip_hdr *icmp_iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002004 unsigned int icmp_ihl_words;
2005 unsigned int icmp_ihl;
2006 uint32_t *icmp_trans_h;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002007 struct sfe_ipv4_udp_hdr *icmp_udph;
2008 struct sfe_ipv4_tcp_hdr *icmp_tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01002009 __be32 src_ip;
2010 __be32 dest_ip;
2011 __be16 src_port;
2012 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002013 struct sfe_ipv4_connection_match *cm;
2014 struct sfe_ipv4_connection *c;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002015 uint32_t pull_len = sizeof(struct icmphdr) + ihl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002016
2017 /*
2018 * Is our packet too short to contain a valid UDP header?
2019 */
2020 len -= ihl;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002021 if (!pskb_may_pull(skb, pull_len)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002022 spin_lock(&si->lock);
2023 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++;
2024 si->packets_not_forwarded++;
2025 spin_unlock(&si->lock);
2026
2027 DEBUG_TRACE("packet too short for ICMP header\n");
2028 return 0;
2029 }
2030
2031 /*
2032 * We only handle "destination unreachable" and "time exceeded" messages.
2033 */
2034 icmph = (struct icmphdr *)(skb->data + ihl);
2035 if ((icmph->type != ICMP_DEST_UNREACH)
2036 && (icmph->type != ICMP_TIME_EXCEEDED)) {
2037 spin_lock(&si->lock);
2038 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++;
2039 si->packets_not_forwarded++;
2040 spin_unlock(&si->lock);
2041
2042 DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->type);
2043 return 0;
2044 }
2045
2046 /*
2047 * Do we have the full embedded IP header?
2048 */
2049 len -= sizeof(struct icmphdr);
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002050 pull_len += sizeof(struct sfe_ipv4_ip_hdr);
2051 if (!pskb_may_pull(skb, pull_len)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002052 spin_lock(&si->lock);
2053 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE]++;
2054 si->packets_not_forwarded++;
2055 spin_unlock(&si->lock);
2056
2057 DEBUG_TRACE("Embedded IP header not complete\n");
2058 return 0;
2059 }
2060
2061 /*
2062 * Is our embedded IP version wrong?
2063 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002064 icmp_iph = (struct sfe_ipv4_ip_hdr *)(icmph + 1);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002065 if (unlikely(icmp_iph->version != 4)) {
2066 spin_lock(&si->lock);
2067 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4]++;
2068 si->packets_not_forwarded++;
2069 spin_unlock(&si->lock);
2070
2071 DEBUG_TRACE("IP version: %u\n", icmp_iph->version);
2072 return 0;
2073 }
2074
2075 /*
2076 * Do we have the full embedded IP header, including any options?
2077 */
2078 icmp_ihl_words = icmp_iph->ihl;
2079 icmp_ihl = icmp_ihl_words << 2;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002080 pull_len += icmp_ihl - sizeof(struct sfe_ipv4_ip_hdr);
2081 if (!pskb_may_pull(skb, pull_len)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002082 spin_lock(&si->lock);
2083 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE]++;
2084 si->packets_not_forwarded++;
2085 spin_unlock(&si->lock);
2086
2087 DEBUG_TRACE("Embedded header not large enough for IP options\n");
2088 return 0;
2089 }
2090
Nicolas Costaac2979c2014-01-14 10:35:24 -06002091 len -= icmp_ihl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002092 icmp_trans_h = ((uint32_t *)icmp_iph) + icmp_ihl_words;
2093
2094 /*
2095 * Handle the embedded transport layer header.
2096 */
2097 switch (icmp_iph->protocol) {
2098 case IPPROTO_UDP:
2099 /*
2100 * We should have 8 bytes of UDP header - that's enough to identify
2101 * the connection.
2102 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002103 pull_len += 8;
2104 if (!pskb_may_pull(skb, pull_len)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002105 spin_lock(&si->lock);
2106 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE]++;
2107 si->packets_not_forwarded++;
2108 spin_unlock(&si->lock);
2109
2110 DEBUG_TRACE("Incomplete embedded UDP header\n");
2111 return 0;
2112 }
2113
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002114 icmp_udph = (struct sfe_ipv4_udp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002115 src_port = icmp_udph->source;
2116 dest_port = icmp_udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002117 break;
2118
2119 case IPPROTO_TCP:
2120 /*
2121 * We should have 8 bytes of TCP header - that's enough to identify
2122 * the connection.
2123 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002124 pull_len += 8;
2125 if (!pskb_may_pull(skb, pull_len)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002126 spin_lock(&si->lock);
2127 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE]++;
2128 si->packets_not_forwarded++;
2129 spin_unlock(&si->lock);
2130
2131 DEBUG_TRACE("Incomplete embedded TCP header\n");
2132 return 0;
2133 }
2134
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002135 icmp_tcph = (struct sfe_ipv4_tcp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002136 src_port = icmp_tcph->source;
2137 dest_port = icmp_tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002138 break;
2139
2140 default:
2141 spin_lock(&si->lock);
2142 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL]++;
2143 si->packets_not_forwarded++;
2144 spin_unlock(&si->lock);
2145
2146 DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", icmp_iph->protocol);
2147 return 0;
2148 }
2149
Dave Hudson87973cd2013-10-22 16:00:04 +01002150 src_ip = icmp_iph->saddr;
2151 dest_ip = icmp_iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002152
2153 spin_lock(&si->lock);
2154
2155 /*
2156 * Look for a connection match. Note that we reverse the source and destination
2157 * here because our embedded message contains a packet that was sent in the
2158 * opposite direction to the one in which we just received it. It will have
2159 * been sent on the interface from which we received it though so that's still
2160 * ok to use.
2161 */
2162 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port);
2163 if (unlikely(!cm)) {
2164 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++;
2165 si->packets_not_forwarded++;
2166 spin_unlock(&si->lock);
2167
2168 DEBUG_TRACE("no connection found\n");
2169 return 0;
2170 }
2171
2172 /*
2173 * We found a connection so now remove it from the connection list and flush
2174 * its state.
2175 */
2176 c = cm->connection;
2177 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2178 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++;
2179 si->packets_not_forwarded++;
2180 spin_unlock(&si->lock);
2181
2182 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
2183 return 0;
2184}
2185
2186/*
2187 * sfe_ipv4_recv()
Matthew McClintocka8ad7962014-01-16 16:49:30 -06002188 * Handle packet receives and forwaring.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002189 *
2190 * Returns 1 if the packet is forwarded or 0 if it isn't.
2191 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002192int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002193{
2194 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002195 unsigned int len;
2196 unsigned int tot_len;
2197 unsigned int frag_off;
2198 unsigned int ihl;
2199 bool flush_on_find;
2200 bool ip_options;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002201 struct sfe_ipv4_ip_hdr *iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002202 uint32_t protocol;
2203
2204 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002205 * Check that we have space for an IP header here.
2206 */
2207 len = skb->len;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002208 if (unlikely(!pskb_may_pull(skb, sizeof(struct sfe_ipv4_ip_hdr)))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002209 spin_lock(&si->lock);
2210 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2211 si->packets_not_forwarded++;
2212 spin_unlock(&si->lock);
2213
2214 DEBUG_TRACE("len: %u is too short\n", len);
2215 return 0;
2216 }
2217
2218 /*
2219 * Check that our "total length" is large enough for an IP header.
2220 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002221 iph = (struct sfe_ipv4_ip_hdr *)skb->data;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002222 tot_len = ntohs(iph->tot_len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002223 if (unlikely(tot_len < sizeof(struct sfe_ipv4_ip_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002224 spin_lock(&si->lock);
2225 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH]++;
2226 si->packets_not_forwarded++;
2227 spin_unlock(&si->lock);
2228
2229 DEBUG_TRACE("tot_len: %u is too short\n", tot_len);
2230 return 0;
2231 }
2232
2233 /*
2234 * Is our IP version wrong?
2235 */
2236 if (unlikely(iph->version != 4)) {
2237 spin_lock(&si->lock);
2238 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_V4]++;
2239 si->packets_not_forwarded++;
2240 spin_unlock(&si->lock);
2241
2242 DEBUG_TRACE("IP version: %u\n", iph->version);
2243 return 0;
2244 }
2245
2246 /*
2247 * Does our datagram fit inside the skb?
2248 */
2249 if (unlikely(tot_len > len)) {
2250 spin_lock(&si->lock);
2251 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++;
2252 si->packets_not_forwarded++;
2253 spin_unlock(&si->lock);
2254
2255 DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len);
2256 return 0;
2257 }
2258
2259 /*
2260 * Do we have a non-initial fragment?
Nicolas Costaac2979c2014-01-14 10:35:24 -06002261 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002262 frag_off = ntohs(iph->frag_off);
2263 if (unlikely(frag_off & IP_OFFSET)) {
2264 spin_lock(&si->lock);
2265 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++;
2266 si->packets_not_forwarded++;
2267 spin_unlock(&si->lock);
2268
2269 DEBUG_TRACE("non-initial fragment\n");
2270 return 0;
2271 }
2272
2273 /*
2274 * If we have a (first) fragment then mark it to cause any connection to flush.
2275 */
2276 flush_on_find = unlikely(frag_off & IP_MF) ? true : false;
2277
2278 /*
2279 * Do we have any IP options? That's definite a slow path! If we do have IP
2280 * options we need to recheck our header size.
2281 */
2282 ihl = iph->ihl << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002283 ip_options = unlikely(ihl != sizeof(struct sfe_ipv4_ip_hdr)) ? true : false;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002284 if (unlikely(ip_options)) {
2285 if (unlikely(len < ihl)) {
2286 spin_lock(&si->lock);
2287 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE]++;
2288 si->packets_not_forwarded++;
2289 spin_unlock(&si->lock);
2290
2291 DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl);
2292 return 0;
2293 }
2294
2295 flush_on_find = true;
2296 }
2297
2298 protocol = iph->protocol;
2299 if (IPPROTO_UDP == protocol) {
2300 return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find);
2301 }
2302
2303 if (IPPROTO_TCP == protocol) {
2304 return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find);
2305 }
2306
2307 if (IPPROTO_ICMP == protocol) {
2308 return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl);
2309 }
2310
2311 spin_lock(&si->lock);
2312 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++;
2313 si->packets_not_forwarded++;
2314 spin_unlock(&si->lock);
2315
2316 DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol);
2317 return 0;
2318}
2319
Nicolas Costa436926b2014-01-14 10:36:22 -06002320static void
2321sfe_ipv4_update_tcp_state(struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002322 struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002323{
2324 struct sfe_ipv4_connection_match *orig_cm;
2325 struct sfe_ipv4_connection_match *repl_cm;
2326 struct sfe_ipv4_tcp_connection_match *orig_tcp;
2327 struct sfe_ipv4_tcp_connection_match *repl_tcp;
2328
2329 orig_cm = c->original_match;
2330 repl_cm = c->reply_match;
2331 orig_tcp = &orig_cm->protocol_state.tcp;
2332 repl_tcp = &repl_cm->protocol_state.tcp;
2333
2334 /* update orig */
2335 if (orig_tcp->max_win < sic->src_td_max_window) {
2336 orig_tcp->max_win = sic->src_td_max_window;
2337 }
2338 if ((int32_t)(orig_tcp->end - sic->src_td_end) < 0) {
2339 orig_tcp->end = sic->src_td_end;
2340 }
2341 if ((int32_t)(orig_tcp->max_end - sic->src_td_max_end) < 0) {
2342 orig_tcp->max_end = sic->src_td_max_end;
2343 }
2344
2345 /* update reply */
2346 if (repl_tcp->max_win < sic->dest_td_max_window) {
2347 repl_tcp->max_win = sic->dest_td_max_window;
2348 }
2349 if ((int32_t)(repl_tcp->end - sic->dest_td_end) < 0) {
2350 repl_tcp->end = sic->dest_td_end;
2351 }
2352 if ((int32_t)(repl_tcp->max_end - sic->dest_td_max_end) < 0) {
2353 repl_tcp->max_end = sic->dest_td_max_end;
2354 }
2355
2356 /* update match flags */
2357 orig_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2358 repl_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002359 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
Nicolas Costa436926b2014-01-14 10:36:22 -06002360 orig_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2361 repl_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2362 }
2363}
2364
2365static void
2366sfe_ipv4_update_protocol_state(struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002367 struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002368{
2369 switch (sic->protocol) {
2370 case IPPROTO_TCP:
2371 sfe_ipv4_update_tcp_state(c, sic);
2372 break;
2373 }
2374}
2375
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002376void sfe_ipv4_update_rule(struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002377{
2378 struct sfe_ipv4_connection *c;
2379 struct sfe_ipv4 *si = &__si;
2380
2381 spin_lock_bh(&si->lock);
2382
2383 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2384 sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002385 sic->src_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002386 sic->src_port,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002387 sic->dest_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002388 sic->dest_port);
2389 if (c != NULL) {
2390 sfe_ipv4_update_protocol_state(c, sic);
2391 }
2392
2393 spin_unlock_bh(&si->lock);
2394}
2395
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002396/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002397 * sfe_ipv4_create_rule()
2398 * Create a forwarding rule.
2399 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002400int sfe_ipv4_create_rule(struct sfe_connection_create *sic)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002401{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002402 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002403 struct sfe_ipv4_connection *c;
2404 struct sfe_ipv4_connection_match *original_cm;
2405 struct sfe_ipv4_connection_match *reply_cm;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002406 struct net_device *dest_dev;
2407 struct net_device *src_dev;
2408
2409 dest_dev = sic->dest_dev;
2410 src_dev = sic->src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002411
Matthew McClintock389b42a2014-09-24 14:05:51 -05002412 if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) ||
2413 (src_dev->reg_state != NETREG_REGISTERED))) {
2414 return -EINVAL;
2415 }
2416
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002417 spin_lock_bh(&si->lock);
2418 si->connection_create_requests++;
2419
2420 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002421 * Check to see if there is already a flow that matches the rule we're
2422 * trying to create. If there is then we can't create a new one.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002423 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002424 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2425 sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002426 sic->src_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002427 sic->src_port,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002428 sic->dest_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002429 sic->dest_port);
2430 if (c != NULL) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002431 si->connection_create_collisions++;
2432
2433 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002434 * If we already have the flow then it's likely that this
2435 * request to create the connection rule contains more
2436 * up-to-date information. Check and update accordingly.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002437 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002438 sfe_ipv4_update_protocol_state(c, sic);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002439 spin_unlock_bh(&si->lock);
2440
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002441 DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002442 " s: %s:%pM:%pI4:%u, d: %s:%pM:%pI4:%u\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002443 sic->mark, sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002444 sic->src_dev->name, sic->src_mac, &sic->src_ip.ip, ntohs(sic->src_port),
2445 sic->dest_dev->name, sic->dest_mac, &sic->dest_ip.ip, ntohs(sic->dest_port));
Nicolas Costa514fde02014-01-13 15:50:29 -06002446 return -EADDRINUSE;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002447 }
2448
2449 /*
2450 * Allocate the various connection tracking objects.
2451 */
2452 c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC);
2453 if (unlikely(!c)) {
2454 spin_unlock_bh(&si->lock);
Nicolas Costa514fde02014-01-13 15:50:29 -06002455 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002456 }
2457
2458 original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2459 if (unlikely(!original_cm)) {
2460 spin_unlock_bh(&si->lock);
2461 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002462 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002463 }
2464
2465 reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2466 if (unlikely(!reply_cm)) {
2467 spin_unlock_bh(&si->lock);
2468 kfree(original_cm);
2469 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002470 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002471 }
2472
2473 /*
2474 * Fill in the "original" direction connection matching object.
2475 * Note that the transmit MAC address is "dest_mac_xlate" because
2476 * we always know both ends of a connection by their translated
2477 * addresses and not their public addresses.
2478 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002479 original_cm->match_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002480 original_cm->match_protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002481 original_cm->match_src_ip = sic->src_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002482 original_cm->match_src_port = sic->src_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002483 original_cm->match_dest_ip = sic->dest_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002484 original_cm->match_dest_port = sic->dest_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002485 original_cm->xlate_src_ip = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002486 original_cm->xlate_src_port = sic->src_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002487 original_cm->xlate_dest_ip = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002488 original_cm->xlate_dest_port = sic->dest_port_xlate;
2489 original_cm->rx_packet_count = 0;
2490 original_cm->rx_packet_count64 = 0;
2491 original_cm->rx_byte_count = 0;
2492 original_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002493 original_cm->xmit_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002494 original_cm->xmit_dev_mtu = sic->dest_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002495 memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002496 memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN);
2497 original_cm->connection = c;
2498 original_cm->counter_match = reply_cm;
2499 original_cm->flags = 0;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002500#ifdef CONFIG_NF_FLOW_COOKIE
2501 original_cm->flow_cookie = 0;
2502#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002503 original_cm->active_next = NULL;
2504 original_cm->active_prev = NULL;
2505 original_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002506
2507 /*
2508 * For PPP links we don't write an L2 header. For everything else we do.
2509 */
2510 if (!(dest_dev->flags & IFF_POINTOPOINT)) {
2511 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2512
2513 /*
2514 * If our dev writes Ethernet headers then we can write a really fast
2515 * version.
2516 */
2517 if (dest_dev->header_ops) {
2518 if (dest_dev->header_ops->create == eth_header) {
2519 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2520 }
2521 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002522 }
2523
2524 /*
2525 * Fill in the "reply" direction connection matching object.
2526 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002527 reply_cm->match_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002528 reply_cm->match_protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002529 reply_cm->match_src_ip = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002530 reply_cm->match_src_port = sic->dest_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002531 reply_cm->match_dest_ip = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002532 reply_cm->match_dest_port = sic->src_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002533 reply_cm->xlate_src_ip = sic->dest_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002534 reply_cm->xlate_src_port = sic->dest_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002535 reply_cm->xlate_dest_ip = sic->src_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002536 reply_cm->xlate_dest_port = sic->src_port;
2537 reply_cm->rx_packet_count = 0;
2538 reply_cm->rx_packet_count64 = 0;
2539 reply_cm->rx_byte_count = 0;
2540 reply_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002541 reply_cm->xmit_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002542 reply_cm->xmit_dev_mtu = sic->src_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002543 memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002544 memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN);
2545 reply_cm->connection = c;
2546 reply_cm->counter_match = original_cm;
2547 reply_cm->flags = 0;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002548#ifdef CONFIG_NF_FLOW_COOKIE
2549 reply_cm->flow_cookie = 0;
2550#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002551 reply_cm->active_next = NULL;
2552 reply_cm->active_prev = NULL;
2553 reply_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002554
2555 /*
2556 * For PPP links we don't write an L2 header. For everything else we do.
2557 */
2558 if (!(src_dev->flags & IFF_POINTOPOINT)) {
2559 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2560
2561 /*
2562 * If our dev writes Ethernet headers then we can write a really fast
2563 * version.
2564 */
2565 if (src_dev->header_ops) {
2566 if (src_dev->header_ops->create == eth_header) {
2567 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2568 }
2569 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002570 }
2571
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002572
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002573 if (sic->dest_ip.ip != sic->dest_ip_xlate.ip || sic->dest_port != sic->dest_port_xlate) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002574 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2575 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2576 }
2577
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002578 if (sic->src_ip.ip != sic->src_ip_xlate.ip || sic->src_port != sic->src_port_xlate) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002579 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2580 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2581 }
2582
2583 c->protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002584 c->src_ip = sic->src_ip.ip;
2585 c->src_ip_xlate = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002586 c->src_port = sic->src_port;
2587 c->src_port_xlate = sic->src_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002588 c->original_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002589 c->original_match = original_cm;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002590 c->dest_ip = sic->dest_ip.ip;
2591 c->dest_ip_xlate = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002592 c->dest_port = sic->dest_port;
2593 c->dest_port_xlate = sic->dest_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002594 c->reply_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002595 c->reply_match = reply_cm;
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06002596 c->mark = sic->mark;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002597
2598 c->last_sync_jiffies = get_jiffies_64();
2599 c->iterators = 0;
2600 c->pending_free = false;
2601
2602 /*
2603 * Take hold of our source and dest devices for the duration of the connection.
2604 */
2605 dev_hold(c->original_dev);
2606 dev_hold(c->reply_dev);
2607
2608 /*
2609 * Initialize the protocol-specific information that we track.
2610 */
2611 switch (sic->protocol) {
2612 case IPPROTO_TCP:
2613 original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale;
2614 original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1;
2615 original_cm->protocol_state.tcp.end = sic->src_td_end;
2616 original_cm->protocol_state.tcp.max_end = sic->src_td_max_end;
2617 reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale;
2618 reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1;
2619 reply_cm->protocol_state.tcp.end = sic->dest_td_end;
2620 reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002621 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002622 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2623 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2624 }
2625 break;
2626 }
2627
2628 sfe_ipv4_connection_match_compute_translations(original_cm);
2629 sfe_ipv4_connection_match_compute_translations(reply_cm);
2630 sfe_ipv4_insert_sfe_ipv4_connection(si, c);
2631
2632 spin_unlock_bh(&si->lock);
2633
2634 /*
2635 * We have everything we need!
2636 */
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002637 DEBUG_INFO("new connection - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002638 " s: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n"
2639 " d: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002640 sic->mark, sic->protocol,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002641 sic->src_dev->name, sic->src_mac, sic->src_mac_xlate,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002642 &sic->src_ip.ip, &sic->src_ip_xlate.ip, ntohs(sic->src_port), ntohs(sic->src_port_xlate),
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002643 dest_dev->name, sic->dest_mac, sic->dest_mac_xlate,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002644 &sic->dest_ip.ip, &sic->dest_ip_xlate.ip, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate));
Nicolas Costa514fde02014-01-13 15:50:29 -06002645
2646 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002647}
2648
2649/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002650 * sfe_ipv4_destroy_rule()
2651 * Destroy a forwarding rule.
2652 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002653void sfe_ipv4_destroy_rule(struct sfe_connection_destroy *sid)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002654{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002655 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002656 struct sfe_ipv4_connection *c;
2657
2658 spin_lock_bh(&si->lock);
2659 si->connection_destroy_requests++;
2660
2661 /*
2662 * Check to see if we have a flow that matches the rule we're trying
2663 * to destroy. If there isn't then we can't destroy it.
2664 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002665 c = sfe_ipv4_find_sfe_ipv4_connection(si, sid->protocol, sid->src_ip.ip, sid->src_port,
2666 sid->dest_ip.ip, sid->dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002667 if (!c) {
2668 si->connection_destroy_misses++;
2669 spin_unlock_bh(&si->lock);
2670
2671 DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002672 sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2673 &sid->dest_ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002674 return;
2675 }
2676
2677 /*
2678 * Remove our connection details from the hash tables.
2679 */
2680 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2681 spin_unlock_bh(&si->lock);
2682
2683 /*
2684 * Finally synchronize state and free resources. We need to protect against
2685 * pre-emption by our bottom half while we do this though.
2686 */
2687 local_bh_disable();
2688 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
2689 local_bh_enable();
2690
2691 DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002692 sid->protocol, &sid->src_ip.ip, ntohs(sid->src_port),
2693 &sid->dest_ip.ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002694}
2695
2696/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002697 * sfe_ipv4_register_sync_rule_callback()
2698 * Register a callback for rule synchronization.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002699 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002700void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002701{
2702 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002703
2704 spin_lock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002705 rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002706 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002707}
2708
2709/*
2710 * sfe_ipv4_get_debug_dev()
2711 */
2712static ssize_t sfe_ipv4_get_debug_dev(struct device *dev,
2713 struct device_attribute *attr,
2714 char *buf)
2715{
2716 struct sfe_ipv4 *si = &__si;
2717 ssize_t count;
2718 int num;
2719
2720 spin_lock_bh(&si->lock);
2721 num = si->debug_dev;
2722 spin_unlock_bh(&si->lock);
2723
2724 count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num);
2725 return count;
2726}
2727
2728/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002729 * sysfs attributes.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002730 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002731static const struct device_attribute sfe_ipv4_debug_dev_attr =
2732 __ATTR(debug_dev, S_IWUGO | S_IRUGO, sfe_ipv4_get_debug_dev, NULL);
2733
2734/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002735 * sfe_ipv4_destroy_all_rules_for_dev()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002736 * Destroy all connections that match a particular device.
2737 *
2738 * If we pass dev as NULL then this destroys all connections.
2739 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002740void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002741{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002742 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002743 struct sfe_ipv4_connection *c;
2744 struct sfe_ipv4_connection *c_next;
2745
2746 spin_lock_bh(&si->lock);
2747 c = si->all_connections_head;
2748 if (!c) {
2749 spin_unlock_bh(&si->lock);
2750 return;
2751 }
2752
2753 c->iterators++;
2754
2755 /*
2756 * Iterate over all connections
2757 */
2758 while (c) {
2759 c_next = c->all_connections_next;
2760
2761 /*
2762 * Before we do anything else, take an iterator reference for the
2763 * connection we'll iterate next.
2764 */
2765 if (c_next) {
2766 c_next->iterators++;
2767 }
2768
2769 /*
2770 * Does this connection relate to the device we are destroying? If
2771 * it does then ensure it is marked for being freed as soon as it
2772 * is no longer being iterated.
2773 */
2774 if (!dev
2775 || (dev == c->original_dev)
2776 || (dev == c->reply_dev)) {
2777 c->pending_free = true;
2778 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2779 }
2780
2781 /*
2782 * Remove the iterator reference that we acquired and see if we
2783 * should free any resources.
2784 */
2785 if (sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c)) {
2786 spin_unlock_bh(&si->lock);
Nicolas Costabafb3af2014-01-29 16:39:39 -06002787
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002788 /*
2789 * This entry is dead so release our hold of the source and
2790 * dest devices and free the memory for our connection objects.
2791 */
2792 dev_put(c->original_dev);
2793 dev_put(c->reply_dev);
2794 kfree(c->original_match);
2795 kfree(c->reply_match);
2796 kfree(c);
2797
2798 spin_lock_bh(&si->lock);
2799 }
2800
2801 c = c_next;
2802 }
2803
2804 spin_unlock_bh(&si->lock);
2805}
2806
2807/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002808 * sfe_ipv4_periodic_sync()
2809 */
2810static void sfe_ipv4_periodic_sync(unsigned long arg)
2811{
2812 struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg;
2813 uint64_t now_jiffies;
2814 int quota;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002815 sfe_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002816
2817 now_jiffies = get_jiffies_64();
2818
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002819 rcu_read_lock();
2820 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
2821 if (!sync_rule_callback) {
2822 rcu_read_unlock();
2823 goto done;
2824 }
2825
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002826 spin_lock_bh(&si->lock);
2827 sfe_ipv4_update_summary_stats(si);
2828
2829 /*
2830 * Get an estimate of the number of connections to parse in this sync.
2831 */
2832 quota = (si->num_connections + 63) / 64;
2833
2834 /*
2835 * Walk the "active" list and sync the connection state.
2836 */
2837 while (quota--) {
2838 struct sfe_ipv4_connection_match *cm;
2839 struct sfe_ipv4_connection_match *counter_cm;
2840 struct sfe_ipv4_connection *c;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002841 struct sfe_connection_sync sis;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002842
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002843 cm = si->active_head;
2844 if (!cm) {
2845 break;
2846 }
2847
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002848 /*
Nicolas Costabafb3af2014-01-29 16:39:39 -06002849 * There's a possibility that our counter match is in the active list too.
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002850 * If it is then remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002851 */
2852 counter_cm = cm->counter_match;
2853 if (counter_cm->active) {
2854 counter_cm->active = false;
2855
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002856 /*
2857 * We must have a connection preceding this counter match
2858 * because that's the one that got us to this point, so we don't have
2859 * to worry about removing the head of the list.
2860 */
2861 counter_cm->active_prev->active_next = counter_cm->active_next;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002862
2863 if (likely(counter_cm->active_next)) {
2864 counter_cm->active_next->active_prev = counter_cm->active_prev;
2865 } else {
2866 si->active_tail = counter_cm->active_prev;
2867 }
2868
2869 counter_cm->active_next = NULL;
2870 counter_cm->active_prev = NULL;
2871 }
2872
2873 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002874 * Now remove the head of the active scan list.
2875 */
2876 cm->active = false;
2877 si->active_head = cm->active_next;
2878 if (likely(cm->active_next)) {
2879 cm->active_next->active_prev = NULL;
2880 } else {
2881 si->active_tail = NULL;
2882 }
2883 cm->active_next = NULL;
2884
2885 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002886 * Sync the connection state.
2887 */
2888 c = cm->connection;
2889 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, now_jiffies);
2890
2891 /*
2892 * We don't want to be holding the lock when we sync!
2893 */
2894 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002895 sync_rule_callback(&sis);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002896 spin_lock_bh(&si->lock);
2897 }
2898
2899 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002900 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002901
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002902done:
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002903 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002904}
2905
2906#define CHAR_DEV_MSG_SIZE 768
2907
2908/*
2909 * sfe_ipv4_debug_dev_read_start()
2910 * Generate part of the XML output.
2911 */
2912static bool sfe_ipv4_debug_dev_read_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2913 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2914{
2915 int bytes_read;
2916
2917 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "<sfe_ipv4>\n");
2918 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2919 return false;
2920 }
2921
2922 *length -= bytes_read;
2923 *total_read += bytes_read;
2924
2925 ws->state++;
2926 return true;
2927}
2928
2929/*
2930 * sfe_ipv4_debug_dev_read_connections_start()
2931 * Generate part of the XML output.
2932 */
2933static bool sfe_ipv4_debug_dev_read_connections_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2934 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2935{
2936 int bytes_read;
2937
2938 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<connections>\n");
2939 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2940 return false;
2941 }
2942
2943 *length -= bytes_read;
2944 *total_read += bytes_read;
2945
2946 ws->state++;
2947 return true;
2948}
2949
2950/*
2951 * sfe_ipv4_debug_dev_read_connections_connection()
2952 * Generate part of the XML output.
2953 */
2954static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2955 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2956{
2957 struct sfe_ipv4_connection *c;
2958 struct sfe_ipv4_connection *c_next;
2959 struct sfe_ipv4_connection_match *original_cm;
2960 struct sfe_ipv4_connection_match *reply_cm;
2961 int bytes_read;
2962 int protocol;
2963 struct net_device *src_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002964 __be32 src_ip;
2965 __be32 src_ip_xlate;
2966 __be16 src_port;
2967 __be16 src_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002968 uint64_t src_rx_packets;
2969 uint64_t src_rx_bytes;
2970 struct net_device *dest_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002971 __be32 dest_ip;
2972 __be32 dest_ip_xlate;
2973 __be16 dest_port;
2974 __be16 dest_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002975 uint64_t dest_rx_packets;
2976 uint64_t dest_rx_bytes;
2977 uint64_t last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002978 uint32_t mark;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002979#ifdef CONFIG_NF_FLOW_COOKIE
2980 int src_flow_cookie, dst_flow_cookie;
2981#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002982
2983 spin_lock_bh(&si->lock);
2984 c = ws->iter_conn;
2985
2986 /*
2987 * Is this the first connection we need to scan?
2988 */
2989 if (!c) {
2990 c = si->all_connections_head;
2991
2992 /*
2993 * If there were no connections then move to the next state.
2994 */
2995 if (!c) {
2996 spin_unlock_bh(&si->lock);
2997
2998 ws->state++;
2999 return true;
3000 }
3001
3002 c->iterators++;
3003 }
3004
3005 c_next = c->all_connections_next;
3006 ws->iter_conn = c_next;
3007
3008 /*
3009 * Before we do anything else, take an iterator reference for the
3010 * connection we'll iterate next.
3011 */
3012 if (c_next) {
3013 c_next->iterators++;
3014 }
3015
3016 /*
3017 * Remove the iterator reference that we acquired and see if we
3018 * should free any resources.
3019 */
3020 if (sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c)) {
3021 spin_unlock_bh(&si->lock);
3022
3023 /*
3024 * This entry is dead so release our hold of the source and
3025 * dest devices and free the memory for our connection objects.
3026 */
3027 dev_put(c->original_dev);
3028 dev_put(c->reply_dev);
3029 kfree(c->original_match);
3030 kfree(c->reply_match);
3031 kfree(c);
3032
3033 /*
3034 * If we have no more connections then move to the next state.
3035 */
3036 if (!c_next) {
3037 ws->state++;
3038 }
3039
3040 return true;
3041 }
3042
3043 original_cm = c->original_match;
3044 reply_cm = c->reply_match;
3045
3046 protocol = c->protocol;
3047 src_dev = c->original_dev;
3048 src_ip = c->src_ip;
3049 src_ip_xlate = c->src_ip_xlate;
3050 src_port = c->src_port;
3051 src_port_xlate = c->src_port_xlate;
3052
3053 sfe_ipv4_connection_match_update_summary_stats(original_cm);
3054 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
3055
3056 src_rx_packets = original_cm->rx_packet_count64;
3057 src_rx_bytes = original_cm->rx_byte_count64;
3058 dest_dev = c->reply_dev;
3059 dest_ip = c->dest_ip;
3060 dest_ip_xlate = c->dest_ip_xlate;
3061 dest_port = c->dest_port;
3062 dest_port_xlate = c->dest_port_xlate;
3063 dest_rx_packets = reply_cm->rx_packet_count64;
3064 dest_rx_bytes = reply_cm->rx_byte_count64;
3065 last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003066 mark = c->mark;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003067#ifdef CONFIG_NF_FLOW_COOKIE
3068 src_flow_cookie = original_cm->flow_cookie;
3069 dst_flow_cookie = reply_cm->flow_cookie;
3070#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003071 spin_unlock_bh(&si->lock);
3072
3073 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t<connection "
3074 "protocol=\"%u\" "
3075 "src_dev=\"%s\" "
3076 "src_ip=\"%pI4\" src_ip_xlate=\"%pI4\" "
3077 "src_port=\"%u\" src_port_xlate=\"%u\" "
3078 "src_rx_pkts=\"%llu\" src_rx_bytes=\"%llu\" "
3079 "dest_dev=\"%s\" "
3080 "dest_ip=\"%pI4\" dest_ip_xlate=\"%pI4\" "
3081 "dest_port=\"%u\" dest_port_xlate=\"%u\" "
3082 "dest_rx_pkts=\"%llu\" dest_rx_bytes=\"%llu\" "
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003083#ifdef CONFIG_NF_FLOW_COOKIE
3084 "src_flow_cookie=\"%d\" dst_flow_cookie=\"%d\" "
3085#endif
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003086 "last_sync=\"%llu\" "
Nicolas Costabb85a2e2014-01-13 16:26:33 -06003087 "mark=\"%08x\" />\n",
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003088 protocol,
3089 src_dev->name,
3090 &src_ip, &src_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01003091 ntohs(src_port), ntohs(src_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003092 src_rx_packets, src_rx_bytes,
3093 dest_dev->name,
3094 &dest_ip, &dest_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01003095 ntohs(dest_port), ntohs(dest_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003096 dest_rx_packets, dest_rx_bytes,
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003097#ifdef CONFIG_NF_FLOW_COOKIE
3098 src_flow_cookie, dst_flow_cookie,
3099#endif
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003100 last_sync_jiffies, mark);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003101
3102 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3103 return false;
3104 }
3105
3106 *length -= bytes_read;
3107 *total_read += bytes_read;
3108
3109 /*
3110 * If we have no more connections then move to the next state.
3111 */
3112 if (!c_next) {
3113 ws->state++;
3114 }
3115
3116 return true;
3117}
3118
3119/*
3120 * sfe_ipv4_debug_dev_read_connections_end()
3121 * Generate part of the XML output.
3122 */
3123static bool sfe_ipv4_debug_dev_read_connections_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3124 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3125{
3126 int bytes_read;
3127
3128 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</connections>\n");
3129 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3130 return false;
3131 }
3132
3133 *length -= bytes_read;
3134 *total_read += bytes_read;
3135
3136 ws->state++;
3137 return true;
3138}
3139
3140/*
3141 * sfe_ipv4_debug_dev_read_exceptions_start()
3142 * Generate part of the XML output.
3143 */
3144static bool sfe_ipv4_debug_dev_read_exceptions_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3145 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3146{
3147 int bytes_read;
3148
3149 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<exceptions>\n");
3150 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3151 return false;
3152 }
3153
3154 *length -= bytes_read;
3155 *total_read += bytes_read;
3156
3157 ws->state++;
3158 return true;
3159}
3160
3161/*
3162 * sfe_ipv4_debug_dev_read_exceptions_exception()
3163 * Generate part of the XML output.
3164 */
3165static bool sfe_ipv4_debug_dev_read_exceptions_exception(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3166 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3167{
3168 uint64_t ct;
3169
3170 spin_lock_bh(&si->lock);
3171 ct = si->exception_events64[ws->iter_exception];
3172 spin_unlock_bh(&si->lock);
3173
3174 if (ct) {
3175 int bytes_read;
3176
3177 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE,
3178 "\t\t<exception name=\"%s\" count=\"%llu\" />\n",
3179 sfe_ipv4_exception_events_string[ws->iter_exception],
3180 ct);
3181 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3182 return false;
3183 }
3184
3185 *length -= bytes_read;
3186 *total_read += bytes_read;
3187 }
3188
3189 ws->iter_exception++;
3190 if (ws->iter_exception >= SFE_IPV4_EXCEPTION_EVENT_LAST) {
3191 ws->iter_exception = 0;
3192 ws->state++;
3193 }
3194
3195 return true;
3196}
3197
3198/*
3199 * sfe_ipv4_debug_dev_read_exceptions_end()
3200 * Generate part of the XML output.
3201 */
3202static bool sfe_ipv4_debug_dev_read_exceptions_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3203 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3204{
3205 int bytes_read;
3206
3207 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</exceptions>\n");
3208 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3209 return false;
3210 }
3211
3212 *length -= bytes_read;
3213 *total_read += bytes_read;
3214
3215 ws->state++;
3216 return true;
3217}
3218
3219/*
3220 * sfe_ipv4_debug_dev_read_stats()
3221 * Generate part of the XML output.
3222 */
3223static bool sfe_ipv4_debug_dev_read_stats(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3224 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3225{
3226 int bytes_read;
3227 unsigned int num_connections;
3228 uint64_t packets_forwarded;
3229 uint64_t packets_not_forwarded;
3230 uint64_t connection_create_requests;
3231 uint64_t connection_create_collisions;
3232 uint64_t connection_destroy_requests;
3233 uint64_t connection_destroy_misses;
3234 uint64_t connection_flushes;
3235 uint64_t connection_match_hash_hits;
3236 uint64_t connection_match_hash_reorders;
3237
3238 spin_lock_bh(&si->lock);
3239 sfe_ipv4_update_summary_stats(si);
3240
3241 num_connections = si->num_connections;
3242 packets_forwarded = si->packets_forwarded64;
3243 packets_not_forwarded = si->packets_not_forwarded64;
3244 connection_create_requests = si->connection_create_requests64;
3245 connection_create_collisions = si->connection_create_collisions64;
3246 connection_destroy_requests = si->connection_destroy_requests64;
3247 connection_destroy_misses = si->connection_destroy_misses64;
3248 connection_flushes = si->connection_flushes64;
3249 connection_match_hash_hits = si->connection_match_hash_hits64;
3250 connection_match_hash_reorders = si->connection_match_hash_reorders64;
3251 spin_unlock_bh(&si->lock);
3252
3253 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<stats "
3254 "num_connections=\"%u\" "
Xiaoping Fan59176422015-05-22 15:58:10 -07003255 "pkts_forwarded=\"%llu\" pkts_not_forwarded=\"%llu\" "
3256 "create_requests=\"%llu\" create_collisions=\"%llu\" "
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003257 "destroy_requests=\"%llu\" destroy_misses=\"%llu\" "
3258 "flushes=\"%llu\" "
3259 "hash_hits=\"%llu\" hash_reorders=\"%llu\" />\n",
3260 num_connections,
3261 packets_forwarded,
3262 packets_not_forwarded,
3263 connection_create_requests,
3264 connection_create_collisions,
3265 connection_destroy_requests,
3266 connection_destroy_misses,
3267 connection_flushes,
3268 connection_match_hash_hits,
3269 connection_match_hash_reorders);
3270 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3271 return false;
3272 }
3273
3274 *length -= bytes_read;
3275 *total_read += bytes_read;
3276
3277 ws->state++;
3278 return true;
3279}
3280
3281/*
3282 * sfe_ipv4_debug_dev_read_end()
3283 * Generate part of the XML output.
3284 */
3285static bool sfe_ipv4_debug_dev_read_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3286 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3287{
3288 int bytes_read;
3289
3290 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "</sfe_ipv4>\n");
3291 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3292 return false;
3293 }
3294
3295 *length -= bytes_read;
3296 *total_read += bytes_read;
3297
3298 ws->state++;
3299 return true;
3300}
3301
3302/*
3303 * Array of write functions that write various XML elements that correspond to
3304 * our XML output state machine.
3305 */
3306sfe_ipv4_debug_xml_write_method_t sfe_ipv4_debug_xml_write_methods[SFE_IPV4_DEBUG_XML_STATE_DONE] = {
3307 sfe_ipv4_debug_dev_read_start,
3308 sfe_ipv4_debug_dev_read_connections_start,
3309 sfe_ipv4_debug_dev_read_connections_connection,
3310 sfe_ipv4_debug_dev_read_connections_end,
3311 sfe_ipv4_debug_dev_read_exceptions_start,
3312 sfe_ipv4_debug_dev_read_exceptions_exception,
3313 sfe_ipv4_debug_dev_read_exceptions_end,
3314 sfe_ipv4_debug_dev_read_stats,
3315 sfe_ipv4_debug_dev_read_end,
3316};
3317
3318/*
3319 * sfe_ipv4_debug_dev_read()
3320 * Send info to userspace upon read request from user
3321 */
3322static ssize_t sfe_ipv4_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset)
3323{
3324 char msg[CHAR_DEV_MSG_SIZE];
3325 int total_read = 0;
3326 struct sfe_ipv4_debug_xml_write_state *ws;
3327 struct sfe_ipv4 *si = &__si;
3328
3329 ws = (struct sfe_ipv4_debug_xml_write_state *)filp->private_data;
3330 while ((ws->state != SFE_IPV4_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) {
3331 if ((sfe_ipv4_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) {
3332 continue;
3333 }
3334 }
3335
3336 return total_read;
3337}
3338
3339/*
3340 * sfe_ipv4_debug_dev_write()
Nicolas Costabafb3af2014-01-29 16:39:39 -06003341 * Write to char device resets some stats
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003342 */
3343static ssize_t sfe_ipv4_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset)
3344{
Matthew McClintock54167ab2014-01-14 21:06:28 -06003345 struct sfe_ipv4 *si = &__si;
3346
3347 spin_lock_bh(&si->lock);
3348 sfe_ipv4_update_summary_stats(si);
3349
Matthew McClintock54167ab2014-01-14 21:06:28 -06003350 si->packets_forwarded64 = 0;
3351 si->packets_not_forwarded64 = 0;
3352 si->connection_create_requests64 = 0;
3353 si->connection_create_collisions64 = 0;
3354 si->connection_destroy_requests64 = 0;
3355 si->connection_destroy_misses64 = 0;
3356 si->connection_flushes64 = 0;
3357 si->connection_match_hash_hits64 = 0;
3358 si->connection_match_hash_reorders64 = 0;
3359 spin_unlock_bh(&si->lock);
3360
3361 return length;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003362}
3363
3364/*
3365 * sfe_ipv4_debug_dev_open()
3366 */
3367static int sfe_ipv4_debug_dev_open(struct inode *inode, struct file *file)
3368{
3369 struct sfe_ipv4_debug_xml_write_state *ws;
3370
3371 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3372 if (!ws) {
3373 ws = kzalloc(sizeof(struct sfe_ipv4_debug_xml_write_state), GFP_KERNEL);
3374 if (!ws) {
3375 return -ENOMEM;
3376 }
3377
3378 ws->state = SFE_IPV4_DEBUG_XML_STATE_START;
3379 file->private_data = ws;
3380 }
3381
3382 return 0;
3383}
3384
3385/*
3386 * sfe_ipv4_debug_dev_release()
3387 */
3388static int sfe_ipv4_debug_dev_release(struct inode *inode, struct file *file)
3389{
3390 struct sfe_ipv4_debug_xml_write_state *ws;
3391
3392 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3393 if (ws) {
3394 struct sfe_ipv4_connection *c;
3395
3396 /*
3397 * Are we currently iterating a connection? If we are then
3398 * make sure that we reduce its iterator count and if necessary
3399 * free it.
3400 */
3401 c = ws->iter_conn;
3402 if (c) {
3403 struct sfe_ipv4 *si = &__si;
Nicolas Costabdddb9b2014-01-29 16:40:26 -06003404 bool free_connection;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003405
3406 spin_lock_bh(&si->lock);
Nicolas Costabdddb9b2014-01-29 16:40:26 -06003407 free_connection = sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c);
3408 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003409
Nicolas Costabdddb9b2014-01-29 16:40:26 -06003410 if (free_connection) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003411 /*
3412 * This entry is dead so release our hold of the source and
3413 * dest devices and free the memory for our connection objects.
3414 */
3415 dev_put(c->original_dev);
3416 dev_put(c->reply_dev);
3417 kfree(c->original_match);
3418 kfree(c->reply_match);
3419 kfree(c);
3420 }
3421 }
3422
3423 /*
3424 * We've finished with our output so free the write state.
3425 */
3426 kfree(ws);
3427 }
3428
3429 return 0;
3430}
3431
3432/*
3433 * File operations used in the debug char device
3434 */
3435static struct file_operations sfe_ipv4_debug_dev_fops = {
3436 .read = sfe_ipv4_debug_dev_read,
3437 .write = sfe_ipv4_debug_dev_write,
3438 .open = sfe_ipv4_debug_dev_open,
3439 .release = sfe_ipv4_debug_dev_release
3440};
3441
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003442#ifdef CONFIG_NF_FLOW_COOKIE
3443/*
3444 * sfe_register_flow_cookie_cb
3445 * register a function in SFE to let SFE use this function to configure flow cookie for a flow
3446 *
3447 * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE
3448 * can use this function to configure flow cookie for a flow.
3449 * return: 0, success; !=0, fail
3450 */
3451int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb)
3452{
3453 struct sfe_ipv4 *si = &__si;
3454
3455 BUG_ON(!cb);
3456
3457 if (si->flow_cookie_set_func) {
3458 return -1;
3459 }
3460
3461 rcu_assign_pointer(si->flow_cookie_set_func, cb);
3462 return 0;
3463}
3464
3465/*
3466 * sfe_unregister_flow_cookie_cb
3467 * unregister function which is used to configure flow cookie for a flow
3468 *
3469 * return: 0, success; !=0, fail
3470 */
3471int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb)
3472{
3473 struct sfe_ipv4 *si = &__si;
3474
3475 RCU_INIT_POINTER(si->flow_cookie_set_func, NULL);
3476 return 0;
3477}
3478#endif /*CONFIG_NF_FLOW_COOKIE*/
3479
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003480/*
Dave Hudson87973cd2013-10-22 16:00:04 +01003481 * sfe_ipv4_init()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003482 */
Dave Hudson87973cd2013-10-22 16:00:04 +01003483static int __init sfe_ipv4_init(void)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003484{
3485 struct sfe_ipv4 *si = &__si;
3486 int result = -1;
3487
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003488 DEBUG_INFO("SFE IPv4 init\n");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003489
3490 /*
3491 * Create sys/sfe_ipv4
3492 */
3493 si->sys_sfe_ipv4 = kobject_create_and_add("sfe_ipv4", NULL);
3494 if (!si->sys_sfe_ipv4) {
3495 DEBUG_ERROR("failed to register sfe_ipv4\n");
3496 goto exit1;
3497 }
3498
3499 /*
3500 * Create files, one for each parameter supported by this module.
3501 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003502 result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3503 if (result) {
3504 DEBUG_ERROR("failed to register debug dev file: %d\n", result);
3505 goto exit4;
3506 }
3507
3508 /*
3509 * Register our debug char device.
3510 */
3511 result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops);
3512 if (result < 0) {
3513 DEBUG_ERROR("Failed to register chrdev: %d\n", result);
3514 goto exit5;
3515 }
3516
3517 si->debug_dev = result;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003518
3519 /*
3520 * Create a timer to handle periodic statistics.
3521 */
3522 setup_timer(&si->timer, sfe_ipv4_periodic_sync, (unsigned long)si);
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06003523 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003524
Dave Hudson87973cd2013-10-22 16:00:04 +01003525 spin_lock_init(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003526
Dave Hudson87973cd2013-10-22 16:00:04 +01003527 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003528
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003529exit5:
3530 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3531
3532exit4:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003533 kobject_put(si->sys_sfe_ipv4);
3534
3535exit1:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003536 return result;
3537}
3538
3539/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003540 * sfe_ipv4_exit()
3541 */
3542static void __exit sfe_ipv4_exit(void)
3543{
Dave Hudson87973cd2013-10-22 16:00:04 +01003544 struct sfe_ipv4 *si = &__si;
3545
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003546 DEBUG_INFO("SFE IPv4 exit\n");
Dave Hudson87973cd2013-10-22 16:00:04 +01003547
3548 /*
3549 * Destroy all connections.
3550 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003551 sfe_ipv4_destroy_all_rules_for_dev(NULL);
Dave Hudson87973cd2013-10-22 16:00:04 +01003552
Dave Hudson87973cd2013-10-22 16:00:04 +01003553 del_timer_sync(&si->timer);
3554
Dave Hudson87973cd2013-10-22 16:00:04 +01003555 unregister_chrdev(si->debug_dev, "sfe_ipv4");
3556
3557 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3558
Dave Hudson87973cd2013-10-22 16:00:04 +01003559 kobject_put(si->sys_sfe_ipv4);
3560
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003561}
3562
3563module_init(sfe_ipv4_init)
3564module_exit(sfe_ipv4_exit)
3565
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003566EXPORT_SYMBOL(sfe_ipv4_recv);
3567EXPORT_SYMBOL(sfe_ipv4_create_rule);
3568EXPORT_SYMBOL(sfe_ipv4_destroy_rule);
3569EXPORT_SYMBOL(sfe_ipv4_destroy_all_rules_for_dev);
3570EXPORT_SYMBOL(sfe_ipv4_register_sync_rule_callback);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06003571EXPORT_SYMBOL(sfe_ipv4_mark_rule);
Nicolas Costa436926b2014-01-14 10:36:22 -06003572EXPORT_SYMBOL(sfe_ipv4_update_rule);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003573#ifdef CONFIG_NF_FLOW_COOKIE
3574EXPORT_SYMBOL(sfe_register_flow_cookie_cb);
3575EXPORT_SYMBOL(sfe_unregister_flow_cookie_cb);
3576#endif
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003577
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003578MODULE_AUTHOR("Qualcomm Atheros Inc.");
3579MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv4 edition");
Matthew McClintocka3221942014-01-16 11:44:26 -06003580MODULE_LICENSE("Dual BSD/GPL");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003581