blob: 64cf8ddd20a3c4d13c89e5be9640916dc134aefd [file] [log] [blame]
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001/*
2 * sfe_ipv4.c
3 * Shortcut forwarding engine - IPv4 edition.
4 *
Xiaoping Fand642a6e2015-04-10 15:19:06 -07005 * Copyright (c) 2013-2015 Qualcomm Atheros, Inc.
Matthew McClintocka3221942014-01-16 11:44:26 -06006 *
7 * All Rights Reserved.
8 * Qualcomm Atheros Confidential and Proprietary.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01009 */
Matthew McClintocka3221942014-01-16 11:44:26 -060010
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010011#include <linux/module.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060012#include <linux/sysfs.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010013#include <linux/skbuff.h>
14#include <linux/icmp.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010015#include <net/tcp.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060016#include <linux/etherdevice.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010017
Dave Hudsondcd08fb2013-11-22 09:25:16 -060018#include "sfe.h"
19#include "sfe_ipv4.h"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010020
21/*
Dave Hudsona8197e72013-12-17 23:46:22 +000022 * By default Linux IP header and transport layer header structures are
23 * unpacked, assuming that such headers should be 32-bit aligned.
24 * Unfortunately some wireless adaptors can't cope with this requirement and
25 * some CPUs can't handle misaligned accesses. For those platforms we
26 * define SFE_IPV4_UNALIGNED_IP_HEADER and mark the structures as packed.
27 * When we do this the compiler will generate slightly worse code than for the
28 * aligned case (on most platforms) but will be much quicker than fixing
29 * things up in an unaligned trap handler.
30 */
31#define SFE_IPV4_UNALIGNED_IP_HEADER 1
32#if SFE_IPV4_UNALIGNED_IP_HEADER
33#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed))
34#else
35#define SFE_IPV4_UNALIGNED_STRUCT
36#endif
37
38/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060039 * An Ethernet header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000040 * help with performance on some platforms (see the definition of
41 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010042 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060043struct sfe_ipv4_eth_hdr {
44 __be16 h_dest[ETH_ALEN / 2];
45 __be16 h_source[ETH_ALEN / 2];
46 __be16 h_proto;
47} SFE_IPV4_UNALIGNED_STRUCT;
48
49/*
50 * An IPv4 header, but with an optional "packed" attribute to
51 * help with performance on some platforms (see the definition of
52 * SFE_IPV4_UNALIGNED_STRUCT)
53 */
54struct sfe_ipv4_ip_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010055#if defined(__LITTLE_ENDIAN_BITFIELD)
56 __u8 ihl:4,
57 version:4;
58#elif defined (__BIG_ENDIAN_BITFIELD)
59 __u8 version:4,
Xiaoping Fan59176422015-05-22 15:58:10 -070060 ihl:4;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010061#else
62#error "Please fix <asm/byteorder.h>"
63#endif
64 __u8 tos;
65 __be16 tot_len;
66 __be16 id;
67 __be16 frag_off;
68 __u8 ttl;
69 __u8 protocol;
70 __sum16 check;
71 __be32 saddr;
72 __be32 daddr;
Dave Hudsondcd08fb2013-11-22 09:25:16 -060073
74 /*
75 * The options start here.
76 */
Dave Hudsona8197e72013-12-17 23:46:22 +000077} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010078
79/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060080 * A UDP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000081 * help with performance on some platforms (see the definition of
82 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010083 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060084struct sfe_ipv4_udp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010085 __be16 source;
86 __be16 dest;
87 __be16 len;
88 __sum16 check;
Dave Hudsona8197e72013-12-17 23:46:22 +000089} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010090
91/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060092 * A TCP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000093 * help with performance on some platforms (see the definition of
94 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010095 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060096struct sfe_ipv4_tcp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010097 __be16 source;
98 __be16 dest;
99 __be32 seq;
100 __be32 ack_seq;
101#if defined(__LITTLE_ENDIAN_BITFIELD)
102 __u16 res1:4,
103 doff:4,
104 fin:1,
105 syn:1,
106 rst:1,
107 psh:1,
108 ack:1,
109 urg:1,
110 ece:1,
111 cwr:1;
112#elif defined(__BIG_ENDIAN_BITFIELD)
113 __u16 doff:4,
114 res1:4,
115 cwr:1,
116 ece:1,
117 urg:1,
118 ack:1,
119 psh:1,
120 rst:1,
121 syn:1,
122 fin:1;
123#else
124#error "Adjust your <asm/byteorder.h> defines"
Nicolas Costaac2979c2014-01-14 10:35:24 -0600125#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100126 __be16 window;
127 __sum16 check;
128 __be16 urg_ptr;
Dave Hudsona8197e72013-12-17 23:46:22 +0000129} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100130
131/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100132 * Specifies the lower bound on ACK numbers carried in the TCP header
133 */
134#define SFE_IPV4_TCP_MAX_ACK_WINDOW 65520
135
136/*
137 * IPv4 TCP connection match additional data.
138 */
139struct sfe_ipv4_tcp_connection_match {
140 uint8_t win_scale; /* Window scale */
141 uint32_t max_win; /* Maximum window size seen */
142 uint32_t end; /* Sequence number of the next byte to send (seq + segment length) */
143 uint32_t max_end; /* Sequence number of the last byte to ack */
144};
145
146/*
147 * Bit flags for IPv4 connection matching entry.
148 */
149#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC 0x1
150 /* Perform source translation */
151#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST 0x2
152 /* Perform destination translation */
153#define SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK 0x4
154 /* Ignore TCP sequence numbers */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600155#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR 0x8
156 /* Fast Ethernet header write */
157#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR 0x10
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100158 /* Fast Ethernet header write */
159
160/*
161 * IPv4 connection matching structure.
162 */
163struct sfe_ipv4_connection_match {
164 /*
165 * References to other objects.
166 */
167 struct sfe_ipv4_connection_match *next;
168 /* Next connection match entry in a list */
169 struct sfe_ipv4_connection_match *prev;
170 /* Previous connection match entry in a list */
171 struct sfe_ipv4_connection *connection;
172 /* Pointer to our connection */
173 struct sfe_ipv4_connection_match *counter_match;
174 /* Pointer to the connection match in the "counter" direction to this one */
175 struct sfe_ipv4_connection_match *active_next;
176 /* Pointer to the next connection in the active list */
177 struct sfe_ipv4_connection_match *active_prev;
178 /* Pointer to the previous connection in the active list */
179 bool active; /* Flag to indicate if we're on the active list */
180
181 /*
182 * Characteristics that identify flows that match this rule.
183 */
184 struct net_device *match_dev; /* Network device */
185 uint8_t match_protocol; /* Protocol */
Dave Hudson87973cd2013-10-22 16:00:04 +0100186 __be32 match_src_ip; /* Source IP address */
187 __be32 match_dest_ip; /* Destination IP address */
188 __be16 match_src_port; /* Source port/connection ident */
189 __be16 match_dest_port; /* Destination port/connection ident */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100190
191 /*
192 * Control the operations of the match.
193 */
194 uint32_t flags; /* Bit flags */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800195#ifdef CONFIG_NF_FLOW_COOKIE
196 uint32_t flow_cookie; /* used flow cookie, for debug */
197#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100198
199 /*
200 * Connection state that we track once we match.
201 */
202 union { /* Protocol-specific state */
203 struct sfe_ipv4_tcp_connection_match tcp;
204 } protocol_state;
205 uint32_t rx_packet_count; /* Number of packets RX'd */
206 uint32_t rx_byte_count; /* Number of bytes RX'd */
207
208 /*
209 * Packet translation information.
210 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100211 __be32 xlate_src_ip; /* Address after source translation */
212 __be16 xlate_src_port; /* Port/connection ident after source translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100213 uint16_t xlate_src_csum_adjustment;
214 /* Transport layer checksum adjustment after source translation */
Xiaoping Fanad755af2015-04-01 16:58:46 -0700215 uint16_t xlate_src_partial_csum_adjustment;
216 /* Transport layer pseudo header checksum adjustment after source translation */
217
Dave Hudson87973cd2013-10-22 16:00:04 +0100218 __be32 xlate_dest_ip; /* Address after destination translation */
219 __be16 xlate_dest_port; /* Port/connection ident after destination translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100220 uint16_t xlate_dest_csum_adjustment;
221 /* Transport layer checksum adjustment after destination translation */
Xiaoping Fanad755af2015-04-01 16:58:46 -0700222 uint16_t xlate_dest_partial_csum_adjustment;
223 /* Transport layer pseudo header checksum adjustment after destination translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100224
225 /*
226 * Packet transmit information.
227 */
228 struct net_device *xmit_dev; /* Network device on which to transmit */
229 unsigned short int xmit_dev_mtu;
230 /* Interface MTU */
231 uint16_t xmit_dest_mac[ETH_ALEN / 2];
232 /* Destination MAC address to use when forwarding */
233 uint16_t xmit_src_mac[ETH_ALEN / 2];
234 /* Source MAC address to use when forwarding */
235
236 /*
237 * Summary stats.
238 */
239 uint64_t rx_packet_count64; /* Number of packets RX'd */
240 uint64_t rx_byte_count64; /* Number of bytes RX'd */
241};
242
243/*
244 * Per-connection data structure.
245 */
246struct sfe_ipv4_connection {
247 struct sfe_ipv4_connection *next;
248 /* Pointer to the next entry in a hash chain */
249 struct sfe_ipv4_connection *prev;
250 /* Pointer to the previous entry in a hash chain */
251 int protocol; /* IP protocol number */
Dave Hudson87973cd2013-10-22 16:00:04 +0100252 __be32 src_ip; /* Source IP address */
253 __be32 src_ip_xlate; /* NAT-translated source IP address */
254 __be32 dest_ip; /* Destination IP address */
255 __be32 dest_ip_xlate; /* NAT-translated destination IP address */
256 __be16 src_port; /* Source port */
257 __be16 src_port_xlate; /* NAT-translated source port */
258 __be16 dest_port; /* Destination port */
259 __be16 dest_port_xlate; /* NAT-translated destination port */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100260 struct sfe_ipv4_connection_match *original_match;
261 /* Original direction matching structure */
262 struct net_device *original_dev;
263 /* Original direction source device */
264 struct sfe_ipv4_connection_match *reply_match;
265 /* Reply direction matching structure */
266 struct net_device *reply_dev; /* Reply direction source device */
267 uint64_t last_sync_jiffies; /* Jiffies count for the last sync */
268 struct sfe_ipv4_connection *all_connections_next;
269 /* Pointer to the next entry in the list of all connections */
270 struct sfe_ipv4_connection *all_connections_prev;
271 /* Pointer to the previous entry in the list of all connections */
272 int iterators; /* Number of iterators currently using this connection */
273 bool pending_free; /* Flag that indicates that this connection should be freed after iteration */
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600274 uint32_t mark; /* mark for outgoing packet */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100275};
276
277/*
278 * IPv4 connections and hash table size information.
279 */
280#define SFE_IPV4_CONNECTION_HASH_SHIFT 12
281#define SFE_IPV4_CONNECTION_HASH_SIZE (1 << SFE_IPV4_CONNECTION_HASH_SHIFT)
282#define SFE_IPV4_CONNECTION_HASH_MASK (SFE_IPV4_CONNECTION_HASH_SIZE - 1)
283
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800284#ifdef CONFIG_NF_FLOW_COOKIE
285#define SFE_FLOW_COOKIE_SIZE 2048
286#define SFE_FLOW_COOKIE_MASK 0x7ff
287
288struct sfe_flow_cookie_entry {
289 struct sfe_ipv4_connection_match *match;
290 unsigned long last_clean_time;
291};
292#endif
293
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100294enum sfe_ipv4_exception_events {
295 SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE,
296 SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION,
297 SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
298 SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL,
299 SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION,
300 SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE,
301 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS,
302 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS,
303 SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
304 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL,
305 SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION,
306 SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS,
307 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE,
308 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS,
309 SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK,
310 SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS,
311 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE,
312 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE,
313 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE,
314 SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE,
315 SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE,
316 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE,
317 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4,
318 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE,
319 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE,
320 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE,
321 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL,
322 SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION,
323 SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION,
324 SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE,
325 SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH,
326 SFE_IPV4_EXCEPTION_EVENT_NON_V4,
327 SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT,
328 SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE,
329 SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE,
330 SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL,
331 SFE_IPV4_EXCEPTION_EVENT_LAST
332};
333
334static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = {
335 "UDP_HEADER_INCOMPLETE",
336 "UDP_NO_CONNECTION",
337 "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
338 "UDP_SMALL_TTL",
339 "UDP_NEEDS_FRAGMENTATION",
340 "TCP_HEADER_INCOMPLETE",
341 "TCP_NO_CONNECTION_SLOW_FLAGS",
342 "TCP_NO_CONNECTION_FAST_FLAGS",
343 "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
344 "TCP_SMALL_TTL",
345 "TCP_NEEDS_FRAGMENTATION",
346 "TCP_FLAGS",
347 "TCP_SEQ_EXCEEDS_RIGHT_EDGE",
348 "TCP_SMALL_DATA_OFFS",
349 "TCP_BAD_SACK",
350 "TCP_BIG_DATA_OFFS",
351 "TCP_SEQ_BEFORE_LEFT_EDGE",
352 "TCP_ACK_EXCEEDS_RIGHT_EDGE",
353 "TCP_ACK_BEFORE_LEFT_EDGE",
354 "ICMP_HEADER_INCOMPLETE",
355 "ICMP_UNHANDLED_TYPE",
356 "ICMP_IPV4_HEADER_INCOMPLETE",
357 "ICMP_IPV4_NON_V4",
358 "ICMP_IPV4_IP_OPTIONS_INCOMPLETE",
359 "ICMP_IPV4_UDP_HEADER_INCOMPLETE",
360 "ICMP_IPV4_TCP_HEADER_INCOMPLETE",
361 "ICMP_IPV4_UNHANDLED_PROTOCOL",
362 "ICMP_NO_CONNECTION",
363 "ICMP_FLUSHED_CONNECTION",
364 "HEADER_INCOMPLETE",
365 "BAD_TOTAL_LENGTH",
366 "NON_V4",
367 "NON_INITIAL_FRAGMENT",
368 "DATAGRAM_INCOMPLETE",
369 "IP_OPTIONS_INCOMPLETE",
370 "UNHANDLED_PROTOCOL"
371};
372
373/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600374 * Per-module structure.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100375 */
376struct sfe_ipv4 {
377 spinlock_t lock; /* Lock for SMP correctness */
378 struct sfe_ipv4_connection_match *active_head;
379 /* Head of the list of recently active connections */
380 struct sfe_ipv4_connection_match *active_tail;
381 /* Tail of the list of recently active connections */
382 struct sfe_ipv4_connection *all_connections_head;
383 /* Head of the list of all connections */
384 struct sfe_ipv4_connection *all_connections_tail;
385 /* Tail of the list of all connections */
386 unsigned int num_connections; /* Number of connections */
387 struct timer_list timer; /* Timer used for periodic sync ops */
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600388 sfe_ipv4_sync_rule_callback_t __rcu sync_rule_callback;
389 /* Callback function registered by a connection manager for stats syncing */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100390 struct sfe_ipv4_connection *conn_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
391 /* Connection hash table */
392 struct sfe_ipv4_connection_match *conn_match_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
393 /* Connection match hash table */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800394#ifdef CONFIG_NF_FLOW_COOKIE
395 struct sfe_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE];
396 /* flow cookie table*/
397 flow_cookie_set_func_t flow_cookie_set_func;
398 /* function used to configure flow cookie in hardware*/
399#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100400
401 /*
402 * Statistics.
403 */
404 uint32_t connection_create_requests;
405 /* Number of IPv4 connection create requests */
406 uint32_t connection_create_collisions;
407 /* Number of IPv4 connection create requests that collided with existing hash table entries */
408 uint32_t connection_destroy_requests;
409 /* Number of IPv4 connection destroy requests */
410 uint32_t connection_destroy_misses;
411 /* Number of IPv4 connection destroy requests that missed our hash table */
412 uint32_t connection_match_hash_hits;
413 /* Number of IPv4 connection match hash hits */
414 uint32_t connection_match_hash_reorders;
415 /* Number of IPv4 connection match hash reorders */
416 uint32_t connection_flushes; /* Number of IPv4 connection flushes */
417 uint32_t packets_forwarded; /* Number of IPv4 packets forwarded */
418 uint32_t packets_not_forwarded; /* Number of IPv4 packets not forwarded */
419 uint32_t exception_events[SFE_IPV4_EXCEPTION_EVENT_LAST];
420
421 /*
422 * Summary tatistics.
423 */
424 uint64_t connection_create_requests64;
425 /* Number of IPv4 connection create requests */
426 uint64_t connection_create_collisions64;
427 /* Number of IPv4 connection create requests that collided with existing hash table entries */
428 uint64_t connection_destroy_requests64;
429 /* Number of IPv4 connection destroy requests */
430 uint64_t connection_destroy_misses64;
431 /* Number of IPv4 connection destroy requests that missed our hash table */
432 uint64_t connection_match_hash_hits64;
433 /* Number of IPv4 connection match hash hits */
434 uint64_t connection_match_hash_reorders64;
435 /* Number of IPv4 connection match hash reorders */
436 uint64_t connection_flushes64; /* Number of IPv4 connection flushes */
437 uint64_t packets_forwarded64; /* Number of IPv4 packets forwarded */
438 uint64_t packets_not_forwarded64;
439 /* Number of IPv4 packets not forwarded */
440 uint64_t exception_events64[SFE_IPV4_EXCEPTION_EVENT_LAST];
441
442 /*
443 * Control state.
444 */
445 struct kobject *sys_sfe_ipv4; /* sysfs linkage */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100446 int debug_dev; /* Major number of the debug char device */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100447};
448
449/*
450 * Enumeration of the XML output.
451 */
452enum sfe_ipv4_debug_xml_states {
453 SFE_IPV4_DEBUG_XML_STATE_START,
454 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_START,
455 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_CONNECTION,
456 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_END,
457 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_START,
458 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION,
459 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_END,
460 SFE_IPV4_DEBUG_XML_STATE_STATS,
461 SFE_IPV4_DEBUG_XML_STATE_END,
462 SFE_IPV4_DEBUG_XML_STATE_DONE
463};
464
465/*
466 * XML write state.
467 */
468struct sfe_ipv4_debug_xml_write_state {
469 enum sfe_ipv4_debug_xml_states state;
470 /* XML output file state machine state */
471 struct sfe_ipv4_connection *iter_conn;
472 /* Next connection iterator */
473 int iter_exception; /* Next exception iterator */
474};
475
476typedef bool (*sfe_ipv4_debug_xml_write_method_t)(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
477 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws);
478
479struct sfe_ipv4 __si;
480
481/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100482 * sfe_ipv4_gen_ip_csum()
483 * Generate the IP checksum for an IPv4 header.
484 *
485 * Note that this function assumes that we have only 20 bytes of IP header.
486 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600487static inline uint16_t sfe_ipv4_gen_ip_csum(struct sfe_ipv4_ip_hdr *iph)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100488{
489 uint32_t sum;
490 uint16_t *i = (uint16_t *)iph;
491
492 iph->check = 0;
493
494 /*
495 * Generate the sum.
496 */
497 sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9];
498
499 /*
500 * Fold it to ones-complement form.
501 */
502 sum = (sum & 0xffff) + (sum >> 16);
503 sum = (sum & 0xffff) + (sum >> 16);
504
505 return (uint16_t)sum ^ 0xffff;
506}
507
508/*
509 * sfe_ipv4_get_connection_match_hash()
510 * Generate the hash used in connection match lookups.
511 */
512static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100513 __be32 src_ip, __be16 src_port,
514 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100515{
516 size_t dev_addr = (size_t)dev;
Dave Hudson87973cd2013-10-22 16:00:04 +0100517 uint32_t hash = ((uint32_t)dev_addr) ^ ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100518 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
519}
520
521/*
522 * sfe_ipv4_find_sfe_ipv4_connection_match()
523 * Get the IPv4 flow match info that corresponds to a particular 5-tuple.
524 *
525 * On entry we must be holding the lock that protects the hash table.
526 */
527static struct sfe_ipv4_connection_match *
528sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100529 __be32 src_ip, __be16 src_port,
530 __be32 dest_ip, __be16 dest_port) __attribute__((always_inline));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100531static struct sfe_ipv4_connection_match *
532sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100533 __be32 src_ip, __be16 src_port,
534 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100535{
536 struct sfe_ipv4_connection_match *cm;
537 struct sfe_ipv4_connection_match *head;
538 unsigned int conn_match_idx;
539
540 conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port);
541 cm = si->conn_match_hash[conn_match_idx];
542
543 /*
544 * If we don't have anything in this chain then bale.
545 */
546 if (unlikely(!cm)) {
547 return cm;
548 }
549
550 /*
551 * Hopefully the first entry is the one we want.
552 */
553 if (likely(cm->match_src_port == src_port)
554 && likely(cm->match_dest_port == dest_port)
555 && likely(cm->match_src_ip == src_ip)
556 && likely(cm->match_dest_ip == dest_ip)
557 && likely(cm->match_protocol == protocol)
558 && likely(cm->match_dev == dev)) {
559 si->connection_match_hash_hits++;
560 return cm;
561 }
562
563 /*
564 * We may or may not have a matching entry but if we do then we want to
565 * move that entry to the top of the hash chain when we get to it. We
566 * presume that this will be reused again very quickly.
567 */
568 head = cm;
569 do {
570 cm = cm->next;
571 } while (cm && (cm->match_src_port != src_port
572 || cm->match_dest_port != dest_port
573 || cm->match_src_ip != src_ip
574 || cm->match_dest_ip != dest_ip
575 || cm->match_protocol != protocol
576 || cm->match_dev != dev));
577
578 /*
579 * Not found then we're done.
580 */
581 if (unlikely(!cm)) {
582 return cm;
583 }
584
585 /*
586 * We found a match so move it.
587 */
588 if (cm->next) {
589 cm->next->prev = cm->prev;
590 }
591 cm->prev->next = cm->next;
592 cm->prev = NULL;
593 cm->next = head;
594 head->prev = cm;
595 si->conn_match_hash[conn_match_idx] = cm;
596 si->connection_match_hash_reorders++;
597
598 return cm;
599}
600
601/*
602 * sfe_ipv4_connection_match_update_summary_stats()
603 * Update the summary stats for a connection match entry.
604 */
605static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm)
606{
607 cm->rx_packet_count64 += cm->rx_packet_count;
608 cm->rx_packet_count = 0;
609 cm->rx_byte_count64 += cm->rx_byte_count;
610 cm->rx_byte_count = 0;
611}
612
613/*
614 * sfe_ipv4_connection_match_compute_translations()
615 * Compute port and address translations for a connection match entry.
616 */
617static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm)
618{
619 /*
620 * Before we insert the entry look to see if this is tagged as doing address
621 * translations. If it is then work out the adjustment that we need to apply
622 * to the transport checksum.
623 */
624 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
625 /*
626 * Precompute an incremental checksum adjustment so we can
627 * edit packets in this stream very quickly. The algorithm is from RFC1624.
628 */
629 uint16_t src_ip_hi = cm->match_src_ip >> 16;
630 uint16_t src_ip_lo = cm->match_src_ip & 0xffff;
631 uint32_t xlate_src_ip = ~cm->xlate_src_ip;
632 uint16_t xlate_src_ip_hi = xlate_src_ip >> 16;
633 uint16_t xlate_src_ip_lo = xlate_src_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100634 uint16_t xlate_src_port = ~cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100635 uint32_t adj;
636
637 /*
638 * When we compute this fold it down to a 16-bit offset
639 * as that way we can avoid having to do a double
640 * folding of the twos-complement result because the
641 * addition of 2 16-bit values cannot cause a double
642 * wrap-around!
643 */
644 adj = src_ip_hi + src_ip_lo + cm->match_src_port
645 + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port;
646 adj = (adj & 0xffff) + (adj >> 16);
647 adj = (adj & 0xffff) + (adj >> 16);
648 cm->xlate_src_csum_adjustment = (uint16_t)adj;
Nicolas Costaac2979c2014-01-14 10:35:24 -0600649
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100650 }
651
652 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
653 /*
654 * Precompute an incremental checksum adjustment so we can
655 * edit packets in this stream very quickly. The algorithm is from RFC1624.
656 */
657 uint16_t dest_ip_hi = cm->match_dest_ip >> 16;
658 uint16_t dest_ip_lo = cm->match_dest_ip & 0xffff;
659 uint32_t xlate_dest_ip = ~cm->xlate_dest_ip;
660 uint16_t xlate_dest_ip_hi = xlate_dest_ip >> 16;
661 uint16_t xlate_dest_ip_lo = xlate_dest_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100662 uint16_t xlate_dest_port = ~cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100663 uint32_t adj;
664
665 /*
666 * When we compute this fold it down to a 16-bit offset
667 * as that way we can avoid having to do a double
668 * folding of the twos-complement result because the
669 * addition of 2 16-bit values cannot cause a double
670 * wrap-around!
671 */
672 adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port
673 + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port;
674 adj = (adj & 0xffff) + (adj >> 16);
675 adj = (adj & 0xffff) + (adj >> 16);
676 cm->xlate_dest_csum_adjustment = (uint16_t)adj;
677 }
Xiaoping Fanad755af2015-04-01 16:58:46 -0700678
679 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
680 uint32_t adj = ~cm->match_src_ip + cm->xlate_src_ip;
681 if (adj < cm->xlate_src_ip) {
682 adj++;
683 }
684
685 adj = (adj & 0xffff) + (adj >> 16);
686 adj = (adj & 0xffff) + (adj >> 16);
687 cm->xlate_src_partial_csum_adjustment = (uint16_t)adj;
688 }
689
690 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
691 uint32_t adj = ~cm->match_dest_ip + cm->xlate_dest_ip;
692 if (adj < cm->xlate_dest_ip) {
693 adj++;
694 }
695
696 adj = (adj & 0xffff) + (adj >> 16);
697 adj = (adj & 0xffff) + (adj >> 16);
698 cm->xlate_dest_partial_csum_adjustment = (uint16_t)adj;
699 }
700
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100701}
702
703/*
704 * sfe_ipv4_update_summary_stats()
705 * Update the summary stats.
706 */
707static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si)
708{
709 int i;
710
711 si->connection_create_requests64 += si->connection_create_requests;
712 si->connection_create_requests = 0;
713 si->connection_create_collisions64 += si->connection_create_collisions;
714 si->connection_create_collisions = 0;
715 si->connection_destroy_requests64 += si->connection_destroy_requests;
716 si->connection_destroy_requests = 0;
717 si->connection_destroy_misses64 += si->connection_destroy_misses;
718 si->connection_destroy_misses = 0;
719 si->connection_match_hash_hits64 += si->connection_match_hash_hits;
720 si->connection_match_hash_hits = 0;
721 si->connection_match_hash_reorders64 += si->connection_match_hash_reorders;
722 si->connection_match_hash_reorders = 0;
723 si->connection_flushes64 += si->connection_flushes;
724 si->connection_flushes = 0;
725 si->packets_forwarded64 += si->packets_forwarded;
726 si->packets_forwarded = 0;
727 si->packets_not_forwarded64 += si->packets_not_forwarded;
728 si->packets_not_forwarded = 0;
729
730 for (i = 0; i < SFE_IPV4_EXCEPTION_EVENT_LAST; i++) {
731 si->exception_events64[i] += si->exception_events[i];
732 si->exception_events[i] = 0;
733 }
734}
735
736/*
737 * sfe_ipv4_insert_sfe_ipv4_connection_match()
738 * Insert a connection match into the hash.
739 *
740 * On entry we must be holding the lock that protects the hash table.
741 */
742static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
743{
744 struct sfe_ipv4_connection_match **hash_head;
745 struct sfe_ipv4_connection_match *prev_head;
746 unsigned int conn_match_idx
747 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
748 cm->match_src_ip, cm->match_src_port,
749 cm->match_dest_ip, cm->match_dest_port);
750 hash_head = &si->conn_match_hash[conn_match_idx];
751 prev_head = *hash_head;
752 cm->prev = NULL;
753 if (prev_head) {
754 prev_head->prev = cm;
755 }
756
757 cm->next = prev_head;
758 *hash_head = cm;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800759
760#ifdef CONFIG_NF_FLOW_COOKIE
761 /*
762 * Configure hardware to put a flow cookie in packet of this flow,
763 * then we can accelerate the lookup process when we received this packet.
764 */
765 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
766 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
767
768 if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) {
769 flow_cookie_set_func_t func;
770
771 rcu_read_lock();
772 func = rcu_dereference(si->flow_cookie_set_func);
773 if (func) {
Xiaoping Fan59176422015-05-22 15:58:10 -0700774 if (!func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800775 cm->match_dest_ip, cm->match_dest_port, conn_match_idx)) {
776 entry->match = cm;
777 cm->flow_cookie = conn_match_idx;
778 }
779 }
780 rcu_read_unlock();
781
782 break;
783 }
784 }
785#endif
786
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100787}
788
789/*
790 * sfe_ipv4_remove_sfe_ipv4_connection_match()
791 * Remove a connection match object from the hash.
792 *
793 * On entry we must be holding the lock that protects the hash table.
794 */
795static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
796{
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800797#ifdef CONFIG_NF_FLOW_COOKIE
798 /*
799 * Tell hardware that we no longer need a flow cookie in packet of this flow
800 */
801 unsigned int conn_match_idx;
802
803 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
804 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
805
806 if (cm == entry->match) {
807 flow_cookie_set_func_t func;
808
809 rcu_read_lock();
810 func = rcu_dereference(si->flow_cookie_set_func);
811 if (func) {
812 func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
813 cm->match_dest_ip, cm->match_dest_port, 0);
814 }
815 rcu_read_unlock();
816
817 cm->flow_cookie = 0;
818 entry->match = NULL;
819 entry->last_clean_time = jiffies;
820 break;
821 }
822 }
823#endif
824
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100825 /*
826 * Unlink the connection match entry from the hash.
827 */
828 if (cm->prev) {
829 cm->prev->next = cm->next;
830 } else {
831 unsigned int conn_match_idx
832 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
833 cm->match_src_ip, cm->match_src_port,
834 cm->match_dest_ip, cm->match_dest_port);
835 si->conn_match_hash[conn_match_idx] = cm->next;
836 }
837
838 if (cm->next) {
839 cm->next->prev = cm->prev;
840 }
841
842 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600843 * If the connection match entry is in the active list remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100844 */
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600845 if (cm->active) {
846 if (likely(cm->active_prev)) {
847 cm->active_prev->active_next = cm->active_next;
848 } else {
849 si->active_head = cm->active_next;
850 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100851
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600852 if (likely(cm->active_next)) {
853 cm->active_next->active_prev = cm->active_prev;
854 } else {
855 si->active_tail = cm->active_prev;
856 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100857 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100858}
859
860/*
861 * sfe_ipv4_get_connection_hash()
862 * Generate the hash used in connection lookups.
863 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100864static inline unsigned int sfe_ipv4_get_connection_hash(uint8_t protocol, __be32 src_ip, __be16 src_port,
865 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100866{
Dave Hudson87973cd2013-10-22 16:00:04 +0100867 uint32_t hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100868 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
869}
870
871/*
872 * sfe_ipv4_find_sfe_ipv4_connection()
873 * Get the IPv4 connection info that corresponds to a particular 5-tuple.
874 *
875 * On entry we must be holding the lock that protects the hash table.
876 */
877static inline struct sfe_ipv4_connection *sfe_ipv4_find_sfe_ipv4_connection(struct sfe_ipv4 *si, uint32_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100878 __be32 src_ip, __be16 src_port,
879 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100880{
881 struct sfe_ipv4_connection *c;
882 unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port);
883 c = si->conn_hash[conn_idx];
884
885 /*
886 * If we don't have anything in this chain then bale.
887 */
888 if (unlikely(!c)) {
889 return c;
890 }
891
892 /*
893 * Hopefully the first entry is the one we want.
894 */
895 if (likely(c->src_port == src_port)
896 && likely(c->dest_port == dest_port)
897 && likely(c->src_ip == src_ip)
898 && likely(c->dest_ip == dest_ip)
899 && likely(c->protocol == protocol)) {
900 return c;
901 }
902
903 /*
904 * We may or may not have a matching entry but if we do then we want to
905 * move that entry to the top of the hash chain when we get to it. We
906 * presume that this will be reused again very quickly.
907 */
908 do {
909 c = c->next;
910 } while (c && (c->src_port != src_port
911 || c->dest_port != dest_port
912 || c->src_ip != src_ip
913 || c->dest_ip != dest_ip
914 || c->protocol != protocol));
915
916 /*
917 * Will need connection entry for next create/destroy metadata,
918 * So no need to re-order entry for these requests
919 */
920 return c;
921}
922
923/*
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600924 * sfe_ipv4_mark_rule()
925 * Updates the mark for a current offloaded connection
926 *
927 * Will take hash lock upon entry
928 */
Nicolas Costa865bce82014-01-31 17:48:03 -0600929void sfe_ipv4_mark_rule(struct sfe_ipv4_mark *mark)
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600930{
931 struct sfe_ipv4 *si = &__si;
932 struct sfe_ipv4_connection *c;
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600933
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600934 spin_lock(&si->lock);
935 c = sfe_ipv4_find_sfe_ipv4_connection(si, mark->protocol,
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600936 mark->src_ip, mark->src_port,
937 mark->dest_ip, mark->dest_port);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600938 if (c) {
Nicolas Costaf53d6fe2014-01-13 16:03:46 -0600939 DEBUG_TRACE("Matching connection found for mark, "
940 "setting from %08x to %08x\n",
941 c->mark, mark->mark);
942 WARN_ON((0 != c->mark) && (0 == mark->mark));
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600943 c->mark = mark->mark;
944 }
945 spin_unlock(&si->lock);
946}
947
948/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100949 * sfe_ipv4_insert_sfe_ipv4_connection()
950 * Insert a connection into the hash.
951 *
952 * On entry we must be holding the lock that protects the hash table.
953 */
954static void sfe_ipv4_insert_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
955{
956 struct sfe_ipv4_connection **hash_head;
957 struct sfe_ipv4_connection *prev_head;
958 unsigned int conn_idx;
959
960 /*
961 * Insert entry into the connection hash.
962 */
963 conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
964 c->dest_ip, c->dest_port);
965 hash_head = &si->conn_hash[conn_idx];
966 prev_head = *hash_head;
967 c->prev = NULL;
968 if (prev_head) {
969 prev_head->prev = c;
970 }
971
972 c->next = prev_head;
973 *hash_head = c;
974
975 /*
976 * Insert entry into the "all connections" list.
977 */
978 if (si->all_connections_tail) {
979 c->all_connections_prev = si->all_connections_tail;
980 si->all_connections_tail->all_connections_next = c;
981 } else {
982 c->all_connections_prev = NULL;
983 si->all_connections_head = c;
984 }
985
986 si->all_connections_tail = c;
987 c->all_connections_next = NULL;
988 si->num_connections++;
989
990 /*
991 * Insert the connection match objects too.
992 */
993 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->original_match);
994 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->reply_match);
995}
996
997/*
998 * sfe_ipv4_remove_sfe_ipv4_connection()
999 * Remove a sfe_ipv4_connection object from the hash.
1000 *
1001 * On entry we must be holding the lock that protects the hash table.
1002 */
1003static void sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1004{
1005 /*
1006 * Remove the connection match objects.
1007 */
1008 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->reply_match);
1009 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->original_match);
1010
1011 /*
1012 * Unlink the connection.
1013 */
1014 if (c->prev) {
1015 c->prev->next = c->next;
1016 } else {
1017 unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
1018 c->dest_ip, c->dest_port);
1019 si->conn_hash[conn_idx] = c->next;
1020 }
1021
1022 if (c->next) {
1023 c->next->prev = c->prev;
1024 }
1025}
1026
1027/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001028 * sfe_ipv4_sync_sfe_ipv4_connection()
1029 * Sync a connection.
1030 *
1031 * On entry to this function we expect that the lock for the connection is either
1032 * already held or isn't required.
1033 */
1034static void sfe_ipv4_gen_sync_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c,
1035 struct sfe_ipv4_sync *sis, uint64_t now_jiffies)
1036{
1037 struct sfe_ipv4_connection_match *original_cm;
1038 struct sfe_ipv4_connection_match *reply_cm;
1039
1040 /*
1041 * Fill in the update message.
1042 */
1043 sis->protocol = c->protocol;
1044 sis->src_ip = c->src_ip;
1045 sis->dest_ip = c->dest_ip;
1046 sis->src_port = c->src_port;
1047 sis->dest_port = c->dest_port;
1048
1049 original_cm = c->original_match;
1050 reply_cm = c->reply_match;
1051 sis->src_td_max_window = original_cm->protocol_state.tcp.max_win;
1052 sis->src_td_end = original_cm->protocol_state.tcp.end;
1053 sis->src_td_max_end = original_cm->protocol_state.tcp.max_end;
1054 sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win;
1055 sis->dest_td_end = reply_cm->protocol_state.tcp.end;
1056 sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end;
1057
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001058 sis->src_new_packet_count = original_cm->rx_packet_count;
1059 sis->src_new_byte_count = original_cm->rx_byte_count;
1060 sis->dest_new_packet_count = reply_cm->rx_packet_count;
1061 sis->dest_new_byte_count = reply_cm->rx_byte_count;
1062
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001063 sfe_ipv4_connection_match_update_summary_stats(original_cm);
1064 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
1065
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001066 sis->src_dev = original_cm->match_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001067 sis->src_packet_count = original_cm->rx_packet_count64;
1068 sis->src_byte_count = original_cm->rx_byte_count64;
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001069
1070 sis->dest_dev = reply_cm->match_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001071 sis->dest_packet_count = reply_cm->rx_packet_count64;
1072 sis->dest_byte_count = reply_cm->rx_byte_count64;
1073
1074 /*
1075 * Get the time increment since our last sync.
1076 */
1077 sis->delta_jiffies = now_jiffies - c->last_sync_jiffies;
1078 c->last_sync_jiffies = now_jiffies;
1079}
1080
1081/*
1082 * sfe_ipv4_decrement_sfe_ipv4_connection_iterator()
1083 * Remove an iterator from a connection - free all resources if necessary.
1084 *
1085 * Returns true if the connection should now be free, false if not.
1086 *
1087 * We must be locked on entry to this function.
1088 */
1089static bool sfe_ipv4_decrement_sfe_ipv4_connection_iterator(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1090{
1091 /*
1092 * Are we the last iterator for this connection?
1093 */
1094 c->iterators--;
1095 if (c->iterators) {
1096 return false;
1097 }
1098
1099 /*
1100 * Is this connection marked for deletion?
1101 */
1102 if (!c->pending_free) {
1103 return false;
1104 }
1105
1106 /*
1107 * We're ready to delete this connection so unlink it from the "all
1108 * connections" list.
1109 */
1110 si->num_connections--;
1111 if (c->all_connections_prev) {
1112 c->all_connections_prev->all_connections_next = c->all_connections_next;
1113 } else {
1114 si->all_connections_head = c->all_connections_next;
1115 }
1116
1117 if (c->all_connections_next) {
1118 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1119 } else {
1120 si->all_connections_tail = c->all_connections_prev;
1121 }
1122
1123 return true;
1124}
1125
1126/*
1127 * sfe_ipv4_flush_sfe_ipv4_connection()
1128 * Flush a connection and free all associated resources.
1129 *
1130 * We need to be called with bottom halves disabled locally as we need to acquire
1131 * the connection hash lock and release it again. In general we're actually called
1132 * from within a BH and so we're fine, but we're also called when connections are
1133 * torn down.
1134 */
1135static void sfe_ipv4_flush_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1136{
1137 struct sfe_ipv4_sync sis;
1138 uint64_t now_jiffies;
1139 bool pending_free = false;
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001140 sfe_ipv4_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001141
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001142 rcu_read_lock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001143 spin_lock(&si->lock);
1144 si->connection_flushes++;
1145
1146 /*
1147 * Check that we're not currently being iterated. If we are then
1148 * we can't free this entry yet but must mark it pending a free. If it's
1149 * not being iterated then we can unlink it from the list of all
1150 * connections.
1151 */
1152 if (c->iterators) {
1153 pending_free = true;
1154 c->pending_free = true;
1155 } else {
1156 si->num_connections--;
1157 if (c->all_connections_prev) {
1158 c->all_connections_prev->all_connections_next = c->all_connections_next;
1159 } else {
1160 si->all_connections_head = c->all_connections_next;
1161 }
1162
1163 if (c->all_connections_next) {
1164 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1165 } else {
1166 si->all_connections_tail = c->all_connections_prev;
1167 }
1168 }
1169
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001170 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
1171
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001172 spin_unlock(&si->lock);
1173
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001174 if (sync_rule_callback) {
1175 /*
1176 * Generate a sync message and then sync.
1177 */
1178 now_jiffies = get_jiffies_64();
1179 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, now_jiffies);
1180 sync_rule_callback(&sis);
1181 }
1182
1183 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001184
1185 /*
1186 * If we can't yet free the underlying memory then we're done.
1187 */
1188 if (pending_free) {
1189 return;
1190 }
1191
1192 /*
1193 * Release our hold of the source and dest devices and free the memory
1194 * for our connection objects.
1195 */
1196 dev_put(c->original_dev);
1197 dev_put(c->reply_dev);
1198 kfree(c->original_match);
1199 kfree(c->reply_match);
1200 kfree(c);
1201}
1202
1203/*
1204 * sfe_ipv4_recv_udp()
1205 * Handle UDP packet receives and forwarding.
1206 */
1207static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001208 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001209{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001210 struct sfe_ipv4_udp_hdr *udph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001211 __be32 src_ip;
1212 __be32 dest_ip;
1213 __be16 src_port;
1214 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001215 struct sfe_ipv4_connection_match *cm;
1216 uint8_t ttl;
1217 struct net_device *xmit_dev;
1218
1219 /*
1220 * Is our packet too short to contain a valid UDP header?
1221 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001222 if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_udp_hdr) + ihl)))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001223 spin_lock(&si->lock);
1224 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++;
1225 si->packets_not_forwarded++;
1226 spin_unlock(&si->lock);
1227
1228 DEBUG_TRACE("packet too short for UDP header\n");
1229 return 0;
1230 }
1231
1232 /*
1233 * Read the IP address and port information. Read the IP header data first
1234 * because we've almost certainly got that in the cache. We may not yet have
1235 * the UDP header cached though so allow more time for any prefetching.
1236 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001237 src_ip = iph->saddr;
1238 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001239
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001240 udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001241 src_port = udph->source;
1242 dest_port = udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001243
1244 spin_lock(&si->lock);
1245
1246 /*
1247 * Look for a connection match.
1248 */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001249#ifdef CONFIG_NF_FLOW_COOKIE
1250 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1251 if (unlikely(!cm)) {
1252 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
1253 }
1254#else
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001255 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001256#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001257 if (unlikely(!cm)) {
1258 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION]++;
1259 si->packets_not_forwarded++;
1260 spin_unlock(&si->lock);
1261
1262 DEBUG_TRACE("no connection found\n");
1263 return 0;
1264 }
1265
1266 /*
1267 * If our packet has beern marked as "flush on find" we can't actually
1268 * forward it in the fast path, but now that we've found an associated
1269 * connection we can flush that out before we process the packet.
1270 */
1271 if (unlikely(flush_on_find)) {
1272 struct sfe_ipv4_connection *c = cm->connection;
1273 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1274 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1275 si->packets_not_forwarded++;
1276 spin_unlock(&si->lock);
1277
1278 DEBUG_TRACE("flush on find\n");
1279 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1280 return 0;
1281 }
1282
1283 /*
1284 * Does our TTL allow forwarding?
1285 */
1286 ttl = iph->ttl;
1287 if (unlikely(ttl < 2)) {
1288 struct sfe_ipv4_connection *c = cm->connection;
1289 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1290 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL]++;
1291 si->packets_not_forwarded++;
1292 spin_unlock(&si->lock);
1293
1294 DEBUG_TRACE("ttl too low\n");
1295 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1296 return 0;
1297 }
1298
1299 /*
1300 * If our packet is larger than the MTU of the transmit interface then
1301 * we can't forward it easily.
1302 */
1303 if (unlikely(len > cm->xmit_dev_mtu)) {
1304 struct sfe_ipv4_connection *c = cm->connection;
1305 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1306 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++;
1307 si->packets_not_forwarded++;
1308 spin_unlock(&si->lock);
1309
1310 DEBUG_TRACE("larger than mtu\n");
1311 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1312 return 0;
1313 }
1314
1315 /*
1316 * From this point on we're good to modify the packet.
1317 */
1318
1319 /*
1320 * Decrement our TTL.
1321 */
1322 iph->ttl = ttl - 1;
1323
1324 /*
1325 * Do we have to perform translations of the source address/port?
1326 */
1327 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1328 uint16_t udp_csum;
1329
Dave Hudson87973cd2013-10-22 16:00:04 +01001330 iph->saddr = cm->xlate_src_ip;
1331 udph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001332
1333 /*
1334 * Do we have a non-zero UDP checksum? If we do then we need
1335 * to update it.
1336 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001337 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001338 if (likely(udp_csum)) {
Xiaoping Fanad755af2015-04-01 16:58:46 -07001339 uint32_t sum;
1340
1341 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1342 sum = udp_csum + cm->xlate_src_partial_csum_adjustment;
1343 } else {
1344 sum = udp_csum + cm->xlate_src_csum_adjustment;
1345 }
1346
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001347 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001348 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001349 }
1350 }
1351
1352 /*
1353 * Do we have to perform translations of the destination address/port?
1354 */
1355 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1356 uint16_t udp_csum;
1357
Dave Hudson87973cd2013-10-22 16:00:04 +01001358 iph->daddr = cm->xlate_dest_ip;
1359 udph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001360
1361 /*
1362 * Do we have a non-zero UDP checksum? If we do then we need
1363 * to update it.
1364 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001365 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001366 if (likely(udp_csum)) {
Xiaoping Fanad755af2015-04-01 16:58:46 -07001367 uint32_t sum;
1368
1369 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1370 sum = udp_csum + cm->xlate_dest_partial_csum_adjustment;
1371 } else {
1372 sum = udp_csum + cm->xlate_dest_csum_adjustment;
1373 }
1374
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001375 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001376 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001377 }
1378 }
1379
1380 /*
1381 * Replace the IP checksum.
1382 */
1383 iph->check = sfe_ipv4_gen_ip_csum(iph);
1384
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001385 /*
1386 * Update traffic stats.
1387 */
1388 cm->rx_packet_count++;
1389 cm->rx_byte_count += len;
1390
1391 /*
1392 * If we're not already on the active list then insert ourselves at the tail
1393 * of the current list.
1394 */
1395 if (unlikely(!cm->active)) {
1396 cm->active = true;
1397 cm->active_prev = si->active_tail;
1398 if (likely(si->active_tail)) {
1399 si->active_tail->active_next = cm;
1400 } else {
1401 si->active_head = cm;
1402 }
1403 si->active_tail = cm;
1404 }
1405
1406 xmit_dev = cm->xmit_dev;
1407 skb->dev = xmit_dev;
1408
1409 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001410 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001411 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001412 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1413 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Matthew McClintocka8ad7962014-01-16 16:49:30 -06001414 xmit_dev->header_ops->create(skb, xmit_dev, ETH_P_IP,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001415 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001416 } else {
1417 /*
1418 * For the simple case we write this really fast.
1419 */
1420 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1421 eth->h_proto = htons(ETH_P_IP);
Matthew McClintockdab3c8f2014-02-19 14:29:39 -06001422 eth->h_dest[0] = cm->xmit_dest_mac[0];
1423 eth->h_dest[1] = cm->xmit_dest_mac[1];
1424 eth->h_dest[2] = cm->xmit_dest_mac[2];
1425 eth->h_source[0] = cm->xmit_src_mac[0];
1426 eth->h_source[1] = cm->xmit_src_mac[1];
1427 eth->h_source[2] = cm->xmit_src_mac[2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001428 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001429 }
1430
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001431 /*
1432 * Mark outgoing packet.
1433 */
Cristian Prundeanu94fff982013-12-23 15:02:59 -06001434 skb->mark = cm->connection->mark;
1435 if (skb->mark) {
1436 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1437 }
1438
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001439 si->packets_forwarded++;
1440 spin_unlock(&si->lock);
1441
1442 /*
1443 * We're going to check for GSO flags when we transmit the packet so
1444 * start fetching the necessary cache line now.
1445 */
1446 prefetch(skb_shinfo(skb));
1447
1448 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06001449 * Mark that this packet has been fast forwarded.
1450 */
1451 skb->fast_forwarded = 1;
1452
1453 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001454 * Send the packet on its way.
1455 */
1456 dev_queue_xmit(skb);
1457
1458 return 1;
1459}
1460
1461/*
1462 * sfe_ipv4_process_tcp_option_sack()
1463 * Parse TCP SACK option and update ack according
1464 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001465static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const uint32_t data_offs,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001466 uint32_t *ack) __attribute__((always_inline));
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001467static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const uint32_t data_offs,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001468 uint32_t *ack)
1469{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001470 uint32_t length = sizeof(struct sfe_ipv4_tcp_hdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001471 uint8_t *ptr = (uint8_t *)th + length;
1472
1473 /*
1474 * If option is TIMESTAMP discard it.
1475 */
1476 if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
1477 && likely(ptr[0] == TCPOPT_NOP)
1478 && likely(ptr[1] == TCPOPT_NOP)
1479 && likely(ptr[2] == TCPOPT_TIMESTAMP)
1480 && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
1481 return true;
1482 }
1483
1484 /*
1485 * TCP options. Parse SACK option.
1486 */
1487 while (length < data_offs) {
1488 uint8_t size;
1489 uint8_t kind;
1490
1491 ptr = (uint8_t *)th + length;
1492 kind = *ptr;
1493
1494 /*
1495 * NOP, for padding
1496 * Not in the switch because to fast escape and to not calculate size
1497 */
1498 if (kind == TCPOPT_NOP) {
1499 length++;
1500 continue;
1501 }
1502
1503 if (kind == TCPOPT_SACK) {
1504 uint32_t sack = 0;
1505 uint8_t re = 1 + 1;
1506
1507 size = *(ptr + 1);
1508 if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
1509 || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
1510 || (size > (data_offs - length))) {
1511 return false;
1512 }
1513
1514 re += 4;
1515 while (re < size) {
1516 uint32_t sack_re;
1517 uint8_t *sptr = ptr + re;
1518 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
1519 if (sack_re > sack) {
1520 sack = sack_re;
1521 }
1522 re += TCPOLEN_SACK_PERBLOCK;
1523 }
1524 if (sack > *ack) {
1525 *ack = sack;
1526 }
1527 length += size;
1528 continue;
1529 }
1530 if (kind == TCPOPT_EOL) {
1531 return true;
1532 }
1533 size = *(ptr + 1);
1534 if (size < 2) {
1535 return false;
1536 }
1537 length += size;
1538 }
1539
1540 return true;
1541}
1542
1543/*
1544 * sfe_ipv4_recv_tcp()
1545 * Handle TCP packet receives and forwarding.
1546 */
1547static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001548 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001549{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001550 struct sfe_ipv4_tcp_hdr *tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001551 __be32 src_ip;
1552 __be32 dest_ip;
1553 __be16 src_port;
1554 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001555 struct sfe_ipv4_connection_match *cm;
1556 struct sfe_ipv4_connection_match *counter_cm;
1557 uint8_t ttl;
1558 uint32_t flags;
1559 struct net_device *xmit_dev;
1560
1561 /*
1562 * Is our packet too short to contain a valid UDP header?
1563 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001564 if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_tcp_hdr) + ihl)))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001565 spin_lock(&si->lock);
1566 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++;
1567 si->packets_not_forwarded++;
1568 spin_unlock(&si->lock);
1569
1570 DEBUG_TRACE("packet too short for TCP header\n");
1571 return 0;
1572 }
1573
1574 /*
1575 * Read the IP address and port information. Read the IP header data first
1576 * because we've almost certainly got that in the cache. We may not yet have
1577 * the TCP header cached though so allow more time for any prefetching.
1578 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001579 src_ip = iph->saddr;
1580 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001581
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001582 tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001583 src_port = tcph->source;
1584 dest_port = tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001585 flags = tcp_flag_word(tcph);
1586
1587 spin_lock(&si->lock);
1588
1589 /*
1590 * Look for a connection match.
1591 */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001592#ifdef CONFIG_NF_FLOW_COOKIE
1593 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1594 if (unlikely(!cm)) {
Matthew McClintock37858802015-02-03 12:12:02 -06001595 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001596 }
1597#else
Matthew McClintock37858802015-02-03 12:12:02 -06001598 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001599#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001600 if (unlikely(!cm)) {
1601 /*
1602 * We didn't get a connection but as TCP is connection-oriented that
1603 * may be because this is a non-fast connection (not running established).
1604 * For diagnostic purposes we differentiate this here.
1605 */
1606 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
1607 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++;
1608 si->packets_not_forwarded++;
1609 spin_unlock(&si->lock);
1610
1611 DEBUG_TRACE("no connection found - fast flags\n");
1612 return 0;
1613 }
1614 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++;
1615 si->packets_not_forwarded++;
1616 spin_unlock(&si->lock);
1617
1618 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
1619 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1620 return 0;
1621 }
1622
1623 /*
1624 * If our packet has beern marked as "flush on find" we can't actually
1625 * forward it in the fast path, but now that we've found an associated
1626 * connection we can flush that out before we process the packet.
1627 */
1628 if (unlikely(flush_on_find)) {
1629 struct sfe_ipv4_connection *c = cm->connection;
1630 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1631 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1632 si->packets_not_forwarded++;
1633 spin_unlock(&si->lock);
1634
1635 DEBUG_TRACE("flush on find\n");
1636 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1637 return 0;
1638 }
1639
1640 /*
1641 * Does our TTL allow forwarding?
1642 */
1643 ttl = iph->ttl;
1644 if (unlikely(ttl < 2)) {
1645 struct sfe_ipv4_connection *c = cm->connection;
1646 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1647 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL]++;
1648 si->packets_not_forwarded++;
1649 spin_unlock(&si->lock);
1650
1651 DEBUG_TRACE("ttl too low\n");
1652 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1653 return 0;
1654 }
1655
1656 /*
1657 * If our packet is larger than the MTU of the transmit interface then
1658 * we can't forward it easily.
1659 */
Xiaoping Fand642a6e2015-04-10 15:19:06 -07001660 if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001661 struct sfe_ipv4_connection *c = cm->connection;
1662 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1663 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++;
1664 si->packets_not_forwarded++;
1665 spin_unlock(&si->lock);
1666
1667 DEBUG_TRACE("larger than mtu\n");
1668 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1669 return 0;
1670 }
1671
1672 /*
1673 * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN
1674 * set is not a fast path packet.
1675 */
1676 if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
1677 struct sfe_ipv4_connection *c = cm->connection;
1678 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1679 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS]++;
1680 si->packets_not_forwarded++;
1681 spin_unlock(&si->lock);
1682
1683 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
1684 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1685 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1686 return 0;
1687 }
1688
1689 counter_cm = cm->counter_match;
1690
1691 /*
1692 * Are we doing sequence number checking?
1693 */
1694 if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
1695 uint32_t seq;
1696 uint32_t ack;
1697 uint32_t sack;
1698 uint32_t data_offs;
1699 uint32_t end;
1700 uint32_t left_edge;
1701 uint32_t scaled_win;
1702 uint32_t max_end;
1703
1704 /*
1705 * Is our sequence fully past the right hand edge of the window?
1706 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001707 seq = ntohl(tcph->seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001708 if (unlikely((int32_t)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
1709 struct sfe_ipv4_connection *c = cm->connection;
1710 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1711 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++;
1712 si->packets_not_forwarded++;
1713 spin_unlock(&si->lock);
1714
1715 DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
1716 seq, cm->protocol_state.tcp.max_end + 1);
1717 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1718 return 0;
1719 }
1720
1721 /*
1722 * Check that our TCP data offset isn't too short.
1723 */
1724 data_offs = tcph->doff << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001725 if (unlikely(data_offs < sizeof(struct sfe_ipv4_tcp_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001726 struct sfe_ipv4_connection *c = cm->connection;
1727 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1728 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++;
1729 si->packets_not_forwarded++;
1730 spin_unlock(&si->lock);
1731
1732 DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
1733 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1734 return 0;
1735 }
1736
1737 /*
1738 * Update ACK according to any SACK option.
1739 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001740 ack = ntohl(tcph->ack_seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001741 sack = ack;
1742 if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) {
1743 struct sfe_ipv4_connection *c = cm->connection;
1744 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1745 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK]++;
1746 si->packets_not_forwarded++;
1747 spin_unlock(&si->lock);
1748
1749 DEBUG_TRACE("TCP option SACK size is wrong\n");
1750 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1751 return 0;
1752 }
1753
1754 /*
1755 * Check that our TCP data offset isn't past the end of the packet.
1756 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001757 data_offs += sizeof(struct sfe_ipv4_ip_hdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001758 if (unlikely(len < data_offs)) {
1759 struct sfe_ipv4_connection *c = cm->connection;
1760 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1761 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++;
1762 si->packets_not_forwarded++;
1763 spin_unlock(&si->lock);
1764
1765 DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
1766 data_offs, len);
1767 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1768 return 0;
1769 }
1770
1771 end = seq + len - data_offs;
1772
1773 /*
1774 * Is our sequence fully before the left hand edge of the window?
1775 */
1776 if (unlikely((int32_t)(end - (cm->protocol_state.tcp.end
1777 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
1778 struct sfe_ipv4_connection *c = cm->connection;
1779 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1780 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++;
1781 si->packets_not_forwarded++;
1782 spin_unlock(&si->lock);
1783
1784 DEBUG_TRACE("seq: %u before left edge: %u\n",
1785 end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
1786 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1787 return 0;
1788 }
1789
1790 /*
1791 * Are we acking data that is to the right of what has been sent?
1792 */
1793 if (unlikely((int32_t)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
1794 struct sfe_ipv4_connection *c = cm->connection;
1795 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1796 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++;
1797 si->packets_not_forwarded++;
1798 spin_unlock(&si->lock);
1799
1800 DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
1801 sack, counter_cm->protocol_state.tcp.end + 1);
1802 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1803 return 0;
1804 }
1805
1806 /*
1807 * Is our ack too far before the left hand edge of the window?
1808 */
1809 left_edge = counter_cm->protocol_state.tcp.end
1810 - cm->protocol_state.tcp.max_win
1811 - SFE_IPV4_TCP_MAX_ACK_WINDOW
1812 - 1;
1813 if (unlikely((int32_t)(sack - left_edge) < 0)) {
1814 struct sfe_ipv4_connection *c = cm->connection;
1815 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1816 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++;
1817 si->packets_not_forwarded++;
1818 spin_unlock(&si->lock);
1819
1820 DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
1821 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
1822 return 0;
1823 }
1824
1825 /*
1826 * Have we just seen the largest window size yet for this connection? If yes
1827 * then we need to record the new value.
1828 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001829 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001830 scaled_win += (sack - ack);
1831 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
1832 cm->protocol_state.tcp.max_win = scaled_win;
1833 }
1834
1835 /*
1836 * If our sequence and/or ack numbers have advanced then record the new state.
1837 */
1838 if (likely((int32_t)(end - cm->protocol_state.tcp.end) >= 0)) {
1839 cm->protocol_state.tcp.end = end;
1840 }
1841
1842 max_end = sack + scaled_win;
1843 if (likely((int32_t)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
1844 counter_cm->protocol_state.tcp.max_end = max_end;
1845 }
1846 }
1847
1848 /*
1849 * From this point on we're good to modify the packet.
1850 */
1851
1852 /*
1853 * Decrement our TTL.
1854 */
1855 iph->ttl = ttl - 1;
1856
1857 /*
1858 * Do we have to perform translations of the source address/port?
1859 */
1860 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1861 uint16_t tcp_csum;
1862 uint32_t sum;
1863
Dave Hudson87973cd2013-10-22 16:00:04 +01001864 iph->saddr = cm->xlate_src_ip;
1865 tcph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001866
1867 /*
1868 * Do we have a non-zero UDP checksum? If we do then we need
1869 * to update it.
1870 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001871 tcp_csum = tcph->check;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001872 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1873 sum = tcp_csum + cm->xlate_src_partial_csum_adjustment;
1874 } else {
1875 sum = tcp_csum + cm->xlate_src_csum_adjustment;
1876 }
1877
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001878 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001879 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001880 }
1881
1882 /*
1883 * Do we have to perform translations of the destination address/port?
1884 */
1885 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1886 uint16_t tcp_csum;
1887 uint32_t sum;
1888
Dave Hudson87973cd2013-10-22 16:00:04 +01001889 iph->daddr = cm->xlate_dest_ip;
1890 tcph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001891
1892 /*
1893 * Do we have a non-zero UDP checksum? If we do then we need
1894 * to update it.
1895 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001896 tcp_csum = tcph->check;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001897 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1898 sum = tcp_csum + cm->xlate_dest_partial_csum_adjustment;
1899 } else {
1900 sum = tcp_csum + cm->xlate_dest_csum_adjustment;
1901 }
1902
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001903 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001904 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001905 }
1906
1907 /*
1908 * Replace the IP checksum.
1909 */
1910 iph->check = sfe_ipv4_gen_ip_csum(iph);
1911
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001912 /*
1913 * Update traffic stats.
1914 */
1915 cm->rx_packet_count++;
1916 cm->rx_byte_count += len;
1917
1918 /*
1919 * If we're not already on the active list then insert ourselves at the tail
1920 * of the current list.
1921 */
1922 if (unlikely(!cm->active)) {
1923 cm->active = true;
1924 cm->active_prev = si->active_tail;
1925 if (likely(si->active_tail)) {
1926 si->active_tail->active_next = cm;
1927 } else {
1928 si->active_head = cm;
1929 }
1930 si->active_tail = cm;
1931 }
1932
1933 xmit_dev = cm->xmit_dev;
1934 skb->dev = xmit_dev;
1935
1936 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001937 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001938 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001939 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1940 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Matthew McClintocka8ad7962014-01-16 16:49:30 -06001941 xmit_dev->header_ops->create(skb, xmit_dev, ETH_P_IP,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001942 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001943 } else {
1944 /*
1945 * For the simple case we write this really fast.
1946 */
1947 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1948 eth->h_proto = htons(ETH_P_IP);
Matthew McClintockdab3c8f2014-02-19 14:29:39 -06001949 eth->h_dest[0] = cm->xmit_dest_mac[0];
1950 eth->h_dest[1] = cm->xmit_dest_mac[1];
1951 eth->h_dest[2] = cm->xmit_dest_mac[2];
1952 eth->h_source[0] = cm->xmit_src_mac[0];
1953 eth->h_source[1] = cm->xmit_src_mac[1];
1954 eth->h_source[2] = cm->xmit_src_mac[2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001955 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001956 }
1957
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06001958 /*
1959 * Mark outgoing packet
1960 */
1961 skb->mark = cm->connection->mark;
1962 if (skb->mark) {
1963 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1964 }
1965
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001966 si->packets_forwarded++;
1967 spin_unlock(&si->lock);
1968
1969 /*
1970 * We're going to check for GSO flags when we transmit the packet so
1971 * start fetching the necessary cache line now.
1972 */
1973 prefetch(skb_shinfo(skb));
1974
1975 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06001976 * Mark that this packet has been fast forwarded.
1977 */
1978 skb->fast_forwarded = 1;
1979
1980 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001981 * Send the packet on its way.
1982 */
1983 dev_queue_xmit(skb);
1984
1985 return 1;
1986}
1987
1988/*
1989 * sfe_ipv4_recv_icmp()
1990 * Handle ICMP packet receives.
1991 *
1992 * ICMP packets aren't handled as a "fast path" and always have us process them
1993 * through the default Linux stack. What we do need to do is look for any errors
1994 * about connections we are handling in the fast path. If we find any such
1995 * connections then we want to flush their state so that the ICMP error path
1996 * within Linux has all of the correct state should it need it.
1997 */
1998static int sfe_ipv4_recv_icmp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001999 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002000{
2001 struct icmphdr *icmph;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002002 struct sfe_ipv4_ip_hdr *icmp_iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002003 unsigned int icmp_ihl_words;
2004 unsigned int icmp_ihl;
2005 uint32_t *icmp_trans_h;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002006 struct sfe_ipv4_udp_hdr *icmp_udph;
2007 struct sfe_ipv4_tcp_hdr *icmp_tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01002008 __be32 src_ip;
2009 __be32 dest_ip;
2010 __be16 src_port;
2011 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002012 struct sfe_ipv4_connection_match *cm;
2013 struct sfe_ipv4_connection *c;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002014 uint32_t pull_len = sizeof(struct icmphdr) + ihl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002015
2016 /*
2017 * Is our packet too short to contain a valid UDP header?
2018 */
2019 len -= ihl;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002020 if (!pskb_may_pull(skb, pull_len)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002021 spin_lock(&si->lock);
2022 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++;
2023 si->packets_not_forwarded++;
2024 spin_unlock(&si->lock);
2025
2026 DEBUG_TRACE("packet too short for ICMP header\n");
2027 return 0;
2028 }
2029
2030 /*
2031 * We only handle "destination unreachable" and "time exceeded" messages.
2032 */
2033 icmph = (struct icmphdr *)(skb->data + ihl);
2034 if ((icmph->type != ICMP_DEST_UNREACH)
2035 && (icmph->type != ICMP_TIME_EXCEEDED)) {
2036 spin_lock(&si->lock);
2037 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++;
2038 si->packets_not_forwarded++;
2039 spin_unlock(&si->lock);
2040
2041 DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->type);
2042 return 0;
2043 }
2044
2045 /*
2046 * Do we have the full embedded IP header?
2047 */
2048 len -= sizeof(struct icmphdr);
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002049 pull_len += sizeof(struct sfe_ipv4_ip_hdr);
2050 if (!pskb_may_pull(skb, pull_len)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002051 spin_lock(&si->lock);
2052 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE]++;
2053 si->packets_not_forwarded++;
2054 spin_unlock(&si->lock);
2055
2056 DEBUG_TRACE("Embedded IP header not complete\n");
2057 return 0;
2058 }
2059
2060 /*
2061 * Is our embedded IP version wrong?
2062 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002063 icmp_iph = (struct sfe_ipv4_ip_hdr *)(icmph + 1);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002064 if (unlikely(icmp_iph->version != 4)) {
2065 spin_lock(&si->lock);
2066 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4]++;
2067 si->packets_not_forwarded++;
2068 spin_unlock(&si->lock);
2069
2070 DEBUG_TRACE("IP version: %u\n", icmp_iph->version);
2071 return 0;
2072 }
2073
2074 /*
2075 * Do we have the full embedded IP header, including any options?
2076 */
2077 icmp_ihl_words = icmp_iph->ihl;
2078 icmp_ihl = icmp_ihl_words << 2;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002079 pull_len += icmp_ihl - sizeof(struct sfe_ipv4_ip_hdr);
2080 if (!pskb_may_pull(skb, pull_len)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002081 spin_lock(&si->lock);
2082 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE]++;
2083 si->packets_not_forwarded++;
2084 spin_unlock(&si->lock);
2085
2086 DEBUG_TRACE("Embedded header not large enough for IP options\n");
2087 return 0;
2088 }
2089
Nicolas Costaac2979c2014-01-14 10:35:24 -06002090 len -= icmp_ihl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002091 icmp_trans_h = ((uint32_t *)icmp_iph) + icmp_ihl_words;
2092
2093 /*
2094 * Handle the embedded transport layer header.
2095 */
2096 switch (icmp_iph->protocol) {
2097 case IPPROTO_UDP:
2098 /*
2099 * We should have 8 bytes of UDP header - that's enough to identify
2100 * the connection.
2101 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002102 pull_len += 8;
2103 if (!pskb_may_pull(skb, pull_len)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002104 spin_lock(&si->lock);
2105 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE]++;
2106 si->packets_not_forwarded++;
2107 spin_unlock(&si->lock);
2108
2109 DEBUG_TRACE("Incomplete embedded UDP header\n");
2110 return 0;
2111 }
2112
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002113 icmp_udph = (struct sfe_ipv4_udp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002114 src_port = icmp_udph->source;
2115 dest_port = icmp_udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002116 break;
2117
2118 case IPPROTO_TCP:
2119 /*
2120 * We should have 8 bytes of TCP header - that's enough to identify
2121 * the connection.
2122 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002123 pull_len += 8;
2124 if (!pskb_may_pull(skb, pull_len)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002125 spin_lock(&si->lock);
2126 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE]++;
2127 si->packets_not_forwarded++;
2128 spin_unlock(&si->lock);
2129
2130 DEBUG_TRACE("Incomplete embedded TCP header\n");
2131 return 0;
2132 }
2133
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002134 icmp_tcph = (struct sfe_ipv4_tcp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002135 src_port = icmp_tcph->source;
2136 dest_port = icmp_tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002137 break;
2138
2139 default:
2140 spin_lock(&si->lock);
2141 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL]++;
2142 si->packets_not_forwarded++;
2143 spin_unlock(&si->lock);
2144
2145 DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", icmp_iph->protocol);
2146 return 0;
2147 }
2148
Dave Hudson87973cd2013-10-22 16:00:04 +01002149 src_ip = icmp_iph->saddr;
2150 dest_ip = icmp_iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002151
2152 spin_lock(&si->lock);
2153
2154 /*
2155 * Look for a connection match. Note that we reverse the source and destination
2156 * here because our embedded message contains a packet that was sent in the
2157 * opposite direction to the one in which we just received it. It will have
2158 * been sent on the interface from which we received it though so that's still
2159 * ok to use.
2160 */
2161 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port);
2162 if (unlikely(!cm)) {
2163 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++;
2164 si->packets_not_forwarded++;
2165 spin_unlock(&si->lock);
2166
2167 DEBUG_TRACE("no connection found\n");
2168 return 0;
2169 }
2170
2171 /*
2172 * We found a connection so now remove it from the connection list and flush
2173 * its state.
2174 */
2175 c = cm->connection;
2176 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2177 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++;
2178 si->packets_not_forwarded++;
2179 spin_unlock(&si->lock);
2180
2181 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
2182 return 0;
2183}
2184
2185/*
2186 * sfe_ipv4_recv()
Matthew McClintocka8ad7962014-01-16 16:49:30 -06002187 * Handle packet receives and forwaring.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002188 *
2189 * Returns 1 if the packet is forwarded or 0 if it isn't.
2190 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002191int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002192{
2193 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002194 unsigned int len;
2195 unsigned int tot_len;
2196 unsigned int frag_off;
2197 unsigned int ihl;
2198 bool flush_on_find;
2199 bool ip_options;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002200 struct sfe_ipv4_ip_hdr *iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002201 uint32_t protocol;
2202
2203 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002204 * Check that we have space for an IP header here.
2205 */
2206 len = skb->len;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002207 if (unlikely(!pskb_may_pull(skb, sizeof(struct sfe_ipv4_ip_hdr)))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002208 spin_lock(&si->lock);
2209 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2210 si->packets_not_forwarded++;
2211 spin_unlock(&si->lock);
2212
2213 DEBUG_TRACE("len: %u is too short\n", len);
2214 return 0;
2215 }
2216
2217 /*
2218 * Check that our "total length" is large enough for an IP header.
2219 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002220 iph = (struct sfe_ipv4_ip_hdr *)skb->data;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002221 tot_len = ntohs(iph->tot_len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002222 if (unlikely(tot_len < sizeof(struct sfe_ipv4_ip_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002223 spin_lock(&si->lock);
2224 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH]++;
2225 si->packets_not_forwarded++;
2226 spin_unlock(&si->lock);
2227
2228 DEBUG_TRACE("tot_len: %u is too short\n", tot_len);
2229 return 0;
2230 }
2231
2232 /*
2233 * Is our IP version wrong?
2234 */
2235 if (unlikely(iph->version != 4)) {
2236 spin_lock(&si->lock);
2237 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_V4]++;
2238 si->packets_not_forwarded++;
2239 spin_unlock(&si->lock);
2240
2241 DEBUG_TRACE("IP version: %u\n", iph->version);
2242 return 0;
2243 }
2244
2245 /*
2246 * Does our datagram fit inside the skb?
2247 */
2248 if (unlikely(tot_len > len)) {
2249 spin_lock(&si->lock);
2250 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++;
2251 si->packets_not_forwarded++;
2252 spin_unlock(&si->lock);
2253
2254 DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len);
2255 return 0;
2256 }
2257
2258 /*
2259 * Do we have a non-initial fragment?
Nicolas Costaac2979c2014-01-14 10:35:24 -06002260 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002261 frag_off = ntohs(iph->frag_off);
2262 if (unlikely(frag_off & IP_OFFSET)) {
2263 spin_lock(&si->lock);
2264 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++;
2265 si->packets_not_forwarded++;
2266 spin_unlock(&si->lock);
2267
2268 DEBUG_TRACE("non-initial fragment\n");
2269 return 0;
2270 }
2271
2272 /*
2273 * If we have a (first) fragment then mark it to cause any connection to flush.
2274 */
2275 flush_on_find = unlikely(frag_off & IP_MF) ? true : false;
2276
2277 /*
2278 * Do we have any IP options? That's definite a slow path! If we do have IP
2279 * options we need to recheck our header size.
2280 */
2281 ihl = iph->ihl << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002282 ip_options = unlikely(ihl != sizeof(struct sfe_ipv4_ip_hdr)) ? true : false;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002283 if (unlikely(ip_options)) {
2284 if (unlikely(len < ihl)) {
2285 spin_lock(&si->lock);
2286 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE]++;
2287 si->packets_not_forwarded++;
2288 spin_unlock(&si->lock);
2289
2290 DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl);
2291 return 0;
2292 }
2293
2294 flush_on_find = true;
2295 }
2296
2297 protocol = iph->protocol;
2298 if (IPPROTO_UDP == protocol) {
2299 return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find);
2300 }
2301
2302 if (IPPROTO_TCP == protocol) {
2303 return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find);
2304 }
2305
2306 if (IPPROTO_ICMP == protocol) {
2307 return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl);
2308 }
2309
2310 spin_lock(&si->lock);
2311 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++;
2312 si->packets_not_forwarded++;
2313 spin_unlock(&si->lock);
2314
2315 DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol);
2316 return 0;
2317}
2318
Nicolas Costa436926b2014-01-14 10:36:22 -06002319static void
2320sfe_ipv4_update_tcp_state(struct sfe_ipv4_connection *c,
2321 struct sfe_ipv4_create *sic)
2322{
2323 struct sfe_ipv4_connection_match *orig_cm;
2324 struct sfe_ipv4_connection_match *repl_cm;
2325 struct sfe_ipv4_tcp_connection_match *orig_tcp;
2326 struct sfe_ipv4_tcp_connection_match *repl_tcp;
2327
2328 orig_cm = c->original_match;
2329 repl_cm = c->reply_match;
2330 orig_tcp = &orig_cm->protocol_state.tcp;
2331 repl_tcp = &repl_cm->protocol_state.tcp;
2332
2333 /* update orig */
2334 if (orig_tcp->max_win < sic->src_td_max_window) {
2335 orig_tcp->max_win = sic->src_td_max_window;
2336 }
2337 if ((int32_t)(orig_tcp->end - sic->src_td_end) < 0) {
2338 orig_tcp->end = sic->src_td_end;
2339 }
2340 if ((int32_t)(orig_tcp->max_end - sic->src_td_max_end) < 0) {
2341 orig_tcp->max_end = sic->src_td_max_end;
2342 }
2343
2344 /* update reply */
2345 if (repl_tcp->max_win < sic->dest_td_max_window) {
2346 repl_tcp->max_win = sic->dest_td_max_window;
2347 }
2348 if ((int32_t)(repl_tcp->end - sic->dest_td_end) < 0) {
2349 repl_tcp->end = sic->dest_td_end;
2350 }
2351 if ((int32_t)(repl_tcp->max_end - sic->dest_td_max_end) < 0) {
2352 repl_tcp->max_end = sic->dest_td_max_end;
2353 }
2354
2355 /* update match flags */
2356 orig_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2357 repl_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2358 if (sic->flags & SFE_IPV4_CREATE_FLAG_NO_SEQ_CHECK) {
2359 orig_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2360 repl_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2361 }
2362}
2363
2364static void
2365sfe_ipv4_update_protocol_state(struct sfe_ipv4_connection *c,
2366 struct sfe_ipv4_create *sic)
2367{
2368 switch (sic->protocol) {
2369 case IPPROTO_TCP:
2370 sfe_ipv4_update_tcp_state(c, sic);
2371 break;
2372 }
2373}
2374
2375void sfe_ipv4_update_rule(struct sfe_ipv4_create *sic)
2376{
2377 struct sfe_ipv4_connection *c;
2378 struct sfe_ipv4 *si = &__si;
2379
2380 spin_lock_bh(&si->lock);
2381
2382 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2383 sic->protocol,
2384 sic->src_ip,
2385 sic->src_port,
2386 sic->dest_ip,
2387 sic->dest_port);
2388 if (c != NULL) {
2389 sfe_ipv4_update_protocol_state(c, sic);
2390 }
2391
2392 spin_unlock_bh(&si->lock);
2393}
2394
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002395/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002396 * sfe_ipv4_create_rule()
2397 * Create a forwarding rule.
2398 */
Nicolas Costa514fde02014-01-13 15:50:29 -06002399int sfe_ipv4_create_rule(struct sfe_ipv4_create *sic)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002400{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002401 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002402 struct sfe_ipv4_connection *c;
2403 struct sfe_ipv4_connection_match *original_cm;
2404 struct sfe_ipv4_connection_match *reply_cm;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002405 struct net_device *dest_dev;
2406 struct net_device *src_dev;
2407
2408 dest_dev = sic->dest_dev;
2409 src_dev = sic->src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002410
Matthew McClintock389b42a2014-09-24 14:05:51 -05002411 if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) ||
2412 (src_dev->reg_state != NETREG_REGISTERED))) {
2413 return -EINVAL;
2414 }
2415
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002416 spin_lock_bh(&si->lock);
2417 si->connection_create_requests++;
2418
2419 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002420 * Check to see if there is already a flow that matches the rule we're
2421 * trying to create. If there is then we can't create a new one.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002422 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002423 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2424 sic->protocol,
2425 sic->src_ip,
2426 sic->src_port,
2427 sic->dest_ip,
2428 sic->dest_port);
2429 if (c != NULL) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002430 si->connection_create_collisions++;
2431
2432 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002433 * If we already have the flow then it's likely that this
2434 * request to create the connection rule contains more
2435 * up-to-date information. Check and update accordingly.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002436 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002437 sfe_ipv4_update_protocol_state(c, sic);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002438 spin_unlock_bh(&si->lock);
2439
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002440 DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002441 " s: %s:%pM:%pI4:%u, d: %s:%pM:%pI4:%u\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002442 sic->mark, sic->protocol,
2443 sic->src_dev->name, sic->src_mac, &sic->src_ip, ntohs(sic->src_port),
Dave Hudson87973cd2013-10-22 16:00:04 +01002444 sic->dest_dev->name, sic->dest_mac, &sic->dest_ip, ntohs(sic->dest_port));
Nicolas Costa514fde02014-01-13 15:50:29 -06002445 return -EADDRINUSE;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002446 }
2447
2448 /*
2449 * Allocate the various connection tracking objects.
2450 */
2451 c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC);
2452 if (unlikely(!c)) {
2453 spin_unlock_bh(&si->lock);
Nicolas Costa514fde02014-01-13 15:50:29 -06002454 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002455 }
2456
2457 original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2458 if (unlikely(!original_cm)) {
2459 spin_unlock_bh(&si->lock);
2460 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002461 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002462 }
2463
2464 reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2465 if (unlikely(!reply_cm)) {
2466 spin_unlock_bh(&si->lock);
2467 kfree(original_cm);
2468 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002469 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002470 }
2471
2472 /*
2473 * Fill in the "original" direction connection matching object.
2474 * Note that the transmit MAC address is "dest_mac_xlate" because
2475 * we always know both ends of a connection by their translated
2476 * addresses and not their public addresses.
2477 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002478 original_cm->match_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002479 original_cm->match_protocol = sic->protocol;
2480 original_cm->match_src_ip = sic->src_ip;
2481 original_cm->match_src_port = sic->src_port;
2482 original_cm->match_dest_ip = sic->dest_ip;
2483 original_cm->match_dest_port = sic->dest_port;
2484 original_cm->xlate_src_ip = sic->src_ip_xlate;
2485 original_cm->xlate_src_port = sic->src_port_xlate;
2486 original_cm->xlate_dest_ip = sic->dest_ip_xlate;
2487 original_cm->xlate_dest_port = sic->dest_port_xlate;
2488 original_cm->rx_packet_count = 0;
2489 original_cm->rx_packet_count64 = 0;
2490 original_cm->rx_byte_count = 0;
2491 original_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002492 original_cm->xmit_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002493 original_cm->xmit_dev_mtu = sic->dest_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002494 memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002495 memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN);
2496 original_cm->connection = c;
2497 original_cm->counter_match = reply_cm;
2498 original_cm->flags = 0;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002499#ifdef CONFIG_NF_FLOW_COOKIE
2500 original_cm->flow_cookie = 0;
2501#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002502 original_cm->active_next = NULL;
2503 original_cm->active_prev = NULL;
2504 original_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002505
2506 /*
2507 * For PPP links we don't write an L2 header. For everything else we do.
2508 */
2509 if (!(dest_dev->flags & IFF_POINTOPOINT)) {
2510 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2511
2512 /*
2513 * If our dev writes Ethernet headers then we can write a really fast
2514 * version.
2515 */
2516 if (dest_dev->header_ops) {
2517 if (dest_dev->header_ops->create == eth_header) {
2518 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2519 }
2520 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002521 }
2522
2523 /*
2524 * Fill in the "reply" direction connection matching object.
2525 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002526 reply_cm->match_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002527 reply_cm->match_protocol = sic->protocol;
2528 reply_cm->match_src_ip = sic->dest_ip_xlate;
2529 reply_cm->match_src_port = sic->dest_port_xlate;
2530 reply_cm->match_dest_ip = sic->src_ip_xlate;
2531 reply_cm->match_dest_port = sic->src_port_xlate;
2532 reply_cm->xlate_src_ip = sic->dest_ip;
2533 reply_cm->xlate_src_port = sic->dest_port;
2534 reply_cm->xlate_dest_ip = sic->src_ip;
2535 reply_cm->xlate_dest_port = sic->src_port;
2536 reply_cm->rx_packet_count = 0;
2537 reply_cm->rx_packet_count64 = 0;
2538 reply_cm->rx_byte_count = 0;
2539 reply_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002540 reply_cm->xmit_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002541 reply_cm->xmit_dev_mtu = sic->src_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002542 memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002543 memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN);
2544 reply_cm->connection = c;
2545 reply_cm->counter_match = original_cm;
2546 reply_cm->flags = 0;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002547#ifdef CONFIG_NF_FLOW_COOKIE
2548 reply_cm->flow_cookie = 0;
2549#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002550 reply_cm->active_next = NULL;
2551 reply_cm->active_prev = NULL;
2552 reply_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002553
2554 /*
2555 * For PPP links we don't write an L2 header. For everything else we do.
2556 */
2557 if (!(src_dev->flags & IFF_POINTOPOINT)) {
2558 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2559
2560 /*
2561 * If our dev writes Ethernet headers then we can write a really fast
2562 * version.
2563 */
2564 if (src_dev->header_ops) {
2565 if (src_dev->header_ops->create == eth_header) {
2566 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2567 }
2568 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002569 }
2570
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002571
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002572 if (sic->dest_ip != sic->dest_ip_xlate || sic->dest_port != sic->dest_port_xlate) {
2573 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2574 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2575 }
2576
2577 if (sic->src_ip != sic->src_ip_xlate || sic->src_port != sic->src_port_xlate) {
2578 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2579 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2580 }
2581
2582 c->protocol = sic->protocol;
2583 c->src_ip = sic->src_ip;
2584 c->src_ip_xlate = sic->src_ip_xlate;
2585 c->src_port = sic->src_port;
2586 c->src_port_xlate = sic->src_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002587 c->original_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002588 c->original_match = original_cm;
2589 c->dest_ip = sic->dest_ip;
2590 c->dest_ip_xlate = sic->dest_ip_xlate;
2591 c->dest_port = sic->dest_port;
2592 c->dest_port_xlate = sic->dest_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002593 c->reply_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002594 c->reply_match = reply_cm;
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06002595 c->mark = sic->mark;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002596
2597 c->last_sync_jiffies = get_jiffies_64();
2598 c->iterators = 0;
2599 c->pending_free = false;
2600
2601 /*
2602 * Take hold of our source and dest devices for the duration of the connection.
2603 */
2604 dev_hold(c->original_dev);
2605 dev_hold(c->reply_dev);
2606
2607 /*
2608 * Initialize the protocol-specific information that we track.
2609 */
2610 switch (sic->protocol) {
2611 case IPPROTO_TCP:
2612 original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale;
2613 original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1;
2614 original_cm->protocol_state.tcp.end = sic->src_td_end;
2615 original_cm->protocol_state.tcp.max_end = sic->src_td_max_end;
2616 reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale;
2617 reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1;
2618 reply_cm->protocol_state.tcp.end = sic->dest_td_end;
2619 reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end;
2620 if (sic->flags & SFE_IPV4_CREATE_FLAG_NO_SEQ_CHECK) {
2621 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2622 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2623 }
2624 break;
2625 }
2626
2627 sfe_ipv4_connection_match_compute_translations(original_cm);
2628 sfe_ipv4_connection_match_compute_translations(reply_cm);
2629 sfe_ipv4_insert_sfe_ipv4_connection(si, c);
2630
2631 spin_unlock_bh(&si->lock);
2632
2633 /*
2634 * We have everything we need!
2635 */
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002636 DEBUG_INFO("new connection - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002637 " s: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n"
2638 " d: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002639 sic->mark, sic->protocol,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002640 sic->src_dev->name, sic->src_mac, sic->src_mac_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002641 &sic->src_ip, &sic->src_ip_xlate, ntohs(sic->src_port), ntohs(sic->src_port_xlate),
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002642 dest_dev->name, sic->dest_mac, sic->dest_mac_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002643 &sic->dest_ip, &sic->dest_ip_xlate, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate));
Nicolas Costa514fde02014-01-13 15:50:29 -06002644
2645 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002646}
2647
2648/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002649 * sfe_ipv4_destroy_rule()
2650 * Destroy a forwarding rule.
2651 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002652void sfe_ipv4_destroy_rule(struct sfe_ipv4_destroy *sid)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002653{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002654 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002655 struct sfe_ipv4_connection *c;
2656
2657 spin_lock_bh(&si->lock);
2658 si->connection_destroy_requests++;
2659
2660 /*
2661 * Check to see if we have a flow that matches the rule we're trying
2662 * to destroy. If there isn't then we can't destroy it.
2663 */
2664 c = sfe_ipv4_find_sfe_ipv4_connection(si, sid->protocol, sid->src_ip, sid->src_port,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002665 sid->dest_ip, sid->dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002666 if (!c) {
2667 si->connection_destroy_misses++;
2668 spin_unlock_bh(&si->lock);
2669
2670 DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002671 sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2672 &sid->dest_ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002673 return;
2674 }
2675
2676 /*
2677 * Remove our connection details from the hash tables.
2678 */
2679 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2680 spin_unlock_bh(&si->lock);
2681
2682 /*
2683 * Finally synchronize state and free resources. We need to protect against
2684 * pre-emption by our bottom half while we do this though.
2685 */
2686 local_bh_disable();
2687 sfe_ipv4_flush_sfe_ipv4_connection(si, c);
2688 local_bh_enable();
2689
2690 DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002691 sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2692 &sid->dest_ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002693}
2694
2695/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002696 * sfe_ipv4_register_sync_rule_callback()
2697 * Register a callback for rule synchronization.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002698 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002699void sfe_ipv4_register_sync_rule_callback(sfe_ipv4_sync_rule_callback_t sync_rule_callback)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002700{
2701 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002702
2703 spin_lock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002704 rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002705 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002706}
2707
2708/*
2709 * sfe_ipv4_get_debug_dev()
2710 */
2711static ssize_t sfe_ipv4_get_debug_dev(struct device *dev,
2712 struct device_attribute *attr,
2713 char *buf)
2714{
2715 struct sfe_ipv4 *si = &__si;
2716 ssize_t count;
2717 int num;
2718
2719 spin_lock_bh(&si->lock);
2720 num = si->debug_dev;
2721 spin_unlock_bh(&si->lock);
2722
2723 count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num);
2724 return count;
2725}
2726
2727/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002728 * sysfs attributes.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002729 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002730static const struct device_attribute sfe_ipv4_debug_dev_attr =
2731 __ATTR(debug_dev, S_IWUGO | S_IRUGO, sfe_ipv4_get_debug_dev, NULL);
2732
2733/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002734 * sfe_ipv4_destroy_all_rules_for_dev()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002735 * Destroy all connections that match a particular device.
2736 *
2737 * If we pass dev as NULL then this destroys all connections.
2738 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002739void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002740{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002741 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002742 struct sfe_ipv4_connection *c;
2743 struct sfe_ipv4_connection *c_next;
2744
2745 spin_lock_bh(&si->lock);
2746 c = si->all_connections_head;
2747 if (!c) {
2748 spin_unlock_bh(&si->lock);
2749 return;
2750 }
2751
2752 c->iterators++;
2753
2754 /*
2755 * Iterate over all connections
2756 */
2757 while (c) {
2758 c_next = c->all_connections_next;
2759
2760 /*
2761 * Before we do anything else, take an iterator reference for the
2762 * connection we'll iterate next.
2763 */
2764 if (c_next) {
2765 c_next->iterators++;
2766 }
2767
2768 /*
2769 * Does this connection relate to the device we are destroying? If
2770 * it does then ensure it is marked for being freed as soon as it
2771 * is no longer being iterated.
2772 */
2773 if (!dev
2774 || (dev == c->original_dev)
2775 || (dev == c->reply_dev)) {
2776 c->pending_free = true;
2777 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2778 }
2779
2780 /*
2781 * Remove the iterator reference that we acquired and see if we
2782 * should free any resources.
2783 */
2784 if (sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c)) {
2785 spin_unlock_bh(&si->lock);
Nicolas Costabafb3af2014-01-29 16:39:39 -06002786
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002787 /*
2788 * This entry is dead so release our hold of the source and
2789 * dest devices and free the memory for our connection objects.
2790 */
2791 dev_put(c->original_dev);
2792 dev_put(c->reply_dev);
2793 kfree(c->original_match);
2794 kfree(c->reply_match);
2795 kfree(c);
2796
2797 spin_lock_bh(&si->lock);
2798 }
2799
2800 c = c_next;
2801 }
2802
2803 spin_unlock_bh(&si->lock);
2804}
2805
2806/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002807 * sfe_ipv4_periodic_sync()
2808 */
2809static void sfe_ipv4_periodic_sync(unsigned long arg)
2810{
2811 struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg;
2812 uint64_t now_jiffies;
2813 int quota;
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002814 sfe_ipv4_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002815
2816 now_jiffies = get_jiffies_64();
2817
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002818 rcu_read_lock();
2819 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
2820 if (!sync_rule_callback) {
2821 rcu_read_unlock();
2822 goto done;
2823 }
2824
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002825 spin_lock_bh(&si->lock);
2826 sfe_ipv4_update_summary_stats(si);
2827
2828 /*
2829 * Get an estimate of the number of connections to parse in this sync.
2830 */
2831 quota = (si->num_connections + 63) / 64;
2832
2833 /*
2834 * Walk the "active" list and sync the connection state.
2835 */
2836 while (quota--) {
2837 struct sfe_ipv4_connection_match *cm;
2838 struct sfe_ipv4_connection_match *counter_cm;
2839 struct sfe_ipv4_connection *c;
2840 struct sfe_ipv4_sync sis;
2841
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002842 cm = si->active_head;
2843 if (!cm) {
2844 break;
2845 }
2846
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002847 /*
Nicolas Costabafb3af2014-01-29 16:39:39 -06002848 * There's a possibility that our counter match is in the active list too.
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002849 * If it is then remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002850 */
2851 counter_cm = cm->counter_match;
2852 if (counter_cm->active) {
2853 counter_cm->active = false;
2854
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002855 /*
2856 * We must have a connection preceding this counter match
2857 * because that's the one that got us to this point, so we don't have
2858 * to worry about removing the head of the list.
2859 */
2860 counter_cm->active_prev->active_next = counter_cm->active_next;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002861
2862 if (likely(counter_cm->active_next)) {
2863 counter_cm->active_next->active_prev = counter_cm->active_prev;
2864 } else {
2865 si->active_tail = counter_cm->active_prev;
2866 }
2867
2868 counter_cm->active_next = NULL;
2869 counter_cm->active_prev = NULL;
2870 }
2871
2872 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002873 * Now remove the head of the active scan list.
2874 */
2875 cm->active = false;
2876 si->active_head = cm->active_next;
2877 if (likely(cm->active_next)) {
2878 cm->active_next->active_prev = NULL;
2879 } else {
2880 si->active_tail = NULL;
2881 }
2882 cm->active_next = NULL;
2883
2884 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002885 * Sync the connection state.
2886 */
2887 c = cm->connection;
2888 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, now_jiffies);
2889
2890 /*
2891 * We don't want to be holding the lock when we sync!
2892 */
2893 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002894 sync_rule_callback(&sis);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002895 spin_lock_bh(&si->lock);
2896 }
2897
2898 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002899 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002900
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002901done:
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002902 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002903}
2904
2905#define CHAR_DEV_MSG_SIZE 768
2906
2907/*
2908 * sfe_ipv4_debug_dev_read_start()
2909 * Generate part of the XML output.
2910 */
2911static bool sfe_ipv4_debug_dev_read_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2912 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2913{
2914 int bytes_read;
2915
2916 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "<sfe_ipv4>\n");
2917 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2918 return false;
2919 }
2920
2921 *length -= bytes_read;
2922 *total_read += bytes_read;
2923
2924 ws->state++;
2925 return true;
2926}
2927
2928/*
2929 * sfe_ipv4_debug_dev_read_connections_start()
2930 * Generate part of the XML output.
2931 */
2932static bool sfe_ipv4_debug_dev_read_connections_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2933 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2934{
2935 int bytes_read;
2936
2937 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<connections>\n");
2938 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2939 return false;
2940 }
2941
2942 *length -= bytes_read;
2943 *total_read += bytes_read;
2944
2945 ws->state++;
2946 return true;
2947}
2948
2949/*
2950 * sfe_ipv4_debug_dev_read_connections_connection()
2951 * Generate part of the XML output.
2952 */
2953static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2954 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2955{
2956 struct sfe_ipv4_connection *c;
2957 struct sfe_ipv4_connection *c_next;
2958 struct sfe_ipv4_connection_match *original_cm;
2959 struct sfe_ipv4_connection_match *reply_cm;
2960 int bytes_read;
2961 int protocol;
2962 struct net_device *src_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002963 __be32 src_ip;
2964 __be32 src_ip_xlate;
2965 __be16 src_port;
2966 __be16 src_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002967 uint64_t src_rx_packets;
2968 uint64_t src_rx_bytes;
2969 struct net_device *dest_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002970 __be32 dest_ip;
2971 __be32 dest_ip_xlate;
2972 __be16 dest_port;
2973 __be16 dest_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002974 uint64_t dest_rx_packets;
2975 uint64_t dest_rx_bytes;
2976 uint64_t last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002977 uint32_t mark;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002978#ifdef CONFIG_NF_FLOW_COOKIE
2979 int src_flow_cookie, dst_flow_cookie;
2980#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002981
2982 spin_lock_bh(&si->lock);
2983 c = ws->iter_conn;
2984
2985 /*
2986 * Is this the first connection we need to scan?
2987 */
2988 if (!c) {
2989 c = si->all_connections_head;
2990
2991 /*
2992 * If there were no connections then move to the next state.
2993 */
2994 if (!c) {
2995 spin_unlock_bh(&si->lock);
2996
2997 ws->state++;
2998 return true;
2999 }
3000
3001 c->iterators++;
3002 }
3003
3004 c_next = c->all_connections_next;
3005 ws->iter_conn = c_next;
3006
3007 /*
3008 * Before we do anything else, take an iterator reference for the
3009 * connection we'll iterate next.
3010 */
3011 if (c_next) {
3012 c_next->iterators++;
3013 }
3014
3015 /*
3016 * Remove the iterator reference that we acquired and see if we
3017 * should free any resources.
3018 */
3019 if (sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c)) {
3020 spin_unlock_bh(&si->lock);
3021
3022 /*
3023 * This entry is dead so release our hold of the source and
3024 * dest devices and free the memory for our connection objects.
3025 */
3026 dev_put(c->original_dev);
3027 dev_put(c->reply_dev);
3028 kfree(c->original_match);
3029 kfree(c->reply_match);
3030 kfree(c);
3031
3032 /*
3033 * If we have no more connections then move to the next state.
3034 */
3035 if (!c_next) {
3036 ws->state++;
3037 }
3038
3039 return true;
3040 }
3041
3042 original_cm = c->original_match;
3043 reply_cm = c->reply_match;
3044
3045 protocol = c->protocol;
3046 src_dev = c->original_dev;
3047 src_ip = c->src_ip;
3048 src_ip_xlate = c->src_ip_xlate;
3049 src_port = c->src_port;
3050 src_port_xlate = c->src_port_xlate;
3051
3052 sfe_ipv4_connection_match_update_summary_stats(original_cm);
3053 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
3054
3055 src_rx_packets = original_cm->rx_packet_count64;
3056 src_rx_bytes = original_cm->rx_byte_count64;
3057 dest_dev = c->reply_dev;
3058 dest_ip = c->dest_ip;
3059 dest_ip_xlate = c->dest_ip_xlate;
3060 dest_port = c->dest_port;
3061 dest_port_xlate = c->dest_port_xlate;
3062 dest_rx_packets = reply_cm->rx_packet_count64;
3063 dest_rx_bytes = reply_cm->rx_byte_count64;
3064 last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003065 mark = c->mark;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003066#ifdef CONFIG_NF_FLOW_COOKIE
3067 src_flow_cookie = original_cm->flow_cookie;
3068 dst_flow_cookie = reply_cm->flow_cookie;
3069#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003070 spin_unlock_bh(&si->lock);
3071
3072 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t<connection "
3073 "protocol=\"%u\" "
3074 "src_dev=\"%s\" "
3075 "src_ip=\"%pI4\" src_ip_xlate=\"%pI4\" "
3076 "src_port=\"%u\" src_port_xlate=\"%u\" "
3077 "src_rx_pkts=\"%llu\" src_rx_bytes=\"%llu\" "
3078 "dest_dev=\"%s\" "
3079 "dest_ip=\"%pI4\" dest_ip_xlate=\"%pI4\" "
3080 "dest_port=\"%u\" dest_port_xlate=\"%u\" "
3081 "dest_rx_pkts=\"%llu\" dest_rx_bytes=\"%llu\" "
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003082#ifdef CONFIG_NF_FLOW_COOKIE
3083 "src_flow_cookie=\"%d\" dst_flow_cookie=\"%d\" "
3084#endif
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003085 "last_sync=\"%llu\" "
Nicolas Costabb85a2e2014-01-13 16:26:33 -06003086 "mark=\"%08x\" />\n",
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003087 protocol,
3088 src_dev->name,
3089 &src_ip, &src_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01003090 ntohs(src_port), ntohs(src_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003091 src_rx_packets, src_rx_bytes,
3092 dest_dev->name,
3093 &dest_ip, &dest_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01003094 ntohs(dest_port), ntohs(dest_port_xlate),
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003095 dest_rx_packets, dest_rx_bytes,
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003096#ifdef CONFIG_NF_FLOW_COOKIE
3097 src_flow_cookie, dst_flow_cookie,
3098#endif
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003099 last_sync_jiffies, mark);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003100
3101 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3102 return false;
3103 }
3104
3105 *length -= bytes_read;
3106 *total_read += bytes_read;
3107
3108 /*
3109 * If we have no more connections then move to the next state.
3110 */
3111 if (!c_next) {
3112 ws->state++;
3113 }
3114
3115 return true;
3116}
3117
3118/*
3119 * sfe_ipv4_debug_dev_read_connections_end()
3120 * Generate part of the XML output.
3121 */
3122static bool sfe_ipv4_debug_dev_read_connections_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3123 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3124{
3125 int bytes_read;
3126
3127 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</connections>\n");
3128 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3129 return false;
3130 }
3131
3132 *length -= bytes_read;
3133 *total_read += bytes_read;
3134
3135 ws->state++;
3136 return true;
3137}
3138
3139/*
3140 * sfe_ipv4_debug_dev_read_exceptions_start()
3141 * Generate part of the XML output.
3142 */
3143static bool sfe_ipv4_debug_dev_read_exceptions_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3144 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3145{
3146 int bytes_read;
3147
3148 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<exceptions>\n");
3149 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3150 return false;
3151 }
3152
3153 *length -= bytes_read;
3154 *total_read += bytes_read;
3155
3156 ws->state++;
3157 return true;
3158}
3159
3160/*
3161 * sfe_ipv4_debug_dev_read_exceptions_exception()
3162 * Generate part of the XML output.
3163 */
3164static bool sfe_ipv4_debug_dev_read_exceptions_exception(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3165 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3166{
3167 uint64_t ct;
3168
3169 spin_lock_bh(&si->lock);
3170 ct = si->exception_events64[ws->iter_exception];
3171 spin_unlock_bh(&si->lock);
3172
3173 if (ct) {
3174 int bytes_read;
3175
3176 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE,
3177 "\t\t<exception name=\"%s\" count=\"%llu\" />\n",
3178 sfe_ipv4_exception_events_string[ws->iter_exception],
3179 ct);
3180 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3181 return false;
3182 }
3183
3184 *length -= bytes_read;
3185 *total_read += bytes_read;
3186 }
3187
3188 ws->iter_exception++;
3189 if (ws->iter_exception >= SFE_IPV4_EXCEPTION_EVENT_LAST) {
3190 ws->iter_exception = 0;
3191 ws->state++;
3192 }
3193
3194 return true;
3195}
3196
3197/*
3198 * sfe_ipv4_debug_dev_read_exceptions_end()
3199 * Generate part of the XML output.
3200 */
3201static bool sfe_ipv4_debug_dev_read_exceptions_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3202 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3203{
3204 int bytes_read;
3205
3206 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</exceptions>\n");
3207 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3208 return false;
3209 }
3210
3211 *length -= bytes_read;
3212 *total_read += bytes_read;
3213
3214 ws->state++;
3215 return true;
3216}
3217
3218/*
3219 * sfe_ipv4_debug_dev_read_stats()
3220 * Generate part of the XML output.
3221 */
3222static bool sfe_ipv4_debug_dev_read_stats(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3223 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3224{
3225 int bytes_read;
3226 unsigned int num_connections;
3227 uint64_t packets_forwarded;
3228 uint64_t packets_not_forwarded;
3229 uint64_t connection_create_requests;
3230 uint64_t connection_create_collisions;
3231 uint64_t connection_destroy_requests;
3232 uint64_t connection_destroy_misses;
3233 uint64_t connection_flushes;
3234 uint64_t connection_match_hash_hits;
3235 uint64_t connection_match_hash_reorders;
3236
3237 spin_lock_bh(&si->lock);
3238 sfe_ipv4_update_summary_stats(si);
3239
3240 num_connections = si->num_connections;
3241 packets_forwarded = si->packets_forwarded64;
3242 packets_not_forwarded = si->packets_not_forwarded64;
3243 connection_create_requests = si->connection_create_requests64;
3244 connection_create_collisions = si->connection_create_collisions64;
3245 connection_destroy_requests = si->connection_destroy_requests64;
3246 connection_destroy_misses = si->connection_destroy_misses64;
3247 connection_flushes = si->connection_flushes64;
3248 connection_match_hash_hits = si->connection_match_hash_hits64;
3249 connection_match_hash_reorders = si->connection_match_hash_reorders64;
3250 spin_unlock_bh(&si->lock);
3251
3252 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<stats "
3253 "num_connections=\"%u\" "
Xiaoping Fan59176422015-05-22 15:58:10 -07003254 "pkts_forwarded=\"%llu\" pkts_not_forwarded=\"%llu\" "
3255 "create_requests=\"%llu\" create_collisions=\"%llu\" "
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003256 "destroy_requests=\"%llu\" destroy_misses=\"%llu\" "
3257 "flushes=\"%llu\" "
3258 "hash_hits=\"%llu\" hash_reorders=\"%llu\" />\n",
3259 num_connections,
3260 packets_forwarded,
3261 packets_not_forwarded,
3262 connection_create_requests,
3263 connection_create_collisions,
3264 connection_destroy_requests,
3265 connection_destroy_misses,
3266 connection_flushes,
3267 connection_match_hash_hits,
3268 connection_match_hash_reorders);
3269 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3270 return false;
3271 }
3272
3273 *length -= bytes_read;
3274 *total_read += bytes_read;
3275
3276 ws->state++;
3277 return true;
3278}
3279
3280/*
3281 * sfe_ipv4_debug_dev_read_end()
3282 * Generate part of the XML output.
3283 */
3284static bool sfe_ipv4_debug_dev_read_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3285 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3286{
3287 int bytes_read;
3288
3289 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "</sfe_ipv4>\n");
3290 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3291 return false;
3292 }
3293
3294 *length -= bytes_read;
3295 *total_read += bytes_read;
3296
3297 ws->state++;
3298 return true;
3299}
3300
3301/*
3302 * Array of write functions that write various XML elements that correspond to
3303 * our XML output state machine.
3304 */
3305sfe_ipv4_debug_xml_write_method_t sfe_ipv4_debug_xml_write_methods[SFE_IPV4_DEBUG_XML_STATE_DONE] = {
3306 sfe_ipv4_debug_dev_read_start,
3307 sfe_ipv4_debug_dev_read_connections_start,
3308 sfe_ipv4_debug_dev_read_connections_connection,
3309 sfe_ipv4_debug_dev_read_connections_end,
3310 sfe_ipv4_debug_dev_read_exceptions_start,
3311 sfe_ipv4_debug_dev_read_exceptions_exception,
3312 sfe_ipv4_debug_dev_read_exceptions_end,
3313 sfe_ipv4_debug_dev_read_stats,
3314 sfe_ipv4_debug_dev_read_end,
3315};
3316
3317/*
3318 * sfe_ipv4_debug_dev_read()
3319 * Send info to userspace upon read request from user
3320 */
3321static ssize_t sfe_ipv4_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset)
3322{
3323 char msg[CHAR_DEV_MSG_SIZE];
3324 int total_read = 0;
3325 struct sfe_ipv4_debug_xml_write_state *ws;
3326 struct sfe_ipv4 *si = &__si;
3327
3328 ws = (struct sfe_ipv4_debug_xml_write_state *)filp->private_data;
3329 while ((ws->state != SFE_IPV4_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) {
3330 if ((sfe_ipv4_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) {
3331 continue;
3332 }
3333 }
3334
3335 return total_read;
3336}
3337
3338/*
3339 * sfe_ipv4_debug_dev_write()
Nicolas Costabafb3af2014-01-29 16:39:39 -06003340 * Write to char device resets some stats
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003341 */
3342static ssize_t sfe_ipv4_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset)
3343{
Matthew McClintock54167ab2014-01-14 21:06:28 -06003344 struct sfe_ipv4 *si = &__si;
3345
3346 spin_lock_bh(&si->lock);
3347 sfe_ipv4_update_summary_stats(si);
3348
Matthew McClintock54167ab2014-01-14 21:06:28 -06003349 si->packets_forwarded64 = 0;
3350 si->packets_not_forwarded64 = 0;
3351 si->connection_create_requests64 = 0;
3352 si->connection_create_collisions64 = 0;
3353 si->connection_destroy_requests64 = 0;
3354 si->connection_destroy_misses64 = 0;
3355 si->connection_flushes64 = 0;
3356 si->connection_match_hash_hits64 = 0;
3357 si->connection_match_hash_reorders64 = 0;
3358 spin_unlock_bh(&si->lock);
3359
3360 return length;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003361}
3362
3363/*
3364 * sfe_ipv4_debug_dev_open()
3365 */
3366static int sfe_ipv4_debug_dev_open(struct inode *inode, struct file *file)
3367{
3368 struct sfe_ipv4_debug_xml_write_state *ws;
3369
3370 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3371 if (!ws) {
3372 ws = kzalloc(sizeof(struct sfe_ipv4_debug_xml_write_state), GFP_KERNEL);
3373 if (!ws) {
3374 return -ENOMEM;
3375 }
3376
3377 ws->state = SFE_IPV4_DEBUG_XML_STATE_START;
3378 file->private_data = ws;
3379 }
3380
3381 return 0;
3382}
3383
3384/*
3385 * sfe_ipv4_debug_dev_release()
3386 */
3387static int sfe_ipv4_debug_dev_release(struct inode *inode, struct file *file)
3388{
3389 struct sfe_ipv4_debug_xml_write_state *ws;
3390
3391 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3392 if (ws) {
3393 struct sfe_ipv4_connection *c;
3394
3395 /*
3396 * Are we currently iterating a connection? If we are then
3397 * make sure that we reduce its iterator count and if necessary
3398 * free it.
3399 */
3400 c = ws->iter_conn;
3401 if (c) {
3402 struct sfe_ipv4 *si = &__si;
Nicolas Costabdddb9b2014-01-29 16:40:26 -06003403 bool free_connection;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003404
3405 spin_lock_bh(&si->lock);
Nicolas Costabdddb9b2014-01-29 16:40:26 -06003406 free_connection = sfe_ipv4_decrement_sfe_ipv4_connection_iterator(si, c);
3407 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003408
Nicolas Costabdddb9b2014-01-29 16:40:26 -06003409 if (free_connection) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003410 /*
3411 * This entry is dead so release our hold of the source and
3412 * dest devices and free the memory for our connection objects.
3413 */
3414 dev_put(c->original_dev);
3415 dev_put(c->reply_dev);
3416 kfree(c->original_match);
3417 kfree(c->reply_match);
3418 kfree(c);
3419 }
3420 }
3421
3422 /*
3423 * We've finished with our output so free the write state.
3424 */
3425 kfree(ws);
3426 }
3427
3428 return 0;
3429}
3430
3431/*
3432 * File operations used in the debug char device
3433 */
3434static struct file_operations sfe_ipv4_debug_dev_fops = {
3435 .read = sfe_ipv4_debug_dev_read,
3436 .write = sfe_ipv4_debug_dev_write,
3437 .open = sfe_ipv4_debug_dev_open,
3438 .release = sfe_ipv4_debug_dev_release
3439};
3440
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003441#ifdef CONFIG_NF_FLOW_COOKIE
3442/*
3443 * sfe_register_flow_cookie_cb
3444 * register a function in SFE to let SFE use this function to configure flow cookie for a flow
3445 *
3446 * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE
3447 * can use this function to configure flow cookie for a flow.
3448 * return: 0, success; !=0, fail
3449 */
3450int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb)
3451{
3452 struct sfe_ipv4 *si = &__si;
3453
3454 BUG_ON(!cb);
3455
3456 if (si->flow_cookie_set_func) {
3457 return -1;
3458 }
3459
3460 rcu_assign_pointer(si->flow_cookie_set_func, cb);
3461 return 0;
3462}
3463
3464/*
3465 * sfe_unregister_flow_cookie_cb
3466 * unregister function which is used to configure flow cookie for a flow
3467 *
3468 * return: 0, success; !=0, fail
3469 */
3470int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb)
3471{
3472 struct sfe_ipv4 *si = &__si;
3473
3474 RCU_INIT_POINTER(si->flow_cookie_set_func, NULL);
3475 return 0;
3476}
3477#endif /*CONFIG_NF_FLOW_COOKIE*/
3478
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003479/*
Dave Hudson87973cd2013-10-22 16:00:04 +01003480 * sfe_ipv4_init()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003481 */
Dave Hudson87973cd2013-10-22 16:00:04 +01003482static int __init sfe_ipv4_init(void)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003483{
3484 struct sfe_ipv4 *si = &__si;
3485 int result = -1;
3486
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003487 DEBUG_INFO("SFE IPv4 init\n");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003488
3489 /*
3490 * Create sys/sfe_ipv4
3491 */
3492 si->sys_sfe_ipv4 = kobject_create_and_add("sfe_ipv4", NULL);
3493 if (!si->sys_sfe_ipv4) {
3494 DEBUG_ERROR("failed to register sfe_ipv4\n");
3495 goto exit1;
3496 }
3497
3498 /*
3499 * Create files, one for each parameter supported by this module.
3500 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003501 result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3502 if (result) {
3503 DEBUG_ERROR("failed to register debug dev file: %d\n", result);
3504 goto exit4;
3505 }
3506
3507 /*
3508 * Register our debug char device.
3509 */
3510 result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops);
3511 if (result < 0) {
3512 DEBUG_ERROR("Failed to register chrdev: %d\n", result);
3513 goto exit5;
3514 }
3515
3516 si->debug_dev = result;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003517
3518 /*
3519 * Create a timer to handle periodic statistics.
3520 */
3521 setup_timer(&si->timer, sfe_ipv4_periodic_sync, (unsigned long)si);
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06003522 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003523
Dave Hudson87973cd2013-10-22 16:00:04 +01003524 spin_lock_init(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003525
Dave Hudson87973cd2013-10-22 16:00:04 +01003526 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003527
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003528exit5:
3529 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3530
3531exit4:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003532 kobject_put(si->sys_sfe_ipv4);
3533
3534exit1:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003535 return result;
3536}
3537
3538/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003539 * sfe_ipv4_exit()
3540 */
3541static void __exit sfe_ipv4_exit(void)
3542{
Dave Hudson87973cd2013-10-22 16:00:04 +01003543 struct sfe_ipv4 *si = &__si;
3544
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003545 DEBUG_INFO("SFE IPv4 exit\n");
Dave Hudson87973cd2013-10-22 16:00:04 +01003546
3547 /*
3548 * Destroy all connections.
3549 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003550 sfe_ipv4_destroy_all_rules_for_dev(NULL);
Dave Hudson87973cd2013-10-22 16:00:04 +01003551
Dave Hudson87973cd2013-10-22 16:00:04 +01003552 del_timer_sync(&si->timer);
3553
Dave Hudson87973cd2013-10-22 16:00:04 +01003554 unregister_chrdev(si->debug_dev, "sfe_ipv4");
3555
3556 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3557
Dave Hudson87973cd2013-10-22 16:00:04 +01003558 kobject_put(si->sys_sfe_ipv4);
3559
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003560}
3561
3562module_init(sfe_ipv4_init)
3563module_exit(sfe_ipv4_exit)
3564
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003565EXPORT_SYMBOL(sfe_ipv4_recv);
3566EXPORT_SYMBOL(sfe_ipv4_create_rule);
3567EXPORT_SYMBOL(sfe_ipv4_destroy_rule);
3568EXPORT_SYMBOL(sfe_ipv4_destroy_all_rules_for_dev);
3569EXPORT_SYMBOL(sfe_ipv4_register_sync_rule_callback);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06003570EXPORT_SYMBOL(sfe_ipv4_mark_rule);
Nicolas Costa436926b2014-01-14 10:36:22 -06003571EXPORT_SYMBOL(sfe_ipv4_update_rule);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003572#ifdef CONFIG_NF_FLOW_COOKIE
3573EXPORT_SYMBOL(sfe_register_flow_cookie_cb);
3574EXPORT_SYMBOL(sfe_unregister_flow_cookie_cb);
3575#endif
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003576
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003577MODULE_AUTHOR("Qualcomm Atheros Inc.");
3578MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv4 edition");
Matthew McClintocka3221942014-01-16 11:44:26 -06003579MODULE_LICENSE("Dual BSD/GPL");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003580