blob: 300beac346c147500ad3677e8fab355253fd3497 [file] [log] [blame]
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001/*
2 * sfe_ipv4.c
3 * Shortcut forwarding engine - IPv4 edition.
4 *
Xiaoping Fana42c68b2015-08-07 18:00:39 -07005 * Copyright (c) 2013-2015 The Linux Foundation. All rights reserved.
6 * Permission to use, copy, modify, and/or distribute this software for
7 * any purpose with or without fee is hereby granted, provided that the
8 * above copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
15 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010016 */
Matthew McClintocka3221942014-01-16 11:44:26 -060017
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010018#include <linux/module.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060019#include <linux/sysfs.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010020#include <linux/skbuff.h>
21#include <linux/icmp.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010022#include <net/tcp.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060023#include <linux/etherdevice.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010024
Dave Hudsondcd08fb2013-11-22 09:25:16 -060025#include "sfe.h"
Xiaoping Fand44a5b42015-05-26 17:37:37 -070026#include "sfe_cm.h"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010027
28/*
Dave Hudsona8197e72013-12-17 23:46:22 +000029 * By default Linux IP header and transport layer header structures are
30 * unpacked, assuming that such headers should be 32-bit aligned.
31 * Unfortunately some wireless adaptors can't cope with this requirement and
32 * some CPUs can't handle misaligned accesses. For those platforms we
33 * define SFE_IPV4_UNALIGNED_IP_HEADER and mark the structures as packed.
34 * When we do this the compiler will generate slightly worse code than for the
35 * aligned case (on most platforms) but will be much quicker than fixing
36 * things up in an unaligned trap handler.
37 */
38#define SFE_IPV4_UNALIGNED_IP_HEADER 1
39#if SFE_IPV4_UNALIGNED_IP_HEADER
40#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed))
41#else
42#define SFE_IPV4_UNALIGNED_STRUCT
43#endif
44
45/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060046 * An Ethernet header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000047 * help with performance on some platforms (see the definition of
48 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010049 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060050struct sfe_ipv4_eth_hdr {
51 __be16 h_dest[ETH_ALEN / 2];
52 __be16 h_source[ETH_ALEN / 2];
53 __be16 h_proto;
54} SFE_IPV4_UNALIGNED_STRUCT;
55
Xiaoping Fane1963d42015-08-25 17:06:19 -070056#define SFE_IPV4_DSCP_MASK 0x3
57#define SFE_IPV4_DSCP_SHIFT 2
58
Matthew McClintockdb5ac512014-01-16 17:01:40 -060059/*
60 * An IPv4 header, but with an optional "packed" attribute to
61 * help with performance on some platforms (see the definition of
62 * SFE_IPV4_UNALIGNED_STRUCT)
63 */
64struct sfe_ipv4_ip_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010065#if defined(__LITTLE_ENDIAN_BITFIELD)
66 __u8 ihl:4,
67 version:4;
68#elif defined (__BIG_ENDIAN_BITFIELD)
69 __u8 version:4,
Xiaoping Fan59176422015-05-22 15:58:10 -070070 ihl:4;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010071#else
72#error "Please fix <asm/byteorder.h>"
73#endif
74 __u8 tos;
75 __be16 tot_len;
76 __be16 id;
77 __be16 frag_off;
78 __u8 ttl;
79 __u8 protocol;
80 __sum16 check;
81 __be32 saddr;
82 __be32 daddr;
Dave Hudsondcd08fb2013-11-22 09:25:16 -060083
84 /*
85 * The options start here.
86 */
Dave Hudsona8197e72013-12-17 23:46:22 +000087} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010088
89/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060090 * A UDP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000091 * help with performance on some platforms (see the definition of
92 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010093 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060094struct sfe_ipv4_udp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010095 __be16 source;
96 __be16 dest;
97 __be16 len;
98 __sum16 check;
Dave Hudsona8197e72013-12-17 23:46:22 +000099} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100100
101/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600102 * A TCP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +0000103 * help with performance on some platforms (see the definition of
104 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100105 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600106struct sfe_ipv4_tcp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100107 __be16 source;
108 __be16 dest;
109 __be32 seq;
110 __be32 ack_seq;
111#if defined(__LITTLE_ENDIAN_BITFIELD)
112 __u16 res1:4,
113 doff:4,
114 fin:1,
115 syn:1,
116 rst:1,
117 psh:1,
118 ack:1,
119 urg:1,
120 ece:1,
121 cwr:1;
122#elif defined(__BIG_ENDIAN_BITFIELD)
123 __u16 doff:4,
124 res1:4,
125 cwr:1,
126 ece:1,
127 urg:1,
128 ack:1,
129 psh:1,
130 rst:1,
131 syn:1,
132 fin:1;
133#else
134#error "Adjust your <asm/byteorder.h> defines"
Nicolas Costaac2979c2014-01-14 10:35:24 -0600135#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100136 __be16 window;
137 __sum16 check;
138 __be16 urg_ptr;
Dave Hudsona8197e72013-12-17 23:46:22 +0000139} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100140
141/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100142 * Specifies the lower bound on ACK numbers carried in the TCP header
143 */
144#define SFE_IPV4_TCP_MAX_ACK_WINDOW 65520
145
146/*
147 * IPv4 TCP connection match additional data.
148 */
149struct sfe_ipv4_tcp_connection_match {
150 uint8_t win_scale; /* Window scale */
151 uint32_t max_win; /* Maximum window size seen */
152 uint32_t end; /* Sequence number of the next byte to send (seq + segment length) */
153 uint32_t max_end; /* Sequence number of the last byte to ack */
154};
155
156/*
157 * Bit flags for IPv4 connection matching entry.
158 */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700159#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC (1<<0)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100160 /* Perform source translation */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700161#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST (1<<1)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100162 /* Perform destination translation */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700163#define SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK (1<<2)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100164 /* Ignore TCP sequence numbers */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700165#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR (1<<3)
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600166 /* Fast Ethernet header write */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700167#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR (1<<4)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100168 /* Fast Ethernet header write */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700169#define SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK (1<<5)
170 /* remark priority of SKB */
171#define SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK (1<<6)
172 /* remark DSCP of packet */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100173
174/*
175 * IPv4 connection matching structure.
176 */
177struct sfe_ipv4_connection_match {
178 /*
179 * References to other objects.
180 */
181 struct sfe_ipv4_connection_match *next;
182 /* Next connection match entry in a list */
183 struct sfe_ipv4_connection_match *prev;
184 /* Previous connection match entry in a list */
185 struct sfe_ipv4_connection *connection;
186 /* Pointer to our connection */
187 struct sfe_ipv4_connection_match *counter_match;
188 /* Pointer to the connection match in the "counter" direction to this one */
189 struct sfe_ipv4_connection_match *active_next;
190 /* Pointer to the next connection in the active list */
191 struct sfe_ipv4_connection_match *active_prev;
192 /* Pointer to the previous connection in the active list */
193 bool active; /* Flag to indicate if we're on the active list */
194
195 /*
196 * Characteristics that identify flows that match this rule.
197 */
198 struct net_device *match_dev; /* Network device */
199 uint8_t match_protocol; /* Protocol */
Dave Hudson87973cd2013-10-22 16:00:04 +0100200 __be32 match_src_ip; /* Source IP address */
201 __be32 match_dest_ip; /* Destination IP address */
202 __be16 match_src_port; /* Source port/connection ident */
203 __be16 match_dest_port; /* Destination port/connection ident */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100204
205 /*
206 * Control the operations of the match.
207 */
208 uint32_t flags; /* Bit flags */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800209#ifdef CONFIG_NF_FLOW_COOKIE
210 uint32_t flow_cookie; /* used flow cookie, for debug */
211#endif
Zhi Chen8748eb32015-06-18 12:58:48 -0700212#ifdef CONFIG_XFRM
213 uint32_t flow_accel; /* The flow accelerated or not */
214#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100215
216 /*
217 * Connection state that we track once we match.
218 */
219 union { /* Protocol-specific state */
220 struct sfe_ipv4_tcp_connection_match tcp;
221 } protocol_state;
222 uint32_t rx_packet_count; /* Number of packets RX'd */
223 uint32_t rx_byte_count; /* Number of bytes RX'd */
224
225 /*
226 * Packet translation information.
227 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100228 __be32 xlate_src_ip; /* Address after source translation */
229 __be16 xlate_src_port; /* Port/connection ident after source translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100230 uint16_t xlate_src_csum_adjustment;
231 /* Transport layer checksum adjustment after source translation */
Xiaoping Fanad755af2015-04-01 16:58:46 -0700232 uint16_t xlate_src_partial_csum_adjustment;
233 /* Transport layer pseudo header checksum adjustment after source translation */
234
Dave Hudson87973cd2013-10-22 16:00:04 +0100235 __be32 xlate_dest_ip; /* Address after destination translation */
236 __be16 xlate_dest_port; /* Port/connection ident after destination translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100237 uint16_t xlate_dest_csum_adjustment;
238 /* Transport layer checksum adjustment after destination translation */
Xiaoping Fanad755af2015-04-01 16:58:46 -0700239 uint16_t xlate_dest_partial_csum_adjustment;
240 /* Transport layer pseudo header checksum adjustment after destination translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100241
242 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -0700243 * QoS information
244 */
245 uint32_t priority;
246 uint32_t dscp;
247
248 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100249 * Packet transmit information.
250 */
251 struct net_device *xmit_dev; /* Network device on which to transmit */
252 unsigned short int xmit_dev_mtu;
253 /* Interface MTU */
254 uint16_t xmit_dest_mac[ETH_ALEN / 2];
255 /* Destination MAC address to use when forwarding */
256 uint16_t xmit_src_mac[ETH_ALEN / 2];
257 /* Source MAC address to use when forwarding */
258
259 /*
260 * Summary stats.
261 */
262 uint64_t rx_packet_count64; /* Number of packets RX'd */
263 uint64_t rx_byte_count64; /* Number of bytes RX'd */
264};
265
266/*
267 * Per-connection data structure.
268 */
269struct sfe_ipv4_connection {
270 struct sfe_ipv4_connection *next;
271 /* Pointer to the next entry in a hash chain */
272 struct sfe_ipv4_connection *prev;
273 /* Pointer to the previous entry in a hash chain */
274 int protocol; /* IP protocol number */
Dave Hudson87973cd2013-10-22 16:00:04 +0100275 __be32 src_ip; /* Source IP address */
276 __be32 src_ip_xlate; /* NAT-translated source IP address */
277 __be32 dest_ip; /* Destination IP address */
278 __be32 dest_ip_xlate; /* NAT-translated destination IP address */
279 __be16 src_port; /* Source port */
280 __be16 src_port_xlate; /* NAT-translated source port */
281 __be16 dest_port; /* Destination port */
282 __be16 dest_port_xlate; /* NAT-translated destination port */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100283 struct sfe_ipv4_connection_match *original_match;
284 /* Original direction matching structure */
285 struct net_device *original_dev;
286 /* Original direction source device */
287 struct sfe_ipv4_connection_match *reply_match;
288 /* Reply direction matching structure */
289 struct net_device *reply_dev; /* Reply direction source device */
290 uint64_t last_sync_jiffies; /* Jiffies count for the last sync */
291 struct sfe_ipv4_connection *all_connections_next;
292 /* Pointer to the next entry in the list of all connections */
293 struct sfe_ipv4_connection *all_connections_prev;
294 /* Pointer to the previous entry in the list of all connections */
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600295 uint32_t mark; /* mark for outgoing packet */
Xiaoping Fan34586472015-07-03 02:20:35 -0700296 uint32_t debug_read_seq; /* sequence number for debug dump */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100297};
298
299/*
300 * IPv4 connections and hash table size information.
301 */
302#define SFE_IPV4_CONNECTION_HASH_SHIFT 12
303#define SFE_IPV4_CONNECTION_HASH_SIZE (1 << SFE_IPV4_CONNECTION_HASH_SHIFT)
304#define SFE_IPV4_CONNECTION_HASH_MASK (SFE_IPV4_CONNECTION_HASH_SIZE - 1)
305
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800306#ifdef CONFIG_NF_FLOW_COOKIE
307#define SFE_FLOW_COOKIE_SIZE 2048
308#define SFE_FLOW_COOKIE_MASK 0x7ff
309
310struct sfe_flow_cookie_entry {
311 struct sfe_ipv4_connection_match *match;
312 unsigned long last_clean_time;
313};
314#endif
315
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100316enum sfe_ipv4_exception_events {
317 SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE,
318 SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION,
319 SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
320 SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL,
321 SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION,
322 SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE,
323 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS,
324 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS,
325 SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
326 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL,
327 SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION,
328 SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS,
329 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE,
330 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS,
331 SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK,
332 SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS,
333 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE,
334 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE,
335 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE,
336 SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE,
337 SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE,
338 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE,
339 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4,
340 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE,
341 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE,
342 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE,
343 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL,
344 SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION,
345 SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION,
346 SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE,
347 SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH,
348 SFE_IPV4_EXCEPTION_EVENT_NON_V4,
349 SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT,
350 SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE,
351 SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE,
352 SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL,
353 SFE_IPV4_EXCEPTION_EVENT_LAST
354};
355
356static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = {
357 "UDP_HEADER_INCOMPLETE",
358 "UDP_NO_CONNECTION",
359 "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
360 "UDP_SMALL_TTL",
361 "UDP_NEEDS_FRAGMENTATION",
362 "TCP_HEADER_INCOMPLETE",
363 "TCP_NO_CONNECTION_SLOW_FLAGS",
364 "TCP_NO_CONNECTION_FAST_FLAGS",
365 "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
366 "TCP_SMALL_TTL",
367 "TCP_NEEDS_FRAGMENTATION",
368 "TCP_FLAGS",
369 "TCP_SEQ_EXCEEDS_RIGHT_EDGE",
370 "TCP_SMALL_DATA_OFFS",
371 "TCP_BAD_SACK",
372 "TCP_BIG_DATA_OFFS",
373 "TCP_SEQ_BEFORE_LEFT_EDGE",
374 "TCP_ACK_EXCEEDS_RIGHT_EDGE",
375 "TCP_ACK_BEFORE_LEFT_EDGE",
376 "ICMP_HEADER_INCOMPLETE",
377 "ICMP_UNHANDLED_TYPE",
378 "ICMP_IPV4_HEADER_INCOMPLETE",
379 "ICMP_IPV4_NON_V4",
380 "ICMP_IPV4_IP_OPTIONS_INCOMPLETE",
381 "ICMP_IPV4_UDP_HEADER_INCOMPLETE",
382 "ICMP_IPV4_TCP_HEADER_INCOMPLETE",
383 "ICMP_IPV4_UNHANDLED_PROTOCOL",
384 "ICMP_NO_CONNECTION",
385 "ICMP_FLUSHED_CONNECTION",
386 "HEADER_INCOMPLETE",
387 "BAD_TOTAL_LENGTH",
388 "NON_V4",
389 "NON_INITIAL_FRAGMENT",
390 "DATAGRAM_INCOMPLETE",
391 "IP_OPTIONS_INCOMPLETE",
392 "UNHANDLED_PROTOCOL"
393};
394
395/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600396 * Per-module structure.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100397 */
398struct sfe_ipv4 {
399 spinlock_t lock; /* Lock for SMP correctness */
400 struct sfe_ipv4_connection_match *active_head;
401 /* Head of the list of recently active connections */
402 struct sfe_ipv4_connection_match *active_tail;
403 /* Tail of the list of recently active connections */
404 struct sfe_ipv4_connection *all_connections_head;
405 /* Head of the list of all connections */
406 struct sfe_ipv4_connection *all_connections_tail;
407 /* Tail of the list of all connections */
408 unsigned int num_connections; /* Number of connections */
409 struct timer_list timer; /* Timer used for periodic sync ops */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700410 sfe_sync_rule_callback_t __rcu sync_rule_callback;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600411 /* Callback function registered by a connection manager for stats syncing */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100412 struct sfe_ipv4_connection *conn_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
413 /* Connection hash table */
414 struct sfe_ipv4_connection_match *conn_match_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
415 /* Connection match hash table */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800416#ifdef CONFIG_NF_FLOW_COOKIE
417 struct sfe_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE];
418 /* flow cookie table*/
419 flow_cookie_set_func_t flow_cookie_set_func;
420 /* function used to configure flow cookie in hardware*/
Xiaoping Fan640faf42015-08-28 15:50:55 -0700421 int flow_cookie_enable;
422 /* Enable/disable flow cookie at runtime */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800423#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100424
425 /*
426 * Statistics.
427 */
428 uint32_t connection_create_requests;
429 /* Number of IPv4 connection create requests */
430 uint32_t connection_create_collisions;
431 /* Number of IPv4 connection create requests that collided with existing hash table entries */
432 uint32_t connection_destroy_requests;
433 /* Number of IPv4 connection destroy requests */
434 uint32_t connection_destroy_misses;
435 /* Number of IPv4 connection destroy requests that missed our hash table */
436 uint32_t connection_match_hash_hits;
437 /* Number of IPv4 connection match hash hits */
438 uint32_t connection_match_hash_reorders;
439 /* Number of IPv4 connection match hash reorders */
440 uint32_t connection_flushes; /* Number of IPv4 connection flushes */
441 uint32_t packets_forwarded; /* Number of IPv4 packets forwarded */
442 uint32_t packets_not_forwarded; /* Number of IPv4 packets not forwarded */
443 uint32_t exception_events[SFE_IPV4_EXCEPTION_EVENT_LAST];
444
445 /*
446 * Summary tatistics.
447 */
448 uint64_t connection_create_requests64;
449 /* Number of IPv4 connection create requests */
450 uint64_t connection_create_collisions64;
451 /* Number of IPv4 connection create requests that collided with existing hash table entries */
452 uint64_t connection_destroy_requests64;
453 /* Number of IPv4 connection destroy requests */
454 uint64_t connection_destroy_misses64;
455 /* Number of IPv4 connection destroy requests that missed our hash table */
456 uint64_t connection_match_hash_hits64;
457 /* Number of IPv4 connection match hash hits */
458 uint64_t connection_match_hash_reorders64;
459 /* Number of IPv4 connection match hash reorders */
460 uint64_t connection_flushes64; /* Number of IPv4 connection flushes */
461 uint64_t packets_forwarded64; /* Number of IPv4 packets forwarded */
462 uint64_t packets_not_forwarded64;
463 /* Number of IPv4 packets not forwarded */
464 uint64_t exception_events64[SFE_IPV4_EXCEPTION_EVENT_LAST];
465
466 /*
467 * Control state.
468 */
469 struct kobject *sys_sfe_ipv4; /* sysfs linkage */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100470 int debug_dev; /* Major number of the debug char device */
Xiaoping Fan34586472015-07-03 02:20:35 -0700471 uint32_t debug_read_seq; /* sequence number for debug dump */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100472};
473
474/*
475 * Enumeration of the XML output.
476 */
477enum sfe_ipv4_debug_xml_states {
478 SFE_IPV4_DEBUG_XML_STATE_START,
479 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_START,
480 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_CONNECTION,
481 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_END,
482 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_START,
483 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION,
484 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_END,
485 SFE_IPV4_DEBUG_XML_STATE_STATS,
486 SFE_IPV4_DEBUG_XML_STATE_END,
487 SFE_IPV4_DEBUG_XML_STATE_DONE
488};
489
490/*
491 * XML write state.
492 */
493struct sfe_ipv4_debug_xml_write_state {
494 enum sfe_ipv4_debug_xml_states state;
495 /* XML output file state machine state */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100496 int iter_exception; /* Next exception iterator */
497};
498
499typedef bool (*sfe_ipv4_debug_xml_write_method_t)(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
500 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws);
501
502struct sfe_ipv4 __si;
503
504/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100505 * sfe_ipv4_gen_ip_csum()
506 * Generate the IP checksum for an IPv4 header.
507 *
508 * Note that this function assumes that we have only 20 bytes of IP header.
509 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600510static inline uint16_t sfe_ipv4_gen_ip_csum(struct sfe_ipv4_ip_hdr *iph)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100511{
512 uint32_t sum;
513 uint16_t *i = (uint16_t *)iph;
514
515 iph->check = 0;
516
517 /*
518 * Generate the sum.
519 */
520 sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9];
521
522 /*
523 * Fold it to ones-complement form.
524 */
525 sum = (sum & 0xffff) + (sum >> 16);
526 sum = (sum & 0xffff) + (sum >> 16);
527
528 return (uint16_t)sum ^ 0xffff;
529}
530
531/*
532 * sfe_ipv4_get_connection_match_hash()
533 * Generate the hash used in connection match lookups.
534 */
535static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100536 __be32 src_ip, __be16 src_port,
537 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100538{
539 size_t dev_addr = (size_t)dev;
Dave Hudson87973cd2013-10-22 16:00:04 +0100540 uint32_t hash = ((uint32_t)dev_addr) ^ ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100541 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
542}
543
544/*
545 * sfe_ipv4_find_sfe_ipv4_connection_match()
546 * Get the IPv4 flow match info that corresponds to a particular 5-tuple.
547 *
548 * On entry we must be holding the lock that protects the hash table.
549 */
550static struct sfe_ipv4_connection_match *
551sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100552 __be32 src_ip, __be16 src_port,
553 __be32 dest_ip, __be16 dest_port) __attribute__((always_inline));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100554static struct sfe_ipv4_connection_match *
555sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, uint8_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100556 __be32 src_ip, __be16 src_port,
557 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100558{
559 struct sfe_ipv4_connection_match *cm;
560 struct sfe_ipv4_connection_match *head;
561 unsigned int conn_match_idx;
562
563 conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port);
564 cm = si->conn_match_hash[conn_match_idx];
565
566 /*
567 * If we don't have anything in this chain then bale.
568 */
569 if (unlikely(!cm)) {
570 return cm;
571 }
572
573 /*
574 * Hopefully the first entry is the one we want.
575 */
576 if (likely(cm->match_src_port == src_port)
577 && likely(cm->match_dest_port == dest_port)
578 && likely(cm->match_src_ip == src_ip)
579 && likely(cm->match_dest_ip == dest_ip)
580 && likely(cm->match_protocol == protocol)
581 && likely(cm->match_dev == dev)) {
582 si->connection_match_hash_hits++;
583 return cm;
584 }
585
586 /*
587 * We may or may not have a matching entry but if we do then we want to
588 * move that entry to the top of the hash chain when we get to it. We
589 * presume that this will be reused again very quickly.
590 */
591 head = cm;
592 do {
593 cm = cm->next;
594 } while (cm && (cm->match_src_port != src_port
595 || cm->match_dest_port != dest_port
596 || cm->match_src_ip != src_ip
597 || cm->match_dest_ip != dest_ip
598 || cm->match_protocol != protocol
599 || cm->match_dev != dev));
600
601 /*
602 * Not found then we're done.
603 */
604 if (unlikely(!cm)) {
605 return cm;
606 }
607
608 /*
609 * We found a match so move it.
610 */
611 if (cm->next) {
612 cm->next->prev = cm->prev;
613 }
614 cm->prev->next = cm->next;
615 cm->prev = NULL;
616 cm->next = head;
617 head->prev = cm;
618 si->conn_match_hash[conn_match_idx] = cm;
619 si->connection_match_hash_reorders++;
620
621 return cm;
622}
623
624/*
625 * sfe_ipv4_connection_match_update_summary_stats()
626 * Update the summary stats for a connection match entry.
627 */
628static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm)
629{
630 cm->rx_packet_count64 += cm->rx_packet_count;
631 cm->rx_packet_count = 0;
632 cm->rx_byte_count64 += cm->rx_byte_count;
633 cm->rx_byte_count = 0;
634}
635
636/*
637 * sfe_ipv4_connection_match_compute_translations()
638 * Compute port and address translations for a connection match entry.
639 */
640static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm)
641{
642 /*
643 * Before we insert the entry look to see if this is tagged as doing address
644 * translations. If it is then work out the adjustment that we need to apply
645 * to the transport checksum.
646 */
647 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
648 /*
649 * Precompute an incremental checksum adjustment so we can
650 * edit packets in this stream very quickly. The algorithm is from RFC1624.
651 */
652 uint16_t src_ip_hi = cm->match_src_ip >> 16;
653 uint16_t src_ip_lo = cm->match_src_ip & 0xffff;
654 uint32_t xlate_src_ip = ~cm->xlate_src_ip;
655 uint16_t xlate_src_ip_hi = xlate_src_ip >> 16;
656 uint16_t xlate_src_ip_lo = xlate_src_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100657 uint16_t xlate_src_port = ~cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100658 uint32_t adj;
659
660 /*
661 * When we compute this fold it down to a 16-bit offset
662 * as that way we can avoid having to do a double
663 * folding of the twos-complement result because the
664 * addition of 2 16-bit values cannot cause a double
665 * wrap-around!
666 */
667 adj = src_ip_hi + src_ip_lo + cm->match_src_port
668 + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port;
669 adj = (adj & 0xffff) + (adj >> 16);
670 adj = (adj & 0xffff) + (adj >> 16);
671 cm->xlate_src_csum_adjustment = (uint16_t)adj;
Nicolas Costaac2979c2014-01-14 10:35:24 -0600672
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100673 }
674
675 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
676 /*
677 * Precompute an incremental checksum adjustment so we can
678 * edit packets in this stream very quickly. The algorithm is from RFC1624.
679 */
680 uint16_t dest_ip_hi = cm->match_dest_ip >> 16;
681 uint16_t dest_ip_lo = cm->match_dest_ip & 0xffff;
682 uint32_t xlate_dest_ip = ~cm->xlate_dest_ip;
683 uint16_t xlate_dest_ip_hi = xlate_dest_ip >> 16;
684 uint16_t xlate_dest_ip_lo = xlate_dest_ip & 0xffff;
Dave Hudson87973cd2013-10-22 16:00:04 +0100685 uint16_t xlate_dest_port = ~cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100686 uint32_t adj;
687
688 /*
689 * When we compute this fold it down to a 16-bit offset
690 * as that way we can avoid having to do a double
691 * folding of the twos-complement result because the
692 * addition of 2 16-bit values cannot cause a double
693 * wrap-around!
694 */
695 adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port
696 + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port;
697 adj = (adj & 0xffff) + (adj >> 16);
698 adj = (adj & 0xffff) + (adj >> 16);
699 cm->xlate_dest_csum_adjustment = (uint16_t)adj;
700 }
Xiaoping Fanad755af2015-04-01 16:58:46 -0700701
702 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
703 uint32_t adj = ~cm->match_src_ip + cm->xlate_src_ip;
704 if (adj < cm->xlate_src_ip) {
705 adj++;
706 }
707
708 adj = (adj & 0xffff) + (adj >> 16);
709 adj = (adj & 0xffff) + (adj >> 16);
710 cm->xlate_src_partial_csum_adjustment = (uint16_t)adj;
711 }
712
713 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
714 uint32_t adj = ~cm->match_dest_ip + cm->xlate_dest_ip;
715 if (adj < cm->xlate_dest_ip) {
716 adj++;
717 }
718
719 adj = (adj & 0xffff) + (adj >> 16);
720 adj = (adj & 0xffff) + (adj >> 16);
721 cm->xlate_dest_partial_csum_adjustment = (uint16_t)adj;
722 }
723
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100724}
725
726/*
727 * sfe_ipv4_update_summary_stats()
728 * Update the summary stats.
729 */
730static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si)
731{
732 int i;
733
734 si->connection_create_requests64 += si->connection_create_requests;
735 si->connection_create_requests = 0;
736 si->connection_create_collisions64 += si->connection_create_collisions;
737 si->connection_create_collisions = 0;
738 si->connection_destroy_requests64 += si->connection_destroy_requests;
739 si->connection_destroy_requests = 0;
740 si->connection_destroy_misses64 += si->connection_destroy_misses;
741 si->connection_destroy_misses = 0;
742 si->connection_match_hash_hits64 += si->connection_match_hash_hits;
743 si->connection_match_hash_hits = 0;
744 si->connection_match_hash_reorders64 += si->connection_match_hash_reorders;
745 si->connection_match_hash_reorders = 0;
746 si->connection_flushes64 += si->connection_flushes;
747 si->connection_flushes = 0;
748 si->packets_forwarded64 += si->packets_forwarded;
749 si->packets_forwarded = 0;
750 si->packets_not_forwarded64 += si->packets_not_forwarded;
751 si->packets_not_forwarded = 0;
752
753 for (i = 0; i < SFE_IPV4_EXCEPTION_EVENT_LAST; i++) {
754 si->exception_events64[i] += si->exception_events[i];
755 si->exception_events[i] = 0;
756 }
757}
758
759/*
760 * sfe_ipv4_insert_sfe_ipv4_connection_match()
761 * Insert a connection match into the hash.
762 *
763 * On entry we must be holding the lock that protects the hash table.
764 */
765static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
766{
767 struct sfe_ipv4_connection_match **hash_head;
768 struct sfe_ipv4_connection_match *prev_head;
769 unsigned int conn_match_idx
770 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
771 cm->match_src_ip, cm->match_src_port,
772 cm->match_dest_ip, cm->match_dest_port);
773 hash_head = &si->conn_match_hash[conn_match_idx];
774 prev_head = *hash_head;
775 cm->prev = NULL;
776 if (prev_head) {
777 prev_head->prev = cm;
778 }
779
780 cm->next = prev_head;
781 *hash_head = cm;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800782
783#ifdef CONFIG_NF_FLOW_COOKIE
Xiaoping Fan640faf42015-08-28 15:50:55 -0700784 if (!si->flow_cookie_enable)
785 return;
786
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800787 /*
788 * Configure hardware to put a flow cookie in packet of this flow,
789 * then we can accelerate the lookup process when we received this packet.
790 */
791 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
792 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
793
794 if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) {
795 flow_cookie_set_func_t func;
796
797 rcu_read_lock();
798 func = rcu_dereference(si->flow_cookie_set_func);
799 if (func) {
Xiaoping Fan59176422015-05-22 15:58:10 -0700800 if (!func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800801 cm->match_dest_ip, cm->match_dest_port, conn_match_idx)) {
802 entry->match = cm;
803 cm->flow_cookie = conn_match_idx;
804 }
805 }
806 rcu_read_unlock();
807
808 break;
809 }
810 }
811#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100812}
813
814/*
815 * sfe_ipv4_remove_sfe_ipv4_connection_match()
816 * Remove a connection match object from the hash.
817 *
818 * On entry we must be holding the lock that protects the hash table.
819 */
820static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
821{
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800822#ifdef CONFIG_NF_FLOW_COOKIE
Xiaoping Fan640faf42015-08-28 15:50:55 -0700823 if (si->flow_cookie_enable) {
824 /*
825 * Tell hardware that we no longer need a flow cookie in packet of this flow
826 */
827 unsigned int conn_match_idx;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800828
Xiaoping Fan640faf42015-08-28 15:50:55 -0700829 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
830 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800831
Xiaoping Fan640faf42015-08-28 15:50:55 -0700832 if (cm == entry->match) {
833 flow_cookie_set_func_t func;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800834
Xiaoping Fan640faf42015-08-28 15:50:55 -0700835 rcu_read_lock();
836 func = rcu_dereference(si->flow_cookie_set_func);
837 if (func) {
838 func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
839 cm->match_dest_ip, cm->match_dest_port, 0);
840 }
841 rcu_read_unlock();
842
843 cm->flow_cookie = 0;
844 entry->match = NULL;
845 entry->last_clean_time = jiffies;
846 break;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800847 }
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800848 }
849 }
850#endif
851
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100852 /*
853 * Unlink the connection match entry from the hash.
854 */
855 if (cm->prev) {
856 cm->prev->next = cm->next;
857 } else {
858 unsigned int conn_match_idx
859 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
860 cm->match_src_ip, cm->match_src_port,
861 cm->match_dest_ip, cm->match_dest_port);
862 si->conn_match_hash[conn_match_idx] = cm->next;
863 }
864
865 if (cm->next) {
866 cm->next->prev = cm->prev;
867 }
868
869 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600870 * If the connection match entry is in the active list remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100871 */
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600872 if (cm->active) {
873 if (likely(cm->active_prev)) {
874 cm->active_prev->active_next = cm->active_next;
875 } else {
876 si->active_head = cm->active_next;
877 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100878
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600879 if (likely(cm->active_next)) {
880 cm->active_next->active_prev = cm->active_prev;
881 } else {
882 si->active_tail = cm->active_prev;
883 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100884 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100885}
886
887/*
888 * sfe_ipv4_get_connection_hash()
889 * Generate the hash used in connection lookups.
890 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100891static inline unsigned int sfe_ipv4_get_connection_hash(uint8_t protocol, __be32 src_ip, __be16 src_port,
892 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100893{
Dave Hudson87973cd2013-10-22 16:00:04 +0100894 uint32_t hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100895 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
896}
897
898/*
899 * sfe_ipv4_find_sfe_ipv4_connection()
900 * Get the IPv4 connection info that corresponds to a particular 5-tuple.
901 *
902 * On entry we must be holding the lock that protects the hash table.
903 */
904static inline struct sfe_ipv4_connection *sfe_ipv4_find_sfe_ipv4_connection(struct sfe_ipv4 *si, uint32_t protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100905 __be32 src_ip, __be16 src_port,
906 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100907{
908 struct sfe_ipv4_connection *c;
909 unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port);
910 c = si->conn_hash[conn_idx];
911
912 /*
913 * If we don't have anything in this chain then bale.
914 */
915 if (unlikely(!c)) {
916 return c;
917 }
918
919 /*
920 * Hopefully the first entry is the one we want.
921 */
922 if (likely(c->src_port == src_port)
923 && likely(c->dest_port == dest_port)
924 && likely(c->src_ip == src_ip)
925 && likely(c->dest_ip == dest_ip)
926 && likely(c->protocol == protocol)) {
927 return c;
928 }
929
930 /*
931 * We may or may not have a matching entry but if we do then we want to
932 * move that entry to the top of the hash chain when we get to it. We
933 * presume that this will be reused again very quickly.
934 */
935 do {
936 c = c->next;
937 } while (c && (c->src_port != src_port
938 || c->dest_port != dest_port
939 || c->src_ip != src_ip
940 || c->dest_ip != dest_ip
941 || c->protocol != protocol));
942
943 /*
944 * Will need connection entry for next create/destroy metadata,
945 * So no need to re-order entry for these requests
946 */
947 return c;
948}
949
950/*
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600951 * sfe_ipv4_mark_rule()
952 * Updates the mark for a current offloaded connection
953 *
954 * Will take hash lock upon entry
955 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700956void sfe_ipv4_mark_rule(struct sfe_connection_mark *mark)
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600957{
958 struct sfe_ipv4 *si = &__si;
959 struct sfe_ipv4_connection *c;
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600960
Xiaoping Fan3c423e32015-07-03 03:09:29 -0700961 spin_lock_bh(&si->lock);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600962 c = sfe_ipv4_find_sfe_ipv4_connection(si, mark->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700963 mark->src_ip.ip, mark->src_port,
964 mark->dest_ip.ip, mark->dest_port);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600965 if (c) {
Nicolas Costaf53d6fe2014-01-13 16:03:46 -0600966 DEBUG_TRACE("Matching connection found for mark, "
967 "setting from %08x to %08x\n",
968 c->mark, mark->mark);
969 WARN_ON((0 != c->mark) && (0 == mark->mark));
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600970 c->mark = mark->mark;
971 }
Xiaoping Fan3c423e32015-07-03 03:09:29 -0700972 spin_unlock_bh(&si->lock);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600973}
974
975/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100976 * sfe_ipv4_insert_sfe_ipv4_connection()
977 * Insert a connection into the hash.
978 *
979 * On entry we must be holding the lock that protects the hash table.
980 */
981static void sfe_ipv4_insert_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
982{
983 struct sfe_ipv4_connection **hash_head;
984 struct sfe_ipv4_connection *prev_head;
985 unsigned int conn_idx;
986
987 /*
988 * Insert entry into the connection hash.
989 */
990 conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
991 c->dest_ip, c->dest_port);
992 hash_head = &si->conn_hash[conn_idx];
993 prev_head = *hash_head;
994 c->prev = NULL;
995 if (prev_head) {
996 prev_head->prev = c;
997 }
998
999 c->next = prev_head;
1000 *hash_head = c;
1001
1002 /*
1003 * Insert entry into the "all connections" list.
1004 */
1005 if (si->all_connections_tail) {
1006 c->all_connections_prev = si->all_connections_tail;
1007 si->all_connections_tail->all_connections_next = c;
1008 } else {
1009 c->all_connections_prev = NULL;
1010 si->all_connections_head = c;
1011 }
1012
1013 si->all_connections_tail = c;
1014 c->all_connections_next = NULL;
1015 si->num_connections++;
1016
1017 /*
1018 * Insert the connection match objects too.
1019 */
1020 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->original_match);
1021 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->reply_match);
1022}
1023
1024/*
1025 * sfe_ipv4_remove_sfe_ipv4_connection()
1026 * Remove a sfe_ipv4_connection object from the hash.
1027 *
1028 * On entry we must be holding the lock that protects the hash table.
1029 */
1030static void sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1031{
1032 /*
1033 * Remove the connection match objects.
1034 */
1035 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->reply_match);
1036 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->original_match);
1037
1038 /*
1039 * Unlink the connection.
1040 */
1041 if (c->prev) {
1042 c->prev->next = c->next;
1043 } else {
1044 unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
1045 c->dest_ip, c->dest_port);
1046 si->conn_hash[conn_idx] = c->next;
1047 }
1048
1049 if (c->next) {
1050 c->next->prev = c->prev;
1051 }
Xiaoping Fan34586472015-07-03 02:20:35 -07001052
1053 /*
1054 * Unlink connection from all_connections list
1055 */
1056 if (c->all_connections_prev) {
1057 c->all_connections_prev->all_connections_next = c->all_connections_next;
1058 } else {
1059 si->all_connections_head = c->all_connections_next;
1060 }
1061
1062 if (c->all_connections_next) {
1063 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1064 } else {
1065 si->all_connections_tail = c->all_connections_prev;
1066 }
1067
1068 si->num_connections--;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001069}
1070
1071/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001072 * sfe_ipv4_sync_sfe_ipv4_connection()
1073 * Sync a connection.
1074 *
1075 * On entry to this function we expect that the lock for the connection is either
1076 * already held or isn't required.
1077 */
1078static void sfe_ipv4_gen_sync_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c,
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001079 struct sfe_connection_sync *sis, sfe_sync_reason_t reason,
1080 uint64_t now_jiffies)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001081{
1082 struct sfe_ipv4_connection_match *original_cm;
1083 struct sfe_ipv4_connection_match *reply_cm;
1084
1085 /*
1086 * Fill in the update message.
1087 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001088 sis->is_v6 = 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001089 sis->protocol = c->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001090 sis->src_ip.ip = c->src_ip;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001091 sis->src_ip_xlate.ip = c->src_ip_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001092 sis->dest_ip.ip = c->dest_ip;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001093 sis->dest_ip_xlate.ip = c->dest_ip_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001094 sis->src_port = c->src_port;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001095 sis->src_port_xlate = c->src_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001096 sis->dest_port = c->dest_port;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001097 sis->dest_port_xlate = c->dest_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001098
1099 original_cm = c->original_match;
1100 reply_cm = c->reply_match;
1101 sis->src_td_max_window = original_cm->protocol_state.tcp.max_win;
1102 sis->src_td_end = original_cm->protocol_state.tcp.end;
1103 sis->src_td_max_end = original_cm->protocol_state.tcp.max_end;
1104 sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win;
1105 sis->dest_td_end = reply_cm->protocol_state.tcp.end;
1106 sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end;
1107
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001108 sis->src_new_packet_count = original_cm->rx_packet_count;
1109 sis->src_new_byte_count = original_cm->rx_byte_count;
1110 sis->dest_new_packet_count = reply_cm->rx_packet_count;
1111 sis->dest_new_byte_count = reply_cm->rx_byte_count;
1112
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001113 sfe_ipv4_connection_match_update_summary_stats(original_cm);
1114 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
1115
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001116 sis->src_dev = original_cm->match_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001117 sis->src_packet_count = original_cm->rx_packet_count64;
1118 sis->src_byte_count = original_cm->rx_byte_count64;
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001119
1120 sis->dest_dev = reply_cm->match_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001121 sis->dest_packet_count = reply_cm->rx_packet_count64;
1122 sis->dest_byte_count = reply_cm->rx_byte_count64;
1123
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001124 sis->reason = reason;
1125
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001126 /*
1127 * Get the time increment since our last sync.
1128 */
1129 sis->delta_jiffies = now_jiffies - c->last_sync_jiffies;
1130 c->last_sync_jiffies = now_jiffies;
1131}
1132
1133/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001134 * sfe_ipv4_flush_sfe_ipv4_connection()
1135 * Flush a connection and free all associated resources.
1136 *
1137 * We need to be called with bottom halves disabled locally as we need to acquire
1138 * the connection hash lock and release it again. In general we're actually called
1139 * from within a BH and so we're fine, but we're also called when connections are
1140 * torn down.
1141 */
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001142static void sfe_ipv4_flush_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c, sfe_sync_reason_t reason)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001143{
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001144 struct sfe_connection_sync sis;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001145 uint64_t now_jiffies;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001146 sfe_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001147
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001148 rcu_read_lock();
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001149 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001150 si->connection_flushes++;
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001151 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001152 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001153
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001154 if (sync_rule_callback) {
1155 /*
1156 * Generate a sync message and then sync.
1157 */
1158 now_jiffies = get_jiffies_64();
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001159 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, reason, now_jiffies);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001160 sync_rule_callback(&sis);
1161 }
1162
1163 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001164
1165 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001166 * Release our hold of the source and dest devices and free the memory
1167 * for our connection objects.
1168 */
1169 dev_put(c->original_dev);
1170 dev_put(c->reply_dev);
1171 kfree(c->original_match);
1172 kfree(c->reply_match);
1173 kfree(c);
1174}
1175
1176/*
1177 * sfe_ipv4_recv_udp()
1178 * Handle UDP packet receives and forwarding.
1179 */
1180static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001181 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001182{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001183 struct sfe_ipv4_udp_hdr *udph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001184 __be32 src_ip;
1185 __be32 dest_ip;
1186 __be16 src_port;
1187 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001188 struct sfe_ipv4_connection_match *cm;
1189 uint8_t ttl;
1190 struct net_device *xmit_dev;
1191
1192 /*
1193 * Is our packet too short to contain a valid UDP header?
1194 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001195 if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_udp_hdr) + ihl)))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001196 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001197 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++;
1198 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001199 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001200
1201 DEBUG_TRACE("packet too short for UDP header\n");
1202 return 0;
1203 }
1204
1205 /*
1206 * Read the IP address and port information. Read the IP header data first
1207 * because we've almost certainly got that in the cache. We may not yet have
1208 * the UDP header cached though so allow more time for any prefetching.
1209 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001210 src_ip = iph->saddr;
1211 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001212
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001213 udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001214 src_port = udph->source;
1215 dest_port = udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001216
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001217 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001218
1219 /*
1220 * Look for a connection match.
1221 */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001222#ifdef CONFIG_NF_FLOW_COOKIE
1223 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1224 if (unlikely(!cm)) {
1225 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
1226 }
1227#else
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001228 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001229#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001230 if (unlikely(!cm)) {
1231 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION]++;
1232 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001233 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001234
1235 DEBUG_TRACE("no connection found\n");
1236 return 0;
1237 }
1238
1239 /*
1240 * If our packet has beern marked as "flush on find" we can't actually
1241 * forward it in the fast path, but now that we've found an associated
1242 * connection we can flush that out before we process the packet.
1243 */
1244 if (unlikely(flush_on_find)) {
1245 struct sfe_ipv4_connection *c = cm->connection;
1246 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1247 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1248 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001249 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001250
1251 DEBUG_TRACE("flush on find\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001252 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001253 return 0;
1254 }
1255
Zhi Chen8748eb32015-06-18 12:58:48 -07001256#ifdef CONFIG_XFRM
1257 /*
1258 * We can't accelerate the flow on this direction, just let it go
1259 * through the slow path.
1260 */
1261 if (unlikely(!cm->flow_accel)) {
1262 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001263 spin_unlock_bh(&si->lock);
Zhi Chen8748eb32015-06-18 12:58:48 -07001264 return 0;
1265 }
1266#endif
1267
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001268 /*
1269 * Does our TTL allow forwarding?
1270 */
1271 ttl = iph->ttl;
1272 if (unlikely(ttl < 2)) {
1273 struct sfe_ipv4_connection *c = cm->connection;
1274 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1275 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL]++;
1276 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001277 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001278
1279 DEBUG_TRACE("ttl too low\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001280 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001281 return 0;
1282 }
1283
1284 /*
1285 * If our packet is larger than the MTU of the transmit interface then
1286 * we can't forward it easily.
1287 */
1288 if (unlikely(len > cm->xmit_dev_mtu)) {
1289 struct sfe_ipv4_connection *c = cm->connection;
1290 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1291 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++;
1292 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001293 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001294
1295 DEBUG_TRACE("larger than mtu\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001296 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001297 return 0;
1298 }
1299
1300 /*
1301 * From this point on we're good to modify the packet.
1302 */
1303
1304 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07001305 * Update DSCP
1306 */
1307 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK)) {
1308 iph->tos = (iph->tos & SFE_IPV4_DSCP_MASK) | cm->dscp;
1309 }
1310
1311 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001312 * Decrement our TTL.
1313 */
1314 iph->ttl = ttl - 1;
1315
1316 /*
1317 * Do we have to perform translations of the source address/port?
1318 */
1319 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1320 uint16_t udp_csum;
1321
Dave Hudson87973cd2013-10-22 16:00:04 +01001322 iph->saddr = cm->xlate_src_ip;
1323 udph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001324
1325 /*
1326 * Do we have a non-zero UDP checksum? If we do then we need
1327 * to update it.
1328 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001329 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001330 if (likely(udp_csum)) {
Xiaoping Fanad755af2015-04-01 16:58:46 -07001331 uint32_t sum;
1332
1333 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1334 sum = udp_csum + cm->xlate_src_partial_csum_adjustment;
1335 } else {
1336 sum = udp_csum + cm->xlate_src_csum_adjustment;
1337 }
1338
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001339 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001340 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001341 }
1342 }
1343
1344 /*
1345 * Do we have to perform translations of the destination address/port?
1346 */
1347 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1348 uint16_t udp_csum;
1349
Dave Hudson87973cd2013-10-22 16:00:04 +01001350 iph->daddr = cm->xlate_dest_ip;
1351 udph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001352
1353 /*
1354 * Do we have a non-zero UDP checksum? If we do then we need
1355 * to update it.
1356 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001357 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001358 if (likely(udp_csum)) {
Xiaoping Fanad755af2015-04-01 16:58:46 -07001359 uint32_t sum;
1360
1361 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1362 sum = udp_csum + cm->xlate_dest_partial_csum_adjustment;
1363 } else {
1364 sum = udp_csum + cm->xlate_dest_csum_adjustment;
1365 }
1366
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001367 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001368 udph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001369 }
1370 }
1371
1372 /*
1373 * Replace the IP checksum.
1374 */
1375 iph->check = sfe_ipv4_gen_ip_csum(iph);
1376
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001377 /*
1378 * Update traffic stats.
1379 */
1380 cm->rx_packet_count++;
1381 cm->rx_byte_count += len;
1382
1383 /*
1384 * If we're not already on the active list then insert ourselves at the tail
1385 * of the current list.
1386 */
1387 if (unlikely(!cm->active)) {
1388 cm->active = true;
1389 cm->active_prev = si->active_tail;
1390 if (likely(si->active_tail)) {
1391 si->active_tail->active_next = cm;
1392 } else {
1393 si->active_head = cm;
1394 }
1395 si->active_tail = cm;
1396 }
1397
1398 xmit_dev = cm->xmit_dev;
1399 skb->dev = xmit_dev;
1400
1401 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001402 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001403 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001404 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1405 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Xiaoping Fan2784e612015-06-25 17:57:41 -07001406 dev_hard_header(skb, xmit_dev, ETH_P_IP,
1407 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001408 } else {
1409 /*
1410 * For the simple case we write this really fast.
1411 */
1412 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1413 eth->h_proto = htons(ETH_P_IP);
Matthew McClintockdab3c8f2014-02-19 14:29:39 -06001414 eth->h_dest[0] = cm->xmit_dest_mac[0];
1415 eth->h_dest[1] = cm->xmit_dest_mac[1];
1416 eth->h_dest[2] = cm->xmit_dest_mac[2];
1417 eth->h_source[0] = cm->xmit_src_mac[0];
1418 eth->h_source[1] = cm->xmit_src_mac[1];
1419 eth->h_source[2] = cm->xmit_src_mac[2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001420 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001421 }
1422
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001423 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07001424 * Update priority of skb.
1425 */
1426 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) {
1427 skb->priority = cm->priority;
1428 }
1429
1430 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001431 * Mark outgoing packet.
1432 */
Cristian Prundeanu94fff982013-12-23 15:02:59 -06001433 skb->mark = cm->connection->mark;
1434 if (skb->mark) {
1435 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1436 }
1437
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001438 si->packets_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001439 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001440
1441 /*
1442 * We're going to check for GSO flags when we transmit the packet so
1443 * start fetching the necessary cache line now.
1444 */
1445 prefetch(skb_shinfo(skb));
1446
1447 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06001448 * Mark that this packet has been fast forwarded.
1449 */
1450 skb->fast_forwarded = 1;
1451
1452 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001453 * Send the packet on its way.
1454 */
1455 dev_queue_xmit(skb);
1456
1457 return 1;
1458}
1459
1460/*
1461 * sfe_ipv4_process_tcp_option_sack()
1462 * Parse TCP SACK option and update ack according
1463 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001464static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const uint32_t data_offs,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001465 uint32_t *ack) __attribute__((always_inline));
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001466static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const uint32_t data_offs,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001467 uint32_t *ack)
1468{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001469 uint32_t length = sizeof(struct sfe_ipv4_tcp_hdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001470 uint8_t *ptr = (uint8_t *)th + length;
1471
1472 /*
1473 * If option is TIMESTAMP discard it.
1474 */
1475 if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
1476 && likely(ptr[0] == TCPOPT_NOP)
1477 && likely(ptr[1] == TCPOPT_NOP)
1478 && likely(ptr[2] == TCPOPT_TIMESTAMP)
1479 && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
1480 return true;
1481 }
1482
1483 /*
1484 * TCP options. Parse SACK option.
1485 */
1486 while (length < data_offs) {
1487 uint8_t size;
1488 uint8_t kind;
1489
1490 ptr = (uint8_t *)th + length;
1491 kind = *ptr;
1492
1493 /*
1494 * NOP, for padding
1495 * Not in the switch because to fast escape and to not calculate size
1496 */
1497 if (kind == TCPOPT_NOP) {
1498 length++;
1499 continue;
1500 }
1501
1502 if (kind == TCPOPT_SACK) {
1503 uint32_t sack = 0;
1504 uint8_t re = 1 + 1;
1505
1506 size = *(ptr + 1);
1507 if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
1508 || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
1509 || (size > (data_offs - length))) {
1510 return false;
1511 }
1512
1513 re += 4;
1514 while (re < size) {
1515 uint32_t sack_re;
1516 uint8_t *sptr = ptr + re;
1517 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
1518 if (sack_re > sack) {
1519 sack = sack_re;
1520 }
1521 re += TCPOLEN_SACK_PERBLOCK;
1522 }
1523 if (sack > *ack) {
1524 *ack = sack;
1525 }
1526 length += size;
1527 continue;
1528 }
1529 if (kind == TCPOPT_EOL) {
1530 return true;
1531 }
1532 size = *(ptr + 1);
1533 if (size < 2) {
1534 return false;
1535 }
1536 length += size;
1537 }
1538
1539 return true;
1540}
1541
1542/*
1543 * sfe_ipv4_recv_tcp()
1544 * Handle TCP packet receives and forwarding.
1545 */
1546static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001547 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001548{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001549 struct sfe_ipv4_tcp_hdr *tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001550 __be32 src_ip;
1551 __be32 dest_ip;
1552 __be16 src_port;
1553 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001554 struct sfe_ipv4_connection_match *cm;
1555 struct sfe_ipv4_connection_match *counter_cm;
1556 uint8_t ttl;
1557 uint32_t flags;
1558 struct net_device *xmit_dev;
1559
1560 /*
1561 * Is our packet too short to contain a valid UDP header?
1562 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001563 if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_tcp_hdr) + ihl)))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001564 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001565 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++;
1566 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001567 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001568
1569 DEBUG_TRACE("packet too short for TCP header\n");
1570 return 0;
1571 }
1572
1573 /*
1574 * Read the IP address and port information. Read the IP header data first
1575 * because we've almost certainly got that in the cache. We may not yet have
1576 * the TCP header cached though so allow more time for any prefetching.
1577 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001578 src_ip = iph->saddr;
1579 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001580
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001581 tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001582 src_port = tcph->source;
1583 dest_port = tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001584 flags = tcp_flag_word(tcph);
1585
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001586 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001587
1588 /*
1589 * Look for a connection match.
1590 */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001591#ifdef CONFIG_NF_FLOW_COOKIE
1592 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1593 if (unlikely(!cm)) {
Matthew McClintock37858802015-02-03 12:12:02 -06001594 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001595 }
1596#else
Matthew McClintock37858802015-02-03 12:12:02 -06001597 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001598#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001599 if (unlikely(!cm)) {
1600 /*
1601 * We didn't get a connection but as TCP is connection-oriented that
1602 * may be because this is a non-fast connection (not running established).
1603 * For diagnostic purposes we differentiate this here.
1604 */
1605 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
1606 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++;
1607 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001608 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001609
1610 DEBUG_TRACE("no connection found - fast flags\n");
1611 return 0;
1612 }
1613 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++;
1614 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001615 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001616
1617 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
1618 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1619 return 0;
1620 }
1621
1622 /*
1623 * If our packet has beern marked as "flush on find" we can't actually
1624 * forward it in the fast path, but now that we've found an associated
1625 * connection we can flush that out before we process the packet.
1626 */
1627 if (unlikely(flush_on_find)) {
1628 struct sfe_ipv4_connection *c = cm->connection;
1629 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1630 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1631 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001632 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001633
1634 DEBUG_TRACE("flush on find\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001635 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001636 return 0;
1637 }
1638
Zhi Chen8748eb32015-06-18 12:58:48 -07001639#ifdef CONFIG_XFRM
1640 /*
1641 * We can't accelerate the flow on this direction, just let it go
1642 * through the slow path.
1643 */
1644 if (unlikely(!cm->flow_accel)) {
1645 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001646 spin_unlock_bh(&si->lock);
Zhi Chen8748eb32015-06-18 12:58:48 -07001647 return 0;
1648 }
1649#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001650 /*
1651 * Does our TTL allow forwarding?
1652 */
1653 ttl = iph->ttl;
1654 if (unlikely(ttl < 2)) {
1655 struct sfe_ipv4_connection *c = cm->connection;
1656 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1657 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL]++;
1658 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001659 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001660
1661 DEBUG_TRACE("ttl too low\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001662 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001663 return 0;
1664 }
1665
1666 /*
1667 * If our packet is larger than the MTU of the transmit interface then
1668 * we can't forward it easily.
1669 */
Xiaoping Fand642a6e2015-04-10 15:19:06 -07001670 if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001671 struct sfe_ipv4_connection *c = cm->connection;
1672 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1673 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++;
1674 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001675 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001676
1677 DEBUG_TRACE("larger than mtu\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001678 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001679 return 0;
1680 }
1681
1682 /*
1683 * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN
1684 * set is not a fast path packet.
1685 */
1686 if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
1687 struct sfe_ipv4_connection *c = cm->connection;
1688 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1689 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS]++;
1690 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001691 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001692
1693 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
1694 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001695 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001696 return 0;
1697 }
1698
1699 counter_cm = cm->counter_match;
1700
1701 /*
1702 * Are we doing sequence number checking?
1703 */
1704 if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
1705 uint32_t seq;
1706 uint32_t ack;
1707 uint32_t sack;
1708 uint32_t data_offs;
1709 uint32_t end;
1710 uint32_t left_edge;
1711 uint32_t scaled_win;
1712 uint32_t max_end;
1713
1714 /*
1715 * Is our sequence fully past the right hand edge of the window?
1716 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001717 seq = ntohl(tcph->seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001718 if (unlikely((int32_t)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
1719 struct sfe_ipv4_connection *c = cm->connection;
1720 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1721 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++;
1722 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001723 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001724
1725 DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
1726 seq, cm->protocol_state.tcp.max_end + 1);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001727 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001728 return 0;
1729 }
1730
1731 /*
1732 * Check that our TCP data offset isn't too short.
1733 */
1734 data_offs = tcph->doff << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001735 if (unlikely(data_offs < sizeof(struct sfe_ipv4_tcp_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001736 struct sfe_ipv4_connection *c = cm->connection;
1737 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1738 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++;
1739 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001740 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001741
1742 DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001743 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001744 return 0;
1745 }
1746
1747 /*
1748 * Update ACK according to any SACK option.
1749 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001750 ack = ntohl(tcph->ack_seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001751 sack = ack;
1752 if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) {
1753 struct sfe_ipv4_connection *c = cm->connection;
1754 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1755 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK]++;
1756 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001757 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001758
1759 DEBUG_TRACE("TCP option SACK size is wrong\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001760 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001761 return 0;
1762 }
1763
1764 /*
1765 * Check that our TCP data offset isn't past the end of the packet.
1766 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001767 data_offs += sizeof(struct sfe_ipv4_ip_hdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001768 if (unlikely(len < data_offs)) {
1769 struct sfe_ipv4_connection *c = cm->connection;
1770 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1771 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++;
1772 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001773 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001774
1775 DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
1776 data_offs, len);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001777 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001778 return 0;
1779 }
1780
1781 end = seq + len - data_offs;
1782
1783 /*
1784 * Is our sequence fully before the left hand edge of the window?
1785 */
1786 if (unlikely((int32_t)(end - (cm->protocol_state.tcp.end
1787 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
1788 struct sfe_ipv4_connection *c = cm->connection;
1789 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1790 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++;
1791 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001792 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001793
1794 DEBUG_TRACE("seq: %u before left edge: %u\n",
1795 end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001796 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001797 return 0;
1798 }
1799
1800 /*
1801 * Are we acking data that is to the right of what has been sent?
1802 */
1803 if (unlikely((int32_t)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
1804 struct sfe_ipv4_connection *c = cm->connection;
1805 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1806 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++;
1807 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001808 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001809
1810 DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
1811 sack, counter_cm->protocol_state.tcp.end + 1);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001812 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001813 return 0;
1814 }
1815
1816 /*
1817 * Is our ack too far before the left hand edge of the window?
1818 */
1819 left_edge = counter_cm->protocol_state.tcp.end
1820 - cm->protocol_state.tcp.max_win
1821 - SFE_IPV4_TCP_MAX_ACK_WINDOW
1822 - 1;
1823 if (unlikely((int32_t)(sack - left_edge) < 0)) {
1824 struct sfe_ipv4_connection *c = cm->connection;
1825 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1826 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++;
1827 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001828 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001829
1830 DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001831 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001832 return 0;
1833 }
1834
1835 /*
1836 * Have we just seen the largest window size yet for this connection? If yes
1837 * then we need to record the new value.
1838 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001839 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001840 scaled_win += (sack - ack);
1841 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
1842 cm->protocol_state.tcp.max_win = scaled_win;
1843 }
1844
1845 /*
1846 * If our sequence and/or ack numbers have advanced then record the new state.
1847 */
1848 if (likely((int32_t)(end - cm->protocol_state.tcp.end) >= 0)) {
1849 cm->protocol_state.tcp.end = end;
1850 }
1851
1852 max_end = sack + scaled_win;
1853 if (likely((int32_t)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
1854 counter_cm->protocol_state.tcp.max_end = max_end;
1855 }
1856 }
1857
1858 /*
1859 * From this point on we're good to modify the packet.
1860 */
1861
1862 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07001863 * Update DSCP
1864 */
1865 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK)) {
1866 iph->tos = (iph->tos & SFE_IPV4_DSCP_MASK) | cm->dscp;
1867 }
1868
1869 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001870 * Decrement our TTL.
1871 */
1872 iph->ttl = ttl - 1;
1873
1874 /*
1875 * Do we have to perform translations of the source address/port?
1876 */
1877 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1878 uint16_t tcp_csum;
1879 uint32_t sum;
1880
Dave Hudson87973cd2013-10-22 16:00:04 +01001881 iph->saddr = cm->xlate_src_ip;
1882 tcph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001883
1884 /*
1885 * Do we have a non-zero UDP checksum? If we do then we need
1886 * to update it.
1887 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001888 tcp_csum = tcph->check;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001889 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1890 sum = tcp_csum + cm->xlate_src_partial_csum_adjustment;
1891 } else {
1892 sum = tcp_csum + cm->xlate_src_csum_adjustment;
1893 }
1894
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001895 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001896 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001897 }
1898
1899 /*
1900 * Do we have to perform translations of the destination address/port?
1901 */
1902 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1903 uint16_t tcp_csum;
1904 uint32_t sum;
1905
Dave Hudson87973cd2013-10-22 16:00:04 +01001906 iph->daddr = cm->xlate_dest_ip;
1907 tcph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001908
1909 /*
1910 * Do we have a non-zero UDP checksum? If we do then we need
1911 * to update it.
1912 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001913 tcp_csum = tcph->check;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001914 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1915 sum = tcp_csum + cm->xlate_dest_partial_csum_adjustment;
1916 } else {
1917 sum = tcp_csum + cm->xlate_dest_csum_adjustment;
1918 }
1919
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001920 sum = (sum & 0xffff) + (sum >> 16);
Dave Hudson87973cd2013-10-22 16:00:04 +01001921 tcph->check = (uint16_t)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001922 }
1923
1924 /*
1925 * Replace the IP checksum.
1926 */
1927 iph->check = sfe_ipv4_gen_ip_csum(iph);
1928
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001929 /*
1930 * Update traffic stats.
1931 */
1932 cm->rx_packet_count++;
1933 cm->rx_byte_count += len;
1934
1935 /*
1936 * If we're not already on the active list then insert ourselves at the tail
1937 * of the current list.
1938 */
1939 if (unlikely(!cm->active)) {
1940 cm->active = true;
1941 cm->active_prev = si->active_tail;
1942 if (likely(si->active_tail)) {
1943 si->active_tail->active_next = cm;
1944 } else {
1945 si->active_head = cm;
1946 }
1947 si->active_tail = cm;
1948 }
1949
1950 xmit_dev = cm->xmit_dev;
1951 skb->dev = xmit_dev;
1952
1953 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001954 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001955 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001956 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1957 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Xiaoping Fan2784e612015-06-25 17:57:41 -07001958 dev_hard_header(skb, xmit_dev, ETH_P_IP,
1959 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001960 } else {
1961 /*
1962 * For the simple case we write this really fast.
1963 */
1964 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1965 eth->h_proto = htons(ETH_P_IP);
Matthew McClintockdab3c8f2014-02-19 14:29:39 -06001966 eth->h_dest[0] = cm->xmit_dest_mac[0];
1967 eth->h_dest[1] = cm->xmit_dest_mac[1];
1968 eth->h_dest[2] = cm->xmit_dest_mac[2];
1969 eth->h_source[0] = cm->xmit_src_mac[0];
1970 eth->h_source[1] = cm->xmit_src_mac[1];
1971 eth->h_source[2] = cm->xmit_src_mac[2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001972 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001973 }
1974
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06001975 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07001976 * Update priority of skb.
1977 */
1978 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) {
1979 skb->priority = cm->priority;
1980 }
1981
1982 /*
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06001983 * Mark outgoing packet
1984 */
1985 skb->mark = cm->connection->mark;
1986 if (skb->mark) {
1987 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1988 }
1989
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001990 si->packets_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001991 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001992
1993 /*
1994 * We're going to check for GSO flags when we transmit the packet so
1995 * start fetching the necessary cache line now.
1996 */
1997 prefetch(skb_shinfo(skb));
1998
1999 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06002000 * Mark that this packet has been fast forwarded.
2001 */
2002 skb->fast_forwarded = 1;
2003
2004 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002005 * Send the packet on its way.
2006 */
2007 dev_queue_xmit(skb);
2008
2009 return 1;
2010}
2011
2012/*
2013 * sfe_ipv4_recv_icmp()
2014 * Handle ICMP packet receives.
2015 *
2016 * ICMP packets aren't handled as a "fast path" and always have us process them
2017 * through the default Linux stack. What we do need to do is look for any errors
2018 * about connections we are handling in the fast path. If we find any such
2019 * connections then we want to flush their state so that the ICMP error path
2020 * within Linux has all of the correct state should it need it.
2021 */
2022static int sfe_ipv4_recv_icmp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002023 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002024{
2025 struct icmphdr *icmph;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002026 struct sfe_ipv4_ip_hdr *icmp_iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002027 unsigned int icmp_ihl_words;
2028 unsigned int icmp_ihl;
2029 uint32_t *icmp_trans_h;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002030 struct sfe_ipv4_udp_hdr *icmp_udph;
2031 struct sfe_ipv4_tcp_hdr *icmp_tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01002032 __be32 src_ip;
2033 __be32 dest_ip;
2034 __be16 src_port;
2035 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002036 struct sfe_ipv4_connection_match *cm;
2037 struct sfe_ipv4_connection *c;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002038 uint32_t pull_len = sizeof(struct icmphdr) + ihl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002039
2040 /*
2041 * Is our packet too short to contain a valid UDP header?
2042 */
2043 len -= ihl;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002044 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002045 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002046 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++;
2047 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002048 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002049
2050 DEBUG_TRACE("packet too short for ICMP header\n");
2051 return 0;
2052 }
2053
2054 /*
2055 * We only handle "destination unreachable" and "time exceeded" messages.
2056 */
2057 icmph = (struct icmphdr *)(skb->data + ihl);
2058 if ((icmph->type != ICMP_DEST_UNREACH)
2059 && (icmph->type != ICMP_TIME_EXCEEDED)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002060 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002061 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++;
2062 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002063 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002064
2065 DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->type);
2066 return 0;
2067 }
2068
2069 /*
2070 * Do we have the full embedded IP header?
2071 */
2072 len -= sizeof(struct icmphdr);
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002073 pull_len += sizeof(struct sfe_ipv4_ip_hdr);
2074 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002075 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002076 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE]++;
2077 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002078 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002079
2080 DEBUG_TRACE("Embedded IP header not complete\n");
2081 return 0;
2082 }
2083
2084 /*
2085 * Is our embedded IP version wrong?
2086 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002087 icmp_iph = (struct sfe_ipv4_ip_hdr *)(icmph + 1);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002088 if (unlikely(icmp_iph->version != 4)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002089 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002090 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4]++;
2091 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002092 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002093
2094 DEBUG_TRACE("IP version: %u\n", icmp_iph->version);
2095 return 0;
2096 }
2097
2098 /*
2099 * Do we have the full embedded IP header, including any options?
2100 */
2101 icmp_ihl_words = icmp_iph->ihl;
2102 icmp_ihl = icmp_ihl_words << 2;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002103 pull_len += icmp_ihl - sizeof(struct sfe_ipv4_ip_hdr);
2104 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002105 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002106 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE]++;
2107 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002108 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002109
2110 DEBUG_TRACE("Embedded header not large enough for IP options\n");
2111 return 0;
2112 }
2113
Nicolas Costaac2979c2014-01-14 10:35:24 -06002114 len -= icmp_ihl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002115 icmp_trans_h = ((uint32_t *)icmp_iph) + icmp_ihl_words;
2116
2117 /*
2118 * Handle the embedded transport layer header.
2119 */
2120 switch (icmp_iph->protocol) {
2121 case IPPROTO_UDP:
2122 /*
2123 * We should have 8 bytes of UDP header - that's enough to identify
2124 * the connection.
2125 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002126 pull_len += 8;
2127 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002128 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002129 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE]++;
2130 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002131 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002132
2133 DEBUG_TRACE("Incomplete embedded UDP header\n");
2134 return 0;
2135 }
2136
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002137 icmp_udph = (struct sfe_ipv4_udp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002138 src_port = icmp_udph->source;
2139 dest_port = icmp_udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002140 break;
2141
2142 case IPPROTO_TCP:
2143 /*
2144 * We should have 8 bytes of TCP header - that's enough to identify
2145 * the connection.
2146 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002147 pull_len += 8;
2148 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002149 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002150 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE]++;
2151 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002152 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002153
2154 DEBUG_TRACE("Incomplete embedded TCP header\n");
2155 return 0;
2156 }
2157
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002158 icmp_tcph = (struct sfe_ipv4_tcp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002159 src_port = icmp_tcph->source;
2160 dest_port = icmp_tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002161 break;
2162
2163 default:
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002164 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002165 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL]++;
2166 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002167 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002168
2169 DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", icmp_iph->protocol);
2170 return 0;
2171 }
2172
Dave Hudson87973cd2013-10-22 16:00:04 +01002173 src_ip = icmp_iph->saddr;
2174 dest_ip = icmp_iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002175
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002176 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002177
2178 /*
2179 * Look for a connection match. Note that we reverse the source and destination
2180 * here because our embedded message contains a packet that was sent in the
2181 * opposite direction to the one in which we just received it. It will have
2182 * been sent on the interface from which we received it though so that's still
2183 * ok to use.
2184 */
2185 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port);
2186 if (unlikely(!cm)) {
2187 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++;
2188 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002189 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002190
2191 DEBUG_TRACE("no connection found\n");
2192 return 0;
2193 }
2194
2195 /*
2196 * We found a connection so now remove it from the connection list and flush
2197 * its state.
2198 */
2199 c = cm->connection;
2200 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2201 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++;
2202 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002203 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002204
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07002205 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002206 return 0;
2207}
2208
2209/*
2210 * sfe_ipv4_recv()
Matthew McClintocka8ad7962014-01-16 16:49:30 -06002211 * Handle packet receives and forwaring.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002212 *
2213 * Returns 1 if the packet is forwarded or 0 if it isn't.
2214 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002215int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002216{
2217 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002218 unsigned int len;
2219 unsigned int tot_len;
2220 unsigned int frag_off;
2221 unsigned int ihl;
2222 bool flush_on_find;
2223 bool ip_options;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002224 struct sfe_ipv4_ip_hdr *iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002225 uint32_t protocol;
2226
2227 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002228 * Check that we have space for an IP header here.
2229 */
2230 len = skb->len;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002231 if (unlikely(!pskb_may_pull(skb, sizeof(struct sfe_ipv4_ip_hdr)))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002232 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002233 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2234 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002235 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002236
2237 DEBUG_TRACE("len: %u is too short\n", len);
2238 return 0;
2239 }
2240
2241 /*
2242 * Check that our "total length" is large enough for an IP header.
2243 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002244 iph = (struct sfe_ipv4_ip_hdr *)skb->data;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002245 tot_len = ntohs(iph->tot_len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002246 if (unlikely(tot_len < sizeof(struct sfe_ipv4_ip_hdr))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002247 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002248 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH]++;
2249 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002250 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002251
2252 DEBUG_TRACE("tot_len: %u is too short\n", tot_len);
2253 return 0;
2254 }
2255
2256 /*
2257 * Is our IP version wrong?
2258 */
2259 if (unlikely(iph->version != 4)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002260 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002261 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_V4]++;
2262 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002263 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002264
2265 DEBUG_TRACE("IP version: %u\n", iph->version);
2266 return 0;
2267 }
2268
2269 /*
2270 * Does our datagram fit inside the skb?
2271 */
2272 if (unlikely(tot_len > len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002273 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002274 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++;
2275 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002276 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002277
2278 DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len);
2279 return 0;
2280 }
2281
2282 /*
2283 * Do we have a non-initial fragment?
Nicolas Costaac2979c2014-01-14 10:35:24 -06002284 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002285 frag_off = ntohs(iph->frag_off);
2286 if (unlikely(frag_off & IP_OFFSET)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002287 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002288 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++;
2289 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002290 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002291
2292 DEBUG_TRACE("non-initial fragment\n");
2293 return 0;
2294 }
2295
2296 /*
2297 * If we have a (first) fragment then mark it to cause any connection to flush.
2298 */
2299 flush_on_find = unlikely(frag_off & IP_MF) ? true : false;
2300
2301 /*
2302 * Do we have any IP options? That's definite a slow path! If we do have IP
2303 * options we need to recheck our header size.
2304 */
2305 ihl = iph->ihl << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002306 ip_options = unlikely(ihl != sizeof(struct sfe_ipv4_ip_hdr)) ? true : false;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002307 if (unlikely(ip_options)) {
2308 if (unlikely(len < ihl)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002309 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002310 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE]++;
2311 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002312 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002313
2314 DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl);
2315 return 0;
2316 }
2317
2318 flush_on_find = true;
2319 }
2320
2321 protocol = iph->protocol;
2322 if (IPPROTO_UDP == protocol) {
2323 return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find);
2324 }
2325
2326 if (IPPROTO_TCP == protocol) {
2327 return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find);
2328 }
2329
2330 if (IPPROTO_ICMP == protocol) {
2331 return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl);
2332 }
2333
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002334 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002335 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++;
2336 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002337 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002338
2339 DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol);
2340 return 0;
2341}
2342
Nicolas Costa436926b2014-01-14 10:36:22 -06002343static void
2344sfe_ipv4_update_tcp_state(struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002345 struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002346{
2347 struct sfe_ipv4_connection_match *orig_cm;
2348 struct sfe_ipv4_connection_match *repl_cm;
2349 struct sfe_ipv4_tcp_connection_match *orig_tcp;
2350 struct sfe_ipv4_tcp_connection_match *repl_tcp;
2351
2352 orig_cm = c->original_match;
2353 repl_cm = c->reply_match;
2354 orig_tcp = &orig_cm->protocol_state.tcp;
2355 repl_tcp = &repl_cm->protocol_state.tcp;
2356
2357 /* update orig */
2358 if (orig_tcp->max_win < sic->src_td_max_window) {
2359 orig_tcp->max_win = sic->src_td_max_window;
2360 }
2361 if ((int32_t)(orig_tcp->end - sic->src_td_end) < 0) {
2362 orig_tcp->end = sic->src_td_end;
2363 }
2364 if ((int32_t)(orig_tcp->max_end - sic->src_td_max_end) < 0) {
2365 orig_tcp->max_end = sic->src_td_max_end;
2366 }
2367
2368 /* update reply */
2369 if (repl_tcp->max_win < sic->dest_td_max_window) {
2370 repl_tcp->max_win = sic->dest_td_max_window;
2371 }
2372 if ((int32_t)(repl_tcp->end - sic->dest_td_end) < 0) {
2373 repl_tcp->end = sic->dest_td_end;
2374 }
2375 if ((int32_t)(repl_tcp->max_end - sic->dest_td_max_end) < 0) {
2376 repl_tcp->max_end = sic->dest_td_max_end;
2377 }
2378
2379 /* update match flags */
2380 orig_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2381 repl_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002382 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
Nicolas Costa436926b2014-01-14 10:36:22 -06002383 orig_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2384 repl_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2385 }
2386}
2387
2388static void
2389sfe_ipv4_update_protocol_state(struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002390 struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002391{
2392 switch (sic->protocol) {
2393 case IPPROTO_TCP:
2394 sfe_ipv4_update_tcp_state(c, sic);
2395 break;
2396 }
2397}
2398
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002399void sfe_ipv4_update_rule(struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002400{
2401 struct sfe_ipv4_connection *c;
2402 struct sfe_ipv4 *si = &__si;
2403
2404 spin_lock_bh(&si->lock);
2405
2406 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2407 sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002408 sic->src_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002409 sic->src_port,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002410 sic->dest_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002411 sic->dest_port);
2412 if (c != NULL) {
2413 sfe_ipv4_update_protocol_state(c, sic);
2414 }
2415
2416 spin_unlock_bh(&si->lock);
2417}
2418
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002419/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002420 * sfe_ipv4_create_rule()
2421 * Create a forwarding rule.
2422 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002423int sfe_ipv4_create_rule(struct sfe_connection_create *sic)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002424{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002425 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002426 struct sfe_ipv4_connection *c;
2427 struct sfe_ipv4_connection_match *original_cm;
2428 struct sfe_ipv4_connection_match *reply_cm;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002429 struct net_device *dest_dev;
2430 struct net_device *src_dev;
2431
2432 dest_dev = sic->dest_dev;
2433 src_dev = sic->src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002434
Matthew McClintock389b42a2014-09-24 14:05:51 -05002435 if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) ||
2436 (src_dev->reg_state != NETREG_REGISTERED))) {
2437 return -EINVAL;
2438 }
2439
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002440 spin_lock_bh(&si->lock);
2441 si->connection_create_requests++;
2442
2443 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002444 * Check to see if there is already a flow that matches the rule we're
2445 * trying to create. If there is then we can't create a new one.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002446 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002447 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2448 sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002449 sic->src_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002450 sic->src_port,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002451 sic->dest_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002452 sic->dest_port);
2453 if (c != NULL) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002454 si->connection_create_collisions++;
2455
2456 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002457 * If we already have the flow then it's likely that this
2458 * request to create the connection rule contains more
2459 * up-to-date information. Check and update accordingly.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002460 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002461 sfe_ipv4_update_protocol_state(c, sic);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002462 spin_unlock_bh(&si->lock);
2463
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002464 DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002465 " s: %s:%pM:%pI4:%u, d: %s:%pM:%pI4:%u\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002466 sic->mark, sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002467 sic->src_dev->name, sic->src_mac, &sic->src_ip.ip, ntohs(sic->src_port),
2468 sic->dest_dev->name, sic->dest_mac, &sic->dest_ip.ip, ntohs(sic->dest_port));
Nicolas Costa514fde02014-01-13 15:50:29 -06002469 return -EADDRINUSE;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002470 }
2471
2472 /*
2473 * Allocate the various connection tracking objects.
2474 */
2475 c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC);
2476 if (unlikely(!c)) {
2477 spin_unlock_bh(&si->lock);
Nicolas Costa514fde02014-01-13 15:50:29 -06002478 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002479 }
2480
2481 original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2482 if (unlikely(!original_cm)) {
2483 spin_unlock_bh(&si->lock);
2484 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002485 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002486 }
2487
2488 reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2489 if (unlikely(!reply_cm)) {
2490 spin_unlock_bh(&si->lock);
2491 kfree(original_cm);
2492 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002493 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002494 }
2495
2496 /*
2497 * Fill in the "original" direction connection matching object.
2498 * Note that the transmit MAC address is "dest_mac_xlate" because
2499 * we always know both ends of a connection by their translated
2500 * addresses and not their public addresses.
2501 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002502 original_cm->match_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002503 original_cm->match_protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002504 original_cm->match_src_ip = sic->src_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002505 original_cm->match_src_port = sic->src_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002506 original_cm->match_dest_ip = sic->dest_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002507 original_cm->match_dest_port = sic->dest_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002508 original_cm->xlate_src_ip = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002509 original_cm->xlate_src_port = sic->src_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002510 original_cm->xlate_dest_ip = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002511 original_cm->xlate_dest_port = sic->dest_port_xlate;
2512 original_cm->rx_packet_count = 0;
2513 original_cm->rx_packet_count64 = 0;
2514 original_cm->rx_byte_count = 0;
2515 original_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002516 original_cm->xmit_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002517 original_cm->xmit_dev_mtu = sic->dest_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002518 memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002519 memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN);
2520 original_cm->connection = c;
2521 original_cm->counter_match = reply_cm;
2522 original_cm->flags = 0;
Xiaoping Fane1963d42015-08-25 17:06:19 -07002523 if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) {
2524 original_cm->priority = sic->src_priority;
2525 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK;
2526 }
2527 if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) {
2528 original_cm->dscp = sic->src_dscp << SFE_IPV4_DSCP_SHIFT;
2529 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK;
2530 }
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002531#ifdef CONFIG_NF_FLOW_COOKIE
2532 original_cm->flow_cookie = 0;
2533#endif
Zhi Chen8748eb32015-06-18 12:58:48 -07002534#ifdef CONFIG_XFRM
2535 original_cm->flow_accel = sic->original_accel;
2536#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002537 original_cm->active_next = NULL;
2538 original_cm->active_prev = NULL;
2539 original_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002540
2541 /*
2542 * For PPP links we don't write an L2 header. For everything else we do.
2543 */
2544 if (!(dest_dev->flags & IFF_POINTOPOINT)) {
2545 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2546
2547 /*
2548 * If our dev writes Ethernet headers then we can write a really fast
2549 * version.
2550 */
2551 if (dest_dev->header_ops) {
2552 if (dest_dev->header_ops->create == eth_header) {
2553 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2554 }
2555 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002556 }
2557
2558 /*
2559 * Fill in the "reply" direction connection matching object.
2560 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002561 reply_cm->match_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002562 reply_cm->match_protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002563 reply_cm->match_src_ip = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002564 reply_cm->match_src_port = sic->dest_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002565 reply_cm->match_dest_ip = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002566 reply_cm->match_dest_port = sic->src_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002567 reply_cm->xlate_src_ip = sic->dest_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002568 reply_cm->xlate_src_port = sic->dest_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002569 reply_cm->xlate_dest_ip = sic->src_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002570 reply_cm->xlate_dest_port = sic->src_port;
2571 reply_cm->rx_packet_count = 0;
2572 reply_cm->rx_packet_count64 = 0;
2573 reply_cm->rx_byte_count = 0;
2574 reply_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002575 reply_cm->xmit_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002576 reply_cm->xmit_dev_mtu = sic->src_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002577 memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002578 memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN);
2579 reply_cm->connection = c;
2580 reply_cm->counter_match = original_cm;
2581 reply_cm->flags = 0;
Xiaoping Fane1963d42015-08-25 17:06:19 -07002582 if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) {
2583 reply_cm->priority = sic->dest_priority;
2584 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK;
2585 }
2586 if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) {
2587 reply_cm->dscp = sic->dest_dscp << SFE_IPV4_DSCP_SHIFT;
2588 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK;
2589 }
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002590#ifdef CONFIG_NF_FLOW_COOKIE
2591 reply_cm->flow_cookie = 0;
2592#endif
Zhi Chen8748eb32015-06-18 12:58:48 -07002593#ifdef CONFIG_XFRM
2594 reply_cm->flow_accel = sic->reply_accel;
2595#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002596 reply_cm->active_next = NULL;
2597 reply_cm->active_prev = NULL;
2598 reply_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002599
2600 /*
2601 * For PPP links we don't write an L2 header. For everything else we do.
2602 */
2603 if (!(src_dev->flags & IFF_POINTOPOINT)) {
2604 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2605
2606 /*
2607 * If our dev writes Ethernet headers then we can write a really fast
2608 * version.
2609 */
2610 if (src_dev->header_ops) {
2611 if (src_dev->header_ops->create == eth_header) {
2612 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2613 }
2614 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002615 }
2616
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002617
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002618 if (sic->dest_ip.ip != sic->dest_ip_xlate.ip || sic->dest_port != sic->dest_port_xlate) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002619 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2620 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2621 }
2622
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002623 if (sic->src_ip.ip != sic->src_ip_xlate.ip || sic->src_port != sic->src_port_xlate) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002624 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2625 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2626 }
2627
2628 c->protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002629 c->src_ip = sic->src_ip.ip;
2630 c->src_ip_xlate = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002631 c->src_port = sic->src_port;
2632 c->src_port_xlate = sic->src_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002633 c->original_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002634 c->original_match = original_cm;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002635 c->dest_ip = sic->dest_ip.ip;
2636 c->dest_ip_xlate = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002637 c->dest_port = sic->dest_port;
2638 c->dest_port_xlate = sic->dest_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002639 c->reply_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002640 c->reply_match = reply_cm;
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06002641 c->mark = sic->mark;
Xiaoping Fan34586472015-07-03 02:20:35 -07002642 c->debug_read_seq = 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002643 c->last_sync_jiffies = get_jiffies_64();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002644
2645 /*
2646 * Take hold of our source and dest devices for the duration of the connection.
2647 */
2648 dev_hold(c->original_dev);
2649 dev_hold(c->reply_dev);
2650
2651 /*
2652 * Initialize the protocol-specific information that we track.
2653 */
2654 switch (sic->protocol) {
2655 case IPPROTO_TCP:
2656 original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale;
2657 original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1;
2658 original_cm->protocol_state.tcp.end = sic->src_td_end;
2659 original_cm->protocol_state.tcp.max_end = sic->src_td_max_end;
2660 reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale;
2661 reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1;
2662 reply_cm->protocol_state.tcp.end = sic->dest_td_end;
2663 reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002664 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002665 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2666 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2667 }
2668 break;
2669 }
2670
2671 sfe_ipv4_connection_match_compute_translations(original_cm);
2672 sfe_ipv4_connection_match_compute_translations(reply_cm);
2673 sfe_ipv4_insert_sfe_ipv4_connection(si, c);
2674
2675 spin_unlock_bh(&si->lock);
2676
2677 /*
2678 * We have everything we need!
2679 */
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002680 DEBUG_INFO("new connection - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002681 " s: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n"
2682 " d: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002683 sic->mark, sic->protocol,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002684 sic->src_dev->name, sic->src_mac, sic->src_mac_xlate,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002685 &sic->src_ip.ip, &sic->src_ip_xlate.ip, ntohs(sic->src_port), ntohs(sic->src_port_xlate),
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002686 dest_dev->name, sic->dest_mac, sic->dest_mac_xlate,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002687 &sic->dest_ip.ip, &sic->dest_ip_xlate.ip, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate));
Nicolas Costa514fde02014-01-13 15:50:29 -06002688
2689 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002690}
2691
2692/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002693 * sfe_ipv4_destroy_rule()
2694 * Destroy a forwarding rule.
2695 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002696void sfe_ipv4_destroy_rule(struct sfe_connection_destroy *sid)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002697{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002698 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002699 struct sfe_ipv4_connection *c;
2700
2701 spin_lock_bh(&si->lock);
2702 si->connection_destroy_requests++;
2703
2704 /*
2705 * Check to see if we have a flow that matches the rule we're trying
2706 * to destroy. If there isn't then we can't destroy it.
2707 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002708 c = sfe_ipv4_find_sfe_ipv4_connection(si, sid->protocol, sid->src_ip.ip, sid->src_port,
2709 sid->dest_ip.ip, sid->dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002710 if (!c) {
2711 si->connection_destroy_misses++;
2712 spin_unlock_bh(&si->lock);
2713
2714 DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002715 sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2716 &sid->dest_ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002717 return;
2718 }
2719
2720 /*
2721 * Remove our connection details from the hash tables.
2722 */
2723 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2724 spin_unlock_bh(&si->lock);
2725
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07002726 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002727
2728 DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002729 sid->protocol, &sid->src_ip.ip, ntohs(sid->src_port),
2730 &sid->dest_ip.ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002731}
2732
2733/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002734 * sfe_ipv4_register_sync_rule_callback()
2735 * Register a callback for rule synchronization.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002736 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002737void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002738{
2739 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002740
2741 spin_lock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002742 rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002743 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002744}
2745
2746/*
2747 * sfe_ipv4_get_debug_dev()
2748 */
2749static ssize_t sfe_ipv4_get_debug_dev(struct device *dev,
2750 struct device_attribute *attr,
2751 char *buf)
2752{
2753 struct sfe_ipv4 *si = &__si;
2754 ssize_t count;
2755 int num;
2756
2757 spin_lock_bh(&si->lock);
2758 num = si->debug_dev;
2759 spin_unlock_bh(&si->lock);
2760
2761 count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num);
2762 return count;
2763}
2764
2765/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002766 * sysfs attributes.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002767 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002768static const struct device_attribute sfe_ipv4_debug_dev_attr =
2769 __ATTR(debug_dev, S_IWUGO | S_IRUGO, sfe_ipv4_get_debug_dev, NULL);
2770
2771/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002772 * sfe_ipv4_destroy_all_rules_for_dev()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002773 * Destroy all connections that match a particular device.
2774 *
2775 * If we pass dev as NULL then this destroys all connections.
2776 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002777void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002778{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002779 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002780 struct sfe_ipv4_connection *c;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002781
Xiaoping Fan34586472015-07-03 02:20:35 -07002782another_round:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002783 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002784
Xiaoping Fan34586472015-07-03 02:20:35 -07002785 for (c = si->all_connections_head; c; c = c->all_connections_next) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002786 /*
Xiaoping Fan34586472015-07-03 02:20:35 -07002787 * Does this connection relate to the device we are destroying?
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002788 */
2789 if (!dev
2790 || (dev == c->original_dev)
2791 || (dev == c->reply_dev)) {
Xiaoping Fan34586472015-07-03 02:20:35 -07002792 break;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002793 }
Xiaoping Fan34586472015-07-03 02:20:35 -07002794 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002795
Xiaoping Fan34586472015-07-03 02:20:35 -07002796 if (c) {
2797 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002798 }
2799
2800 spin_unlock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07002801
2802 if (c) {
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07002803 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY);
Xiaoping Fan34586472015-07-03 02:20:35 -07002804 goto another_round;
2805 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002806}
2807
2808/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002809 * sfe_ipv4_periodic_sync()
2810 */
2811static void sfe_ipv4_periodic_sync(unsigned long arg)
2812{
2813 struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg;
2814 uint64_t now_jiffies;
2815 int quota;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002816 sfe_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002817
2818 now_jiffies = get_jiffies_64();
2819
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002820 rcu_read_lock();
2821 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
2822 if (!sync_rule_callback) {
2823 rcu_read_unlock();
2824 goto done;
2825 }
2826
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002827 spin_lock_bh(&si->lock);
2828 sfe_ipv4_update_summary_stats(si);
2829
2830 /*
2831 * Get an estimate of the number of connections to parse in this sync.
2832 */
2833 quota = (si->num_connections + 63) / 64;
2834
2835 /*
2836 * Walk the "active" list and sync the connection state.
2837 */
2838 while (quota--) {
2839 struct sfe_ipv4_connection_match *cm;
2840 struct sfe_ipv4_connection_match *counter_cm;
2841 struct sfe_ipv4_connection *c;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002842 struct sfe_connection_sync sis;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002843
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002844 cm = si->active_head;
2845 if (!cm) {
2846 break;
2847 }
2848
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002849 /*
Nicolas Costabafb3af2014-01-29 16:39:39 -06002850 * There's a possibility that our counter match is in the active list too.
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002851 * If it is then remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002852 */
2853 counter_cm = cm->counter_match;
2854 if (counter_cm->active) {
2855 counter_cm->active = false;
2856
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002857 /*
2858 * We must have a connection preceding this counter match
2859 * because that's the one that got us to this point, so we don't have
2860 * to worry about removing the head of the list.
2861 */
2862 counter_cm->active_prev->active_next = counter_cm->active_next;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002863
2864 if (likely(counter_cm->active_next)) {
2865 counter_cm->active_next->active_prev = counter_cm->active_prev;
2866 } else {
2867 si->active_tail = counter_cm->active_prev;
2868 }
2869
2870 counter_cm->active_next = NULL;
2871 counter_cm->active_prev = NULL;
2872 }
2873
2874 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002875 * Now remove the head of the active scan list.
2876 */
2877 cm->active = false;
2878 si->active_head = cm->active_next;
2879 if (likely(cm->active_next)) {
2880 cm->active_next->active_prev = NULL;
2881 } else {
2882 si->active_tail = NULL;
2883 }
2884 cm->active_next = NULL;
2885
2886 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002887 * Sync the connection state.
2888 */
2889 c = cm->connection;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07002890 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, SFE_SYNC_REASON_STATS, now_jiffies);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002891
2892 /*
2893 * We don't want to be holding the lock when we sync!
2894 */
2895 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002896 sync_rule_callback(&sis);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002897 spin_lock_bh(&si->lock);
2898 }
2899
2900 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002901 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002902
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002903done:
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002904 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002905}
2906
2907#define CHAR_DEV_MSG_SIZE 768
2908
2909/*
2910 * sfe_ipv4_debug_dev_read_start()
2911 * Generate part of the XML output.
2912 */
2913static bool sfe_ipv4_debug_dev_read_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2914 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2915{
2916 int bytes_read;
2917
Xiaoping Fan34586472015-07-03 02:20:35 -07002918 si->debug_read_seq++;
2919
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002920 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "<sfe_ipv4>\n");
2921 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2922 return false;
2923 }
2924
2925 *length -= bytes_read;
2926 *total_read += bytes_read;
2927
2928 ws->state++;
2929 return true;
2930}
2931
2932/*
2933 * sfe_ipv4_debug_dev_read_connections_start()
2934 * Generate part of the XML output.
2935 */
2936static bool sfe_ipv4_debug_dev_read_connections_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2937 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2938{
2939 int bytes_read;
2940
2941 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<connections>\n");
2942 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2943 return false;
2944 }
2945
2946 *length -= bytes_read;
2947 *total_read += bytes_read;
2948
2949 ws->state++;
2950 return true;
2951}
2952
2953/*
2954 * sfe_ipv4_debug_dev_read_connections_connection()
2955 * Generate part of the XML output.
2956 */
2957static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2958 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2959{
2960 struct sfe_ipv4_connection *c;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002961 struct sfe_ipv4_connection_match *original_cm;
2962 struct sfe_ipv4_connection_match *reply_cm;
2963 int bytes_read;
2964 int protocol;
2965 struct net_device *src_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002966 __be32 src_ip;
2967 __be32 src_ip_xlate;
2968 __be16 src_port;
2969 __be16 src_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002970 uint64_t src_rx_packets;
2971 uint64_t src_rx_bytes;
2972 struct net_device *dest_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002973 __be32 dest_ip;
2974 __be32 dest_ip_xlate;
2975 __be16 dest_port;
2976 __be16 dest_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002977 uint64_t dest_rx_packets;
2978 uint64_t dest_rx_bytes;
2979 uint64_t last_sync_jiffies;
Xiaoping Fane1963d42015-08-25 17:06:19 -07002980 uint32_t mark, src_priority, dest_priority, src_dscp, dest_dscp;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002981#ifdef CONFIG_NF_FLOW_COOKIE
2982 int src_flow_cookie, dst_flow_cookie;
2983#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002984
2985 spin_lock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07002986
2987 for (c = si->all_connections_head; c; c = c->all_connections_next) {
2988 if (c->debug_read_seq < si->debug_read_seq) {
2989 c->debug_read_seq = si->debug_read_seq;
2990 break;
2991 }
2992 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002993
2994 /*
Xiaoping Fan34586472015-07-03 02:20:35 -07002995 * If there were no connections then move to the next state.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002996 */
2997 if (!c) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002998 spin_unlock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07002999 ws->state++;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003000 return true;
3001 }
3002
3003 original_cm = c->original_match;
3004 reply_cm = c->reply_match;
3005
3006 protocol = c->protocol;
3007 src_dev = c->original_dev;
3008 src_ip = c->src_ip;
3009 src_ip_xlate = c->src_ip_xlate;
3010 src_port = c->src_port;
3011 src_port_xlate = c->src_port_xlate;
Xiaoping Fane1963d42015-08-25 17:06:19 -07003012 src_priority = original_cm->priority;
3013 src_dscp = original_cm->dscp >> SFE_IPV4_DSCP_SHIFT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003014
3015 sfe_ipv4_connection_match_update_summary_stats(original_cm);
3016 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
3017
3018 src_rx_packets = original_cm->rx_packet_count64;
3019 src_rx_bytes = original_cm->rx_byte_count64;
3020 dest_dev = c->reply_dev;
3021 dest_ip = c->dest_ip;
3022 dest_ip_xlate = c->dest_ip_xlate;
3023 dest_port = c->dest_port;
3024 dest_port_xlate = c->dest_port_xlate;
Xiaoping Fane1963d42015-08-25 17:06:19 -07003025 dest_priority = reply_cm->priority;
3026 dest_dscp = reply_cm->dscp >> SFE_IPV4_DSCP_SHIFT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003027 dest_rx_packets = reply_cm->rx_packet_count64;
3028 dest_rx_bytes = reply_cm->rx_byte_count64;
3029 last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003030 mark = c->mark;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003031#ifdef CONFIG_NF_FLOW_COOKIE
3032 src_flow_cookie = original_cm->flow_cookie;
3033 dst_flow_cookie = reply_cm->flow_cookie;
3034#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003035 spin_unlock_bh(&si->lock);
3036
3037 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t<connection "
3038 "protocol=\"%u\" "
3039 "src_dev=\"%s\" "
3040 "src_ip=\"%pI4\" src_ip_xlate=\"%pI4\" "
3041 "src_port=\"%u\" src_port_xlate=\"%u\" "
Xiaoping Fane1963d42015-08-25 17:06:19 -07003042 "src_priority=\"%u\" src_dscp=\"%u\" "
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003043 "src_rx_pkts=\"%llu\" src_rx_bytes=\"%llu\" "
3044 "dest_dev=\"%s\" "
3045 "dest_ip=\"%pI4\" dest_ip_xlate=\"%pI4\" "
3046 "dest_port=\"%u\" dest_port_xlate=\"%u\" "
Xiaoping Fane1963d42015-08-25 17:06:19 -07003047 "dest_priority=\"%u\" dest_dscp=\"%u\" "
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003048 "dest_rx_pkts=\"%llu\" dest_rx_bytes=\"%llu\" "
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003049#ifdef CONFIG_NF_FLOW_COOKIE
3050 "src_flow_cookie=\"%d\" dst_flow_cookie=\"%d\" "
3051#endif
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003052 "last_sync=\"%llu\" "
Nicolas Costabb85a2e2014-01-13 16:26:33 -06003053 "mark=\"%08x\" />\n",
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003054 protocol,
3055 src_dev->name,
3056 &src_ip, &src_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01003057 ntohs(src_port), ntohs(src_port_xlate),
Xiaoping Fane1963d42015-08-25 17:06:19 -07003058 src_priority, src_dscp,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003059 src_rx_packets, src_rx_bytes,
3060 dest_dev->name,
3061 &dest_ip, &dest_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01003062 ntohs(dest_port), ntohs(dest_port_xlate),
Xiaoping Fane1963d42015-08-25 17:06:19 -07003063 dest_priority, dest_dscp,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003064 dest_rx_packets, dest_rx_bytes,
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003065#ifdef CONFIG_NF_FLOW_COOKIE
3066 src_flow_cookie, dst_flow_cookie,
3067#endif
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003068 last_sync_jiffies, mark);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003069
3070 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3071 return false;
3072 }
3073
3074 *length -= bytes_read;
3075 *total_read += bytes_read;
3076
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003077 return true;
3078}
3079
3080/*
3081 * sfe_ipv4_debug_dev_read_connections_end()
3082 * Generate part of the XML output.
3083 */
3084static bool sfe_ipv4_debug_dev_read_connections_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3085 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3086{
3087 int bytes_read;
3088
3089 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</connections>\n");
3090 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3091 return false;
3092 }
3093
3094 *length -= bytes_read;
3095 *total_read += bytes_read;
3096
3097 ws->state++;
3098 return true;
3099}
3100
3101/*
3102 * sfe_ipv4_debug_dev_read_exceptions_start()
3103 * Generate part of the XML output.
3104 */
3105static bool sfe_ipv4_debug_dev_read_exceptions_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3106 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3107{
3108 int bytes_read;
3109
3110 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<exceptions>\n");
3111 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3112 return false;
3113 }
3114
3115 *length -= bytes_read;
3116 *total_read += bytes_read;
3117
3118 ws->state++;
3119 return true;
3120}
3121
3122/*
3123 * sfe_ipv4_debug_dev_read_exceptions_exception()
3124 * Generate part of the XML output.
3125 */
3126static bool sfe_ipv4_debug_dev_read_exceptions_exception(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3127 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3128{
3129 uint64_t ct;
3130
3131 spin_lock_bh(&si->lock);
3132 ct = si->exception_events64[ws->iter_exception];
3133 spin_unlock_bh(&si->lock);
3134
3135 if (ct) {
3136 int bytes_read;
3137
3138 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE,
3139 "\t\t<exception name=\"%s\" count=\"%llu\" />\n",
3140 sfe_ipv4_exception_events_string[ws->iter_exception],
3141 ct);
3142 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3143 return false;
3144 }
3145
3146 *length -= bytes_read;
3147 *total_read += bytes_read;
3148 }
3149
3150 ws->iter_exception++;
3151 if (ws->iter_exception >= SFE_IPV4_EXCEPTION_EVENT_LAST) {
3152 ws->iter_exception = 0;
3153 ws->state++;
3154 }
3155
3156 return true;
3157}
3158
3159/*
3160 * sfe_ipv4_debug_dev_read_exceptions_end()
3161 * Generate part of the XML output.
3162 */
3163static bool sfe_ipv4_debug_dev_read_exceptions_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3164 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3165{
3166 int bytes_read;
3167
3168 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</exceptions>\n");
3169 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3170 return false;
3171 }
3172
3173 *length -= bytes_read;
3174 *total_read += bytes_read;
3175
3176 ws->state++;
3177 return true;
3178}
3179
3180/*
3181 * sfe_ipv4_debug_dev_read_stats()
3182 * Generate part of the XML output.
3183 */
3184static bool sfe_ipv4_debug_dev_read_stats(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3185 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3186{
3187 int bytes_read;
3188 unsigned int num_connections;
3189 uint64_t packets_forwarded;
3190 uint64_t packets_not_forwarded;
3191 uint64_t connection_create_requests;
3192 uint64_t connection_create_collisions;
3193 uint64_t connection_destroy_requests;
3194 uint64_t connection_destroy_misses;
3195 uint64_t connection_flushes;
3196 uint64_t connection_match_hash_hits;
3197 uint64_t connection_match_hash_reorders;
3198
3199 spin_lock_bh(&si->lock);
3200 sfe_ipv4_update_summary_stats(si);
3201
3202 num_connections = si->num_connections;
3203 packets_forwarded = si->packets_forwarded64;
3204 packets_not_forwarded = si->packets_not_forwarded64;
3205 connection_create_requests = si->connection_create_requests64;
3206 connection_create_collisions = si->connection_create_collisions64;
3207 connection_destroy_requests = si->connection_destroy_requests64;
3208 connection_destroy_misses = si->connection_destroy_misses64;
3209 connection_flushes = si->connection_flushes64;
3210 connection_match_hash_hits = si->connection_match_hash_hits64;
3211 connection_match_hash_reorders = si->connection_match_hash_reorders64;
3212 spin_unlock_bh(&si->lock);
3213
3214 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<stats "
3215 "num_connections=\"%u\" "
Xiaoping Fan59176422015-05-22 15:58:10 -07003216 "pkts_forwarded=\"%llu\" pkts_not_forwarded=\"%llu\" "
3217 "create_requests=\"%llu\" create_collisions=\"%llu\" "
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003218 "destroy_requests=\"%llu\" destroy_misses=\"%llu\" "
3219 "flushes=\"%llu\" "
3220 "hash_hits=\"%llu\" hash_reorders=\"%llu\" />\n",
3221 num_connections,
3222 packets_forwarded,
3223 packets_not_forwarded,
3224 connection_create_requests,
3225 connection_create_collisions,
3226 connection_destroy_requests,
3227 connection_destroy_misses,
3228 connection_flushes,
3229 connection_match_hash_hits,
3230 connection_match_hash_reorders);
3231 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3232 return false;
3233 }
3234
3235 *length -= bytes_read;
3236 *total_read += bytes_read;
3237
3238 ws->state++;
3239 return true;
3240}
3241
3242/*
3243 * sfe_ipv4_debug_dev_read_end()
3244 * Generate part of the XML output.
3245 */
3246static bool sfe_ipv4_debug_dev_read_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3247 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3248{
3249 int bytes_read;
3250
3251 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "</sfe_ipv4>\n");
3252 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3253 return false;
3254 }
3255
3256 *length -= bytes_read;
3257 *total_read += bytes_read;
3258
3259 ws->state++;
3260 return true;
3261}
3262
3263/*
3264 * Array of write functions that write various XML elements that correspond to
3265 * our XML output state machine.
3266 */
3267sfe_ipv4_debug_xml_write_method_t sfe_ipv4_debug_xml_write_methods[SFE_IPV4_DEBUG_XML_STATE_DONE] = {
3268 sfe_ipv4_debug_dev_read_start,
3269 sfe_ipv4_debug_dev_read_connections_start,
3270 sfe_ipv4_debug_dev_read_connections_connection,
3271 sfe_ipv4_debug_dev_read_connections_end,
3272 sfe_ipv4_debug_dev_read_exceptions_start,
3273 sfe_ipv4_debug_dev_read_exceptions_exception,
3274 sfe_ipv4_debug_dev_read_exceptions_end,
3275 sfe_ipv4_debug_dev_read_stats,
3276 sfe_ipv4_debug_dev_read_end,
3277};
3278
3279/*
3280 * sfe_ipv4_debug_dev_read()
3281 * Send info to userspace upon read request from user
3282 */
3283static ssize_t sfe_ipv4_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset)
3284{
3285 char msg[CHAR_DEV_MSG_SIZE];
3286 int total_read = 0;
3287 struct sfe_ipv4_debug_xml_write_state *ws;
3288 struct sfe_ipv4 *si = &__si;
3289
3290 ws = (struct sfe_ipv4_debug_xml_write_state *)filp->private_data;
3291 while ((ws->state != SFE_IPV4_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) {
3292 if ((sfe_ipv4_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) {
3293 continue;
3294 }
3295 }
3296
3297 return total_read;
3298}
3299
3300/*
3301 * sfe_ipv4_debug_dev_write()
Nicolas Costabafb3af2014-01-29 16:39:39 -06003302 * Write to char device resets some stats
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003303 */
3304static ssize_t sfe_ipv4_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset)
3305{
Matthew McClintock54167ab2014-01-14 21:06:28 -06003306 struct sfe_ipv4 *si = &__si;
3307
3308 spin_lock_bh(&si->lock);
3309 sfe_ipv4_update_summary_stats(si);
3310
Matthew McClintock54167ab2014-01-14 21:06:28 -06003311 si->packets_forwarded64 = 0;
3312 si->packets_not_forwarded64 = 0;
3313 si->connection_create_requests64 = 0;
3314 si->connection_create_collisions64 = 0;
3315 si->connection_destroy_requests64 = 0;
3316 si->connection_destroy_misses64 = 0;
3317 si->connection_flushes64 = 0;
3318 si->connection_match_hash_hits64 = 0;
3319 si->connection_match_hash_reorders64 = 0;
3320 spin_unlock_bh(&si->lock);
3321
3322 return length;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003323}
3324
3325/*
3326 * sfe_ipv4_debug_dev_open()
3327 */
3328static int sfe_ipv4_debug_dev_open(struct inode *inode, struct file *file)
3329{
3330 struct sfe_ipv4_debug_xml_write_state *ws;
3331
3332 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3333 if (!ws) {
3334 ws = kzalloc(sizeof(struct sfe_ipv4_debug_xml_write_state), GFP_KERNEL);
3335 if (!ws) {
3336 return -ENOMEM;
3337 }
3338
3339 ws->state = SFE_IPV4_DEBUG_XML_STATE_START;
3340 file->private_data = ws;
3341 }
3342
3343 return 0;
3344}
3345
3346/*
3347 * sfe_ipv4_debug_dev_release()
3348 */
3349static int sfe_ipv4_debug_dev_release(struct inode *inode, struct file *file)
3350{
3351 struct sfe_ipv4_debug_xml_write_state *ws;
3352
3353 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3354 if (ws) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003355 /*
3356 * We've finished with our output so free the write state.
3357 */
3358 kfree(ws);
3359 }
3360
3361 return 0;
3362}
3363
3364/*
3365 * File operations used in the debug char device
3366 */
3367static struct file_operations sfe_ipv4_debug_dev_fops = {
3368 .read = sfe_ipv4_debug_dev_read,
3369 .write = sfe_ipv4_debug_dev_write,
3370 .open = sfe_ipv4_debug_dev_open,
3371 .release = sfe_ipv4_debug_dev_release
3372};
3373
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003374#ifdef CONFIG_NF_FLOW_COOKIE
3375/*
3376 * sfe_register_flow_cookie_cb
3377 * register a function in SFE to let SFE use this function to configure flow cookie for a flow
3378 *
3379 * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE
3380 * can use this function to configure flow cookie for a flow.
3381 * return: 0, success; !=0, fail
3382 */
3383int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb)
3384{
3385 struct sfe_ipv4 *si = &__si;
3386
3387 BUG_ON(!cb);
3388
3389 if (si->flow_cookie_set_func) {
3390 return -1;
3391 }
3392
3393 rcu_assign_pointer(si->flow_cookie_set_func, cb);
3394 return 0;
3395}
3396
3397/*
3398 * sfe_unregister_flow_cookie_cb
3399 * unregister function which is used to configure flow cookie for a flow
3400 *
3401 * return: 0, success; !=0, fail
3402 */
3403int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb)
3404{
3405 struct sfe_ipv4 *si = &__si;
3406
3407 RCU_INIT_POINTER(si->flow_cookie_set_func, NULL);
3408 return 0;
3409}
Xiaoping Fan640faf42015-08-28 15:50:55 -07003410
3411/*
3412 * sfe_ipv4_get_flow_cookie()
3413 */
3414static ssize_t sfe_ipv4_get_flow_cookie(struct device *dev,
3415 struct device_attribute *attr,
3416 char *buf)
3417{
3418 struct sfe_ipv4 *si = &__si;
Xiaoping Fan01c67cc2015-11-09 11:31:57 -08003419 return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", si->flow_cookie_enable);
Xiaoping Fan640faf42015-08-28 15:50:55 -07003420}
3421
3422/*
3423 * sfe_ipv4_set_flow_cookie()
3424 */
3425static ssize_t sfe_ipv4_set_flow_cookie(struct device *dev,
3426 struct device_attribute *attr,
3427 const char *buf, size_t size)
3428{
3429 struct sfe_ipv4 *si = &__si;
3430 strict_strtol(buf, 0, (long int *)&si->flow_cookie_enable);
3431
3432 return size;
3433}
3434
3435/*
3436 * sysfs attributes.
3437 */
3438static const struct device_attribute sfe_ipv4_flow_cookie_attr =
3439 __ATTR(flow_cookie_enable, S_IWUGO | S_IRUGO, sfe_ipv4_get_flow_cookie, sfe_ipv4_set_flow_cookie);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003440#endif /*CONFIG_NF_FLOW_COOKIE*/
3441
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003442/*
Dave Hudson87973cd2013-10-22 16:00:04 +01003443 * sfe_ipv4_init()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003444 */
Dave Hudson87973cd2013-10-22 16:00:04 +01003445static int __init sfe_ipv4_init(void)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003446{
3447 struct sfe_ipv4 *si = &__si;
3448 int result = -1;
3449
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003450 DEBUG_INFO("SFE IPv4 init\n");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003451
3452 /*
3453 * Create sys/sfe_ipv4
3454 */
3455 si->sys_sfe_ipv4 = kobject_create_and_add("sfe_ipv4", NULL);
3456 if (!si->sys_sfe_ipv4) {
3457 DEBUG_ERROR("failed to register sfe_ipv4\n");
3458 goto exit1;
3459 }
3460
3461 /*
3462 * Create files, one for each parameter supported by this module.
3463 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003464 result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3465 if (result) {
3466 DEBUG_ERROR("failed to register debug dev file: %d\n", result);
Xiaoping Fan640faf42015-08-28 15:50:55 -07003467 goto exit2;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003468 }
3469
Xiaoping Fan640faf42015-08-28 15:50:55 -07003470#ifdef CONFIG_NF_FLOW_COOKIE
3471 result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr);
3472 if (result) {
3473 DEBUG_ERROR("failed to register flow cookie enable file: %d\n", result);
3474 goto exit3;
3475 }
3476#endif /* CONFIG_NF_FLOW_COOKIE */
3477
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003478 /*
3479 * Register our debug char device.
3480 */
3481 result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops);
3482 if (result < 0) {
3483 DEBUG_ERROR("Failed to register chrdev: %d\n", result);
Xiaoping Fan640faf42015-08-28 15:50:55 -07003484 goto exit4;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003485 }
3486
3487 si->debug_dev = result;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003488
3489 /*
3490 * Create a timer to handle periodic statistics.
3491 */
3492 setup_timer(&si->timer, sfe_ipv4_periodic_sync, (unsigned long)si);
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06003493 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003494
Dave Hudson87973cd2013-10-22 16:00:04 +01003495 spin_lock_init(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003496
Dave Hudson87973cd2013-10-22 16:00:04 +01003497 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003498
Xiaoping Fan640faf42015-08-28 15:50:55 -07003499exit4:
3500#ifdef CONFIG_NF_FLOW_COOKIE
3501 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr);
3502
3503exit3:
3504#endif /* CONFIG_NF_FLOW_COOKIE */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003505 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3506
Xiaoping Fan640faf42015-08-28 15:50:55 -07003507exit2:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003508 kobject_put(si->sys_sfe_ipv4);
3509
3510exit1:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003511 return result;
3512}
3513
3514/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003515 * sfe_ipv4_exit()
3516 */
3517static void __exit sfe_ipv4_exit(void)
3518{
Dave Hudson87973cd2013-10-22 16:00:04 +01003519 struct sfe_ipv4 *si = &__si;
3520
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003521 DEBUG_INFO("SFE IPv4 exit\n");
Dave Hudson87973cd2013-10-22 16:00:04 +01003522
3523 /*
3524 * Destroy all connections.
3525 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003526 sfe_ipv4_destroy_all_rules_for_dev(NULL);
Dave Hudson87973cd2013-10-22 16:00:04 +01003527
Dave Hudson87973cd2013-10-22 16:00:04 +01003528 del_timer_sync(&si->timer);
3529
Dave Hudson87973cd2013-10-22 16:00:04 +01003530 unregister_chrdev(si->debug_dev, "sfe_ipv4");
3531
Xiaoping Fan640faf42015-08-28 15:50:55 -07003532#ifdef CONFIG_NF_FLOW_COOKIE
3533 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr);
3534#endif /* CONFIG_NF_FLOW_COOKIE */
Dave Hudson87973cd2013-10-22 16:00:04 +01003535 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3536
Dave Hudson87973cd2013-10-22 16:00:04 +01003537 kobject_put(si->sys_sfe_ipv4);
3538
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003539}
3540
3541module_init(sfe_ipv4_init)
3542module_exit(sfe_ipv4_exit)
3543
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003544EXPORT_SYMBOL(sfe_ipv4_recv);
3545EXPORT_SYMBOL(sfe_ipv4_create_rule);
3546EXPORT_SYMBOL(sfe_ipv4_destroy_rule);
3547EXPORT_SYMBOL(sfe_ipv4_destroy_all_rules_for_dev);
3548EXPORT_SYMBOL(sfe_ipv4_register_sync_rule_callback);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06003549EXPORT_SYMBOL(sfe_ipv4_mark_rule);
Nicolas Costa436926b2014-01-14 10:36:22 -06003550EXPORT_SYMBOL(sfe_ipv4_update_rule);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003551#ifdef CONFIG_NF_FLOW_COOKIE
3552EXPORT_SYMBOL(sfe_register_flow_cookie_cb);
3553EXPORT_SYMBOL(sfe_unregister_flow_cookie_cb);
3554#endif
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003555
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003556MODULE_AUTHOR("Qualcomm Atheros Inc.");
3557MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv4 edition");
Matthew McClintocka3221942014-01-16 11:44:26 -06003558MODULE_LICENSE("Dual BSD/GPL");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003559