blob: 554dd08a62b14c21f31b9933cf631627fd58c510 [file] [log] [blame]
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001/*
2 * sfe_ipv4.c
3 * Shortcut forwarding engine - IPv4 edition.
4 *
Murat Sezginc7dd8172019-02-27 15:23:50 -08005 * Copyright (c) 2013-2016, 2019, The Linux Foundation. All rights reserved.
Xiaoping Fana42c68b2015-08-07 18:00:39 -07006 * Permission to use, copy, modify, and/or distribute this software for
7 * any purpose with or without fee is hereby granted, provided that the
8 * above copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
15 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010016 */
Matthew McClintocka3221942014-01-16 11:44:26 -060017
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010018#include <linux/module.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060019#include <linux/sysfs.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010020#include <linux/skbuff.h>
21#include <linux/icmp.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010022#include <net/tcp.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060023#include <linux/etherdevice.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010024
Dave Hudsondcd08fb2013-11-22 09:25:16 -060025#include "sfe.h"
Xiaoping Fand44a5b42015-05-26 17:37:37 -070026#include "sfe_cm.h"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010027
28/*
Dave Hudsona8197e72013-12-17 23:46:22 +000029 * By default Linux IP header and transport layer header structures are
30 * unpacked, assuming that such headers should be 32-bit aligned.
31 * Unfortunately some wireless adaptors can't cope with this requirement and
32 * some CPUs can't handle misaligned accesses. For those platforms we
33 * define SFE_IPV4_UNALIGNED_IP_HEADER and mark the structures as packed.
34 * When we do this the compiler will generate slightly worse code than for the
35 * aligned case (on most platforms) but will be much quicker than fixing
36 * things up in an unaligned trap handler.
37 */
38#define SFE_IPV4_UNALIGNED_IP_HEADER 1
39#if SFE_IPV4_UNALIGNED_IP_HEADER
40#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed))
41#else
42#define SFE_IPV4_UNALIGNED_STRUCT
43#endif
44
45/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060046 * An Ethernet header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000047 * help with performance on some platforms (see the definition of
48 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010049 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060050struct sfe_ipv4_eth_hdr {
51 __be16 h_dest[ETH_ALEN / 2];
52 __be16 h_source[ETH_ALEN / 2];
53 __be16 h_proto;
54} SFE_IPV4_UNALIGNED_STRUCT;
55
Xiaoping Fane1963d42015-08-25 17:06:19 -070056#define SFE_IPV4_DSCP_MASK 0x3
57#define SFE_IPV4_DSCP_SHIFT 2
58
Matthew McClintockdb5ac512014-01-16 17:01:40 -060059/*
60 * An IPv4 header, but with an optional "packed" attribute to
61 * help with performance on some platforms (see the definition of
62 * SFE_IPV4_UNALIGNED_STRUCT)
63 */
64struct sfe_ipv4_ip_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010065#if defined(__LITTLE_ENDIAN_BITFIELD)
66 __u8 ihl:4,
67 version:4;
68#elif defined (__BIG_ENDIAN_BITFIELD)
69 __u8 version:4,
Xiaoping Fan59176422015-05-22 15:58:10 -070070 ihl:4;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010071#else
72#error "Please fix <asm/byteorder.h>"
73#endif
74 __u8 tos;
75 __be16 tot_len;
76 __be16 id;
77 __be16 frag_off;
78 __u8 ttl;
79 __u8 protocol;
80 __sum16 check;
81 __be32 saddr;
82 __be32 daddr;
Dave Hudsondcd08fb2013-11-22 09:25:16 -060083
84 /*
85 * The options start here.
86 */
Dave Hudsona8197e72013-12-17 23:46:22 +000087} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010088
89/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060090 * A UDP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000091 * help with performance on some platforms (see the definition of
92 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010093 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060094struct sfe_ipv4_udp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010095 __be16 source;
96 __be16 dest;
97 __be16 len;
98 __sum16 check;
Dave Hudsona8197e72013-12-17 23:46:22 +000099} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100100
101/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600102 * A TCP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +0000103 * help with performance on some platforms (see the definition of
104 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100105 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600106struct sfe_ipv4_tcp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100107 __be16 source;
108 __be16 dest;
109 __be32 seq;
110 __be32 ack_seq;
111#if defined(__LITTLE_ENDIAN_BITFIELD)
112 __u16 res1:4,
113 doff:4,
114 fin:1,
115 syn:1,
116 rst:1,
117 psh:1,
118 ack:1,
119 urg:1,
120 ece:1,
121 cwr:1;
122#elif defined(__BIG_ENDIAN_BITFIELD)
123 __u16 doff:4,
124 res1:4,
125 cwr:1,
126 ece:1,
127 urg:1,
128 ack:1,
129 psh:1,
130 rst:1,
131 syn:1,
132 fin:1;
133#else
134#error "Adjust your <asm/byteorder.h> defines"
Nicolas Costaac2979c2014-01-14 10:35:24 -0600135#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100136 __be16 window;
137 __sum16 check;
138 __be16 urg_ptr;
Dave Hudsona8197e72013-12-17 23:46:22 +0000139} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100140
141/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100142 * Specifies the lower bound on ACK numbers carried in the TCP header
143 */
144#define SFE_IPV4_TCP_MAX_ACK_WINDOW 65520
145
146/*
147 * IPv4 TCP connection match additional data.
148 */
149struct sfe_ipv4_tcp_connection_match {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700150 u8 win_scale; /* Window scale */
151 u32 max_win; /* Maximum window size seen */
152 u32 end; /* Sequence number of the next byte to send (seq + segment length) */
153 u32 max_end; /* Sequence number of the last byte to ack */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100154};
155
156/*
157 * Bit flags for IPv4 connection matching entry.
158 */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700159#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC (1<<0)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100160 /* Perform source translation */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700161#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST (1<<1)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100162 /* Perform destination translation */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700163#define SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK (1<<2)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100164 /* Ignore TCP sequence numbers */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700165#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR (1<<3)
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600166 /* Fast Ethernet header write */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700167#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR (1<<4)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100168 /* Fast Ethernet header write */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700169#define SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK (1<<5)
170 /* remark priority of SKB */
171#define SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK (1<<6)
172 /* remark DSCP of packet */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100173
174/*
175 * IPv4 connection matching structure.
176 */
177struct sfe_ipv4_connection_match {
178 /*
179 * References to other objects.
180 */
181 struct sfe_ipv4_connection_match *next;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100182 struct sfe_ipv4_connection_match *prev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100183 struct sfe_ipv4_connection *connection;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100184 struct sfe_ipv4_connection_match *counter_match;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700185 /* Matches the flow in the opposite direction as the one in *connection */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100186 struct sfe_ipv4_connection_match *active_next;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100187 struct sfe_ipv4_connection_match *active_prev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100188 bool active; /* Flag to indicate if we're on the active list */
189
190 /*
191 * Characteristics that identify flows that match this rule.
192 */
193 struct net_device *match_dev; /* Network device */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700194 u8 match_protocol; /* Protocol */
Dave Hudson87973cd2013-10-22 16:00:04 +0100195 __be32 match_src_ip; /* Source IP address */
196 __be32 match_dest_ip; /* Destination IP address */
197 __be16 match_src_port; /* Source port/connection ident */
198 __be16 match_dest_port; /* Destination port/connection ident */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100199
200 /*
201 * Control the operations of the match.
202 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700203 u32 flags; /* Bit flags */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800204#ifdef CONFIG_NF_FLOW_COOKIE
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700205 u32 flow_cookie; /* used flow cookie, for debug */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800206#endif
Zhi Chen8748eb32015-06-18 12:58:48 -0700207#ifdef CONFIG_XFRM
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700208 u32 flow_accel; /* The flow accelerated or not */
Zhi Chen8748eb32015-06-18 12:58:48 -0700209#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100210
211 /*
212 * Connection state that we track once we match.
213 */
214 union { /* Protocol-specific state */
215 struct sfe_ipv4_tcp_connection_match tcp;
216 } protocol_state;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700217 /*
218 * Stats recorded in a sync period. These stats will be added to
219 * rx_packet_count64/rx_byte_count64 after a sync period.
220 */
221 u32 rx_packet_count;
222 u32 rx_byte_count;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100223
224 /*
225 * Packet translation information.
226 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100227 __be32 xlate_src_ip; /* Address after source translation */
228 __be16 xlate_src_port; /* Port/connection ident after source translation */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700229 u16 xlate_src_csum_adjustment;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100230 /* Transport layer checksum adjustment after source translation */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700231 u16 xlate_src_partial_csum_adjustment;
Xiaoping Fanad755af2015-04-01 16:58:46 -0700232 /* Transport layer pseudo header checksum adjustment after source translation */
233
Dave Hudson87973cd2013-10-22 16:00:04 +0100234 __be32 xlate_dest_ip; /* Address after destination translation */
235 __be16 xlate_dest_port; /* Port/connection ident after destination translation */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700236 u16 xlate_dest_csum_adjustment;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100237 /* Transport layer checksum adjustment after destination translation */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700238 u16 xlate_dest_partial_csum_adjustment;
Xiaoping Fanad755af2015-04-01 16:58:46 -0700239 /* Transport layer pseudo header checksum adjustment after destination translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100240
241 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -0700242 * QoS information
243 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700244 u32 priority;
245 u32 dscp;
Xiaoping Fane1963d42015-08-25 17:06:19 -0700246
247 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100248 * Packet transmit information.
249 */
250 struct net_device *xmit_dev; /* Network device on which to transmit */
251 unsigned short int xmit_dev_mtu;
252 /* Interface MTU */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700253 u16 xmit_dest_mac[ETH_ALEN / 2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100254 /* Destination MAC address to use when forwarding */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700255 u16 xmit_src_mac[ETH_ALEN / 2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100256 /* Source MAC address to use when forwarding */
257
258 /*
259 * Summary stats.
260 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700261 u64 rx_packet_count64;
262 u64 rx_byte_count64;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100263};
264
265/*
266 * Per-connection data structure.
267 */
268struct sfe_ipv4_connection {
269 struct sfe_ipv4_connection *next;
270 /* Pointer to the next entry in a hash chain */
271 struct sfe_ipv4_connection *prev;
272 /* Pointer to the previous entry in a hash chain */
273 int protocol; /* IP protocol number */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700274 __be32 src_ip; /* Src IP addr pre-translation */
275 __be32 src_ip_xlate; /* Src IP addr post-translation */
276 __be32 dest_ip; /* Dest IP addr pre-translation */
277 __be32 dest_ip_xlate; /* Dest IP addr post-translation */
278 __be16 src_port; /* Src port pre-translation */
279 __be16 src_port_xlate; /* Src port post-translation */
280 __be16 dest_port; /* Dest port pre-translation */
281 __be16 dest_port_xlate; /* Dest port post-translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100282 struct sfe_ipv4_connection_match *original_match;
283 /* Original direction matching structure */
284 struct net_device *original_dev;
285 /* Original direction source device */
286 struct sfe_ipv4_connection_match *reply_match;
287 /* Reply direction matching structure */
288 struct net_device *reply_dev; /* Reply direction source device */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700289 u64 last_sync_jiffies; /* Jiffies count for the last sync */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100290 struct sfe_ipv4_connection *all_connections_next;
291 /* Pointer to the next entry in the list of all connections */
292 struct sfe_ipv4_connection *all_connections_prev;
293 /* Pointer to the previous entry in the list of all connections */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700294 u32 mark; /* mark for outgoing packet */
295 u32 debug_read_seq; /* sequence number for debug dump */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100296};
297
298/*
299 * IPv4 connections and hash table size information.
300 */
301#define SFE_IPV4_CONNECTION_HASH_SHIFT 12
302#define SFE_IPV4_CONNECTION_HASH_SIZE (1 << SFE_IPV4_CONNECTION_HASH_SHIFT)
303#define SFE_IPV4_CONNECTION_HASH_MASK (SFE_IPV4_CONNECTION_HASH_SIZE - 1)
304
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800305#ifdef CONFIG_NF_FLOW_COOKIE
306#define SFE_FLOW_COOKIE_SIZE 2048
307#define SFE_FLOW_COOKIE_MASK 0x7ff
308
309struct sfe_flow_cookie_entry {
310 struct sfe_ipv4_connection_match *match;
311 unsigned long last_clean_time;
312};
313#endif
314
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100315enum sfe_ipv4_exception_events {
316 SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE,
317 SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION,
318 SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
319 SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL,
320 SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION,
321 SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE,
322 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS,
323 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS,
324 SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
325 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL,
326 SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION,
327 SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS,
328 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE,
329 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS,
330 SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK,
331 SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS,
332 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE,
333 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE,
334 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE,
335 SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE,
336 SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE,
337 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE,
338 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4,
339 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE,
340 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE,
341 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE,
342 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL,
343 SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION,
344 SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION,
345 SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE,
346 SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH,
347 SFE_IPV4_EXCEPTION_EVENT_NON_V4,
348 SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT,
349 SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE,
350 SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE,
351 SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL,
352 SFE_IPV4_EXCEPTION_EVENT_LAST
353};
354
355static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = {
356 "UDP_HEADER_INCOMPLETE",
357 "UDP_NO_CONNECTION",
358 "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
359 "UDP_SMALL_TTL",
360 "UDP_NEEDS_FRAGMENTATION",
361 "TCP_HEADER_INCOMPLETE",
362 "TCP_NO_CONNECTION_SLOW_FLAGS",
363 "TCP_NO_CONNECTION_FAST_FLAGS",
364 "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
365 "TCP_SMALL_TTL",
366 "TCP_NEEDS_FRAGMENTATION",
367 "TCP_FLAGS",
368 "TCP_SEQ_EXCEEDS_RIGHT_EDGE",
369 "TCP_SMALL_DATA_OFFS",
370 "TCP_BAD_SACK",
371 "TCP_BIG_DATA_OFFS",
372 "TCP_SEQ_BEFORE_LEFT_EDGE",
373 "TCP_ACK_EXCEEDS_RIGHT_EDGE",
374 "TCP_ACK_BEFORE_LEFT_EDGE",
375 "ICMP_HEADER_INCOMPLETE",
376 "ICMP_UNHANDLED_TYPE",
377 "ICMP_IPV4_HEADER_INCOMPLETE",
378 "ICMP_IPV4_NON_V4",
379 "ICMP_IPV4_IP_OPTIONS_INCOMPLETE",
380 "ICMP_IPV4_UDP_HEADER_INCOMPLETE",
381 "ICMP_IPV4_TCP_HEADER_INCOMPLETE",
382 "ICMP_IPV4_UNHANDLED_PROTOCOL",
383 "ICMP_NO_CONNECTION",
384 "ICMP_FLUSHED_CONNECTION",
385 "HEADER_INCOMPLETE",
386 "BAD_TOTAL_LENGTH",
387 "NON_V4",
388 "NON_INITIAL_FRAGMENT",
389 "DATAGRAM_INCOMPLETE",
390 "IP_OPTIONS_INCOMPLETE",
391 "UNHANDLED_PROTOCOL"
392};
393
394/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600395 * Per-module structure.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100396 */
397struct sfe_ipv4 {
398 spinlock_t lock; /* Lock for SMP correctness */
399 struct sfe_ipv4_connection_match *active_head;
400 /* Head of the list of recently active connections */
401 struct sfe_ipv4_connection_match *active_tail;
402 /* Tail of the list of recently active connections */
403 struct sfe_ipv4_connection *all_connections_head;
404 /* Head of the list of all connections */
405 struct sfe_ipv4_connection *all_connections_tail;
406 /* Tail of the list of all connections */
407 unsigned int num_connections; /* Number of connections */
408 struct timer_list timer; /* Timer used for periodic sync ops */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700409 sfe_sync_rule_callback_t __rcu sync_rule_callback;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600410 /* Callback function registered by a connection manager for stats syncing */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100411 struct sfe_ipv4_connection *conn_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
412 /* Connection hash table */
413 struct sfe_ipv4_connection_match *conn_match_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
414 /* Connection match hash table */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800415#ifdef CONFIG_NF_FLOW_COOKIE
416 struct sfe_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE];
417 /* flow cookie table*/
418 flow_cookie_set_func_t flow_cookie_set_func;
419 /* function used to configure flow cookie in hardware*/
Xiaoping Fan640faf42015-08-28 15:50:55 -0700420 int flow_cookie_enable;
421 /* Enable/disable flow cookie at runtime */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800422#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100423
424 /*
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700425 * Stats recorded in a sync period. These stats will be added to
426 * connection_xxx64 after a sync period.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100427 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700428 u32 connection_create_requests;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100429 /* Number of IPv4 connection create requests */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700430 u32 connection_create_collisions;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100431 /* Number of IPv4 connection create requests that collided with existing hash table entries */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700432 u32 connection_destroy_requests;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100433 /* Number of IPv4 connection destroy requests */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700434 u32 connection_destroy_misses;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100435 /* Number of IPv4 connection destroy requests that missed our hash table */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700436 u32 connection_match_hash_hits;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100437 /* Number of IPv4 connection match hash hits */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700438 u32 connection_match_hash_reorders;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100439 /* Number of IPv4 connection match hash reorders */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700440 u32 connection_flushes; /* Number of IPv4 connection flushes */
441 u32 packets_forwarded; /* Number of IPv4 packets forwarded */
442 u32 packets_not_forwarded; /* Number of IPv4 packets not forwarded */
443 u32 exception_events[SFE_IPV4_EXCEPTION_EVENT_LAST];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100444
445 /*
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700446 * Summary statistics.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100447 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700448 u64 connection_create_requests64;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100449 /* Number of IPv4 connection create requests */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700450 u64 connection_create_collisions64;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100451 /* Number of IPv4 connection create requests that collided with existing hash table entries */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700452 u64 connection_destroy_requests64;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100453 /* Number of IPv4 connection destroy requests */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700454 u64 connection_destroy_misses64;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100455 /* Number of IPv4 connection destroy requests that missed our hash table */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700456 u64 connection_match_hash_hits64;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100457 /* Number of IPv4 connection match hash hits */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700458 u64 connection_match_hash_reorders64;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100459 /* Number of IPv4 connection match hash reorders */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700460 u64 connection_flushes64; /* Number of IPv4 connection flushes */
461 u64 packets_forwarded64; /* Number of IPv4 packets forwarded */
462 u64 packets_not_forwarded64;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100463 /* Number of IPv4 packets not forwarded */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700464 u64 exception_events64[SFE_IPV4_EXCEPTION_EVENT_LAST];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100465
466 /*
467 * Control state.
468 */
469 struct kobject *sys_sfe_ipv4; /* sysfs linkage */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100470 int debug_dev; /* Major number of the debug char device */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700471 u32 debug_read_seq; /* sequence number for debug dump */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100472};
473
474/*
475 * Enumeration of the XML output.
476 */
477enum sfe_ipv4_debug_xml_states {
478 SFE_IPV4_DEBUG_XML_STATE_START,
479 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_START,
480 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_CONNECTION,
481 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_END,
482 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_START,
483 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION,
484 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_END,
485 SFE_IPV4_DEBUG_XML_STATE_STATS,
486 SFE_IPV4_DEBUG_XML_STATE_END,
487 SFE_IPV4_DEBUG_XML_STATE_DONE
488};
489
490/*
491 * XML write state.
492 */
493struct sfe_ipv4_debug_xml_write_state {
494 enum sfe_ipv4_debug_xml_states state;
495 /* XML output file state machine state */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100496 int iter_exception; /* Next exception iterator */
497};
498
499typedef bool (*sfe_ipv4_debug_xml_write_method_t)(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
500 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws);
501
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700502static struct sfe_ipv4 __si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100503
504/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100505 * sfe_ipv4_gen_ip_csum()
506 * Generate the IP checksum for an IPv4 header.
507 *
508 * Note that this function assumes that we have only 20 bytes of IP header.
509 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700510static inline u16 sfe_ipv4_gen_ip_csum(struct sfe_ipv4_ip_hdr *iph)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100511{
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700512 u32 sum;
513 u16 *i = (u16 *)iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100514
515 iph->check = 0;
516
517 /*
518 * Generate the sum.
519 */
520 sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9];
521
522 /*
523 * Fold it to ones-complement form.
524 */
525 sum = (sum & 0xffff) + (sum >> 16);
526 sum = (sum & 0xffff) + (sum >> 16);
527
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700528 return (u16)sum ^ 0xffff;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100529}
530
531/*
532 * sfe_ipv4_get_connection_match_hash()
533 * Generate the hash used in connection match lookups.
534 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700535static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, u8 protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100536 __be32 src_ip, __be16 src_port,
537 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100538{
539 size_t dev_addr = (size_t)dev;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700540 u32 hash = ((u32)dev_addr) ^ ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100541 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
542}
543
544/*
545 * sfe_ipv4_find_sfe_ipv4_connection_match()
546 * Get the IPv4 flow match info that corresponds to a particular 5-tuple.
547 *
548 * On entry we must be holding the lock that protects the hash table.
549 */
550static struct sfe_ipv4_connection_match *
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700551sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, u8 protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100552 __be32 src_ip, __be16 src_port,
553 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100554{
555 struct sfe_ipv4_connection_match *cm;
556 struct sfe_ipv4_connection_match *head;
557 unsigned int conn_match_idx;
558
559 conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port);
560 cm = si->conn_match_hash[conn_match_idx];
561
562 /*
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700563 * If we don't have anything in this chain then bail.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100564 */
565 if (unlikely(!cm)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700566 return NULL;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100567 }
568
569 /*
570 * Hopefully the first entry is the one we want.
571 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700572 if ((cm->match_src_port == src_port)
573 && (cm->match_dest_port == dest_port)
574 && (cm->match_src_ip == src_ip)
575 && (cm->match_dest_ip == dest_ip)
576 && (cm->match_protocol == protocol)
577 && (cm->match_dev == dev)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100578 si->connection_match_hash_hits++;
579 return cm;
580 }
581
582 /*
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700583 * Unfortunately we didn't find it at head, so we search it in chain and
584 * move matching entry to the top of the hash chain. We presume that this
585 * will be reused again very quickly.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100586 */
587 head = cm;
588 do {
589 cm = cm->next;
590 } while (cm && (cm->match_src_port != src_port
591 || cm->match_dest_port != dest_port
592 || cm->match_src_ip != src_ip
593 || cm->match_dest_ip != dest_ip
594 || cm->match_protocol != protocol
595 || cm->match_dev != dev));
596
597 /*
598 * Not found then we're done.
599 */
600 if (unlikely(!cm)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700601 return NULL;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100602 }
603
604 /*
605 * We found a match so move it.
606 */
607 if (cm->next) {
608 cm->next->prev = cm->prev;
609 }
610 cm->prev->next = cm->next;
611 cm->prev = NULL;
612 cm->next = head;
613 head->prev = cm;
614 si->conn_match_hash[conn_match_idx] = cm;
615 si->connection_match_hash_reorders++;
616
617 return cm;
618}
619
620/*
621 * sfe_ipv4_connection_match_update_summary_stats()
622 * Update the summary stats for a connection match entry.
623 */
624static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm)
625{
626 cm->rx_packet_count64 += cm->rx_packet_count;
627 cm->rx_packet_count = 0;
628 cm->rx_byte_count64 += cm->rx_byte_count;
629 cm->rx_byte_count = 0;
630}
631
632/*
633 * sfe_ipv4_connection_match_compute_translations()
634 * Compute port and address translations for a connection match entry.
635 */
636static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm)
637{
638 /*
639 * Before we insert the entry look to see if this is tagged as doing address
640 * translations. If it is then work out the adjustment that we need to apply
641 * to the transport checksum.
642 */
643 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
644 /*
645 * Precompute an incremental checksum adjustment so we can
646 * edit packets in this stream very quickly. The algorithm is from RFC1624.
647 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700648 u16 src_ip_hi = cm->match_src_ip >> 16;
649 u16 src_ip_lo = cm->match_src_ip & 0xffff;
650 u32 xlate_src_ip = ~cm->xlate_src_ip;
651 u16 xlate_src_ip_hi = xlate_src_ip >> 16;
652 u16 xlate_src_ip_lo = xlate_src_ip & 0xffff;
653 u16 xlate_src_port = ~cm->xlate_src_port;
654 u32 adj;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100655
656 /*
657 * When we compute this fold it down to a 16-bit offset
658 * as that way we can avoid having to do a double
659 * folding of the twos-complement result because the
660 * addition of 2 16-bit values cannot cause a double
661 * wrap-around!
662 */
663 adj = src_ip_hi + src_ip_lo + cm->match_src_port
664 + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port;
665 adj = (adj & 0xffff) + (adj >> 16);
666 adj = (adj & 0xffff) + (adj >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700667 cm->xlate_src_csum_adjustment = (u16)adj;
Nicolas Costaac2979c2014-01-14 10:35:24 -0600668
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100669 }
670
671 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
672 /*
673 * Precompute an incremental checksum adjustment so we can
674 * edit packets in this stream very quickly. The algorithm is from RFC1624.
675 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700676 u16 dest_ip_hi = cm->match_dest_ip >> 16;
677 u16 dest_ip_lo = cm->match_dest_ip & 0xffff;
678 u32 xlate_dest_ip = ~cm->xlate_dest_ip;
679 u16 xlate_dest_ip_hi = xlate_dest_ip >> 16;
680 u16 xlate_dest_ip_lo = xlate_dest_ip & 0xffff;
681 u16 xlate_dest_port = ~cm->xlate_dest_port;
682 u32 adj;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100683
684 /*
685 * When we compute this fold it down to a 16-bit offset
686 * as that way we can avoid having to do a double
687 * folding of the twos-complement result because the
688 * addition of 2 16-bit values cannot cause a double
689 * wrap-around!
690 */
691 adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port
692 + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port;
693 adj = (adj & 0xffff) + (adj >> 16);
694 adj = (adj & 0xffff) + (adj >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700695 cm->xlate_dest_csum_adjustment = (u16)adj;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100696 }
Xiaoping Fanad755af2015-04-01 16:58:46 -0700697
698 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700699 u32 adj = ~cm->match_src_ip + cm->xlate_src_ip;
Xiaoping Fanad755af2015-04-01 16:58:46 -0700700 if (adj < cm->xlate_src_ip) {
701 adj++;
702 }
703
704 adj = (adj & 0xffff) + (adj >> 16);
705 adj = (adj & 0xffff) + (adj >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700706 cm->xlate_src_partial_csum_adjustment = (u16)adj;
Xiaoping Fanad755af2015-04-01 16:58:46 -0700707 }
708
709 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700710 u32 adj = ~cm->match_dest_ip + cm->xlate_dest_ip;
Xiaoping Fanad755af2015-04-01 16:58:46 -0700711 if (adj < cm->xlate_dest_ip) {
712 adj++;
713 }
714
715 adj = (adj & 0xffff) + (adj >> 16);
716 adj = (adj & 0xffff) + (adj >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700717 cm->xlate_dest_partial_csum_adjustment = (u16)adj;
Xiaoping Fanad755af2015-04-01 16:58:46 -0700718 }
719
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100720}
721
722/*
723 * sfe_ipv4_update_summary_stats()
724 * Update the summary stats.
725 */
726static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si)
727{
728 int i;
729
730 si->connection_create_requests64 += si->connection_create_requests;
731 si->connection_create_requests = 0;
732 si->connection_create_collisions64 += si->connection_create_collisions;
733 si->connection_create_collisions = 0;
734 si->connection_destroy_requests64 += si->connection_destroy_requests;
735 si->connection_destroy_requests = 0;
736 si->connection_destroy_misses64 += si->connection_destroy_misses;
737 si->connection_destroy_misses = 0;
738 si->connection_match_hash_hits64 += si->connection_match_hash_hits;
739 si->connection_match_hash_hits = 0;
740 si->connection_match_hash_reorders64 += si->connection_match_hash_reorders;
741 si->connection_match_hash_reorders = 0;
742 si->connection_flushes64 += si->connection_flushes;
743 si->connection_flushes = 0;
744 si->packets_forwarded64 += si->packets_forwarded;
745 si->packets_forwarded = 0;
746 si->packets_not_forwarded64 += si->packets_not_forwarded;
747 si->packets_not_forwarded = 0;
748
749 for (i = 0; i < SFE_IPV4_EXCEPTION_EVENT_LAST; i++) {
750 si->exception_events64[i] += si->exception_events[i];
751 si->exception_events[i] = 0;
752 }
753}
754
755/*
756 * sfe_ipv4_insert_sfe_ipv4_connection_match()
757 * Insert a connection match into the hash.
758 *
759 * On entry we must be holding the lock that protects the hash table.
760 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700761static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si,
762 struct sfe_ipv4_connection_match *cm)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100763{
764 struct sfe_ipv4_connection_match **hash_head;
765 struct sfe_ipv4_connection_match *prev_head;
766 unsigned int conn_match_idx
767 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
768 cm->match_src_ip, cm->match_src_port,
769 cm->match_dest_ip, cm->match_dest_port);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700770
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100771 hash_head = &si->conn_match_hash[conn_match_idx];
772 prev_head = *hash_head;
773 cm->prev = NULL;
774 if (prev_head) {
775 prev_head->prev = cm;
776 }
777
778 cm->next = prev_head;
779 *hash_head = cm;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800780
781#ifdef CONFIG_NF_FLOW_COOKIE
Xiaoping Fan640faf42015-08-28 15:50:55 -0700782 if (!si->flow_cookie_enable)
783 return;
784
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800785 /*
786 * Configure hardware to put a flow cookie in packet of this flow,
787 * then we can accelerate the lookup process when we received this packet.
788 */
789 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
790 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
791
792 if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) {
793 flow_cookie_set_func_t func;
794
795 rcu_read_lock();
796 func = rcu_dereference(si->flow_cookie_set_func);
797 if (func) {
Xiaoping Fan59176422015-05-22 15:58:10 -0700798 if (!func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800799 cm->match_dest_ip, cm->match_dest_port, conn_match_idx)) {
800 entry->match = cm;
801 cm->flow_cookie = conn_match_idx;
802 }
803 }
804 rcu_read_unlock();
805
806 break;
807 }
808 }
809#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100810}
811
812/*
813 * sfe_ipv4_remove_sfe_ipv4_connection_match()
814 * Remove a connection match object from the hash.
815 *
816 * On entry we must be holding the lock that protects the hash table.
817 */
818static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
819{
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800820#ifdef CONFIG_NF_FLOW_COOKIE
Xiaoping Fan640faf42015-08-28 15:50:55 -0700821 if (si->flow_cookie_enable) {
822 /*
823 * Tell hardware that we no longer need a flow cookie in packet of this flow
824 */
825 unsigned int conn_match_idx;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800826
Xiaoping Fan640faf42015-08-28 15:50:55 -0700827 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
828 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800829
Xiaoping Fan640faf42015-08-28 15:50:55 -0700830 if (cm == entry->match) {
831 flow_cookie_set_func_t func;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800832
Xiaoping Fan640faf42015-08-28 15:50:55 -0700833 rcu_read_lock();
834 func = rcu_dereference(si->flow_cookie_set_func);
835 if (func) {
836 func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
837 cm->match_dest_ip, cm->match_dest_port, 0);
838 }
839 rcu_read_unlock();
840
841 cm->flow_cookie = 0;
842 entry->match = NULL;
843 entry->last_clean_time = jiffies;
844 break;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800845 }
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800846 }
847 }
848#endif
849
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100850 /*
851 * Unlink the connection match entry from the hash.
852 */
853 if (cm->prev) {
854 cm->prev->next = cm->next;
855 } else {
856 unsigned int conn_match_idx
857 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
858 cm->match_src_ip, cm->match_src_port,
859 cm->match_dest_ip, cm->match_dest_port);
860 si->conn_match_hash[conn_match_idx] = cm->next;
861 }
862
863 if (cm->next) {
864 cm->next->prev = cm->prev;
865 }
866
867 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600868 * If the connection match entry is in the active list remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100869 */
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600870 if (cm->active) {
871 if (likely(cm->active_prev)) {
872 cm->active_prev->active_next = cm->active_next;
873 } else {
874 si->active_head = cm->active_next;
875 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100876
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600877 if (likely(cm->active_next)) {
878 cm->active_next->active_prev = cm->active_prev;
879 } else {
880 si->active_tail = cm->active_prev;
881 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100882 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100883}
884
885/*
886 * sfe_ipv4_get_connection_hash()
887 * Generate the hash used in connection lookups.
888 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700889static inline unsigned int sfe_ipv4_get_connection_hash(u8 protocol, __be32 src_ip, __be16 src_port,
Dave Hudson87973cd2013-10-22 16:00:04 +0100890 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100891{
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700892 u32 hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100893 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
894}
895
896/*
897 * sfe_ipv4_find_sfe_ipv4_connection()
898 * Get the IPv4 connection info that corresponds to a particular 5-tuple.
899 *
900 * On entry we must be holding the lock that protects the hash table.
901 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700902static inline struct sfe_ipv4_connection *sfe_ipv4_find_sfe_ipv4_connection(struct sfe_ipv4 *si, u32 protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100903 __be32 src_ip, __be16 src_port,
904 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100905{
906 struct sfe_ipv4_connection *c;
907 unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port);
908 c = si->conn_hash[conn_idx];
909
910 /*
911 * If we don't have anything in this chain then bale.
912 */
913 if (unlikely(!c)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700914 return NULL;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100915 }
916
917 /*
918 * Hopefully the first entry is the one we want.
919 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700920 if ((c->src_port == src_port)
921 && (c->dest_port == dest_port)
922 && (c->src_ip == src_ip)
923 && (c->dest_ip == dest_ip)
924 && (c->protocol == protocol)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100925 return c;
926 }
927
928 /*
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700929 * Unfortunately we didn't find it at head, so we search it in chain.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100930 */
931 do {
932 c = c->next;
933 } while (c && (c->src_port != src_port
934 || c->dest_port != dest_port
935 || c->src_ip != src_ip
936 || c->dest_ip != dest_ip
937 || c->protocol != protocol));
938
939 /*
940 * Will need connection entry for next create/destroy metadata,
941 * So no need to re-order entry for these requests
942 */
943 return c;
944}
945
946/*
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600947 * sfe_ipv4_mark_rule()
948 * Updates the mark for a current offloaded connection
949 *
950 * Will take hash lock upon entry
951 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700952void sfe_ipv4_mark_rule(struct sfe_connection_mark *mark)
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600953{
954 struct sfe_ipv4 *si = &__si;
955 struct sfe_ipv4_connection *c;
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600956
Xiaoping Fan3c423e32015-07-03 03:09:29 -0700957 spin_lock_bh(&si->lock);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600958 c = sfe_ipv4_find_sfe_ipv4_connection(si, mark->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700959 mark->src_ip.ip, mark->src_port,
960 mark->dest_ip.ip, mark->dest_port);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600961 if (c) {
Nicolas Costaf53d6fe2014-01-13 16:03:46 -0600962 WARN_ON((0 != c->mark) && (0 == mark->mark));
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600963 c->mark = mark->mark;
964 }
Xiaoping Fan3c423e32015-07-03 03:09:29 -0700965 spin_unlock_bh(&si->lock);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700966
967 if (c) {
968 DEBUG_TRACE("Matching connection found for mark, "
969 "setting from %08x to %08x\n",
970 c->mark, mark->mark);
971 }
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600972}
973
974/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100975 * sfe_ipv4_insert_sfe_ipv4_connection()
976 * Insert a connection into the hash.
977 *
978 * On entry we must be holding the lock that protects the hash table.
979 */
980static void sfe_ipv4_insert_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
981{
982 struct sfe_ipv4_connection **hash_head;
983 struct sfe_ipv4_connection *prev_head;
984 unsigned int conn_idx;
985
986 /*
987 * Insert entry into the connection hash.
988 */
989 conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
990 c->dest_ip, c->dest_port);
991 hash_head = &si->conn_hash[conn_idx];
992 prev_head = *hash_head;
993 c->prev = NULL;
994 if (prev_head) {
995 prev_head->prev = c;
996 }
997
998 c->next = prev_head;
999 *hash_head = c;
1000
1001 /*
1002 * Insert entry into the "all connections" list.
1003 */
1004 if (si->all_connections_tail) {
1005 c->all_connections_prev = si->all_connections_tail;
1006 si->all_connections_tail->all_connections_next = c;
1007 } else {
1008 c->all_connections_prev = NULL;
1009 si->all_connections_head = c;
1010 }
1011
1012 si->all_connections_tail = c;
1013 c->all_connections_next = NULL;
1014 si->num_connections++;
1015
1016 /*
1017 * Insert the connection match objects too.
1018 */
1019 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->original_match);
1020 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->reply_match);
1021}
1022
1023/*
1024 * sfe_ipv4_remove_sfe_ipv4_connection()
1025 * Remove a sfe_ipv4_connection object from the hash.
1026 *
1027 * On entry we must be holding the lock that protects the hash table.
1028 */
1029static void sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1030{
1031 /*
1032 * Remove the connection match objects.
1033 */
1034 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->reply_match);
1035 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->original_match);
1036
1037 /*
1038 * Unlink the connection.
1039 */
1040 if (c->prev) {
1041 c->prev->next = c->next;
1042 } else {
1043 unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
1044 c->dest_ip, c->dest_port);
1045 si->conn_hash[conn_idx] = c->next;
1046 }
1047
1048 if (c->next) {
1049 c->next->prev = c->prev;
1050 }
Xiaoping Fan34586472015-07-03 02:20:35 -07001051
1052 /*
1053 * Unlink connection from all_connections list
1054 */
1055 if (c->all_connections_prev) {
1056 c->all_connections_prev->all_connections_next = c->all_connections_next;
1057 } else {
1058 si->all_connections_head = c->all_connections_next;
1059 }
1060
1061 if (c->all_connections_next) {
1062 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1063 } else {
1064 si->all_connections_tail = c->all_connections_prev;
1065 }
1066
1067 si->num_connections--;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001068}
1069
1070/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001071 * sfe_ipv4_sync_sfe_ipv4_connection()
1072 * Sync a connection.
1073 *
1074 * On entry to this function we expect that the lock for the connection is either
1075 * already held or isn't required.
1076 */
1077static void sfe_ipv4_gen_sync_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c,
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001078 struct sfe_connection_sync *sis, sfe_sync_reason_t reason,
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001079 u64 now_jiffies)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001080{
1081 struct sfe_ipv4_connection_match *original_cm;
1082 struct sfe_ipv4_connection_match *reply_cm;
1083
1084 /*
1085 * Fill in the update message.
1086 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001087 sis->is_v6 = 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001088 sis->protocol = c->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001089 sis->src_ip.ip = c->src_ip;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001090 sis->src_ip_xlate.ip = c->src_ip_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001091 sis->dest_ip.ip = c->dest_ip;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001092 sis->dest_ip_xlate.ip = c->dest_ip_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001093 sis->src_port = c->src_port;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001094 sis->src_port_xlate = c->src_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001095 sis->dest_port = c->dest_port;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001096 sis->dest_port_xlate = c->dest_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001097
1098 original_cm = c->original_match;
1099 reply_cm = c->reply_match;
1100 sis->src_td_max_window = original_cm->protocol_state.tcp.max_win;
1101 sis->src_td_end = original_cm->protocol_state.tcp.end;
1102 sis->src_td_max_end = original_cm->protocol_state.tcp.max_end;
1103 sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win;
1104 sis->dest_td_end = reply_cm->protocol_state.tcp.end;
1105 sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end;
1106
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001107 sis->src_new_packet_count = original_cm->rx_packet_count;
1108 sis->src_new_byte_count = original_cm->rx_byte_count;
1109 sis->dest_new_packet_count = reply_cm->rx_packet_count;
1110 sis->dest_new_byte_count = reply_cm->rx_byte_count;
1111
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001112 sfe_ipv4_connection_match_update_summary_stats(original_cm);
1113 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
1114
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001115 sis->src_dev = original_cm->match_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001116 sis->src_packet_count = original_cm->rx_packet_count64;
1117 sis->src_byte_count = original_cm->rx_byte_count64;
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001118
1119 sis->dest_dev = reply_cm->match_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001120 sis->dest_packet_count = reply_cm->rx_packet_count64;
1121 sis->dest_byte_count = reply_cm->rx_byte_count64;
1122
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001123 sis->reason = reason;
1124
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001125 /*
1126 * Get the time increment since our last sync.
1127 */
1128 sis->delta_jiffies = now_jiffies - c->last_sync_jiffies;
1129 c->last_sync_jiffies = now_jiffies;
1130}
1131
1132/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001133 * sfe_ipv4_flush_sfe_ipv4_connection()
1134 * Flush a connection and free all associated resources.
1135 *
1136 * We need to be called with bottom halves disabled locally as we need to acquire
1137 * the connection hash lock and release it again. In general we're actually called
1138 * from within a BH and so we're fine, but we're also called when connections are
1139 * torn down.
1140 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001141static void sfe_ipv4_flush_sfe_ipv4_connection(struct sfe_ipv4 *si,
1142 struct sfe_ipv4_connection *c,
1143 sfe_sync_reason_t reason)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001144{
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001145 struct sfe_connection_sync sis;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001146 u64 now_jiffies;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001147 sfe_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001148
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001149 rcu_read_lock();
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001150 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001151 si->connection_flushes++;
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001152 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001153 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001154
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001155 if (sync_rule_callback) {
1156 /*
1157 * Generate a sync message and then sync.
1158 */
1159 now_jiffies = get_jiffies_64();
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001160 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, reason, now_jiffies);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001161 sync_rule_callback(&sis);
1162 }
1163
1164 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001165
1166 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001167 * Release our hold of the source and dest devices and free the memory
1168 * for our connection objects.
1169 */
1170 dev_put(c->original_dev);
1171 dev_put(c->reply_dev);
1172 kfree(c->original_match);
1173 kfree(c->reply_match);
1174 kfree(c);
1175}
1176
1177/*
1178 * sfe_ipv4_recv_udp()
1179 * Handle UDP packet receives and forwarding.
1180 */
1181static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001182 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001183{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001184 struct sfe_ipv4_udp_hdr *udph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001185 __be32 src_ip;
1186 __be32 dest_ip;
1187 __be16 src_port;
1188 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001189 struct sfe_ipv4_connection_match *cm;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001190 u8 ttl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001191 struct net_device *xmit_dev;
1192
1193 /*
1194 * Is our packet too short to contain a valid UDP header?
1195 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001196 if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_udp_hdr) + ihl)))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001197 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001198 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++;
1199 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001200 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001201
1202 DEBUG_TRACE("packet too short for UDP header\n");
1203 return 0;
1204 }
1205
1206 /*
1207 * Read the IP address and port information. Read the IP header data first
1208 * because we've almost certainly got that in the cache. We may not yet have
1209 * the UDP header cached though so allow more time for any prefetching.
1210 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001211 src_ip = iph->saddr;
1212 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001213
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001214 udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001215 src_port = udph->source;
1216 dest_port = udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001217
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001218 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001219
1220 /*
1221 * Look for a connection match.
1222 */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001223#ifdef CONFIG_NF_FLOW_COOKIE
1224 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1225 if (unlikely(!cm)) {
1226 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
1227 }
1228#else
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001229 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001230#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001231 if (unlikely(!cm)) {
1232 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION]++;
1233 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001234 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001235
1236 DEBUG_TRACE("no connection found\n");
1237 return 0;
1238 }
1239
1240 /*
1241 * If our packet has beern marked as "flush on find" we can't actually
1242 * forward it in the fast path, but now that we've found an associated
1243 * connection we can flush that out before we process the packet.
1244 */
1245 if (unlikely(flush_on_find)) {
1246 struct sfe_ipv4_connection *c = cm->connection;
1247 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1248 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1249 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001250 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001251
1252 DEBUG_TRACE("flush on find\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001253 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001254 return 0;
1255 }
1256
Zhi Chen8748eb32015-06-18 12:58:48 -07001257#ifdef CONFIG_XFRM
1258 /*
1259 * We can't accelerate the flow on this direction, just let it go
1260 * through the slow path.
1261 */
1262 if (unlikely(!cm->flow_accel)) {
1263 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001264 spin_unlock_bh(&si->lock);
Zhi Chen8748eb32015-06-18 12:58:48 -07001265 return 0;
1266 }
1267#endif
1268
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001269 /*
1270 * Does our TTL allow forwarding?
1271 */
1272 ttl = iph->ttl;
1273 if (unlikely(ttl < 2)) {
1274 struct sfe_ipv4_connection *c = cm->connection;
1275 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1276 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL]++;
1277 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001278 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001279
1280 DEBUG_TRACE("ttl too low\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001281 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001282 return 0;
1283 }
1284
1285 /*
1286 * If our packet is larger than the MTU of the transmit interface then
1287 * we can't forward it easily.
1288 */
1289 if (unlikely(len > cm->xmit_dev_mtu)) {
1290 struct sfe_ipv4_connection *c = cm->connection;
1291 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1292 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++;
1293 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001294 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001295
1296 DEBUG_TRACE("larger than mtu\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001297 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001298 return 0;
1299 }
1300
1301 /*
1302 * From this point on we're good to modify the packet.
1303 */
1304
1305 /*
Murat Sezginc7dd8172019-02-27 15:23:50 -08001306 * Check if skb was cloned. If it was, unshare it. Because
1307 * the data area is going to be written in this path and we don't want to
1308 * change the cloned skb's data section.
1309 */
1310 if (unlikely(skb_cloned(skb))) {
1311 DEBUG_TRACE("%p: skb is a cloned skb\n", skb);
1312 skb = skb_unshare(skb, GFP_ATOMIC);
1313 if (!skb) {
1314 DEBUG_WARN("Failed to unshare the cloned skb\n");
1315 return 0;
1316 }
1317
1318 /*
1319 * Update the iph and udph pointers with the unshared skb's data area.
1320 */
1321 iph = (struct sfe_ipv4_ip_hdr *)skb->data;
1322 udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl);
1323 }
1324
1325 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07001326 * Update DSCP
1327 */
1328 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK)) {
1329 iph->tos = (iph->tos & SFE_IPV4_DSCP_MASK) | cm->dscp;
1330 }
1331
1332 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001333 * Decrement our TTL.
1334 */
1335 iph->ttl = ttl - 1;
1336
1337 /*
1338 * Do we have to perform translations of the source address/port?
1339 */
1340 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001341 u16 udp_csum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001342
Dave Hudson87973cd2013-10-22 16:00:04 +01001343 iph->saddr = cm->xlate_src_ip;
1344 udph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001345
1346 /*
1347 * Do we have a non-zero UDP checksum? If we do then we need
1348 * to update it.
1349 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001350 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001351 if (likely(udp_csum)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001352 u32 sum;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001353
1354 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1355 sum = udp_csum + cm->xlate_src_partial_csum_adjustment;
1356 } else {
1357 sum = udp_csum + cm->xlate_src_csum_adjustment;
1358 }
1359
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001360 sum = (sum & 0xffff) + (sum >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001361 udph->check = (u16)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001362 }
1363 }
1364
1365 /*
1366 * Do we have to perform translations of the destination address/port?
1367 */
1368 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001369 u16 udp_csum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001370
Dave Hudson87973cd2013-10-22 16:00:04 +01001371 iph->daddr = cm->xlate_dest_ip;
1372 udph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001373
1374 /*
1375 * Do we have a non-zero UDP checksum? If we do then we need
1376 * to update it.
1377 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001378 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001379 if (likely(udp_csum)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001380 u32 sum;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001381
1382 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1383 sum = udp_csum + cm->xlate_dest_partial_csum_adjustment;
1384 } else {
1385 sum = udp_csum + cm->xlate_dest_csum_adjustment;
1386 }
1387
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001388 sum = (sum & 0xffff) + (sum >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001389 udph->check = (u16)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001390 }
1391 }
1392
1393 /*
1394 * Replace the IP checksum.
1395 */
1396 iph->check = sfe_ipv4_gen_ip_csum(iph);
1397
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001398 /*
1399 * Update traffic stats.
1400 */
1401 cm->rx_packet_count++;
1402 cm->rx_byte_count += len;
1403
1404 /*
1405 * If we're not already on the active list then insert ourselves at the tail
1406 * of the current list.
1407 */
1408 if (unlikely(!cm->active)) {
1409 cm->active = true;
1410 cm->active_prev = si->active_tail;
1411 if (likely(si->active_tail)) {
1412 si->active_tail->active_next = cm;
1413 } else {
1414 si->active_head = cm;
1415 }
1416 si->active_tail = cm;
1417 }
1418
1419 xmit_dev = cm->xmit_dev;
1420 skb->dev = xmit_dev;
1421
1422 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001423 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001424 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001425 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1426 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Xiaoping Fan2784e612015-06-25 17:57:41 -07001427 dev_hard_header(skb, xmit_dev, ETH_P_IP,
1428 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001429 } else {
1430 /*
1431 * For the simple case we write this really fast.
1432 */
1433 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1434 eth->h_proto = htons(ETH_P_IP);
Matthew McClintockdab3c8f2014-02-19 14:29:39 -06001435 eth->h_dest[0] = cm->xmit_dest_mac[0];
1436 eth->h_dest[1] = cm->xmit_dest_mac[1];
1437 eth->h_dest[2] = cm->xmit_dest_mac[2];
1438 eth->h_source[0] = cm->xmit_src_mac[0];
1439 eth->h_source[1] = cm->xmit_src_mac[1];
1440 eth->h_source[2] = cm->xmit_src_mac[2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001441 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001442 }
1443
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001444 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07001445 * Update priority of skb.
1446 */
1447 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) {
1448 skb->priority = cm->priority;
1449 }
1450
1451 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001452 * Mark outgoing packet.
1453 */
Cristian Prundeanu94fff982013-12-23 15:02:59 -06001454 skb->mark = cm->connection->mark;
1455 if (skb->mark) {
1456 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1457 }
1458
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001459 si->packets_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001460 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001461
1462 /*
1463 * We're going to check for GSO flags when we transmit the packet so
1464 * start fetching the necessary cache line now.
1465 */
1466 prefetch(skb_shinfo(skb));
1467
1468 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06001469 * Mark that this packet has been fast forwarded.
1470 */
1471 skb->fast_forwarded = 1;
1472
1473 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001474 * Send the packet on its way.
1475 */
1476 dev_queue_xmit(skb);
1477
1478 return 1;
1479}
1480
1481/*
1482 * sfe_ipv4_process_tcp_option_sack()
1483 * Parse TCP SACK option and update ack according
1484 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001485static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const u32 data_offs,
1486 u32 *ack)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001487{
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001488 u32 length = sizeof(struct sfe_ipv4_tcp_hdr);
1489 u8 *ptr = (u8 *)th + length;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001490
1491 /*
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001492 * Ignore processing if TCP packet has only TIMESTAMP option.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001493 */
1494 if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
1495 && likely(ptr[0] == TCPOPT_NOP)
1496 && likely(ptr[1] == TCPOPT_NOP)
1497 && likely(ptr[2] == TCPOPT_TIMESTAMP)
1498 && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
1499 return true;
1500 }
1501
1502 /*
1503 * TCP options. Parse SACK option.
1504 */
1505 while (length < data_offs) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001506 u8 size;
1507 u8 kind;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001508
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001509 ptr = (u8 *)th + length;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001510 kind = *ptr;
1511
1512 /*
1513 * NOP, for padding
1514 * Not in the switch because to fast escape and to not calculate size
1515 */
1516 if (kind == TCPOPT_NOP) {
1517 length++;
1518 continue;
1519 }
1520
1521 if (kind == TCPOPT_SACK) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001522 u32 sack = 0;
1523 u8 re = 1 + 1;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001524
1525 size = *(ptr + 1);
1526 if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
1527 || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
1528 || (size > (data_offs - length))) {
1529 return false;
1530 }
1531
1532 re += 4;
1533 while (re < size) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001534 u32 sack_re;
1535 u8 *sptr = ptr + re;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001536 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
1537 if (sack_re > sack) {
1538 sack = sack_re;
1539 }
1540 re += TCPOLEN_SACK_PERBLOCK;
1541 }
1542 if (sack > *ack) {
1543 *ack = sack;
1544 }
1545 length += size;
1546 continue;
1547 }
1548 if (kind == TCPOPT_EOL) {
1549 return true;
1550 }
1551 size = *(ptr + 1);
1552 if (size < 2) {
1553 return false;
1554 }
1555 length += size;
1556 }
1557
1558 return true;
1559}
1560
1561/*
1562 * sfe_ipv4_recv_tcp()
1563 * Handle TCP packet receives and forwarding.
1564 */
1565static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001566 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001567{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001568 struct sfe_ipv4_tcp_hdr *tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001569 __be32 src_ip;
1570 __be32 dest_ip;
1571 __be16 src_port;
1572 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001573 struct sfe_ipv4_connection_match *cm;
1574 struct sfe_ipv4_connection_match *counter_cm;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001575 u8 ttl;
1576 u32 flags;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001577 struct net_device *xmit_dev;
1578
1579 /*
1580 * Is our packet too short to contain a valid UDP header?
1581 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001582 if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_tcp_hdr) + ihl)))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001583 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001584 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++;
1585 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001586 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001587
1588 DEBUG_TRACE("packet too short for TCP header\n");
1589 return 0;
1590 }
1591
1592 /*
1593 * Read the IP address and port information. Read the IP header data first
1594 * because we've almost certainly got that in the cache. We may not yet have
1595 * the TCP header cached though so allow more time for any prefetching.
1596 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001597 src_ip = iph->saddr;
1598 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001599
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001600 tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001601 src_port = tcph->source;
1602 dest_port = tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001603 flags = tcp_flag_word(tcph);
1604
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001605 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001606
1607 /*
1608 * Look for a connection match.
1609 */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001610#ifdef CONFIG_NF_FLOW_COOKIE
1611 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1612 if (unlikely(!cm)) {
Matthew McClintock37858802015-02-03 12:12:02 -06001613 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001614 }
1615#else
Matthew McClintock37858802015-02-03 12:12:02 -06001616 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001617#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001618 if (unlikely(!cm)) {
1619 /*
1620 * We didn't get a connection but as TCP is connection-oriented that
1621 * may be because this is a non-fast connection (not running established).
1622 * For diagnostic purposes we differentiate this here.
1623 */
1624 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
1625 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++;
1626 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001627 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001628
1629 DEBUG_TRACE("no connection found - fast flags\n");
1630 return 0;
1631 }
1632 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++;
1633 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001634 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001635
1636 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
1637 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1638 return 0;
1639 }
1640
1641 /*
1642 * If our packet has beern marked as "flush on find" we can't actually
1643 * forward it in the fast path, but now that we've found an associated
1644 * connection we can flush that out before we process the packet.
1645 */
1646 if (unlikely(flush_on_find)) {
1647 struct sfe_ipv4_connection *c = cm->connection;
1648 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1649 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1650 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001651 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001652
1653 DEBUG_TRACE("flush on find\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001654 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001655 return 0;
1656 }
1657
Zhi Chen8748eb32015-06-18 12:58:48 -07001658#ifdef CONFIG_XFRM
1659 /*
1660 * We can't accelerate the flow on this direction, just let it go
1661 * through the slow path.
1662 */
1663 if (unlikely(!cm->flow_accel)) {
1664 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001665 spin_unlock_bh(&si->lock);
Zhi Chen8748eb32015-06-18 12:58:48 -07001666 return 0;
1667 }
1668#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001669 /*
1670 * Does our TTL allow forwarding?
1671 */
1672 ttl = iph->ttl;
1673 if (unlikely(ttl < 2)) {
1674 struct sfe_ipv4_connection *c = cm->connection;
1675 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1676 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL]++;
1677 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001678 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001679
1680 DEBUG_TRACE("ttl too low\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001681 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001682 return 0;
1683 }
1684
1685 /*
1686 * If our packet is larger than the MTU of the transmit interface then
1687 * we can't forward it easily.
1688 */
Xiaoping Fand642a6e2015-04-10 15:19:06 -07001689 if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001690 struct sfe_ipv4_connection *c = cm->connection;
1691 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1692 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++;
1693 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001694 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001695
1696 DEBUG_TRACE("larger than mtu\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001697 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001698 return 0;
1699 }
1700
1701 /*
1702 * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN
1703 * set is not a fast path packet.
1704 */
1705 if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
1706 struct sfe_ipv4_connection *c = cm->connection;
1707 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1708 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS]++;
1709 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001710 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001711
1712 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
1713 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001714 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001715 return 0;
1716 }
1717
1718 counter_cm = cm->counter_match;
1719
1720 /*
1721 * Are we doing sequence number checking?
1722 */
1723 if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001724 u32 seq;
1725 u32 ack;
1726 u32 sack;
1727 u32 data_offs;
1728 u32 end;
1729 u32 left_edge;
1730 u32 scaled_win;
1731 u32 max_end;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001732
1733 /*
1734 * Is our sequence fully past the right hand edge of the window?
1735 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001736 seq = ntohl(tcph->seq);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001737 if (unlikely((s32)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001738 struct sfe_ipv4_connection *c = cm->connection;
1739 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1740 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++;
1741 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001742 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001743
1744 DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
1745 seq, cm->protocol_state.tcp.max_end + 1);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001746 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001747 return 0;
1748 }
1749
1750 /*
1751 * Check that our TCP data offset isn't too short.
1752 */
1753 data_offs = tcph->doff << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001754 if (unlikely(data_offs < sizeof(struct sfe_ipv4_tcp_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001755 struct sfe_ipv4_connection *c = cm->connection;
1756 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1757 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++;
1758 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001759 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001760
1761 DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001762 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001763 return 0;
1764 }
1765
1766 /*
1767 * Update ACK according to any SACK option.
1768 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001769 ack = ntohl(tcph->ack_seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001770 sack = ack;
1771 if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) {
1772 struct sfe_ipv4_connection *c = cm->connection;
1773 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1774 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK]++;
1775 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001776 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001777
1778 DEBUG_TRACE("TCP option SACK size is wrong\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001779 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001780 return 0;
1781 }
1782
1783 /*
1784 * Check that our TCP data offset isn't past the end of the packet.
1785 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001786 data_offs += sizeof(struct sfe_ipv4_ip_hdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001787 if (unlikely(len < data_offs)) {
1788 struct sfe_ipv4_connection *c = cm->connection;
1789 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1790 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++;
1791 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001792 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001793
1794 DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
1795 data_offs, len);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001796 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001797 return 0;
1798 }
1799
1800 end = seq + len - data_offs;
1801
1802 /*
1803 * Is our sequence fully before the left hand edge of the window?
1804 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001805 if (unlikely((s32)(end - (cm->protocol_state.tcp.end
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001806 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
1807 struct sfe_ipv4_connection *c = cm->connection;
1808 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1809 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++;
1810 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001811 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001812
1813 DEBUG_TRACE("seq: %u before left edge: %u\n",
1814 end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001815 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001816 return 0;
1817 }
1818
1819 /*
1820 * Are we acking data that is to the right of what has been sent?
1821 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001822 if (unlikely((s32)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001823 struct sfe_ipv4_connection *c = cm->connection;
1824 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1825 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++;
1826 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001827 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001828
1829 DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
1830 sack, counter_cm->protocol_state.tcp.end + 1);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001831 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001832 return 0;
1833 }
1834
1835 /*
1836 * Is our ack too far before the left hand edge of the window?
1837 */
1838 left_edge = counter_cm->protocol_state.tcp.end
1839 - cm->protocol_state.tcp.max_win
1840 - SFE_IPV4_TCP_MAX_ACK_WINDOW
1841 - 1;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001842 if (unlikely((s32)(sack - left_edge) < 0)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001843 struct sfe_ipv4_connection *c = cm->connection;
1844 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1845 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++;
1846 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001847 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001848
1849 DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001850 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001851 return 0;
1852 }
1853
1854 /*
1855 * Have we just seen the largest window size yet for this connection? If yes
1856 * then we need to record the new value.
1857 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001858 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001859 scaled_win += (sack - ack);
1860 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
1861 cm->protocol_state.tcp.max_win = scaled_win;
1862 }
1863
1864 /*
1865 * If our sequence and/or ack numbers have advanced then record the new state.
1866 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001867 if (likely((s32)(end - cm->protocol_state.tcp.end) >= 0)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001868 cm->protocol_state.tcp.end = end;
1869 }
1870
1871 max_end = sack + scaled_win;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001872 if (likely((s32)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001873 counter_cm->protocol_state.tcp.max_end = max_end;
1874 }
1875 }
1876
1877 /*
1878 * From this point on we're good to modify the packet.
1879 */
1880
1881 /*
Murat Sezginc7dd8172019-02-27 15:23:50 -08001882 * Check if skb was cloned. If it was, unshare it. Because
1883 * the data area is going to be written in this path and we don't want to
1884 * change the cloned skb's data section.
1885 */
1886 if (unlikely(skb_cloned(skb))) {
1887 DEBUG_TRACE("%p: skb is a cloned skb\n", skb);
1888 skb = skb_unshare(skb, GFP_ATOMIC);
1889 if (!skb) {
1890 DEBUG_WARN("Failed to unshare the cloned skb\n");
1891 return 0;
1892 }
1893
1894 /*
1895 * Update the iph and tcph pointers with the unshared skb's data area.
1896 */
1897 iph = (struct sfe_ipv4_ip_hdr *)skb->data;
1898 tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl);
1899 }
1900
1901 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07001902 * Update DSCP
1903 */
1904 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK)) {
1905 iph->tos = (iph->tos & SFE_IPV4_DSCP_MASK) | cm->dscp;
1906 }
1907
1908 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001909 * Decrement our TTL.
1910 */
1911 iph->ttl = ttl - 1;
1912
1913 /*
1914 * Do we have to perform translations of the source address/port?
1915 */
1916 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001917 u16 tcp_csum;
1918 u32 sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001919
Dave Hudson87973cd2013-10-22 16:00:04 +01001920 iph->saddr = cm->xlate_src_ip;
1921 tcph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001922
1923 /*
1924 * Do we have a non-zero UDP checksum? If we do then we need
1925 * to update it.
1926 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001927 tcp_csum = tcph->check;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001928 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1929 sum = tcp_csum + cm->xlate_src_partial_csum_adjustment;
1930 } else {
1931 sum = tcp_csum + cm->xlate_src_csum_adjustment;
1932 }
1933
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001934 sum = (sum & 0xffff) + (sum >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001935 tcph->check = (u16)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001936 }
1937
1938 /*
1939 * Do we have to perform translations of the destination address/port?
1940 */
1941 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001942 u16 tcp_csum;
1943 u32 sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001944
Dave Hudson87973cd2013-10-22 16:00:04 +01001945 iph->daddr = cm->xlate_dest_ip;
1946 tcph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001947
1948 /*
1949 * Do we have a non-zero UDP checksum? If we do then we need
1950 * to update it.
1951 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001952 tcp_csum = tcph->check;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001953 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1954 sum = tcp_csum + cm->xlate_dest_partial_csum_adjustment;
1955 } else {
1956 sum = tcp_csum + cm->xlate_dest_csum_adjustment;
1957 }
1958
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001959 sum = (sum & 0xffff) + (sum >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001960 tcph->check = (u16)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001961 }
1962
1963 /*
1964 * Replace the IP checksum.
1965 */
1966 iph->check = sfe_ipv4_gen_ip_csum(iph);
1967
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001968 /*
1969 * Update traffic stats.
1970 */
1971 cm->rx_packet_count++;
1972 cm->rx_byte_count += len;
1973
1974 /*
1975 * If we're not already on the active list then insert ourselves at the tail
1976 * of the current list.
1977 */
1978 if (unlikely(!cm->active)) {
1979 cm->active = true;
1980 cm->active_prev = si->active_tail;
1981 if (likely(si->active_tail)) {
1982 si->active_tail->active_next = cm;
1983 } else {
1984 si->active_head = cm;
1985 }
1986 si->active_tail = cm;
1987 }
1988
1989 xmit_dev = cm->xmit_dev;
1990 skb->dev = xmit_dev;
1991
1992 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001993 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001994 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001995 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1996 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Xiaoping Fan2784e612015-06-25 17:57:41 -07001997 dev_hard_header(skb, xmit_dev, ETH_P_IP,
1998 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001999 } else {
2000 /*
2001 * For the simple case we write this really fast.
2002 */
2003 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
2004 eth->h_proto = htons(ETH_P_IP);
Matthew McClintockdab3c8f2014-02-19 14:29:39 -06002005 eth->h_dest[0] = cm->xmit_dest_mac[0];
2006 eth->h_dest[1] = cm->xmit_dest_mac[1];
2007 eth->h_dest[2] = cm->xmit_dest_mac[2];
2008 eth->h_source[0] = cm->xmit_src_mac[0];
2009 eth->h_source[1] = cm->xmit_src_mac[1];
2010 eth->h_source[2] = cm->xmit_src_mac[2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002011 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002012 }
2013
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06002014 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07002015 * Update priority of skb.
2016 */
2017 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) {
2018 skb->priority = cm->priority;
2019 }
2020
2021 /*
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06002022 * Mark outgoing packet
2023 */
2024 skb->mark = cm->connection->mark;
2025 if (skb->mark) {
2026 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
2027 }
2028
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002029 si->packets_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002030 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002031
2032 /*
2033 * We're going to check for GSO flags when we transmit the packet so
2034 * start fetching the necessary cache line now.
2035 */
2036 prefetch(skb_shinfo(skb));
2037
2038 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06002039 * Mark that this packet has been fast forwarded.
2040 */
2041 skb->fast_forwarded = 1;
2042
2043 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002044 * Send the packet on its way.
2045 */
2046 dev_queue_xmit(skb);
2047
2048 return 1;
2049}
2050
2051/*
2052 * sfe_ipv4_recv_icmp()
2053 * Handle ICMP packet receives.
2054 *
2055 * ICMP packets aren't handled as a "fast path" and always have us process them
2056 * through the default Linux stack. What we do need to do is look for any errors
2057 * about connections we are handling in the fast path. If we find any such
2058 * connections then we want to flush their state so that the ICMP error path
2059 * within Linux has all of the correct state should it need it.
2060 */
2061static int sfe_ipv4_recv_icmp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002062 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002063{
2064 struct icmphdr *icmph;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002065 struct sfe_ipv4_ip_hdr *icmp_iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002066 unsigned int icmp_ihl_words;
2067 unsigned int icmp_ihl;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002068 u32 *icmp_trans_h;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002069 struct sfe_ipv4_udp_hdr *icmp_udph;
2070 struct sfe_ipv4_tcp_hdr *icmp_tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01002071 __be32 src_ip;
2072 __be32 dest_ip;
2073 __be16 src_port;
2074 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002075 struct sfe_ipv4_connection_match *cm;
2076 struct sfe_ipv4_connection *c;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002077 u32 pull_len = sizeof(struct icmphdr) + ihl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002078
2079 /*
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002080 * Is our packet too short to contain a valid ICMP header?
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002081 */
2082 len -= ihl;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002083 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002084 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002085 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++;
2086 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002087 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002088
2089 DEBUG_TRACE("packet too short for ICMP header\n");
2090 return 0;
2091 }
2092
2093 /*
2094 * We only handle "destination unreachable" and "time exceeded" messages.
2095 */
2096 icmph = (struct icmphdr *)(skb->data + ihl);
2097 if ((icmph->type != ICMP_DEST_UNREACH)
2098 && (icmph->type != ICMP_TIME_EXCEEDED)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002099 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002100 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++;
2101 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002102 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002103
2104 DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->type);
2105 return 0;
2106 }
2107
2108 /*
2109 * Do we have the full embedded IP header?
2110 */
2111 len -= sizeof(struct icmphdr);
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002112 pull_len += sizeof(struct sfe_ipv4_ip_hdr);
2113 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002114 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002115 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE]++;
2116 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002117 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002118
2119 DEBUG_TRACE("Embedded IP header not complete\n");
2120 return 0;
2121 }
2122
2123 /*
2124 * Is our embedded IP version wrong?
2125 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002126 icmp_iph = (struct sfe_ipv4_ip_hdr *)(icmph + 1);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002127 if (unlikely(icmp_iph->version != 4)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002128 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002129 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4]++;
2130 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002131 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002132
2133 DEBUG_TRACE("IP version: %u\n", icmp_iph->version);
2134 return 0;
2135 }
2136
2137 /*
2138 * Do we have the full embedded IP header, including any options?
2139 */
2140 icmp_ihl_words = icmp_iph->ihl;
2141 icmp_ihl = icmp_ihl_words << 2;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002142 pull_len += icmp_ihl - sizeof(struct sfe_ipv4_ip_hdr);
2143 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002144 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002145 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE]++;
2146 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002147 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002148
2149 DEBUG_TRACE("Embedded header not large enough for IP options\n");
2150 return 0;
2151 }
2152
Nicolas Costaac2979c2014-01-14 10:35:24 -06002153 len -= icmp_ihl;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002154 icmp_trans_h = ((u32 *)icmp_iph) + icmp_ihl_words;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002155
2156 /*
2157 * Handle the embedded transport layer header.
2158 */
2159 switch (icmp_iph->protocol) {
2160 case IPPROTO_UDP:
2161 /*
2162 * We should have 8 bytes of UDP header - that's enough to identify
2163 * the connection.
2164 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002165 pull_len += 8;
2166 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002167 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002168 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE]++;
2169 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002170 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002171
2172 DEBUG_TRACE("Incomplete embedded UDP header\n");
2173 return 0;
2174 }
2175
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002176 icmp_udph = (struct sfe_ipv4_udp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002177 src_port = icmp_udph->source;
2178 dest_port = icmp_udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002179 break;
2180
2181 case IPPROTO_TCP:
2182 /*
2183 * We should have 8 bytes of TCP header - that's enough to identify
2184 * the connection.
2185 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002186 pull_len += 8;
2187 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002188 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002189 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE]++;
2190 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002191 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002192
2193 DEBUG_TRACE("Incomplete embedded TCP header\n");
2194 return 0;
2195 }
2196
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002197 icmp_tcph = (struct sfe_ipv4_tcp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002198 src_port = icmp_tcph->source;
2199 dest_port = icmp_tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002200 break;
2201
2202 default:
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002203 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002204 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL]++;
2205 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002206 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002207
2208 DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", icmp_iph->protocol);
2209 return 0;
2210 }
2211
Dave Hudson87973cd2013-10-22 16:00:04 +01002212 src_ip = icmp_iph->saddr;
2213 dest_ip = icmp_iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002214
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002215 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002216
2217 /*
2218 * Look for a connection match. Note that we reverse the source and destination
2219 * here because our embedded message contains a packet that was sent in the
2220 * opposite direction to the one in which we just received it. It will have
2221 * been sent on the interface from which we received it though so that's still
2222 * ok to use.
2223 */
2224 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port);
2225 if (unlikely(!cm)) {
2226 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++;
2227 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002228 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002229
2230 DEBUG_TRACE("no connection found\n");
2231 return 0;
2232 }
2233
2234 /*
2235 * We found a connection so now remove it from the connection list and flush
2236 * its state.
2237 */
2238 c = cm->connection;
2239 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2240 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++;
2241 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002242 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002243
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07002244 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002245 return 0;
2246}
2247
2248/*
2249 * sfe_ipv4_recv()
Matthew McClintocka8ad7962014-01-16 16:49:30 -06002250 * Handle packet receives and forwaring.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002251 *
2252 * Returns 1 if the packet is forwarded or 0 if it isn't.
2253 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002254int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002255{
2256 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002257 unsigned int len;
2258 unsigned int tot_len;
2259 unsigned int frag_off;
2260 unsigned int ihl;
2261 bool flush_on_find;
2262 bool ip_options;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002263 struct sfe_ipv4_ip_hdr *iph;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002264 u32 protocol;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002265
2266 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002267 * Check that we have space for an IP header here.
2268 */
2269 len = skb->len;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002270 if (unlikely(!pskb_may_pull(skb, sizeof(struct sfe_ipv4_ip_hdr)))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002271 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002272 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2273 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002274 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002275
2276 DEBUG_TRACE("len: %u is too short\n", len);
2277 return 0;
2278 }
2279
2280 /*
2281 * Check that our "total length" is large enough for an IP header.
2282 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002283 iph = (struct sfe_ipv4_ip_hdr *)skb->data;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002284 tot_len = ntohs(iph->tot_len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002285 if (unlikely(tot_len < sizeof(struct sfe_ipv4_ip_hdr))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002286 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002287 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH]++;
2288 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002289 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002290
2291 DEBUG_TRACE("tot_len: %u is too short\n", tot_len);
2292 return 0;
2293 }
2294
2295 /*
2296 * Is our IP version wrong?
2297 */
2298 if (unlikely(iph->version != 4)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002299 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002300 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_V4]++;
2301 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002302 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002303
2304 DEBUG_TRACE("IP version: %u\n", iph->version);
2305 return 0;
2306 }
2307
2308 /*
2309 * Does our datagram fit inside the skb?
2310 */
2311 if (unlikely(tot_len > len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002312 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002313 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++;
2314 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002315 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002316
2317 DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len);
2318 return 0;
2319 }
2320
2321 /*
2322 * Do we have a non-initial fragment?
Nicolas Costaac2979c2014-01-14 10:35:24 -06002323 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002324 frag_off = ntohs(iph->frag_off);
2325 if (unlikely(frag_off & IP_OFFSET)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002326 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002327 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++;
2328 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002329 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002330
2331 DEBUG_TRACE("non-initial fragment\n");
2332 return 0;
2333 }
2334
2335 /*
2336 * If we have a (first) fragment then mark it to cause any connection to flush.
2337 */
2338 flush_on_find = unlikely(frag_off & IP_MF) ? true : false;
2339
2340 /*
2341 * Do we have any IP options? That's definite a slow path! If we do have IP
2342 * options we need to recheck our header size.
2343 */
2344 ihl = iph->ihl << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002345 ip_options = unlikely(ihl != sizeof(struct sfe_ipv4_ip_hdr)) ? true : false;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002346 if (unlikely(ip_options)) {
2347 if (unlikely(len < ihl)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002348 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002349 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE]++;
2350 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002351 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002352
2353 DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl);
2354 return 0;
2355 }
2356
2357 flush_on_find = true;
2358 }
2359
2360 protocol = iph->protocol;
2361 if (IPPROTO_UDP == protocol) {
2362 return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find);
2363 }
2364
2365 if (IPPROTO_TCP == protocol) {
2366 return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find);
2367 }
2368
2369 if (IPPROTO_ICMP == protocol) {
2370 return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl);
2371 }
2372
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002373 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002374 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++;
2375 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002376 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002377
2378 DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol);
2379 return 0;
2380}
2381
Nicolas Costa436926b2014-01-14 10:36:22 -06002382static void
2383sfe_ipv4_update_tcp_state(struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002384 struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002385{
2386 struct sfe_ipv4_connection_match *orig_cm;
2387 struct sfe_ipv4_connection_match *repl_cm;
2388 struct sfe_ipv4_tcp_connection_match *orig_tcp;
2389 struct sfe_ipv4_tcp_connection_match *repl_tcp;
2390
2391 orig_cm = c->original_match;
2392 repl_cm = c->reply_match;
2393 orig_tcp = &orig_cm->protocol_state.tcp;
2394 repl_tcp = &repl_cm->protocol_state.tcp;
2395
2396 /* update orig */
2397 if (orig_tcp->max_win < sic->src_td_max_window) {
2398 orig_tcp->max_win = sic->src_td_max_window;
2399 }
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002400 if ((s32)(orig_tcp->end - sic->src_td_end) < 0) {
Nicolas Costa436926b2014-01-14 10:36:22 -06002401 orig_tcp->end = sic->src_td_end;
2402 }
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002403 if ((s32)(orig_tcp->max_end - sic->src_td_max_end) < 0) {
Nicolas Costa436926b2014-01-14 10:36:22 -06002404 orig_tcp->max_end = sic->src_td_max_end;
2405 }
2406
2407 /* update reply */
2408 if (repl_tcp->max_win < sic->dest_td_max_window) {
2409 repl_tcp->max_win = sic->dest_td_max_window;
2410 }
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002411 if ((s32)(repl_tcp->end - sic->dest_td_end) < 0) {
Nicolas Costa436926b2014-01-14 10:36:22 -06002412 repl_tcp->end = sic->dest_td_end;
2413 }
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002414 if ((s32)(repl_tcp->max_end - sic->dest_td_max_end) < 0) {
Nicolas Costa436926b2014-01-14 10:36:22 -06002415 repl_tcp->max_end = sic->dest_td_max_end;
2416 }
2417
2418 /* update match flags */
2419 orig_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2420 repl_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002421 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
Nicolas Costa436926b2014-01-14 10:36:22 -06002422 orig_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2423 repl_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2424 }
2425}
2426
2427static void
2428sfe_ipv4_update_protocol_state(struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002429 struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002430{
2431 switch (sic->protocol) {
2432 case IPPROTO_TCP:
2433 sfe_ipv4_update_tcp_state(c, sic);
2434 break;
2435 }
2436}
2437
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002438void sfe_ipv4_update_rule(struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002439{
2440 struct sfe_ipv4_connection *c;
2441 struct sfe_ipv4 *si = &__si;
2442
2443 spin_lock_bh(&si->lock);
2444
2445 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2446 sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002447 sic->src_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002448 sic->src_port,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002449 sic->dest_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002450 sic->dest_port);
2451 if (c != NULL) {
2452 sfe_ipv4_update_protocol_state(c, sic);
2453 }
2454
2455 spin_unlock_bh(&si->lock);
2456}
2457
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002458/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002459 * sfe_ipv4_create_rule()
2460 * Create a forwarding rule.
2461 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002462int sfe_ipv4_create_rule(struct sfe_connection_create *sic)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002463{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002464 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002465 struct sfe_ipv4_connection *c;
2466 struct sfe_ipv4_connection_match *original_cm;
2467 struct sfe_ipv4_connection_match *reply_cm;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002468 struct net_device *dest_dev;
2469 struct net_device *src_dev;
2470
2471 dest_dev = sic->dest_dev;
2472 src_dev = sic->src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002473
Matthew McClintock389b42a2014-09-24 14:05:51 -05002474 if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) ||
2475 (src_dev->reg_state != NETREG_REGISTERED))) {
2476 return -EINVAL;
2477 }
2478
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002479 spin_lock_bh(&si->lock);
2480 si->connection_create_requests++;
2481
2482 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002483 * Check to see if there is already a flow that matches the rule we're
2484 * trying to create. If there is then we can't create a new one.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002485 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002486 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2487 sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002488 sic->src_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002489 sic->src_port,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002490 sic->dest_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002491 sic->dest_port);
2492 if (c != NULL) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002493 si->connection_create_collisions++;
2494
2495 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002496 * If we already have the flow then it's likely that this
2497 * request to create the connection rule contains more
2498 * up-to-date information. Check and update accordingly.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002499 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002500 sfe_ipv4_update_protocol_state(c, sic);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002501 spin_unlock_bh(&si->lock);
2502
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002503 DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002504 " s: %s:%pM:%pI4:%u, d: %s:%pM:%pI4:%u\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002505 sic->mark, sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002506 sic->src_dev->name, sic->src_mac, &sic->src_ip.ip, ntohs(sic->src_port),
2507 sic->dest_dev->name, sic->dest_mac, &sic->dest_ip.ip, ntohs(sic->dest_port));
Nicolas Costa514fde02014-01-13 15:50:29 -06002508 return -EADDRINUSE;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002509 }
2510
2511 /*
2512 * Allocate the various connection tracking objects.
2513 */
2514 c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC);
2515 if (unlikely(!c)) {
2516 spin_unlock_bh(&si->lock);
Nicolas Costa514fde02014-01-13 15:50:29 -06002517 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002518 }
2519
2520 original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2521 if (unlikely(!original_cm)) {
2522 spin_unlock_bh(&si->lock);
2523 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002524 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002525 }
2526
2527 reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2528 if (unlikely(!reply_cm)) {
2529 spin_unlock_bh(&si->lock);
2530 kfree(original_cm);
2531 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002532 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002533 }
2534
2535 /*
2536 * Fill in the "original" direction connection matching object.
2537 * Note that the transmit MAC address is "dest_mac_xlate" because
2538 * we always know both ends of a connection by their translated
2539 * addresses and not their public addresses.
2540 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002541 original_cm->match_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002542 original_cm->match_protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002543 original_cm->match_src_ip = sic->src_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002544 original_cm->match_src_port = sic->src_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002545 original_cm->match_dest_ip = sic->dest_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002546 original_cm->match_dest_port = sic->dest_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002547 original_cm->xlate_src_ip = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002548 original_cm->xlate_src_port = sic->src_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002549 original_cm->xlate_dest_ip = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002550 original_cm->xlate_dest_port = sic->dest_port_xlate;
2551 original_cm->rx_packet_count = 0;
2552 original_cm->rx_packet_count64 = 0;
2553 original_cm->rx_byte_count = 0;
2554 original_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002555 original_cm->xmit_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002556 original_cm->xmit_dev_mtu = sic->dest_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002557 memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002558 memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN);
2559 original_cm->connection = c;
2560 original_cm->counter_match = reply_cm;
2561 original_cm->flags = 0;
Xiaoping Fane1963d42015-08-25 17:06:19 -07002562 if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) {
2563 original_cm->priority = sic->src_priority;
2564 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK;
2565 }
2566 if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) {
2567 original_cm->dscp = sic->src_dscp << SFE_IPV4_DSCP_SHIFT;
2568 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK;
2569 }
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002570#ifdef CONFIG_NF_FLOW_COOKIE
2571 original_cm->flow_cookie = 0;
2572#endif
Zhi Chen8748eb32015-06-18 12:58:48 -07002573#ifdef CONFIG_XFRM
2574 original_cm->flow_accel = sic->original_accel;
2575#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002576 original_cm->active_next = NULL;
2577 original_cm->active_prev = NULL;
2578 original_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002579
2580 /*
2581 * For PPP links we don't write an L2 header. For everything else we do.
2582 */
2583 if (!(dest_dev->flags & IFF_POINTOPOINT)) {
2584 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2585
2586 /*
2587 * If our dev writes Ethernet headers then we can write a really fast
2588 * version.
2589 */
2590 if (dest_dev->header_ops) {
2591 if (dest_dev->header_ops->create == eth_header) {
2592 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2593 }
2594 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002595 }
2596
2597 /*
2598 * Fill in the "reply" direction connection matching object.
2599 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002600 reply_cm->match_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002601 reply_cm->match_protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002602 reply_cm->match_src_ip = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002603 reply_cm->match_src_port = sic->dest_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002604 reply_cm->match_dest_ip = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002605 reply_cm->match_dest_port = sic->src_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002606 reply_cm->xlate_src_ip = sic->dest_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002607 reply_cm->xlate_src_port = sic->dest_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002608 reply_cm->xlate_dest_ip = sic->src_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002609 reply_cm->xlate_dest_port = sic->src_port;
2610 reply_cm->rx_packet_count = 0;
2611 reply_cm->rx_packet_count64 = 0;
2612 reply_cm->rx_byte_count = 0;
2613 reply_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002614 reply_cm->xmit_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002615 reply_cm->xmit_dev_mtu = sic->src_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002616 memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002617 memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN);
2618 reply_cm->connection = c;
2619 reply_cm->counter_match = original_cm;
2620 reply_cm->flags = 0;
Xiaoping Fane1963d42015-08-25 17:06:19 -07002621 if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) {
2622 reply_cm->priority = sic->dest_priority;
2623 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK;
2624 }
2625 if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) {
2626 reply_cm->dscp = sic->dest_dscp << SFE_IPV4_DSCP_SHIFT;
2627 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK;
2628 }
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002629#ifdef CONFIG_NF_FLOW_COOKIE
2630 reply_cm->flow_cookie = 0;
2631#endif
Zhi Chen8748eb32015-06-18 12:58:48 -07002632#ifdef CONFIG_XFRM
2633 reply_cm->flow_accel = sic->reply_accel;
2634#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002635 reply_cm->active_next = NULL;
2636 reply_cm->active_prev = NULL;
2637 reply_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002638
2639 /*
2640 * For PPP links we don't write an L2 header. For everything else we do.
2641 */
2642 if (!(src_dev->flags & IFF_POINTOPOINT)) {
2643 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2644
2645 /*
2646 * If our dev writes Ethernet headers then we can write a really fast
2647 * version.
2648 */
2649 if (src_dev->header_ops) {
2650 if (src_dev->header_ops->create == eth_header) {
2651 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2652 }
2653 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002654 }
2655
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002656
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002657 if (sic->dest_ip.ip != sic->dest_ip_xlate.ip || sic->dest_port != sic->dest_port_xlate) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002658 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2659 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2660 }
2661
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002662 if (sic->src_ip.ip != sic->src_ip_xlate.ip || sic->src_port != sic->src_port_xlate) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002663 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2664 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2665 }
2666
2667 c->protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002668 c->src_ip = sic->src_ip.ip;
2669 c->src_ip_xlate = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002670 c->src_port = sic->src_port;
2671 c->src_port_xlate = sic->src_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002672 c->original_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002673 c->original_match = original_cm;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002674 c->dest_ip = sic->dest_ip.ip;
2675 c->dest_ip_xlate = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002676 c->dest_port = sic->dest_port;
2677 c->dest_port_xlate = sic->dest_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002678 c->reply_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002679 c->reply_match = reply_cm;
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06002680 c->mark = sic->mark;
Xiaoping Fan34586472015-07-03 02:20:35 -07002681 c->debug_read_seq = 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002682 c->last_sync_jiffies = get_jiffies_64();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002683
2684 /*
2685 * Take hold of our source and dest devices for the duration of the connection.
2686 */
2687 dev_hold(c->original_dev);
2688 dev_hold(c->reply_dev);
2689
2690 /*
2691 * Initialize the protocol-specific information that we track.
2692 */
2693 switch (sic->protocol) {
2694 case IPPROTO_TCP:
2695 original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale;
2696 original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1;
2697 original_cm->protocol_state.tcp.end = sic->src_td_end;
2698 original_cm->protocol_state.tcp.max_end = sic->src_td_max_end;
2699 reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale;
2700 reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1;
2701 reply_cm->protocol_state.tcp.end = sic->dest_td_end;
2702 reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002703 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002704 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2705 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2706 }
2707 break;
2708 }
2709
2710 sfe_ipv4_connection_match_compute_translations(original_cm);
2711 sfe_ipv4_connection_match_compute_translations(reply_cm);
2712 sfe_ipv4_insert_sfe_ipv4_connection(si, c);
2713
2714 spin_unlock_bh(&si->lock);
2715
2716 /*
2717 * We have everything we need!
2718 */
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002719 DEBUG_INFO("new connection - mark: %08x, p: %d\n"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002720 " s: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n"
2721 " d: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002722 sic->mark, sic->protocol,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002723 sic->src_dev->name, sic->src_mac, sic->src_mac_xlate,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002724 &sic->src_ip.ip, &sic->src_ip_xlate.ip, ntohs(sic->src_port), ntohs(sic->src_port_xlate),
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002725 dest_dev->name, sic->dest_mac, sic->dest_mac_xlate,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002726 &sic->dest_ip.ip, &sic->dest_ip_xlate.ip, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate));
Nicolas Costa514fde02014-01-13 15:50:29 -06002727
2728 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002729}
2730
2731/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002732 * sfe_ipv4_destroy_rule()
2733 * Destroy a forwarding rule.
2734 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002735void sfe_ipv4_destroy_rule(struct sfe_connection_destroy *sid)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002736{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002737 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002738 struct sfe_ipv4_connection *c;
2739
2740 spin_lock_bh(&si->lock);
2741 si->connection_destroy_requests++;
2742
2743 /*
2744 * Check to see if we have a flow that matches the rule we're trying
2745 * to destroy. If there isn't then we can't destroy it.
2746 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002747 c = sfe_ipv4_find_sfe_ipv4_connection(si, sid->protocol, sid->src_ip.ip, sid->src_port,
2748 sid->dest_ip.ip, sid->dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002749 if (!c) {
2750 si->connection_destroy_misses++;
2751 spin_unlock_bh(&si->lock);
2752
2753 DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002754 sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2755 &sid->dest_ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002756 return;
2757 }
2758
2759 /*
2760 * Remove our connection details from the hash tables.
2761 */
2762 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2763 spin_unlock_bh(&si->lock);
2764
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07002765 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002766
2767 DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002768 sid->protocol, &sid->src_ip.ip, ntohs(sid->src_port),
2769 &sid->dest_ip.ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002770}
2771
2772/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002773 * sfe_ipv4_register_sync_rule_callback()
2774 * Register a callback for rule synchronization.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002775 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002776void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002777{
2778 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002779
2780 spin_lock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002781 rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002782 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002783}
2784
2785/*
2786 * sfe_ipv4_get_debug_dev()
2787 */
2788static ssize_t sfe_ipv4_get_debug_dev(struct device *dev,
2789 struct device_attribute *attr,
2790 char *buf)
2791{
2792 struct sfe_ipv4 *si = &__si;
2793 ssize_t count;
2794 int num;
2795
2796 spin_lock_bh(&si->lock);
2797 num = si->debug_dev;
2798 spin_unlock_bh(&si->lock);
2799
2800 count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num);
2801 return count;
2802}
2803
2804/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002805 * sysfs attributes.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002806 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002807static const struct device_attribute sfe_ipv4_debug_dev_attr =
Xiaoping Fane70da412016-02-26 16:47:57 -08002808 __ATTR(debug_dev, S_IWUSR | S_IRUGO, sfe_ipv4_get_debug_dev, NULL);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002809
2810/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002811 * sfe_ipv4_destroy_all_rules_for_dev()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002812 * Destroy all connections that match a particular device.
2813 *
2814 * If we pass dev as NULL then this destroys all connections.
2815 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002816void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002817{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002818 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002819 struct sfe_ipv4_connection *c;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002820
Xiaoping Fan34586472015-07-03 02:20:35 -07002821another_round:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002822 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002823
Xiaoping Fan34586472015-07-03 02:20:35 -07002824 for (c = si->all_connections_head; c; c = c->all_connections_next) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002825 /*
Xiaoping Fan34586472015-07-03 02:20:35 -07002826 * Does this connection relate to the device we are destroying?
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002827 */
2828 if (!dev
2829 || (dev == c->original_dev)
2830 || (dev == c->reply_dev)) {
Xiaoping Fan34586472015-07-03 02:20:35 -07002831 break;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002832 }
Xiaoping Fan34586472015-07-03 02:20:35 -07002833 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002834
Xiaoping Fan34586472015-07-03 02:20:35 -07002835 if (c) {
2836 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002837 }
2838
2839 spin_unlock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07002840
2841 if (c) {
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07002842 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY);
Xiaoping Fan34586472015-07-03 02:20:35 -07002843 goto another_round;
2844 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002845}
2846
2847/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002848 * sfe_ipv4_periodic_sync()
2849 */
2850static void sfe_ipv4_periodic_sync(unsigned long arg)
2851{
2852 struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002853 u64 now_jiffies;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002854 int quota;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002855 sfe_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002856
2857 now_jiffies = get_jiffies_64();
2858
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002859 rcu_read_lock();
2860 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
2861 if (!sync_rule_callback) {
2862 rcu_read_unlock();
2863 goto done;
2864 }
2865
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002866 spin_lock_bh(&si->lock);
2867 sfe_ipv4_update_summary_stats(si);
2868
2869 /*
2870 * Get an estimate of the number of connections to parse in this sync.
2871 */
2872 quota = (si->num_connections + 63) / 64;
2873
2874 /*
2875 * Walk the "active" list and sync the connection state.
2876 */
2877 while (quota--) {
2878 struct sfe_ipv4_connection_match *cm;
2879 struct sfe_ipv4_connection_match *counter_cm;
2880 struct sfe_ipv4_connection *c;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002881 struct sfe_connection_sync sis;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002882
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002883 cm = si->active_head;
2884 if (!cm) {
2885 break;
2886 }
2887
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002888 /*
Nicolas Costabafb3af2014-01-29 16:39:39 -06002889 * There's a possibility that our counter match is in the active list too.
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002890 * If it is then remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002891 */
2892 counter_cm = cm->counter_match;
2893 if (counter_cm->active) {
2894 counter_cm->active = false;
2895
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002896 /*
2897 * We must have a connection preceding this counter match
2898 * because that's the one that got us to this point, so we don't have
2899 * to worry about removing the head of the list.
2900 */
2901 counter_cm->active_prev->active_next = counter_cm->active_next;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002902
2903 if (likely(counter_cm->active_next)) {
2904 counter_cm->active_next->active_prev = counter_cm->active_prev;
2905 } else {
2906 si->active_tail = counter_cm->active_prev;
2907 }
2908
2909 counter_cm->active_next = NULL;
2910 counter_cm->active_prev = NULL;
2911 }
2912
2913 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002914 * Now remove the head of the active scan list.
2915 */
2916 cm->active = false;
2917 si->active_head = cm->active_next;
2918 if (likely(cm->active_next)) {
2919 cm->active_next->active_prev = NULL;
2920 } else {
2921 si->active_tail = NULL;
2922 }
2923 cm->active_next = NULL;
2924
2925 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002926 * Sync the connection state.
2927 */
2928 c = cm->connection;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07002929 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, SFE_SYNC_REASON_STATS, now_jiffies);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002930
2931 /*
2932 * We don't want to be holding the lock when we sync!
2933 */
2934 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002935 sync_rule_callback(&sis);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002936 spin_lock_bh(&si->lock);
2937 }
2938
2939 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002940 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002941
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002942done:
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002943 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002944}
2945
2946#define CHAR_DEV_MSG_SIZE 768
2947
2948/*
2949 * sfe_ipv4_debug_dev_read_start()
2950 * Generate part of the XML output.
2951 */
2952static bool sfe_ipv4_debug_dev_read_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2953 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2954{
2955 int bytes_read;
2956
Xiaoping Fan34586472015-07-03 02:20:35 -07002957 si->debug_read_seq++;
2958
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002959 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "<sfe_ipv4>\n");
2960 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2961 return false;
2962 }
2963
2964 *length -= bytes_read;
2965 *total_read += bytes_read;
2966
2967 ws->state++;
2968 return true;
2969}
2970
2971/*
2972 * sfe_ipv4_debug_dev_read_connections_start()
2973 * Generate part of the XML output.
2974 */
2975static bool sfe_ipv4_debug_dev_read_connections_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2976 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2977{
2978 int bytes_read;
2979
2980 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<connections>\n");
2981 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2982 return false;
2983 }
2984
2985 *length -= bytes_read;
2986 *total_read += bytes_read;
2987
2988 ws->state++;
2989 return true;
2990}
2991
2992/*
2993 * sfe_ipv4_debug_dev_read_connections_connection()
2994 * Generate part of the XML output.
2995 */
2996static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2997 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2998{
2999 struct sfe_ipv4_connection *c;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003000 struct sfe_ipv4_connection_match *original_cm;
3001 struct sfe_ipv4_connection_match *reply_cm;
3002 int bytes_read;
3003 int protocol;
3004 struct net_device *src_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01003005 __be32 src_ip;
3006 __be32 src_ip_xlate;
3007 __be16 src_port;
3008 __be16 src_port_xlate;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07003009 u64 src_rx_packets;
3010 u64 src_rx_bytes;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003011 struct net_device *dest_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01003012 __be32 dest_ip;
3013 __be32 dest_ip_xlate;
3014 __be16 dest_port;
3015 __be16 dest_port_xlate;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07003016 u64 dest_rx_packets;
3017 u64 dest_rx_bytes;
3018 u64 last_sync_jiffies;
3019 u32 mark, src_priority, dest_priority, src_dscp, dest_dscp;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003020#ifdef CONFIG_NF_FLOW_COOKIE
3021 int src_flow_cookie, dst_flow_cookie;
3022#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003023
3024 spin_lock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07003025
3026 for (c = si->all_connections_head; c; c = c->all_connections_next) {
3027 if (c->debug_read_seq < si->debug_read_seq) {
3028 c->debug_read_seq = si->debug_read_seq;
3029 break;
3030 }
3031 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003032
3033 /*
Xiaoping Fan34586472015-07-03 02:20:35 -07003034 * If there were no connections then move to the next state.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003035 */
3036 if (!c) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003037 spin_unlock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07003038 ws->state++;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003039 return true;
3040 }
3041
3042 original_cm = c->original_match;
3043 reply_cm = c->reply_match;
3044
3045 protocol = c->protocol;
3046 src_dev = c->original_dev;
3047 src_ip = c->src_ip;
3048 src_ip_xlate = c->src_ip_xlate;
3049 src_port = c->src_port;
3050 src_port_xlate = c->src_port_xlate;
Xiaoping Fane1963d42015-08-25 17:06:19 -07003051 src_priority = original_cm->priority;
3052 src_dscp = original_cm->dscp >> SFE_IPV4_DSCP_SHIFT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003053
3054 sfe_ipv4_connection_match_update_summary_stats(original_cm);
3055 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
3056
3057 src_rx_packets = original_cm->rx_packet_count64;
3058 src_rx_bytes = original_cm->rx_byte_count64;
3059 dest_dev = c->reply_dev;
3060 dest_ip = c->dest_ip;
3061 dest_ip_xlate = c->dest_ip_xlate;
3062 dest_port = c->dest_port;
3063 dest_port_xlate = c->dest_port_xlate;
Xiaoping Fane1963d42015-08-25 17:06:19 -07003064 dest_priority = reply_cm->priority;
3065 dest_dscp = reply_cm->dscp >> SFE_IPV4_DSCP_SHIFT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003066 dest_rx_packets = reply_cm->rx_packet_count64;
3067 dest_rx_bytes = reply_cm->rx_byte_count64;
3068 last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003069 mark = c->mark;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003070#ifdef CONFIG_NF_FLOW_COOKIE
3071 src_flow_cookie = original_cm->flow_cookie;
3072 dst_flow_cookie = reply_cm->flow_cookie;
3073#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003074 spin_unlock_bh(&si->lock);
3075
3076 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t<connection "
3077 "protocol=\"%u\" "
3078 "src_dev=\"%s\" "
3079 "src_ip=\"%pI4\" src_ip_xlate=\"%pI4\" "
3080 "src_port=\"%u\" src_port_xlate=\"%u\" "
Xiaoping Fane1963d42015-08-25 17:06:19 -07003081 "src_priority=\"%u\" src_dscp=\"%u\" "
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003082 "src_rx_pkts=\"%llu\" src_rx_bytes=\"%llu\" "
3083 "dest_dev=\"%s\" "
3084 "dest_ip=\"%pI4\" dest_ip_xlate=\"%pI4\" "
3085 "dest_port=\"%u\" dest_port_xlate=\"%u\" "
Xiaoping Fane1963d42015-08-25 17:06:19 -07003086 "dest_priority=\"%u\" dest_dscp=\"%u\" "
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003087 "dest_rx_pkts=\"%llu\" dest_rx_bytes=\"%llu\" "
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003088#ifdef CONFIG_NF_FLOW_COOKIE
3089 "src_flow_cookie=\"%d\" dst_flow_cookie=\"%d\" "
3090#endif
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003091 "last_sync=\"%llu\" "
Nicolas Costabb85a2e2014-01-13 16:26:33 -06003092 "mark=\"%08x\" />\n",
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003093 protocol,
3094 src_dev->name,
3095 &src_ip, &src_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01003096 ntohs(src_port), ntohs(src_port_xlate),
Xiaoping Fane1963d42015-08-25 17:06:19 -07003097 src_priority, src_dscp,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003098 src_rx_packets, src_rx_bytes,
3099 dest_dev->name,
3100 &dest_ip, &dest_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01003101 ntohs(dest_port), ntohs(dest_port_xlate),
Xiaoping Fane1963d42015-08-25 17:06:19 -07003102 dest_priority, dest_dscp,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003103 dest_rx_packets, dest_rx_bytes,
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003104#ifdef CONFIG_NF_FLOW_COOKIE
3105 src_flow_cookie, dst_flow_cookie,
3106#endif
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003107 last_sync_jiffies, mark);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003108
3109 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3110 return false;
3111 }
3112
3113 *length -= bytes_read;
3114 *total_read += bytes_read;
3115
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003116 return true;
3117}
3118
3119/*
3120 * sfe_ipv4_debug_dev_read_connections_end()
3121 * Generate part of the XML output.
3122 */
3123static bool sfe_ipv4_debug_dev_read_connections_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3124 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3125{
3126 int bytes_read;
3127
3128 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</connections>\n");
3129 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3130 return false;
3131 }
3132
3133 *length -= bytes_read;
3134 *total_read += bytes_read;
3135
3136 ws->state++;
3137 return true;
3138}
3139
3140/*
3141 * sfe_ipv4_debug_dev_read_exceptions_start()
3142 * Generate part of the XML output.
3143 */
3144static bool sfe_ipv4_debug_dev_read_exceptions_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3145 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3146{
3147 int bytes_read;
3148
3149 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<exceptions>\n");
3150 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3151 return false;
3152 }
3153
3154 *length -= bytes_read;
3155 *total_read += bytes_read;
3156
3157 ws->state++;
3158 return true;
3159}
3160
3161/*
3162 * sfe_ipv4_debug_dev_read_exceptions_exception()
3163 * Generate part of the XML output.
3164 */
3165static bool sfe_ipv4_debug_dev_read_exceptions_exception(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3166 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3167{
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07003168 u64 ct;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003169
3170 spin_lock_bh(&si->lock);
3171 ct = si->exception_events64[ws->iter_exception];
3172 spin_unlock_bh(&si->lock);
3173
3174 if (ct) {
3175 int bytes_read;
3176
3177 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE,
3178 "\t\t<exception name=\"%s\" count=\"%llu\" />\n",
3179 sfe_ipv4_exception_events_string[ws->iter_exception],
3180 ct);
3181 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3182 return false;
3183 }
3184
3185 *length -= bytes_read;
3186 *total_read += bytes_read;
3187 }
3188
3189 ws->iter_exception++;
3190 if (ws->iter_exception >= SFE_IPV4_EXCEPTION_EVENT_LAST) {
3191 ws->iter_exception = 0;
3192 ws->state++;
3193 }
3194
3195 return true;
3196}
3197
3198/*
3199 * sfe_ipv4_debug_dev_read_exceptions_end()
3200 * Generate part of the XML output.
3201 */
3202static bool sfe_ipv4_debug_dev_read_exceptions_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3203 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3204{
3205 int bytes_read;
3206
3207 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</exceptions>\n");
3208 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3209 return false;
3210 }
3211
3212 *length -= bytes_read;
3213 *total_read += bytes_read;
3214
3215 ws->state++;
3216 return true;
3217}
3218
3219/*
3220 * sfe_ipv4_debug_dev_read_stats()
3221 * Generate part of the XML output.
3222 */
3223static bool sfe_ipv4_debug_dev_read_stats(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3224 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3225{
3226 int bytes_read;
3227 unsigned int num_connections;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07003228 u64 packets_forwarded;
3229 u64 packets_not_forwarded;
3230 u64 connection_create_requests;
3231 u64 connection_create_collisions;
3232 u64 connection_destroy_requests;
3233 u64 connection_destroy_misses;
3234 u64 connection_flushes;
3235 u64 connection_match_hash_hits;
3236 u64 connection_match_hash_reorders;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003237
3238 spin_lock_bh(&si->lock);
3239 sfe_ipv4_update_summary_stats(si);
3240
3241 num_connections = si->num_connections;
3242 packets_forwarded = si->packets_forwarded64;
3243 packets_not_forwarded = si->packets_not_forwarded64;
3244 connection_create_requests = si->connection_create_requests64;
3245 connection_create_collisions = si->connection_create_collisions64;
3246 connection_destroy_requests = si->connection_destroy_requests64;
3247 connection_destroy_misses = si->connection_destroy_misses64;
3248 connection_flushes = si->connection_flushes64;
3249 connection_match_hash_hits = si->connection_match_hash_hits64;
3250 connection_match_hash_reorders = si->connection_match_hash_reorders64;
3251 spin_unlock_bh(&si->lock);
3252
3253 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<stats "
3254 "num_connections=\"%u\" "
Xiaoping Fan59176422015-05-22 15:58:10 -07003255 "pkts_forwarded=\"%llu\" pkts_not_forwarded=\"%llu\" "
3256 "create_requests=\"%llu\" create_collisions=\"%llu\" "
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003257 "destroy_requests=\"%llu\" destroy_misses=\"%llu\" "
3258 "flushes=\"%llu\" "
3259 "hash_hits=\"%llu\" hash_reorders=\"%llu\" />\n",
3260 num_connections,
3261 packets_forwarded,
3262 packets_not_forwarded,
3263 connection_create_requests,
3264 connection_create_collisions,
3265 connection_destroy_requests,
3266 connection_destroy_misses,
3267 connection_flushes,
3268 connection_match_hash_hits,
3269 connection_match_hash_reorders);
3270 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3271 return false;
3272 }
3273
3274 *length -= bytes_read;
3275 *total_read += bytes_read;
3276
3277 ws->state++;
3278 return true;
3279}
3280
3281/*
3282 * sfe_ipv4_debug_dev_read_end()
3283 * Generate part of the XML output.
3284 */
3285static bool sfe_ipv4_debug_dev_read_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3286 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3287{
3288 int bytes_read;
3289
3290 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "</sfe_ipv4>\n");
3291 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3292 return false;
3293 }
3294
3295 *length -= bytes_read;
3296 *total_read += bytes_read;
3297
3298 ws->state++;
3299 return true;
3300}
3301
3302/*
3303 * Array of write functions that write various XML elements that correspond to
3304 * our XML output state machine.
3305 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07003306static sfe_ipv4_debug_xml_write_method_t sfe_ipv4_debug_xml_write_methods[SFE_IPV4_DEBUG_XML_STATE_DONE] = {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003307 sfe_ipv4_debug_dev_read_start,
3308 sfe_ipv4_debug_dev_read_connections_start,
3309 sfe_ipv4_debug_dev_read_connections_connection,
3310 sfe_ipv4_debug_dev_read_connections_end,
3311 sfe_ipv4_debug_dev_read_exceptions_start,
3312 sfe_ipv4_debug_dev_read_exceptions_exception,
3313 sfe_ipv4_debug_dev_read_exceptions_end,
3314 sfe_ipv4_debug_dev_read_stats,
3315 sfe_ipv4_debug_dev_read_end,
3316};
3317
3318/*
3319 * sfe_ipv4_debug_dev_read()
3320 * Send info to userspace upon read request from user
3321 */
3322static ssize_t sfe_ipv4_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset)
3323{
3324 char msg[CHAR_DEV_MSG_SIZE];
3325 int total_read = 0;
3326 struct sfe_ipv4_debug_xml_write_state *ws;
3327 struct sfe_ipv4 *si = &__si;
3328
3329 ws = (struct sfe_ipv4_debug_xml_write_state *)filp->private_data;
3330 while ((ws->state != SFE_IPV4_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) {
3331 if ((sfe_ipv4_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) {
3332 continue;
3333 }
3334 }
3335
3336 return total_read;
3337}
3338
3339/*
3340 * sfe_ipv4_debug_dev_write()
Nicolas Costabafb3af2014-01-29 16:39:39 -06003341 * Write to char device resets some stats
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003342 */
3343static ssize_t sfe_ipv4_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset)
3344{
Matthew McClintock54167ab2014-01-14 21:06:28 -06003345 struct sfe_ipv4 *si = &__si;
3346
3347 spin_lock_bh(&si->lock);
3348 sfe_ipv4_update_summary_stats(si);
3349
Matthew McClintock54167ab2014-01-14 21:06:28 -06003350 si->packets_forwarded64 = 0;
3351 si->packets_not_forwarded64 = 0;
3352 si->connection_create_requests64 = 0;
3353 si->connection_create_collisions64 = 0;
3354 si->connection_destroy_requests64 = 0;
3355 si->connection_destroy_misses64 = 0;
3356 si->connection_flushes64 = 0;
3357 si->connection_match_hash_hits64 = 0;
3358 si->connection_match_hash_reorders64 = 0;
3359 spin_unlock_bh(&si->lock);
3360
3361 return length;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003362}
3363
3364/*
3365 * sfe_ipv4_debug_dev_open()
3366 */
3367static int sfe_ipv4_debug_dev_open(struct inode *inode, struct file *file)
3368{
3369 struct sfe_ipv4_debug_xml_write_state *ws;
3370
3371 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3372 if (!ws) {
3373 ws = kzalloc(sizeof(struct sfe_ipv4_debug_xml_write_state), GFP_KERNEL);
3374 if (!ws) {
3375 return -ENOMEM;
3376 }
3377
3378 ws->state = SFE_IPV4_DEBUG_XML_STATE_START;
3379 file->private_data = ws;
3380 }
3381
3382 return 0;
3383}
3384
3385/*
3386 * sfe_ipv4_debug_dev_release()
3387 */
3388static int sfe_ipv4_debug_dev_release(struct inode *inode, struct file *file)
3389{
3390 struct sfe_ipv4_debug_xml_write_state *ws;
3391
3392 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3393 if (ws) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003394 /*
3395 * We've finished with our output so free the write state.
3396 */
3397 kfree(ws);
3398 }
3399
3400 return 0;
3401}
3402
3403/*
3404 * File operations used in the debug char device
3405 */
3406static struct file_operations sfe_ipv4_debug_dev_fops = {
3407 .read = sfe_ipv4_debug_dev_read,
3408 .write = sfe_ipv4_debug_dev_write,
3409 .open = sfe_ipv4_debug_dev_open,
3410 .release = sfe_ipv4_debug_dev_release
3411};
3412
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003413#ifdef CONFIG_NF_FLOW_COOKIE
3414/*
3415 * sfe_register_flow_cookie_cb
3416 * register a function in SFE to let SFE use this function to configure flow cookie for a flow
3417 *
3418 * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE
3419 * can use this function to configure flow cookie for a flow.
3420 * return: 0, success; !=0, fail
3421 */
3422int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb)
3423{
3424 struct sfe_ipv4 *si = &__si;
3425
3426 BUG_ON(!cb);
3427
3428 if (si->flow_cookie_set_func) {
3429 return -1;
3430 }
3431
3432 rcu_assign_pointer(si->flow_cookie_set_func, cb);
3433 return 0;
3434}
3435
3436/*
3437 * sfe_unregister_flow_cookie_cb
3438 * unregister function which is used to configure flow cookie for a flow
3439 *
3440 * return: 0, success; !=0, fail
3441 */
3442int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb)
3443{
3444 struct sfe_ipv4 *si = &__si;
3445
3446 RCU_INIT_POINTER(si->flow_cookie_set_func, NULL);
3447 return 0;
3448}
Xiaoping Fan640faf42015-08-28 15:50:55 -07003449
3450/*
3451 * sfe_ipv4_get_flow_cookie()
3452 */
3453static ssize_t sfe_ipv4_get_flow_cookie(struct device *dev,
3454 struct device_attribute *attr,
3455 char *buf)
3456{
3457 struct sfe_ipv4 *si = &__si;
Xiaoping Fan01c67cc2015-11-09 11:31:57 -08003458 return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", si->flow_cookie_enable);
Xiaoping Fan640faf42015-08-28 15:50:55 -07003459}
3460
3461/*
3462 * sfe_ipv4_set_flow_cookie()
3463 */
3464static ssize_t sfe_ipv4_set_flow_cookie(struct device *dev,
3465 struct device_attribute *attr,
3466 const char *buf, size_t size)
3467{
3468 struct sfe_ipv4 *si = &__si;
3469 strict_strtol(buf, 0, (long int *)&si->flow_cookie_enable);
3470
3471 return size;
3472}
3473
3474/*
3475 * sysfs attributes.
3476 */
3477static const struct device_attribute sfe_ipv4_flow_cookie_attr =
Xiaoping Fane70da412016-02-26 16:47:57 -08003478 __ATTR(flow_cookie_enable, S_IWUSR | S_IRUGO, sfe_ipv4_get_flow_cookie, sfe_ipv4_set_flow_cookie);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003479#endif /*CONFIG_NF_FLOW_COOKIE*/
3480
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003481/*
Dave Hudson87973cd2013-10-22 16:00:04 +01003482 * sfe_ipv4_init()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003483 */
Dave Hudson87973cd2013-10-22 16:00:04 +01003484static int __init sfe_ipv4_init(void)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003485{
3486 struct sfe_ipv4 *si = &__si;
3487 int result = -1;
3488
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003489 DEBUG_INFO("SFE IPv4 init\n");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003490
3491 /*
3492 * Create sys/sfe_ipv4
3493 */
3494 si->sys_sfe_ipv4 = kobject_create_and_add("sfe_ipv4", NULL);
3495 if (!si->sys_sfe_ipv4) {
3496 DEBUG_ERROR("failed to register sfe_ipv4\n");
3497 goto exit1;
3498 }
3499
3500 /*
3501 * Create files, one for each parameter supported by this module.
3502 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003503 result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3504 if (result) {
3505 DEBUG_ERROR("failed to register debug dev file: %d\n", result);
Xiaoping Fan640faf42015-08-28 15:50:55 -07003506 goto exit2;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003507 }
3508
Xiaoping Fan640faf42015-08-28 15:50:55 -07003509#ifdef CONFIG_NF_FLOW_COOKIE
3510 result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr);
3511 if (result) {
3512 DEBUG_ERROR("failed to register flow cookie enable file: %d\n", result);
3513 goto exit3;
3514 }
3515#endif /* CONFIG_NF_FLOW_COOKIE */
3516
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003517 /*
3518 * Register our debug char device.
3519 */
3520 result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops);
3521 if (result < 0) {
3522 DEBUG_ERROR("Failed to register chrdev: %d\n", result);
Xiaoping Fan640faf42015-08-28 15:50:55 -07003523 goto exit4;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003524 }
3525
3526 si->debug_dev = result;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003527
3528 /*
3529 * Create a timer to handle periodic statistics.
3530 */
3531 setup_timer(&si->timer, sfe_ipv4_periodic_sync, (unsigned long)si);
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06003532 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003533
Dave Hudson87973cd2013-10-22 16:00:04 +01003534 spin_lock_init(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003535
Dave Hudson87973cd2013-10-22 16:00:04 +01003536 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003537
Xiaoping Fan640faf42015-08-28 15:50:55 -07003538exit4:
3539#ifdef CONFIG_NF_FLOW_COOKIE
3540 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr);
3541
3542exit3:
3543#endif /* CONFIG_NF_FLOW_COOKIE */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003544 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3545
Xiaoping Fan640faf42015-08-28 15:50:55 -07003546exit2:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003547 kobject_put(si->sys_sfe_ipv4);
3548
3549exit1:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003550 return result;
3551}
3552
3553/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003554 * sfe_ipv4_exit()
3555 */
3556static void __exit sfe_ipv4_exit(void)
3557{
Dave Hudson87973cd2013-10-22 16:00:04 +01003558 struct sfe_ipv4 *si = &__si;
3559
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003560 DEBUG_INFO("SFE IPv4 exit\n");
Dave Hudson87973cd2013-10-22 16:00:04 +01003561
3562 /*
3563 * Destroy all connections.
3564 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003565 sfe_ipv4_destroy_all_rules_for_dev(NULL);
Dave Hudson87973cd2013-10-22 16:00:04 +01003566
Dave Hudson87973cd2013-10-22 16:00:04 +01003567 del_timer_sync(&si->timer);
3568
Dave Hudson87973cd2013-10-22 16:00:04 +01003569 unregister_chrdev(si->debug_dev, "sfe_ipv4");
3570
Xiaoping Fan640faf42015-08-28 15:50:55 -07003571#ifdef CONFIG_NF_FLOW_COOKIE
3572 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr);
3573#endif /* CONFIG_NF_FLOW_COOKIE */
Dave Hudson87973cd2013-10-22 16:00:04 +01003574 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3575
Dave Hudson87973cd2013-10-22 16:00:04 +01003576 kobject_put(si->sys_sfe_ipv4);
3577
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003578}
3579
3580module_init(sfe_ipv4_init)
3581module_exit(sfe_ipv4_exit)
3582
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003583EXPORT_SYMBOL(sfe_ipv4_recv);
3584EXPORT_SYMBOL(sfe_ipv4_create_rule);
3585EXPORT_SYMBOL(sfe_ipv4_destroy_rule);
3586EXPORT_SYMBOL(sfe_ipv4_destroy_all_rules_for_dev);
3587EXPORT_SYMBOL(sfe_ipv4_register_sync_rule_callback);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06003588EXPORT_SYMBOL(sfe_ipv4_mark_rule);
Nicolas Costa436926b2014-01-14 10:36:22 -06003589EXPORT_SYMBOL(sfe_ipv4_update_rule);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003590#ifdef CONFIG_NF_FLOW_COOKIE
3591EXPORT_SYMBOL(sfe_register_flow_cookie_cb);
3592EXPORT_SYMBOL(sfe_unregister_flow_cookie_cb);
3593#endif
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003594
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003595MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv4 edition");
Matthew McClintocka3221942014-01-16 11:44:26 -06003596MODULE_LICENSE("Dual BSD/GPL");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003597