blob: 9f7ebd1c95a5cf51ce184fa3a366059ac91c981e [file] [log] [blame]
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001/*
2 * sfe_ipv4.c
3 * Shortcut forwarding engine - IPv4 edition.
4 *
Tian Yang45f39c82020-10-06 14:07:47 -07005 * Copyright (c) 2013-2016, 2019-2020 The Linux Foundation. All rights reserved.
Xiaoping Fana42c68b2015-08-07 18:00:39 -07006 * Permission to use, copy, modify, and/or distribute this software for
7 * any purpose with or without fee is hereby granted, provided that the
8 * above copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
15 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010016 */
Matthew McClintocka3221942014-01-16 11:44:26 -060017
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010018#include <linux/module.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060019#include <linux/sysfs.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010020#include <linux/skbuff.h>
21#include <linux/icmp.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010022#include <net/tcp.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060023#include <linux/etherdevice.h>
Tian Yang45f39c82020-10-06 14:07:47 -070024#include <linux/version.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010025
Dave Hudsondcd08fb2013-11-22 09:25:16 -060026#include "sfe.h"
Xiaoping Fand44a5b42015-05-26 17:37:37 -070027#include "sfe_cm.h"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010028
29/*
Dave Hudsona8197e72013-12-17 23:46:22 +000030 * By default Linux IP header and transport layer header structures are
31 * unpacked, assuming that such headers should be 32-bit aligned.
32 * Unfortunately some wireless adaptors can't cope with this requirement and
33 * some CPUs can't handle misaligned accesses. For those platforms we
34 * define SFE_IPV4_UNALIGNED_IP_HEADER and mark the structures as packed.
35 * When we do this the compiler will generate slightly worse code than for the
36 * aligned case (on most platforms) but will be much quicker than fixing
37 * things up in an unaligned trap handler.
38 */
39#define SFE_IPV4_UNALIGNED_IP_HEADER 1
40#if SFE_IPV4_UNALIGNED_IP_HEADER
41#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed))
42#else
43#define SFE_IPV4_UNALIGNED_STRUCT
44#endif
45
46/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060047 * An Ethernet header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000048 * help with performance on some platforms (see the definition of
49 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010050 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060051struct sfe_ipv4_eth_hdr {
52 __be16 h_dest[ETH_ALEN / 2];
53 __be16 h_source[ETH_ALEN / 2];
54 __be16 h_proto;
55} SFE_IPV4_UNALIGNED_STRUCT;
56
Xiaoping Fane1963d42015-08-25 17:06:19 -070057#define SFE_IPV4_DSCP_MASK 0x3
58#define SFE_IPV4_DSCP_SHIFT 2
59
Matthew McClintockdb5ac512014-01-16 17:01:40 -060060/*
61 * An IPv4 header, but with an optional "packed" attribute to
62 * help with performance on some platforms (see the definition of
63 * SFE_IPV4_UNALIGNED_STRUCT)
64 */
65struct sfe_ipv4_ip_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010066#if defined(__LITTLE_ENDIAN_BITFIELD)
67 __u8 ihl:4,
68 version:4;
69#elif defined (__BIG_ENDIAN_BITFIELD)
70 __u8 version:4,
Xiaoping Fan59176422015-05-22 15:58:10 -070071 ihl:4;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010072#else
73#error "Please fix <asm/byteorder.h>"
74#endif
75 __u8 tos;
76 __be16 tot_len;
77 __be16 id;
78 __be16 frag_off;
79 __u8 ttl;
80 __u8 protocol;
81 __sum16 check;
82 __be32 saddr;
83 __be32 daddr;
Dave Hudsondcd08fb2013-11-22 09:25:16 -060084
85 /*
86 * The options start here.
87 */
Dave Hudsona8197e72013-12-17 23:46:22 +000088} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010089
90/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -060091 * A UDP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +000092 * help with performance on some platforms (see the definition of
93 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010094 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -060095struct sfe_ipv4_udp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010096 __be16 source;
97 __be16 dest;
98 __be16 len;
99 __sum16 check;
Dave Hudsona8197e72013-12-17 23:46:22 +0000100} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100101
102/*
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600103 * A TCP header, but with an optional "packed" attribute to
Dave Hudsona8197e72013-12-17 23:46:22 +0000104 * help with performance on some platforms (see the definition of
105 * SFE_IPV4_UNALIGNED_STRUCT)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100106 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600107struct sfe_ipv4_tcp_hdr {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100108 __be16 source;
109 __be16 dest;
110 __be32 seq;
111 __be32 ack_seq;
112#if defined(__LITTLE_ENDIAN_BITFIELD)
113 __u16 res1:4,
114 doff:4,
115 fin:1,
116 syn:1,
117 rst:1,
118 psh:1,
119 ack:1,
120 urg:1,
121 ece:1,
122 cwr:1;
123#elif defined(__BIG_ENDIAN_BITFIELD)
124 __u16 doff:4,
125 res1:4,
126 cwr:1,
127 ece:1,
128 urg:1,
129 ack:1,
130 psh:1,
131 rst:1,
132 syn:1,
133 fin:1;
134#else
135#error "Adjust your <asm/byteorder.h> defines"
Nicolas Costaac2979c2014-01-14 10:35:24 -0600136#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100137 __be16 window;
138 __sum16 check;
139 __be16 urg_ptr;
Dave Hudsona8197e72013-12-17 23:46:22 +0000140} SFE_IPV4_UNALIGNED_STRUCT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100141
142/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100143 * Specifies the lower bound on ACK numbers carried in the TCP header
144 */
145#define SFE_IPV4_TCP_MAX_ACK_WINDOW 65520
146
147/*
148 * IPv4 TCP connection match additional data.
149 */
150struct sfe_ipv4_tcp_connection_match {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700151 u8 win_scale; /* Window scale */
152 u32 max_win; /* Maximum window size seen */
153 u32 end; /* Sequence number of the next byte to send (seq + segment length) */
154 u32 max_end; /* Sequence number of the last byte to ack */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100155};
156
157/*
158 * Bit flags for IPv4 connection matching entry.
159 */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700160#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC (1<<0)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100161 /* Perform source translation */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700162#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST (1<<1)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100163 /* Perform destination translation */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700164#define SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK (1<<2)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100165 /* Ignore TCP sequence numbers */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700166#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR (1<<3)
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600167 /* Fast Ethernet header write */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700168#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR (1<<4)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100169 /* Fast Ethernet header write */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700170#define SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK (1<<5)
171 /* remark priority of SKB */
172#define SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK (1<<6)
173 /* remark DSCP of packet */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100174
175/*
176 * IPv4 connection matching structure.
177 */
178struct sfe_ipv4_connection_match {
179 /*
180 * References to other objects.
181 */
182 struct sfe_ipv4_connection_match *next;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100183 struct sfe_ipv4_connection_match *prev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100184 struct sfe_ipv4_connection *connection;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100185 struct sfe_ipv4_connection_match *counter_match;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700186 /* Matches the flow in the opposite direction as the one in *connection */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100187 struct sfe_ipv4_connection_match *active_next;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100188 struct sfe_ipv4_connection_match *active_prev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100189 bool active; /* Flag to indicate if we're on the active list */
190
191 /*
192 * Characteristics that identify flows that match this rule.
193 */
194 struct net_device *match_dev; /* Network device */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700195 u8 match_protocol; /* Protocol */
Dave Hudson87973cd2013-10-22 16:00:04 +0100196 __be32 match_src_ip; /* Source IP address */
197 __be32 match_dest_ip; /* Destination IP address */
198 __be16 match_src_port; /* Source port/connection ident */
199 __be16 match_dest_port; /* Destination port/connection ident */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100200
201 /*
202 * Control the operations of the match.
203 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700204 u32 flags; /* Bit flags */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800205#ifdef CONFIG_NF_FLOW_COOKIE
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700206 u32 flow_cookie; /* used flow cookie, for debug */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800207#endif
Zhi Chen8748eb32015-06-18 12:58:48 -0700208#ifdef CONFIG_XFRM
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700209 u32 flow_accel; /* The flow accelerated or not */
Zhi Chen8748eb32015-06-18 12:58:48 -0700210#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100211
212 /*
213 * Connection state that we track once we match.
214 */
215 union { /* Protocol-specific state */
216 struct sfe_ipv4_tcp_connection_match tcp;
217 } protocol_state;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700218 /*
219 * Stats recorded in a sync period. These stats will be added to
220 * rx_packet_count64/rx_byte_count64 after a sync period.
221 */
222 u32 rx_packet_count;
223 u32 rx_byte_count;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100224
225 /*
226 * Packet translation information.
227 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100228 __be32 xlate_src_ip; /* Address after source translation */
229 __be16 xlate_src_port; /* Port/connection ident after source translation */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700230 u16 xlate_src_csum_adjustment;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100231 /* Transport layer checksum adjustment after source translation */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700232 u16 xlate_src_partial_csum_adjustment;
Xiaoping Fanad755af2015-04-01 16:58:46 -0700233 /* Transport layer pseudo header checksum adjustment after source translation */
234
Dave Hudson87973cd2013-10-22 16:00:04 +0100235 __be32 xlate_dest_ip; /* Address after destination translation */
236 __be16 xlate_dest_port; /* Port/connection ident after destination translation */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700237 u16 xlate_dest_csum_adjustment;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100238 /* Transport layer checksum adjustment after destination translation */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700239 u16 xlate_dest_partial_csum_adjustment;
Xiaoping Fanad755af2015-04-01 16:58:46 -0700240 /* Transport layer pseudo header checksum adjustment after destination translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100241
242 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -0700243 * QoS information
244 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700245 u32 priority;
246 u32 dscp;
Xiaoping Fane1963d42015-08-25 17:06:19 -0700247
248 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100249 * Packet transmit information.
250 */
251 struct net_device *xmit_dev; /* Network device on which to transmit */
252 unsigned short int xmit_dev_mtu;
253 /* Interface MTU */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700254 u16 xmit_dest_mac[ETH_ALEN / 2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100255 /* Destination MAC address to use when forwarding */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700256 u16 xmit_src_mac[ETH_ALEN / 2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100257 /* Source MAC address to use when forwarding */
258
259 /*
260 * Summary stats.
261 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700262 u64 rx_packet_count64;
263 u64 rx_byte_count64;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100264};
265
266/*
267 * Per-connection data structure.
268 */
269struct sfe_ipv4_connection {
270 struct sfe_ipv4_connection *next;
271 /* Pointer to the next entry in a hash chain */
272 struct sfe_ipv4_connection *prev;
273 /* Pointer to the previous entry in a hash chain */
274 int protocol; /* IP protocol number */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700275 __be32 src_ip; /* Src IP addr pre-translation */
276 __be32 src_ip_xlate; /* Src IP addr post-translation */
277 __be32 dest_ip; /* Dest IP addr pre-translation */
278 __be32 dest_ip_xlate; /* Dest IP addr post-translation */
279 __be16 src_port; /* Src port pre-translation */
280 __be16 src_port_xlate; /* Src port post-translation */
281 __be16 dest_port; /* Dest port pre-translation */
282 __be16 dest_port_xlate; /* Dest port post-translation */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100283 struct sfe_ipv4_connection_match *original_match;
284 /* Original direction matching structure */
285 struct net_device *original_dev;
286 /* Original direction source device */
287 struct sfe_ipv4_connection_match *reply_match;
288 /* Reply direction matching structure */
289 struct net_device *reply_dev; /* Reply direction source device */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700290 u64 last_sync_jiffies; /* Jiffies count for the last sync */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100291 struct sfe_ipv4_connection *all_connections_next;
292 /* Pointer to the next entry in the list of all connections */
293 struct sfe_ipv4_connection *all_connections_prev;
294 /* Pointer to the previous entry in the list of all connections */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700295 u32 mark; /* mark for outgoing packet */
296 u32 debug_read_seq; /* sequence number for debug dump */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100297};
298
299/*
300 * IPv4 connections and hash table size information.
301 */
302#define SFE_IPV4_CONNECTION_HASH_SHIFT 12
303#define SFE_IPV4_CONNECTION_HASH_SIZE (1 << SFE_IPV4_CONNECTION_HASH_SHIFT)
304#define SFE_IPV4_CONNECTION_HASH_MASK (SFE_IPV4_CONNECTION_HASH_SIZE - 1)
305
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800306#ifdef CONFIG_NF_FLOW_COOKIE
307#define SFE_FLOW_COOKIE_SIZE 2048
308#define SFE_FLOW_COOKIE_MASK 0x7ff
309
310struct sfe_flow_cookie_entry {
311 struct sfe_ipv4_connection_match *match;
312 unsigned long last_clean_time;
313};
314#endif
315
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100316enum sfe_ipv4_exception_events {
317 SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE,
318 SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION,
319 SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
320 SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL,
321 SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION,
322 SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE,
323 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS,
324 SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS,
325 SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
326 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL,
327 SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION,
328 SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS,
329 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE,
330 SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS,
331 SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK,
332 SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS,
333 SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE,
334 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE,
335 SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE,
336 SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE,
337 SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE,
338 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE,
339 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4,
340 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE,
341 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE,
342 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE,
343 SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL,
344 SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION,
345 SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION,
346 SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE,
347 SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH,
348 SFE_IPV4_EXCEPTION_EVENT_NON_V4,
349 SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT,
350 SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE,
351 SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE,
352 SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL,
353 SFE_IPV4_EXCEPTION_EVENT_LAST
354};
355
356static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = {
357 "UDP_HEADER_INCOMPLETE",
358 "UDP_NO_CONNECTION",
359 "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
360 "UDP_SMALL_TTL",
361 "UDP_NEEDS_FRAGMENTATION",
362 "TCP_HEADER_INCOMPLETE",
363 "TCP_NO_CONNECTION_SLOW_FLAGS",
364 "TCP_NO_CONNECTION_FAST_FLAGS",
365 "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
366 "TCP_SMALL_TTL",
367 "TCP_NEEDS_FRAGMENTATION",
368 "TCP_FLAGS",
369 "TCP_SEQ_EXCEEDS_RIGHT_EDGE",
370 "TCP_SMALL_DATA_OFFS",
371 "TCP_BAD_SACK",
372 "TCP_BIG_DATA_OFFS",
373 "TCP_SEQ_BEFORE_LEFT_EDGE",
374 "TCP_ACK_EXCEEDS_RIGHT_EDGE",
375 "TCP_ACK_BEFORE_LEFT_EDGE",
376 "ICMP_HEADER_INCOMPLETE",
377 "ICMP_UNHANDLED_TYPE",
378 "ICMP_IPV4_HEADER_INCOMPLETE",
379 "ICMP_IPV4_NON_V4",
380 "ICMP_IPV4_IP_OPTIONS_INCOMPLETE",
381 "ICMP_IPV4_UDP_HEADER_INCOMPLETE",
382 "ICMP_IPV4_TCP_HEADER_INCOMPLETE",
383 "ICMP_IPV4_UNHANDLED_PROTOCOL",
384 "ICMP_NO_CONNECTION",
385 "ICMP_FLUSHED_CONNECTION",
386 "HEADER_INCOMPLETE",
387 "BAD_TOTAL_LENGTH",
388 "NON_V4",
389 "NON_INITIAL_FRAGMENT",
390 "DATAGRAM_INCOMPLETE",
391 "IP_OPTIONS_INCOMPLETE",
392 "UNHANDLED_PROTOCOL"
393};
394
395/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600396 * Per-module structure.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100397 */
398struct sfe_ipv4 {
399 spinlock_t lock; /* Lock for SMP correctness */
400 struct sfe_ipv4_connection_match *active_head;
401 /* Head of the list of recently active connections */
402 struct sfe_ipv4_connection_match *active_tail;
403 /* Tail of the list of recently active connections */
404 struct sfe_ipv4_connection *all_connections_head;
405 /* Head of the list of all connections */
406 struct sfe_ipv4_connection *all_connections_tail;
407 /* Tail of the list of all connections */
408 unsigned int num_connections; /* Number of connections */
409 struct timer_list timer; /* Timer used for periodic sync ops */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700410 sfe_sync_rule_callback_t __rcu sync_rule_callback;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600411 /* Callback function registered by a connection manager for stats syncing */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100412 struct sfe_ipv4_connection *conn_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
413 /* Connection hash table */
414 struct sfe_ipv4_connection_match *conn_match_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
415 /* Connection match hash table */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800416#ifdef CONFIG_NF_FLOW_COOKIE
417 struct sfe_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE];
418 /* flow cookie table*/
419 flow_cookie_set_func_t flow_cookie_set_func;
420 /* function used to configure flow cookie in hardware*/
Xiaoping Fan640faf42015-08-28 15:50:55 -0700421 int flow_cookie_enable;
422 /* Enable/disable flow cookie at runtime */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800423#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100424
425 /*
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700426 * Stats recorded in a sync period. These stats will be added to
427 * connection_xxx64 after a sync period.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100428 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700429 u32 connection_create_requests;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100430 /* Number of IPv4 connection create requests */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700431 u32 connection_create_collisions;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100432 /* Number of IPv4 connection create requests that collided with existing hash table entries */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700433 u32 connection_destroy_requests;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100434 /* Number of IPv4 connection destroy requests */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700435 u32 connection_destroy_misses;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100436 /* Number of IPv4 connection destroy requests that missed our hash table */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700437 u32 connection_match_hash_hits;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100438 /* Number of IPv4 connection match hash hits */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700439 u32 connection_match_hash_reorders;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100440 /* Number of IPv4 connection match hash reorders */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700441 u32 connection_flushes; /* Number of IPv4 connection flushes */
442 u32 packets_forwarded; /* Number of IPv4 packets forwarded */
443 u32 packets_not_forwarded; /* Number of IPv4 packets not forwarded */
444 u32 exception_events[SFE_IPV4_EXCEPTION_EVENT_LAST];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100445
446 /*
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700447 * Summary statistics.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100448 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700449 u64 connection_create_requests64;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100450 /* Number of IPv4 connection create requests */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700451 u64 connection_create_collisions64;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100452 /* Number of IPv4 connection create requests that collided with existing hash table entries */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700453 u64 connection_destroy_requests64;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100454 /* Number of IPv4 connection destroy requests */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700455 u64 connection_destroy_misses64;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100456 /* Number of IPv4 connection destroy requests that missed our hash table */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700457 u64 connection_match_hash_hits64;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100458 /* Number of IPv4 connection match hash hits */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700459 u64 connection_match_hash_reorders64;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100460 /* Number of IPv4 connection match hash reorders */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700461 u64 connection_flushes64; /* Number of IPv4 connection flushes */
462 u64 packets_forwarded64; /* Number of IPv4 packets forwarded */
463 u64 packets_not_forwarded64;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100464 /* Number of IPv4 packets not forwarded */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700465 u64 exception_events64[SFE_IPV4_EXCEPTION_EVENT_LAST];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100466
467 /*
468 * Control state.
469 */
470 struct kobject *sys_sfe_ipv4; /* sysfs linkage */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100471 int debug_dev; /* Major number of the debug char device */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700472 u32 debug_read_seq; /* sequence number for debug dump */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100473};
474
475/*
476 * Enumeration of the XML output.
477 */
478enum sfe_ipv4_debug_xml_states {
479 SFE_IPV4_DEBUG_XML_STATE_START,
480 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_START,
481 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_CONNECTION,
482 SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_END,
483 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_START,
484 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION,
485 SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_END,
486 SFE_IPV4_DEBUG_XML_STATE_STATS,
487 SFE_IPV4_DEBUG_XML_STATE_END,
488 SFE_IPV4_DEBUG_XML_STATE_DONE
489};
490
491/*
492 * XML write state.
493 */
494struct sfe_ipv4_debug_xml_write_state {
495 enum sfe_ipv4_debug_xml_states state;
496 /* XML output file state machine state */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100497 int iter_exception; /* Next exception iterator */
498};
499
500typedef bool (*sfe_ipv4_debug_xml_write_method_t)(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
501 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws);
502
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700503static struct sfe_ipv4 __si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100504
505/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100506 * sfe_ipv4_gen_ip_csum()
507 * Generate the IP checksum for an IPv4 header.
508 *
509 * Note that this function assumes that we have only 20 bytes of IP header.
510 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700511static inline u16 sfe_ipv4_gen_ip_csum(struct sfe_ipv4_ip_hdr *iph)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100512{
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700513 u32 sum;
514 u16 *i = (u16 *)iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100515
516 iph->check = 0;
517
518 /*
519 * Generate the sum.
520 */
521 sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9];
522
523 /*
524 * Fold it to ones-complement form.
525 */
526 sum = (sum & 0xffff) + (sum >> 16);
527 sum = (sum & 0xffff) + (sum >> 16);
528
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700529 return (u16)sum ^ 0xffff;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100530}
531
532/*
533 * sfe_ipv4_get_connection_match_hash()
534 * Generate the hash used in connection match lookups.
535 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700536static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, u8 protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100537 __be32 src_ip, __be16 src_port,
538 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100539{
540 size_t dev_addr = (size_t)dev;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700541 u32 hash = ((u32)dev_addr) ^ ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100542 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
543}
544
545/*
546 * sfe_ipv4_find_sfe_ipv4_connection_match()
547 * Get the IPv4 flow match info that corresponds to a particular 5-tuple.
548 *
549 * On entry we must be holding the lock that protects the hash table.
550 */
551static struct sfe_ipv4_connection_match *
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700552sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, u8 protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100553 __be32 src_ip, __be16 src_port,
554 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100555{
556 struct sfe_ipv4_connection_match *cm;
557 struct sfe_ipv4_connection_match *head;
558 unsigned int conn_match_idx;
559
560 conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port);
561 cm = si->conn_match_hash[conn_match_idx];
562
563 /*
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700564 * If we don't have anything in this chain then bail.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100565 */
566 if (unlikely(!cm)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700567 return NULL;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100568 }
569
570 /*
571 * Hopefully the first entry is the one we want.
572 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700573 if ((cm->match_src_port == src_port)
574 && (cm->match_dest_port == dest_port)
575 && (cm->match_src_ip == src_ip)
576 && (cm->match_dest_ip == dest_ip)
577 && (cm->match_protocol == protocol)
578 && (cm->match_dev == dev)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100579 si->connection_match_hash_hits++;
580 return cm;
581 }
582
583 /*
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700584 * Unfortunately we didn't find it at head, so we search it in chain and
585 * move matching entry to the top of the hash chain. We presume that this
586 * will be reused again very quickly.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100587 */
588 head = cm;
589 do {
590 cm = cm->next;
591 } while (cm && (cm->match_src_port != src_port
592 || cm->match_dest_port != dest_port
593 || cm->match_src_ip != src_ip
594 || cm->match_dest_ip != dest_ip
595 || cm->match_protocol != protocol
596 || cm->match_dev != dev));
597
598 /*
599 * Not found then we're done.
600 */
601 if (unlikely(!cm)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700602 return NULL;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100603 }
604
605 /*
606 * We found a match so move it.
607 */
608 if (cm->next) {
609 cm->next->prev = cm->prev;
610 }
611 cm->prev->next = cm->next;
612 cm->prev = NULL;
613 cm->next = head;
614 head->prev = cm;
615 si->conn_match_hash[conn_match_idx] = cm;
616 si->connection_match_hash_reorders++;
617
618 return cm;
619}
620
621/*
622 * sfe_ipv4_connection_match_update_summary_stats()
623 * Update the summary stats for a connection match entry.
624 */
625static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm)
626{
627 cm->rx_packet_count64 += cm->rx_packet_count;
628 cm->rx_packet_count = 0;
629 cm->rx_byte_count64 += cm->rx_byte_count;
630 cm->rx_byte_count = 0;
631}
632
633/*
634 * sfe_ipv4_connection_match_compute_translations()
635 * Compute port and address translations for a connection match entry.
636 */
637static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm)
638{
639 /*
640 * Before we insert the entry look to see if this is tagged as doing address
641 * translations. If it is then work out the adjustment that we need to apply
642 * to the transport checksum.
643 */
644 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
645 /*
646 * Precompute an incremental checksum adjustment so we can
647 * edit packets in this stream very quickly. The algorithm is from RFC1624.
648 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700649 u16 src_ip_hi = cm->match_src_ip >> 16;
650 u16 src_ip_lo = cm->match_src_ip & 0xffff;
651 u32 xlate_src_ip = ~cm->xlate_src_ip;
652 u16 xlate_src_ip_hi = xlate_src_ip >> 16;
653 u16 xlate_src_ip_lo = xlate_src_ip & 0xffff;
654 u16 xlate_src_port = ~cm->xlate_src_port;
655 u32 adj;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100656
657 /*
658 * When we compute this fold it down to a 16-bit offset
659 * as that way we can avoid having to do a double
660 * folding of the twos-complement result because the
661 * addition of 2 16-bit values cannot cause a double
662 * wrap-around!
663 */
664 adj = src_ip_hi + src_ip_lo + cm->match_src_port
665 + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port;
666 adj = (adj & 0xffff) + (adj >> 16);
667 adj = (adj & 0xffff) + (adj >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700668 cm->xlate_src_csum_adjustment = (u16)adj;
Nicolas Costaac2979c2014-01-14 10:35:24 -0600669
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100670 }
671
672 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
673 /*
674 * Precompute an incremental checksum adjustment so we can
675 * edit packets in this stream very quickly. The algorithm is from RFC1624.
676 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700677 u16 dest_ip_hi = cm->match_dest_ip >> 16;
678 u16 dest_ip_lo = cm->match_dest_ip & 0xffff;
679 u32 xlate_dest_ip = ~cm->xlate_dest_ip;
680 u16 xlate_dest_ip_hi = xlate_dest_ip >> 16;
681 u16 xlate_dest_ip_lo = xlate_dest_ip & 0xffff;
682 u16 xlate_dest_port = ~cm->xlate_dest_port;
683 u32 adj;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100684
685 /*
686 * When we compute this fold it down to a 16-bit offset
687 * as that way we can avoid having to do a double
688 * folding of the twos-complement result because the
689 * addition of 2 16-bit values cannot cause a double
690 * wrap-around!
691 */
692 adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port
693 + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port;
694 adj = (adj & 0xffff) + (adj >> 16);
695 adj = (adj & 0xffff) + (adj >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700696 cm->xlate_dest_csum_adjustment = (u16)adj;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100697 }
Xiaoping Fanad755af2015-04-01 16:58:46 -0700698
699 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700700 u32 adj = ~cm->match_src_ip + cm->xlate_src_ip;
Xiaoping Fanad755af2015-04-01 16:58:46 -0700701 if (adj < cm->xlate_src_ip) {
702 adj++;
703 }
704
705 adj = (adj & 0xffff) + (adj >> 16);
706 adj = (adj & 0xffff) + (adj >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700707 cm->xlate_src_partial_csum_adjustment = (u16)adj;
Xiaoping Fanad755af2015-04-01 16:58:46 -0700708 }
709
710 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700711 u32 adj = ~cm->match_dest_ip + cm->xlate_dest_ip;
Xiaoping Fanad755af2015-04-01 16:58:46 -0700712 if (adj < cm->xlate_dest_ip) {
713 adj++;
714 }
715
716 adj = (adj & 0xffff) + (adj >> 16);
717 adj = (adj & 0xffff) + (adj >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700718 cm->xlate_dest_partial_csum_adjustment = (u16)adj;
Xiaoping Fanad755af2015-04-01 16:58:46 -0700719 }
720
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100721}
722
723/*
724 * sfe_ipv4_update_summary_stats()
725 * Update the summary stats.
726 */
727static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si)
728{
729 int i;
730
731 si->connection_create_requests64 += si->connection_create_requests;
732 si->connection_create_requests = 0;
733 si->connection_create_collisions64 += si->connection_create_collisions;
734 si->connection_create_collisions = 0;
735 si->connection_destroy_requests64 += si->connection_destroy_requests;
736 si->connection_destroy_requests = 0;
737 si->connection_destroy_misses64 += si->connection_destroy_misses;
738 si->connection_destroy_misses = 0;
739 si->connection_match_hash_hits64 += si->connection_match_hash_hits;
740 si->connection_match_hash_hits = 0;
741 si->connection_match_hash_reorders64 += si->connection_match_hash_reorders;
742 si->connection_match_hash_reorders = 0;
743 si->connection_flushes64 += si->connection_flushes;
744 si->connection_flushes = 0;
745 si->packets_forwarded64 += si->packets_forwarded;
746 si->packets_forwarded = 0;
747 si->packets_not_forwarded64 += si->packets_not_forwarded;
748 si->packets_not_forwarded = 0;
749
750 for (i = 0; i < SFE_IPV4_EXCEPTION_EVENT_LAST; i++) {
751 si->exception_events64[i] += si->exception_events[i];
752 si->exception_events[i] = 0;
753 }
754}
755
756/*
757 * sfe_ipv4_insert_sfe_ipv4_connection_match()
758 * Insert a connection match into the hash.
759 *
760 * On entry we must be holding the lock that protects the hash table.
761 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700762static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si,
763 struct sfe_ipv4_connection_match *cm)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100764{
765 struct sfe_ipv4_connection_match **hash_head;
766 struct sfe_ipv4_connection_match *prev_head;
767 unsigned int conn_match_idx
768 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
769 cm->match_src_ip, cm->match_src_port,
770 cm->match_dest_ip, cm->match_dest_port);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700771
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100772 hash_head = &si->conn_match_hash[conn_match_idx];
773 prev_head = *hash_head;
774 cm->prev = NULL;
775 if (prev_head) {
776 prev_head->prev = cm;
777 }
778
779 cm->next = prev_head;
780 *hash_head = cm;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800781
782#ifdef CONFIG_NF_FLOW_COOKIE
Xiaoping Fan640faf42015-08-28 15:50:55 -0700783 if (!si->flow_cookie_enable)
784 return;
785
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800786 /*
787 * Configure hardware to put a flow cookie in packet of this flow,
788 * then we can accelerate the lookup process when we received this packet.
789 */
790 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
791 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
792
793 if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) {
794 flow_cookie_set_func_t func;
795
796 rcu_read_lock();
797 func = rcu_dereference(si->flow_cookie_set_func);
798 if (func) {
Xiaoping Fan59176422015-05-22 15:58:10 -0700799 if (!func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800800 cm->match_dest_ip, cm->match_dest_port, conn_match_idx)) {
801 entry->match = cm;
802 cm->flow_cookie = conn_match_idx;
803 }
804 }
805 rcu_read_unlock();
806
807 break;
808 }
809 }
810#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100811}
812
813/*
814 * sfe_ipv4_remove_sfe_ipv4_connection_match()
815 * Remove a connection match object from the hash.
816 *
817 * On entry we must be holding the lock that protects the hash table.
818 */
819static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
820{
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800821#ifdef CONFIG_NF_FLOW_COOKIE
Xiaoping Fan640faf42015-08-28 15:50:55 -0700822 if (si->flow_cookie_enable) {
823 /*
824 * Tell hardware that we no longer need a flow cookie in packet of this flow
825 */
826 unsigned int conn_match_idx;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800827
Xiaoping Fan640faf42015-08-28 15:50:55 -0700828 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
829 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800830
Xiaoping Fan640faf42015-08-28 15:50:55 -0700831 if (cm == entry->match) {
832 flow_cookie_set_func_t func;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800833
Xiaoping Fan640faf42015-08-28 15:50:55 -0700834 rcu_read_lock();
835 func = rcu_dereference(si->flow_cookie_set_func);
836 if (func) {
837 func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
838 cm->match_dest_ip, cm->match_dest_port, 0);
839 }
840 rcu_read_unlock();
841
842 cm->flow_cookie = 0;
843 entry->match = NULL;
844 entry->last_clean_time = jiffies;
845 break;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800846 }
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800847 }
848 }
849#endif
850
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100851 /*
852 * Unlink the connection match entry from the hash.
853 */
854 if (cm->prev) {
855 cm->prev->next = cm->next;
856 } else {
857 unsigned int conn_match_idx
858 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
859 cm->match_src_ip, cm->match_src_port,
860 cm->match_dest_ip, cm->match_dest_port);
861 si->conn_match_hash[conn_match_idx] = cm->next;
862 }
863
864 if (cm->next) {
865 cm->next->prev = cm->prev;
866 }
867
868 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600869 * If the connection match entry is in the active list remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100870 */
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600871 if (cm->active) {
872 if (likely(cm->active_prev)) {
873 cm->active_prev->active_next = cm->active_next;
874 } else {
875 si->active_head = cm->active_next;
876 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100877
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600878 if (likely(cm->active_next)) {
879 cm->active_next->active_prev = cm->active_prev;
880 } else {
881 si->active_tail = cm->active_prev;
882 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100883 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100884}
885
886/*
887 * sfe_ipv4_get_connection_hash()
888 * Generate the hash used in connection lookups.
889 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700890static inline unsigned int sfe_ipv4_get_connection_hash(u8 protocol, __be32 src_ip, __be16 src_port,
Dave Hudson87973cd2013-10-22 16:00:04 +0100891 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100892{
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700893 u32 hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100894 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
895}
896
897/*
898 * sfe_ipv4_find_sfe_ipv4_connection()
899 * Get the IPv4 connection info that corresponds to a particular 5-tuple.
900 *
901 * On entry we must be holding the lock that protects the hash table.
902 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700903static inline struct sfe_ipv4_connection *sfe_ipv4_find_sfe_ipv4_connection(struct sfe_ipv4 *si, u32 protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100904 __be32 src_ip, __be16 src_port,
905 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100906{
907 struct sfe_ipv4_connection *c;
908 unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port);
909 c = si->conn_hash[conn_idx];
910
911 /*
912 * If we don't have anything in this chain then bale.
913 */
914 if (unlikely(!c)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700915 return NULL;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100916 }
917
918 /*
919 * Hopefully the first entry is the one we want.
920 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700921 if ((c->src_port == src_port)
922 && (c->dest_port == dest_port)
923 && (c->src_ip == src_ip)
924 && (c->dest_ip == dest_ip)
925 && (c->protocol == protocol)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100926 return c;
927 }
928
929 /*
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700930 * Unfortunately we didn't find it at head, so we search it in chain.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100931 */
932 do {
933 c = c->next;
934 } while (c && (c->src_port != src_port
935 || c->dest_port != dest_port
936 || c->src_ip != src_ip
937 || c->dest_ip != dest_ip
938 || c->protocol != protocol));
939
940 /*
941 * Will need connection entry for next create/destroy metadata,
942 * So no need to re-order entry for these requests
943 */
944 return c;
945}
946
947/*
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600948 * sfe_ipv4_mark_rule()
949 * Updates the mark for a current offloaded connection
950 *
951 * Will take hash lock upon entry
952 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700953void sfe_ipv4_mark_rule(struct sfe_connection_mark *mark)
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600954{
955 struct sfe_ipv4 *si = &__si;
956 struct sfe_ipv4_connection *c;
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600957
Xiaoping Fan3c423e32015-07-03 03:09:29 -0700958 spin_lock_bh(&si->lock);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600959 c = sfe_ipv4_find_sfe_ipv4_connection(si, mark->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700960 mark->src_ip.ip, mark->src_port,
961 mark->dest_ip.ip, mark->dest_port);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600962 if (c) {
Nicolas Costaf53d6fe2014-01-13 16:03:46 -0600963 WARN_ON((0 != c->mark) && (0 == mark->mark));
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600964 c->mark = mark->mark;
965 }
Xiaoping Fan3c423e32015-07-03 03:09:29 -0700966 spin_unlock_bh(&si->lock);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700967
968 if (c) {
969 DEBUG_TRACE("Matching connection found for mark, "
970 "setting from %08x to %08x\n",
971 c->mark, mark->mark);
972 }
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600973}
974
975/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100976 * sfe_ipv4_insert_sfe_ipv4_connection()
977 * Insert a connection into the hash.
978 *
979 * On entry we must be holding the lock that protects the hash table.
980 */
981static void sfe_ipv4_insert_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
982{
983 struct sfe_ipv4_connection **hash_head;
984 struct sfe_ipv4_connection *prev_head;
985 unsigned int conn_idx;
986
987 /*
988 * Insert entry into the connection hash.
989 */
990 conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
991 c->dest_ip, c->dest_port);
992 hash_head = &si->conn_hash[conn_idx];
993 prev_head = *hash_head;
994 c->prev = NULL;
995 if (prev_head) {
996 prev_head->prev = c;
997 }
998
999 c->next = prev_head;
1000 *hash_head = c;
1001
1002 /*
1003 * Insert entry into the "all connections" list.
1004 */
1005 if (si->all_connections_tail) {
1006 c->all_connections_prev = si->all_connections_tail;
1007 si->all_connections_tail->all_connections_next = c;
1008 } else {
1009 c->all_connections_prev = NULL;
1010 si->all_connections_head = c;
1011 }
1012
1013 si->all_connections_tail = c;
1014 c->all_connections_next = NULL;
1015 si->num_connections++;
1016
1017 /*
1018 * Insert the connection match objects too.
1019 */
1020 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->original_match);
1021 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->reply_match);
1022}
1023
1024/*
1025 * sfe_ipv4_remove_sfe_ipv4_connection()
1026 * Remove a sfe_ipv4_connection object from the hash.
1027 *
1028 * On entry we must be holding the lock that protects the hash table.
1029 */
1030static void sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1031{
1032 /*
1033 * Remove the connection match objects.
1034 */
1035 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->reply_match);
1036 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->original_match);
1037
1038 /*
1039 * Unlink the connection.
1040 */
1041 if (c->prev) {
1042 c->prev->next = c->next;
1043 } else {
1044 unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
1045 c->dest_ip, c->dest_port);
1046 si->conn_hash[conn_idx] = c->next;
1047 }
1048
1049 if (c->next) {
1050 c->next->prev = c->prev;
1051 }
Xiaoping Fan34586472015-07-03 02:20:35 -07001052
1053 /*
1054 * Unlink connection from all_connections list
1055 */
1056 if (c->all_connections_prev) {
1057 c->all_connections_prev->all_connections_next = c->all_connections_next;
1058 } else {
1059 si->all_connections_head = c->all_connections_next;
1060 }
1061
1062 if (c->all_connections_next) {
1063 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1064 } else {
1065 si->all_connections_tail = c->all_connections_prev;
1066 }
1067
1068 si->num_connections--;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001069}
1070
1071/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001072 * sfe_ipv4_sync_sfe_ipv4_connection()
1073 * Sync a connection.
1074 *
1075 * On entry to this function we expect that the lock for the connection is either
1076 * already held or isn't required.
1077 */
1078static void sfe_ipv4_gen_sync_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c,
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001079 struct sfe_connection_sync *sis, sfe_sync_reason_t reason,
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001080 u64 now_jiffies)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001081{
1082 struct sfe_ipv4_connection_match *original_cm;
1083 struct sfe_ipv4_connection_match *reply_cm;
1084
1085 /*
1086 * Fill in the update message.
1087 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001088 sis->is_v6 = 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001089 sis->protocol = c->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001090 sis->src_ip.ip = c->src_ip;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001091 sis->src_ip_xlate.ip = c->src_ip_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001092 sis->dest_ip.ip = c->dest_ip;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001093 sis->dest_ip_xlate.ip = c->dest_ip_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001094 sis->src_port = c->src_port;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001095 sis->src_port_xlate = c->src_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001096 sis->dest_port = c->dest_port;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001097 sis->dest_port_xlate = c->dest_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001098
1099 original_cm = c->original_match;
1100 reply_cm = c->reply_match;
1101 sis->src_td_max_window = original_cm->protocol_state.tcp.max_win;
1102 sis->src_td_end = original_cm->protocol_state.tcp.end;
1103 sis->src_td_max_end = original_cm->protocol_state.tcp.max_end;
1104 sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win;
1105 sis->dest_td_end = reply_cm->protocol_state.tcp.end;
1106 sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end;
1107
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001108 sis->src_new_packet_count = original_cm->rx_packet_count;
1109 sis->src_new_byte_count = original_cm->rx_byte_count;
1110 sis->dest_new_packet_count = reply_cm->rx_packet_count;
1111 sis->dest_new_byte_count = reply_cm->rx_byte_count;
1112
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001113 sfe_ipv4_connection_match_update_summary_stats(original_cm);
1114 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
1115
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001116 sis->src_dev = original_cm->match_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001117 sis->src_packet_count = original_cm->rx_packet_count64;
1118 sis->src_byte_count = original_cm->rx_byte_count64;
Matthew McClintockd0cdb802014-02-24 16:30:35 -06001119
1120 sis->dest_dev = reply_cm->match_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001121 sis->dest_packet_count = reply_cm->rx_packet_count64;
1122 sis->dest_byte_count = reply_cm->rx_byte_count64;
1123
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001124 sis->reason = reason;
1125
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001126 /*
1127 * Get the time increment since our last sync.
1128 */
1129 sis->delta_jiffies = now_jiffies - c->last_sync_jiffies;
1130 c->last_sync_jiffies = now_jiffies;
1131}
1132
1133/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001134 * sfe_ipv4_flush_sfe_ipv4_connection()
1135 * Flush a connection and free all associated resources.
1136 *
1137 * We need to be called with bottom halves disabled locally as we need to acquire
1138 * the connection hash lock and release it again. In general we're actually called
1139 * from within a BH and so we're fine, but we're also called when connections are
1140 * torn down.
1141 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001142static void sfe_ipv4_flush_sfe_ipv4_connection(struct sfe_ipv4 *si,
1143 struct sfe_ipv4_connection *c,
1144 sfe_sync_reason_t reason)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001145{
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001146 struct sfe_connection_sync sis;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001147 u64 now_jiffies;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001148 sfe_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001149
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001150 rcu_read_lock();
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001151 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001152 si->connection_flushes++;
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001153 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001154 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001155
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001156 if (sync_rule_callback) {
1157 /*
1158 * Generate a sync message and then sync.
1159 */
1160 now_jiffies = get_jiffies_64();
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001161 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, reason, now_jiffies);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001162 sync_rule_callback(&sis);
1163 }
1164
1165 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001166
1167 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001168 * Release our hold of the source and dest devices and free the memory
1169 * for our connection objects.
1170 */
1171 dev_put(c->original_dev);
1172 dev_put(c->reply_dev);
1173 kfree(c->original_match);
1174 kfree(c->reply_match);
1175 kfree(c);
1176}
1177
1178/*
1179 * sfe_ipv4_recv_udp()
1180 * Handle UDP packet receives and forwarding.
1181 */
1182static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001183 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001184{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001185 struct sfe_ipv4_udp_hdr *udph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001186 __be32 src_ip;
1187 __be32 dest_ip;
1188 __be16 src_port;
1189 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001190 struct sfe_ipv4_connection_match *cm;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001191 u8 ttl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001192 struct net_device *xmit_dev;
1193
1194 /*
1195 * Is our packet too short to contain a valid UDP header?
1196 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001197 if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_udp_hdr) + ihl)))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001198 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001199 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++;
1200 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001201 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001202
1203 DEBUG_TRACE("packet too short for UDP header\n");
1204 return 0;
1205 }
1206
1207 /*
1208 * Read the IP address and port information. Read the IP header data first
1209 * because we've almost certainly got that in the cache. We may not yet have
1210 * the UDP header cached though so allow more time for any prefetching.
1211 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001212 src_ip = iph->saddr;
1213 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001214
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001215 udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001216 src_port = udph->source;
1217 dest_port = udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001218
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001219 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001220
1221 /*
1222 * Look for a connection match.
1223 */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001224#ifdef CONFIG_NF_FLOW_COOKIE
1225 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1226 if (unlikely(!cm)) {
1227 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
1228 }
1229#else
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001230 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001231#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001232 if (unlikely(!cm)) {
1233 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION]++;
1234 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001235 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001236
1237 DEBUG_TRACE("no connection found\n");
1238 return 0;
1239 }
1240
1241 /*
1242 * If our packet has beern marked as "flush on find" we can't actually
1243 * forward it in the fast path, but now that we've found an associated
1244 * connection we can flush that out before we process the packet.
1245 */
1246 if (unlikely(flush_on_find)) {
1247 struct sfe_ipv4_connection *c = cm->connection;
1248 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1249 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1250 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001251 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001252
1253 DEBUG_TRACE("flush on find\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001254 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001255 return 0;
1256 }
1257
Zhi Chen8748eb32015-06-18 12:58:48 -07001258#ifdef CONFIG_XFRM
1259 /*
1260 * We can't accelerate the flow on this direction, just let it go
1261 * through the slow path.
1262 */
1263 if (unlikely(!cm->flow_accel)) {
1264 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001265 spin_unlock_bh(&si->lock);
Zhi Chen8748eb32015-06-18 12:58:48 -07001266 return 0;
1267 }
1268#endif
1269
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001270 /*
1271 * Does our TTL allow forwarding?
1272 */
1273 ttl = iph->ttl;
1274 if (unlikely(ttl < 2)) {
1275 struct sfe_ipv4_connection *c = cm->connection;
1276 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1277 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL]++;
1278 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001279 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001280
1281 DEBUG_TRACE("ttl too low\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001282 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001283 return 0;
1284 }
1285
1286 /*
1287 * If our packet is larger than the MTU of the transmit interface then
1288 * we can't forward it easily.
1289 */
1290 if (unlikely(len > cm->xmit_dev_mtu)) {
1291 struct sfe_ipv4_connection *c = cm->connection;
1292 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1293 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++;
1294 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001295 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001296
1297 DEBUG_TRACE("larger than mtu\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001298 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001299 return 0;
1300 }
1301
1302 /*
1303 * From this point on we're good to modify the packet.
1304 */
1305
1306 /*
Murat Sezginc7dd8172019-02-27 15:23:50 -08001307 * Check if skb was cloned. If it was, unshare it. Because
1308 * the data area is going to be written in this path and we don't want to
1309 * change the cloned skb's data section.
1310 */
1311 if (unlikely(skb_cloned(skb))) {
Tian Yang45f39c82020-10-06 14:07:47 -07001312 DEBUG_TRACE("%px: skb is a cloned skb\n", skb);
Murat Sezginc7dd8172019-02-27 15:23:50 -08001313 skb = skb_unshare(skb, GFP_ATOMIC);
1314 if (!skb) {
1315 DEBUG_WARN("Failed to unshare the cloned skb\n");
1316 return 0;
1317 }
1318
1319 /*
1320 * Update the iph and udph pointers with the unshared skb's data area.
1321 */
1322 iph = (struct sfe_ipv4_ip_hdr *)skb->data;
1323 udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl);
1324 }
1325
1326 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07001327 * Update DSCP
1328 */
1329 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK)) {
1330 iph->tos = (iph->tos & SFE_IPV4_DSCP_MASK) | cm->dscp;
1331 }
1332
1333 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001334 * Decrement our TTL.
1335 */
1336 iph->ttl = ttl - 1;
1337
1338 /*
1339 * Do we have to perform translations of the source address/port?
1340 */
1341 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001342 u16 udp_csum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001343
Dave Hudson87973cd2013-10-22 16:00:04 +01001344 iph->saddr = cm->xlate_src_ip;
1345 udph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001346
1347 /*
1348 * Do we have a non-zero UDP checksum? If we do then we need
1349 * to update it.
1350 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001351 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001352 if (likely(udp_csum)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001353 u32 sum;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001354
1355 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1356 sum = udp_csum + cm->xlate_src_partial_csum_adjustment;
1357 } else {
1358 sum = udp_csum + cm->xlate_src_csum_adjustment;
1359 }
1360
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001361 sum = (sum & 0xffff) + (sum >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001362 udph->check = (u16)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001363 }
1364 }
1365
1366 /*
1367 * Do we have to perform translations of the destination address/port?
1368 */
1369 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001370 u16 udp_csum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001371
Dave Hudson87973cd2013-10-22 16:00:04 +01001372 iph->daddr = cm->xlate_dest_ip;
1373 udph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001374
1375 /*
1376 * Do we have a non-zero UDP checksum? If we do then we need
1377 * to update it.
1378 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001379 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001380 if (likely(udp_csum)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001381 u32 sum;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001382
1383 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1384 sum = udp_csum + cm->xlate_dest_partial_csum_adjustment;
1385 } else {
1386 sum = udp_csum + cm->xlate_dest_csum_adjustment;
1387 }
1388
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001389 sum = (sum & 0xffff) + (sum >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001390 udph->check = (u16)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001391 }
1392 }
1393
1394 /*
1395 * Replace the IP checksum.
1396 */
1397 iph->check = sfe_ipv4_gen_ip_csum(iph);
1398
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001399 /*
1400 * Update traffic stats.
1401 */
1402 cm->rx_packet_count++;
1403 cm->rx_byte_count += len;
1404
1405 /*
1406 * If we're not already on the active list then insert ourselves at the tail
1407 * of the current list.
1408 */
1409 if (unlikely(!cm->active)) {
1410 cm->active = true;
1411 cm->active_prev = si->active_tail;
1412 if (likely(si->active_tail)) {
1413 si->active_tail->active_next = cm;
1414 } else {
1415 si->active_head = cm;
1416 }
1417 si->active_tail = cm;
1418 }
1419
1420 xmit_dev = cm->xmit_dev;
1421 skb->dev = xmit_dev;
1422
1423 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001424 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001425 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001426 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1427 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Xiaoping Fan2784e612015-06-25 17:57:41 -07001428 dev_hard_header(skb, xmit_dev, ETH_P_IP,
1429 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001430 } else {
1431 /*
1432 * For the simple case we write this really fast.
1433 */
1434 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1435 eth->h_proto = htons(ETH_P_IP);
Matthew McClintockdab3c8f2014-02-19 14:29:39 -06001436 eth->h_dest[0] = cm->xmit_dest_mac[0];
1437 eth->h_dest[1] = cm->xmit_dest_mac[1];
1438 eth->h_dest[2] = cm->xmit_dest_mac[2];
1439 eth->h_source[0] = cm->xmit_src_mac[0];
1440 eth->h_source[1] = cm->xmit_src_mac[1];
1441 eth->h_source[2] = cm->xmit_src_mac[2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001442 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001443 }
1444
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001445 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07001446 * Update priority of skb.
1447 */
1448 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) {
1449 skb->priority = cm->priority;
1450 }
1451
1452 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001453 * Mark outgoing packet.
1454 */
Cristian Prundeanu94fff982013-12-23 15:02:59 -06001455 skb->mark = cm->connection->mark;
1456 if (skb->mark) {
1457 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1458 }
1459
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001460 si->packets_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001461 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001462
1463 /*
1464 * We're going to check for GSO flags when we transmit the packet so
1465 * start fetching the necessary cache line now.
1466 */
1467 prefetch(skb_shinfo(skb));
1468
1469 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06001470 * Mark that this packet has been fast forwarded.
1471 */
1472 skb->fast_forwarded = 1;
1473
1474 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001475 * Send the packet on its way.
1476 */
1477 dev_queue_xmit(skb);
1478
1479 return 1;
1480}
1481
1482/*
1483 * sfe_ipv4_process_tcp_option_sack()
1484 * Parse TCP SACK option and update ack according
1485 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001486static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const u32 data_offs,
1487 u32 *ack)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001488{
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001489 u32 length = sizeof(struct sfe_ipv4_tcp_hdr);
1490 u8 *ptr = (u8 *)th + length;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001491
1492 /*
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001493 * Ignore processing if TCP packet has only TIMESTAMP option.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001494 */
1495 if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
1496 && likely(ptr[0] == TCPOPT_NOP)
1497 && likely(ptr[1] == TCPOPT_NOP)
1498 && likely(ptr[2] == TCPOPT_TIMESTAMP)
1499 && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
1500 return true;
1501 }
1502
1503 /*
1504 * TCP options. Parse SACK option.
1505 */
1506 while (length < data_offs) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001507 u8 size;
1508 u8 kind;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001509
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001510 ptr = (u8 *)th + length;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001511 kind = *ptr;
1512
1513 /*
1514 * NOP, for padding
1515 * Not in the switch because to fast escape and to not calculate size
1516 */
1517 if (kind == TCPOPT_NOP) {
1518 length++;
1519 continue;
1520 }
1521
1522 if (kind == TCPOPT_SACK) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001523 u32 sack = 0;
1524 u8 re = 1 + 1;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001525
1526 size = *(ptr + 1);
1527 if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
1528 || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
1529 || (size > (data_offs - length))) {
1530 return false;
1531 }
1532
1533 re += 4;
1534 while (re < size) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001535 u32 sack_re;
1536 u8 *sptr = ptr + re;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001537 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
1538 if (sack_re > sack) {
1539 sack = sack_re;
1540 }
1541 re += TCPOLEN_SACK_PERBLOCK;
1542 }
1543 if (sack > *ack) {
1544 *ack = sack;
1545 }
1546 length += size;
1547 continue;
1548 }
1549 if (kind == TCPOPT_EOL) {
1550 return true;
1551 }
1552 size = *(ptr + 1);
1553 if (size < 2) {
1554 return false;
1555 }
1556 length += size;
1557 }
1558
1559 return true;
1560}
1561
1562/*
1563 * sfe_ipv4_recv_tcp()
1564 * Handle TCP packet receives and forwarding.
1565 */
1566static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001567 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001568{
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001569 struct sfe_ipv4_tcp_hdr *tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001570 __be32 src_ip;
1571 __be32 dest_ip;
1572 __be16 src_port;
1573 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001574 struct sfe_ipv4_connection_match *cm;
1575 struct sfe_ipv4_connection_match *counter_cm;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001576 u8 ttl;
1577 u32 flags;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001578 struct net_device *xmit_dev;
1579
1580 /*
1581 * Is our packet too short to contain a valid UDP header?
1582 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001583 if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_tcp_hdr) + ihl)))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001584 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001585 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++;
1586 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001587 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001588
1589 DEBUG_TRACE("packet too short for TCP header\n");
1590 return 0;
1591 }
1592
1593 /*
1594 * Read the IP address and port information. Read the IP header data first
1595 * because we've almost certainly got that in the cache. We may not yet have
1596 * the TCP header cached though so allow more time for any prefetching.
1597 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001598 src_ip = iph->saddr;
1599 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001600
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001601 tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001602 src_port = tcph->source;
1603 dest_port = tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001604 flags = tcp_flag_word(tcph);
1605
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001606 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001607
1608 /*
1609 * Look for a connection match.
1610 */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001611#ifdef CONFIG_NF_FLOW_COOKIE
1612 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1613 if (unlikely(!cm)) {
Matthew McClintock37858802015-02-03 12:12:02 -06001614 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001615 }
1616#else
Matthew McClintock37858802015-02-03 12:12:02 -06001617 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001618#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001619 if (unlikely(!cm)) {
1620 /*
1621 * We didn't get a connection but as TCP is connection-oriented that
1622 * may be because this is a non-fast connection (not running established).
1623 * For diagnostic purposes we differentiate this here.
1624 */
1625 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
1626 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++;
1627 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001628 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001629
1630 DEBUG_TRACE("no connection found - fast flags\n");
1631 return 0;
1632 }
1633 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++;
1634 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001635 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001636
1637 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
1638 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1639 return 0;
1640 }
1641
1642 /*
1643 * If our packet has beern marked as "flush on find" we can't actually
1644 * forward it in the fast path, but now that we've found an associated
1645 * connection we can flush that out before we process the packet.
1646 */
1647 if (unlikely(flush_on_find)) {
1648 struct sfe_ipv4_connection *c = cm->connection;
1649 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1650 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1651 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001652 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001653
1654 DEBUG_TRACE("flush on find\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001655 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001656 return 0;
1657 }
1658
Zhi Chen8748eb32015-06-18 12:58:48 -07001659#ifdef CONFIG_XFRM
1660 /*
1661 * We can't accelerate the flow on this direction, just let it go
1662 * through the slow path.
1663 */
1664 if (unlikely(!cm->flow_accel)) {
1665 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001666 spin_unlock_bh(&si->lock);
Zhi Chen8748eb32015-06-18 12:58:48 -07001667 return 0;
1668 }
1669#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001670 /*
1671 * Does our TTL allow forwarding?
1672 */
1673 ttl = iph->ttl;
1674 if (unlikely(ttl < 2)) {
1675 struct sfe_ipv4_connection *c = cm->connection;
1676 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1677 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL]++;
1678 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001679 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001680
1681 DEBUG_TRACE("ttl too low\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001682 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001683 return 0;
1684 }
1685
1686 /*
1687 * If our packet is larger than the MTU of the transmit interface then
1688 * we can't forward it easily.
1689 */
Xiaoping Fand642a6e2015-04-10 15:19:06 -07001690 if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001691 struct sfe_ipv4_connection *c = cm->connection;
1692 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1693 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++;
1694 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001695 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001696
1697 DEBUG_TRACE("larger than mtu\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001698 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001699 return 0;
1700 }
1701
1702 /*
1703 * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN
1704 * set is not a fast path packet.
1705 */
1706 if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
1707 struct sfe_ipv4_connection *c = cm->connection;
1708 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1709 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS]++;
1710 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001711 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001712
1713 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
1714 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001715 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001716 return 0;
1717 }
1718
1719 counter_cm = cm->counter_match;
1720
1721 /*
1722 * Are we doing sequence number checking?
1723 */
1724 if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001725 u32 seq;
1726 u32 ack;
1727 u32 sack;
1728 u32 data_offs;
1729 u32 end;
1730 u32 left_edge;
1731 u32 scaled_win;
1732 u32 max_end;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001733
1734 /*
1735 * Is our sequence fully past the right hand edge of the window?
1736 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001737 seq = ntohl(tcph->seq);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001738 if (unlikely((s32)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001739 struct sfe_ipv4_connection *c = cm->connection;
1740 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1741 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++;
1742 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001743 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001744
1745 DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
1746 seq, cm->protocol_state.tcp.max_end + 1);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001747 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001748 return 0;
1749 }
1750
1751 /*
1752 * Check that our TCP data offset isn't too short.
1753 */
1754 data_offs = tcph->doff << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001755 if (unlikely(data_offs < sizeof(struct sfe_ipv4_tcp_hdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001756 struct sfe_ipv4_connection *c = cm->connection;
1757 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1758 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++;
1759 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001760 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001761
1762 DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001763 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001764 return 0;
1765 }
1766
1767 /*
1768 * Update ACK according to any SACK option.
1769 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001770 ack = ntohl(tcph->ack_seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001771 sack = ack;
1772 if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) {
1773 struct sfe_ipv4_connection *c = cm->connection;
1774 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1775 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK]++;
1776 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001777 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001778
1779 DEBUG_TRACE("TCP option SACK size is wrong\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001780 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001781 return 0;
1782 }
1783
1784 /*
1785 * Check that our TCP data offset isn't past the end of the packet.
1786 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001787 data_offs += sizeof(struct sfe_ipv4_ip_hdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001788 if (unlikely(len < data_offs)) {
1789 struct sfe_ipv4_connection *c = cm->connection;
1790 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1791 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++;
1792 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001793 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001794
1795 DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
1796 data_offs, len);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001797 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001798 return 0;
1799 }
1800
1801 end = seq + len - data_offs;
1802
1803 /*
1804 * Is our sequence fully before the left hand edge of the window?
1805 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001806 if (unlikely((s32)(end - (cm->protocol_state.tcp.end
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001807 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
1808 struct sfe_ipv4_connection *c = cm->connection;
1809 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1810 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++;
1811 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001812 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001813
1814 DEBUG_TRACE("seq: %u before left edge: %u\n",
1815 end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001816 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001817 return 0;
1818 }
1819
1820 /*
1821 * Are we acking data that is to the right of what has been sent?
1822 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001823 if (unlikely((s32)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001824 struct sfe_ipv4_connection *c = cm->connection;
1825 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1826 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++;
1827 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001828 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001829
1830 DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
1831 sack, counter_cm->protocol_state.tcp.end + 1);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001832 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001833 return 0;
1834 }
1835
1836 /*
1837 * Is our ack too far before the left hand edge of the window?
1838 */
1839 left_edge = counter_cm->protocol_state.tcp.end
1840 - cm->protocol_state.tcp.max_win
1841 - SFE_IPV4_TCP_MAX_ACK_WINDOW
1842 - 1;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001843 if (unlikely((s32)(sack - left_edge) < 0)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001844 struct sfe_ipv4_connection *c = cm->connection;
1845 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1846 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++;
1847 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001848 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001849
1850 DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001851 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001852 return 0;
1853 }
1854
1855 /*
1856 * Have we just seen the largest window size yet for this connection? If yes
1857 * then we need to record the new value.
1858 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001859 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001860 scaled_win += (sack - ack);
1861 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
1862 cm->protocol_state.tcp.max_win = scaled_win;
1863 }
1864
1865 /*
1866 * If our sequence and/or ack numbers have advanced then record the new state.
1867 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001868 if (likely((s32)(end - cm->protocol_state.tcp.end) >= 0)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001869 cm->protocol_state.tcp.end = end;
1870 }
1871
1872 max_end = sack + scaled_win;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001873 if (likely((s32)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001874 counter_cm->protocol_state.tcp.max_end = max_end;
1875 }
1876 }
1877
1878 /*
1879 * From this point on we're good to modify the packet.
1880 */
1881
1882 /*
Murat Sezginc7dd8172019-02-27 15:23:50 -08001883 * Check if skb was cloned. If it was, unshare it. Because
1884 * the data area is going to be written in this path and we don't want to
1885 * change the cloned skb's data section.
1886 */
1887 if (unlikely(skb_cloned(skb))) {
Tian Yang45f39c82020-10-06 14:07:47 -07001888 DEBUG_TRACE("%px: skb is a cloned skb\n", skb);
Murat Sezginc7dd8172019-02-27 15:23:50 -08001889 skb = skb_unshare(skb, GFP_ATOMIC);
1890 if (!skb) {
1891 DEBUG_WARN("Failed to unshare the cloned skb\n");
1892 return 0;
1893 }
1894
1895 /*
1896 * Update the iph and tcph pointers with the unshared skb's data area.
1897 */
1898 iph = (struct sfe_ipv4_ip_hdr *)skb->data;
1899 tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl);
1900 }
1901
1902 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07001903 * Update DSCP
1904 */
1905 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK)) {
1906 iph->tos = (iph->tos & SFE_IPV4_DSCP_MASK) | cm->dscp;
1907 }
1908
1909 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001910 * Decrement our TTL.
1911 */
1912 iph->ttl = ttl - 1;
1913
1914 /*
1915 * Do we have to perform translations of the source address/port?
1916 */
1917 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001918 u16 tcp_csum;
1919 u32 sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001920
Dave Hudson87973cd2013-10-22 16:00:04 +01001921 iph->saddr = cm->xlate_src_ip;
1922 tcph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001923
1924 /*
1925 * Do we have a non-zero UDP checksum? If we do then we need
1926 * to update it.
1927 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001928 tcp_csum = tcph->check;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001929 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1930 sum = tcp_csum + cm->xlate_src_partial_csum_adjustment;
1931 } else {
1932 sum = tcp_csum + cm->xlate_src_csum_adjustment;
1933 }
1934
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001935 sum = (sum & 0xffff) + (sum >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001936 tcph->check = (u16)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001937 }
1938
1939 /*
1940 * Do we have to perform translations of the destination address/port?
1941 */
1942 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001943 u16 tcp_csum;
1944 u32 sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001945
Dave Hudson87973cd2013-10-22 16:00:04 +01001946 iph->daddr = cm->xlate_dest_ip;
1947 tcph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001948
1949 /*
1950 * Do we have a non-zero UDP checksum? If we do then we need
1951 * to update it.
1952 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001953 tcp_csum = tcph->check;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001954 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1955 sum = tcp_csum + cm->xlate_dest_partial_csum_adjustment;
1956 } else {
1957 sum = tcp_csum + cm->xlate_dest_csum_adjustment;
1958 }
1959
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001960 sum = (sum & 0xffff) + (sum >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001961 tcph->check = (u16)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001962 }
1963
1964 /*
1965 * Replace the IP checksum.
1966 */
1967 iph->check = sfe_ipv4_gen_ip_csum(iph);
1968
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001969 /*
1970 * Update traffic stats.
1971 */
1972 cm->rx_packet_count++;
1973 cm->rx_byte_count += len;
1974
1975 /*
1976 * If we're not already on the active list then insert ourselves at the tail
1977 * of the current list.
1978 */
1979 if (unlikely(!cm->active)) {
1980 cm->active = true;
1981 cm->active_prev = si->active_tail;
1982 if (likely(si->active_tail)) {
1983 si->active_tail->active_next = cm;
1984 } else {
1985 si->active_head = cm;
1986 }
1987 si->active_tail = cm;
1988 }
1989
1990 xmit_dev = cm->xmit_dev;
1991 skb->dev = xmit_dev;
1992
1993 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001994 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001995 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001996 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1997 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Xiaoping Fan2784e612015-06-25 17:57:41 -07001998 dev_hard_header(skb, xmit_dev, ETH_P_IP,
1999 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002000 } else {
2001 /*
2002 * For the simple case we write this really fast.
2003 */
2004 struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
2005 eth->h_proto = htons(ETH_P_IP);
Matthew McClintockdab3c8f2014-02-19 14:29:39 -06002006 eth->h_dest[0] = cm->xmit_dest_mac[0];
2007 eth->h_dest[1] = cm->xmit_dest_mac[1];
2008 eth->h_dest[2] = cm->xmit_dest_mac[2];
2009 eth->h_source[0] = cm->xmit_src_mac[0];
2010 eth->h_source[1] = cm->xmit_src_mac[1];
2011 eth->h_source[2] = cm->xmit_src_mac[2];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002012 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002013 }
2014
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06002015 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07002016 * Update priority of skb.
2017 */
2018 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) {
2019 skb->priority = cm->priority;
2020 }
2021
2022 /*
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06002023 * Mark outgoing packet
2024 */
2025 skb->mark = cm->connection->mark;
2026 if (skb->mark) {
2027 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
2028 }
2029
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002030 si->packets_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002031 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002032
2033 /*
2034 * We're going to check for GSO flags when we transmit the packet so
2035 * start fetching the necessary cache line now.
2036 */
2037 prefetch(skb_shinfo(skb));
2038
2039 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06002040 * Mark that this packet has been fast forwarded.
2041 */
2042 skb->fast_forwarded = 1;
2043
2044 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002045 * Send the packet on its way.
2046 */
2047 dev_queue_xmit(skb);
2048
2049 return 1;
2050}
2051
2052/*
2053 * sfe_ipv4_recv_icmp()
2054 * Handle ICMP packet receives.
2055 *
2056 * ICMP packets aren't handled as a "fast path" and always have us process them
2057 * through the default Linux stack. What we do need to do is look for any errors
2058 * about connections we are handling in the fast path. If we find any such
2059 * connections then we want to flush their state so that the ICMP error path
2060 * within Linux has all of the correct state should it need it.
2061 */
2062static int sfe_ipv4_recv_icmp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002063 unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002064{
2065 struct icmphdr *icmph;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002066 struct sfe_ipv4_ip_hdr *icmp_iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002067 unsigned int icmp_ihl_words;
2068 unsigned int icmp_ihl;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002069 u32 *icmp_trans_h;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002070 struct sfe_ipv4_udp_hdr *icmp_udph;
2071 struct sfe_ipv4_tcp_hdr *icmp_tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01002072 __be32 src_ip;
2073 __be32 dest_ip;
2074 __be16 src_port;
2075 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002076 struct sfe_ipv4_connection_match *cm;
2077 struct sfe_ipv4_connection *c;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002078 u32 pull_len = sizeof(struct icmphdr) + ihl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002079
2080 /*
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002081 * Is our packet too short to contain a valid ICMP header?
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002082 */
2083 len -= ihl;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002084 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002085 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002086 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++;
2087 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002088 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002089
2090 DEBUG_TRACE("packet too short for ICMP header\n");
2091 return 0;
2092 }
2093
2094 /*
2095 * We only handle "destination unreachable" and "time exceeded" messages.
2096 */
2097 icmph = (struct icmphdr *)(skb->data + ihl);
2098 if ((icmph->type != ICMP_DEST_UNREACH)
2099 && (icmph->type != ICMP_TIME_EXCEEDED)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002100 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002101 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++;
2102 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002103 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002104
2105 DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->type);
2106 return 0;
2107 }
2108
2109 /*
2110 * Do we have the full embedded IP header?
2111 */
2112 len -= sizeof(struct icmphdr);
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002113 pull_len += sizeof(struct sfe_ipv4_ip_hdr);
2114 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002115 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002116 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE]++;
2117 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002118 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002119
2120 DEBUG_TRACE("Embedded IP header not complete\n");
2121 return 0;
2122 }
2123
2124 /*
2125 * Is our embedded IP version wrong?
2126 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002127 icmp_iph = (struct sfe_ipv4_ip_hdr *)(icmph + 1);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002128 if (unlikely(icmp_iph->version != 4)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002129 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002130 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4]++;
2131 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002132 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002133
2134 DEBUG_TRACE("IP version: %u\n", icmp_iph->version);
2135 return 0;
2136 }
2137
2138 /*
2139 * Do we have the full embedded IP header, including any options?
2140 */
2141 icmp_ihl_words = icmp_iph->ihl;
2142 icmp_ihl = icmp_ihl_words << 2;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002143 pull_len += icmp_ihl - sizeof(struct sfe_ipv4_ip_hdr);
2144 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002145 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002146 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE]++;
2147 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002148 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002149
2150 DEBUG_TRACE("Embedded header not large enough for IP options\n");
2151 return 0;
2152 }
2153
Nicolas Costaac2979c2014-01-14 10:35:24 -06002154 len -= icmp_ihl;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002155 icmp_trans_h = ((u32 *)icmp_iph) + icmp_ihl_words;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002156
2157 /*
2158 * Handle the embedded transport layer header.
2159 */
2160 switch (icmp_iph->protocol) {
2161 case IPPROTO_UDP:
2162 /*
2163 * We should have 8 bytes of UDP header - that's enough to identify
2164 * the connection.
2165 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002166 pull_len += 8;
2167 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002168 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002169 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE]++;
2170 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002171 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002172
2173 DEBUG_TRACE("Incomplete embedded UDP header\n");
2174 return 0;
2175 }
2176
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002177 icmp_udph = (struct sfe_ipv4_udp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002178 src_port = icmp_udph->source;
2179 dest_port = icmp_udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002180 break;
2181
2182 case IPPROTO_TCP:
2183 /*
2184 * We should have 8 bytes of TCP header - that's enough to identify
2185 * the connection.
2186 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002187 pull_len += 8;
2188 if (!pskb_may_pull(skb, pull_len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002189 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002190 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE]++;
2191 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002192 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002193
2194 DEBUG_TRACE("Incomplete embedded TCP header\n");
2195 return 0;
2196 }
2197
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002198 icmp_tcph = (struct sfe_ipv4_tcp_hdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01002199 src_port = icmp_tcph->source;
2200 dest_port = icmp_tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002201 break;
2202
2203 default:
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002204 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002205 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL]++;
2206 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002207 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002208
2209 DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", icmp_iph->protocol);
2210 return 0;
2211 }
2212
Dave Hudson87973cd2013-10-22 16:00:04 +01002213 src_ip = icmp_iph->saddr;
2214 dest_ip = icmp_iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002215
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002216 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002217
2218 /*
2219 * Look for a connection match. Note that we reverse the source and destination
2220 * here because our embedded message contains a packet that was sent in the
2221 * opposite direction to the one in which we just received it. It will have
2222 * been sent on the interface from which we received it though so that's still
2223 * ok to use.
2224 */
2225 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port);
2226 if (unlikely(!cm)) {
2227 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++;
2228 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002229 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002230
2231 DEBUG_TRACE("no connection found\n");
2232 return 0;
2233 }
2234
2235 /*
2236 * We found a connection so now remove it from the connection list and flush
2237 * its state.
2238 */
2239 c = cm->connection;
2240 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2241 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++;
2242 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002243 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002244
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07002245 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002246 return 0;
2247}
2248
2249/*
2250 * sfe_ipv4_recv()
Matthew McClintocka8ad7962014-01-16 16:49:30 -06002251 * Handle packet receives and forwaring.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002252 *
2253 * Returns 1 if the packet is forwarded or 0 if it isn't.
2254 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002255int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002256{
2257 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002258 unsigned int len;
2259 unsigned int tot_len;
2260 unsigned int frag_off;
2261 unsigned int ihl;
2262 bool flush_on_find;
2263 bool ip_options;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002264 struct sfe_ipv4_ip_hdr *iph;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002265 u32 protocol;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002266
2267 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002268 * Check that we have space for an IP header here.
2269 */
2270 len = skb->len;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07002271 if (unlikely(!pskb_may_pull(skb, sizeof(struct sfe_ipv4_ip_hdr)))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002272 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002273 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2274 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002275 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002276
2277 DEBUG_TRACE("len: %u is too short\n", len);
2278 return 0;
2279 }
2280
2281 /*
2282 * Check that our "total length" is large enough for an IP header.
2283 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002284 iph = (struct sfe_ipv4_ip_hdr *)skb->data;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002285 tot_len = ntohs(iph->tot_len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002286 if (unlikely(tot_len < sizeof(struct sfe_ipv4_ip_hdr))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002287 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002288 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH]++;
2289 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002290 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002291
2292 DEBUG_TRACE("tot_len: %u is too short\n", tot_len);
2293 return 0;
2294 }
2295
2296 /*
2297 * Is our IP version wrong?
2298 */
2299 if (unlikely(iph->version != 4)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002300 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002301 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_V4]++;
2302 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002303 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002304
2305 DEBUG_TRACE("IP version: %u\n", iph->version);
2306 return 0;
2307 }
2308
2309 /*
2310 * Does our datagram fit inside the skb?
2311 */
2312 if (unlikely(tot_len > len)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002313 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002314 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++;
2315 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002316 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002317
2318 DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len);
2319 return 0;
2320 }
2321
2322 /*
2323 * Do we have a non-initial fragment?
Nicolas Costaac2979c2014-01-14 10:35:24 -06002324 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002325 frag_off = ntohs(iph->frag_off);
2326 if (unlikely(frag_off & IP_OFFSET)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002327 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002328 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++;
2329 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002330 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002331
2332 DEBUG_TRACE("non-initial fragment\n");
2333 return 0;
2334 }
2335
2336 /*
2337 * If we have a (first) fragment then mark it to cause any connection to flush.
2338 */
2339 flush_on_find = unlikely(frag_off & IP_MF) ? true : false;
2340
2341 /*
2342 * Do we have any IP options? That's definite a slow path! If we do have IP
2343 * options we need to recheck our header size.
2344 */
2345 ihl = iph->ihl << 2;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002346 ip_options = unlikely(ihl != sizeof(struct sfe_ipv4_ip_hdr)) ? true : false;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002347 if (unlikely(ip_options)) {
2348 if (unlikely(len < ihl)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002349 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002350 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE]++;
2351 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002352 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002353
2354 DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl);
2355 return 0;
2356 }
2357
2358 flush_on_find = true;
2359 }
2360
2361 protocol = iph->protocol;
2362 if (IPPROTO_UDP == protocol) {
2363 return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find);
2364 }
2365
2366 if (IPPROTO_TCP == protocol) {
2367 return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find);
2368 }
2369
2370 if (IPPROTO_ICMP == protocol) {
2371 return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl);
2372 }
2373
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002374 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002375 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++;
2376 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002377 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002378
2379 DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol);
2380 return 0;
2381}
2382
Nicolas Costa436926b2014-01-14 10:36:22 -06002383static void
2384sfe_ipv4_update_tcp_state(struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002385 struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002386{
2387 struct sfe_ipv4_connection_match *orig_cm;
2388 struct sfe_ipv4_connection_match *repl_cm;
2389 struct sfe_ipv4_tcp_connection_match *orig_tcp;
2390 struct sfe_ipv4_tcp_connection_match *repl_tcp;
2391
2392 orig_cm = c->original_match;
2393 repl_cm = c->reply_match;
2394 orig_tcp = &orig_cm->protocol_state.tcp;
2395 repl_tcp = &repl_cm->protocol_state.tcp;
2396
2397 /* update orig */
2398 if (orig_tcp->max_win < sic->src_td_max_window) {
2399 orig_tcp->max_win = sic->src_td_max_window;
2400 }
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002401 if ((s32)(orig_tcp->end - sic->src_td_end) < 0) {
Nicolas Costa436926b2014-01-14 10:36:22 -06002402 orig_tcp->end = sic->src_td_end;
2403 }
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002404 if ((s32)(orig_tcp->max_end - sic->src_td_max_end) < 0) {
Nicolas Costa436926b2014-01-14 10:36:22 -06002405 orig_tcp->max_end = sic->src_td_max_end;
2406 }
2407
2408 /* update reply */
2409 if (repl_tcp->max_win < sic->dest_td_max_window) {
2410 repl_tcp->max_win = sic->dest_td_max_window;
2411 }
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002412 if ((s32)(repl_tcp->end - sic->dest_td_end) < 0) {
Nicolas Costa436926b2014-01-14 10:36:22 -06002413 repl_tcp->end = sic->dest_td_end;
2414 }
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002415 if ((s32)(repl_tcp->max_end - sic->dest_td_max_end) < 0) {
Nicolas Costa436926b2014-01-14 10:36:22 -06002416 repl_tcp->max_end = sic->dest_td_max_end;
2417 }
2418
2419 /* update match flags */
2420 orig_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2421 repl_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002422 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
Nicolas Costa436926b2014-01-14 10:36:22 -06002423 orig_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2424 repl_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2425 }
2426}
2427
2428static void
2429sfe_ipv4_update_protocol_state(struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002430 struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002431{
2432 switch (sic->protocol) {
2433 case IPPROTO_TCP:
2434 sfe_ipv4_update_tcp_state(c, sic);
2435 break;
2436 }
2437}
2438
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002439void sfe_ipv4_update_rule(struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06002440{
2441 struct sfe_ipv4_connection *c;
2442 struct sfe_ipv4 *si = &__si;
2443
2444 spin_lock_bh(&si->lock);
2445
2446 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2447 sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002448 sic->src_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002449 sic->src_port,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002450 sic->dest_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002451 sic->dest_port);
2452 if (c != NULL) {
2453 sfe_ipv4_update_protocol_state(c, sic);
2454 }
2455
2456 spin_unlock_bh(&si->lock);
2457}
2458
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002459/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002460 * sfe_ipv4_create_rule()
2461 * Create a forwarding rule.
2462 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002463int sfe_ipv4_create_rule(struct sfe_connection_create *sic)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002464{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002465 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002466 struct sfe_ipv4_connection *c;
2467 struct sfe_ipv4_connection_match *original_cm;
2468 struct sfe_ipv4_connection_match *reply_cm;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002469 struct net_device *dest_dev;
2470 struct net_device *src_dev;
2471
2472 dest_dev = sic->dest_dev;
2473 src_dev = sic->src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002474
Matthew McClintock389b42a2014-09-24 14:05:51 -05002475 if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) ||
2476 (src_dev->reg_state != NETREG_REGISTERED))) {
2477 return -EINVAL;
2478 }
2479
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002480 spin_lock_bh(&si->lock);
2481 si->connection_create_requests++;
2482
2483 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002484 * Check to see if there is already a flow that matches the rule we're
2485 * trying to create. If there is then we can't create a new one.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002486 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002487 c = sfe_ipv4_find_sfe_ipv4_connection(si,
2488 sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002489 sic->src_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002490 sic->src_port,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002491 sic->dest_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002492 sic->dest_port);
2493 if (c != NULL) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002494 si->connection_create_collisions++;
2495
2496 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002497 * If we already have the flow then it's likely that this
2498 * request to create the connection rule contains more
2499 * up-to-date information. Check and update accordingly.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002500 */
Nicolas Costa436926b2014-01-14 10:36:22 -06002501 sfe_ipv4_update_protocol_state(c, sic);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002502 spin_unlock_bh(&si->lock);
2503
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002504 DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n"
Tian Yang45f39c82020-10-06 14:07:47 -07002505 " s: %s:%pxM:%pI4:%u, d: %s:%pxM:%pI4:%u\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002506 sic->mark, sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002507 sic->src_dev->name, sic->src_mac, &sic->src_ip.ip, ntohs(sic->src_port),
2508 sic->dest_dev->name, sic->dest_mac, &sic->dest_ip.ip, ntohs(sic->dest_port));
Nicolas Costa514fde02014-01-13 15:50:29 -06002509 return -EADDRINUSE;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002510 }
2511
2512 /*
2513 * Allocate the various connection tracking objects.
2514 */
2515 c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC);
2516 if (unlikely(!c)) {
2517 spin_unlock_bh(&si->lock);
Nicolas Costa514fde02014-01-13 15:50:29 -06002518 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002519 }
2520
2521 original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2522 if (unlikely(!original_cm)) {
2523 spin_unlock_bh(&si->lock);
2524 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002525 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002526 }
2527
2528 reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2529 if (unlikely(!reply_cm)) {
2530 spin_unlock_bh(&si->lock);
2531 kfree(original_cm);
2532 kfree(c);
Nicolas Costa514fde02014-01-13 15:50:29 -06002533 return -ENOMEM;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002534 }
2535
2536 /*
2537 * Fill in the "original" direction connection matching object.
2538 * Note that the transmit MAC address is "dest_mac_xlate" because
2539 * we always know both ends of a connection by their translated
2540 * addresses and not their public addresses.
2541 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002542 original_cm->match_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002543 original_cm->match_protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002544 original_cm->match_src_ip = sic->src_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002545 original_cm->match_src_port = sic->src_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002546 original_cm->match_dest_ip = sic->dest_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002547 original_cm->match_dest_port = sic->dest_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002548 original_cm->xlate_src_ip = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002549 original_cm->xlate_src_port = sic->src_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002550 original_cm->xlate_dest_ip = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002551 original_cm->xlate_dest_port = sic->dest_port_xlate;
2552 original_cm->rx_packet_count = 0;
2553 original_cm->rx_packet_count64 = 0;
2554 original_cm->rx_byte_count = 0;
2555 original_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002556 original_cm->xmit_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002557 original_cm->xmit_dev_mtu = sic->dest_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002558 memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002559 memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN);
2560 original_cm->connection = c;
2561 original_cm->counter_match = reply_cm;
2562 original_cm->flags = 0;
Xiaoping Fane1963d42015-08-25 17:06:19 -07002563 if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) {
2564 original_cm->priority = sic->src_priority;
2565 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK;
2566 }
2567 if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) {
2568 original_cm->dscp = sic->src_dscp << SFE_IPV4_DSCP_SHIFT;
2569 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK;
2570 }
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002571#ifdef CONFIG_NF_FLOW_COOKIE
2572 original_cm->flow_cookie = 0;
2573#endif
Zhi Chen8748eb32015-06-18 12:58:48 -07002574#ifdef CONFIG_XFRM
2575 original_cm->flow_accel = sic->original_accel;
2576#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002577 original_cm->active_next = NULL;
2578 original_cm->active_prev = NULL;
2579 original_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002580
2581 /*
2582 * For PPP links we don't write an L2 header. For everything else we do.
2583 */
2584 if (!(dest_dev->flags & IFF_POINTOPOINT)) {
2585 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2586
2587 /*
2588 * If our dev writes Ethernet headers then we can write a really fast
2589 * version.
2590 */
2591 if (dest_dev->header_ops) {
2592 if (dest_dev->header_ops->create == eth_header) {
2593 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2594 }
2595 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002596 }
2597
2598 /*
2599 * Fill in the "reply" direction connection matching object.
2600 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002601 reply_cm->match_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002602 reply_cm->match_protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002603 reply_cm->match_src_ip = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002604 reply_cm->match_src_port = sic->dest_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002605 reply_cm->match_dest_ip = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002606 reply_cm->match_dest_port = sic->src_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002607 reply_cm->xlate_src_ip = sic->dest_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002608 reply_cm->xlate_src_port = sic->dest_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002609 reply_cm->xlate_dest_ip = sic->src_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002610 reply_cm->xlate_dest_port = sic->src_port;
2611 reply_cm->rx_packet_count = 0;
2612 reply_cm->rx_packet_count64 = 0;
2613 reply_cm->rx_byte_count = 0;
2614 reply_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002615 reply_cm->xmit_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002616 reply_cm->xmit_dev_mtu = sic->src_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002617 memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002618 memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN);
2619 reply_cm->connection = c;
2620 reply_cm->counter_match = original_cm;
2621 reply_cm->flags = 0;
Xiaoping Fane1963d42015-08-25 17:06:19 -07002622 if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) {
2623 reply_cm->priority = sic->dest_priority;
2624 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK;
2625 }
2626 if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) {
2627 reply_cm->dscp = sic->dest_dscp << SFE_IPV4_DSCP_SHIFT;
2628 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK;
2629 }
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002630#ifdef CONFIG_NF_FLOW_COOKIE
2631 reply_cm->flow_cookie = 0;
2632#endif
Zhi Chen8748eb32015-06-18 12:58:48 -07002633#ifdef CONFIG_XFRM
2634 reply_cm->flow_accel = sic->reply_accel;
2635#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002636 reply_cm->active_next = NULL;
2637 reply_cm->active_prev = NULL;
2638 reply_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002639
2640 /*
2641 * For PPP links we don't write an L2 header. For everything else we do.
2642 */
2643 if (!(src_dev->flags & IFF_POINTOPOINT)) {
2644 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2645
2646 /*
2647 * If our dev writes Ethernet headers then we can write a really fast
2648 * version.
2649 */
2650 if (src_dev->header_ops) {
2651 if (src_dev->header_ops->create == eth_header) {
2652 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2653 }
2654 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002655 }
2656
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002657
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002658 if (sic->dest_ip.ip != sic->dest_ip_xlate.ip || sic->dest_port != sic->dest_port_xlate) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002659 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2660 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2661 }
2662
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002663 if (sic->src_ip.ip != sic->src_ip_xlate.ip || sic->src_port != sic->src_port_xlate) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002664 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2665 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2666 }
2667
2668 c->protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002669 c->src_ip = sic->src_ip.ip;
2670 c->src_ip_xlate = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002671 c->src_port = sic->src_port;
2672 c->src_port_xlate = sic->src_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002673 c->original_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002674 c->original_match = original_cm;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002675 c->dest_ip = sic->dest_ip.ip;
2676 c->dest_ip_xlate = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002677 c->dest_port = sic->dest_port;
2678 c->dest_port_xlate = sic->dest_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002679 c->reply_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002680 c->reply_match = reply_cm;
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06002681 c->mark = sic->mark;
Xiaoping Fan34586472015-07-03 02:20:35 -07002682 c->debug_read_seq = 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002683 c->last_sync_jiffies = get_jiffies_64();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002684
2685 /*
2686 * Take hold of our source and dest devices for the duration of the connection.
2687 */
2688 dev_hold(c->original_dev);
2689 dev_hold(c->reply_dev);
2690
2691 /*
2692 * Initialize the protocol-specific information that we track.
2693 */
2694 switch (sic->protocol) {
2695 case IPPROTO_TCP:
2696 original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale;
2697 original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1;
2698 original_cm->protocol_state.tcp.end = sic->src_td_end;
2699 original_cm->protocol_state.tcp.max_end = sic->src_td_max_end;
2700 reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale;
2701 reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1;
2702 reply_cm->protocol_state.tcp.end = sic->dest_td_end;
2703 reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002704 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002705 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2706 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2707 }
2708 break;
2709 }
2710
2711 sfe_ipv4_connection_match_compute_translations(original_cm);
2712 sfe_ipv4_connection_match_compute_translations(reply_cm);
2713 sfe_ipv4_insert_sfe_ipv4_connection(si, c);
2714
2715 spin_unlock_bh(&si->lock);
2716
2717 /*
2718 * We have everything we need!
2719 */
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002720 DEBUG_INFO("new connection - mark: %08x, p: %d\n"
Tian Yang45f39c82020-10-06 14:07:47 -07002721 " s: %s:%pxM(%pxM):%pI4(%pI4):%u(%u)\n"
2722 " d: %s:%pxM(%pxM):%pI4(%pI4):%u(%u)\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002723 sic->mark, sic->protocol,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002724 sic->src_dev->name, sic->src_mac, sic->src_mac_xlate,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002725 &sic->src_ip.ip, &sic->src_ip_xlate.ip, ntohs(sic->src_port), ntohs(sic->src_port_xlate),
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002726 dest_dev->name, sic->dest_mac, sic->dest_mac_xlate,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002727 &sic->dest_ip.ip, &sic->dest_ip_xlate.ip, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate));
Nicolas Costa514fde02014-01-13 15:50:29 -06002728
2729 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002730}
2731
2732/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002733 * sfe_ipv4_destroy_rule()
2734 * Destroy a forwarding rule.
2735 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002736void sfe_ipv4_destroy_rule(struct sfe_connection_destroy *sid)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002737{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002738 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002739 struct sfe_ipv4_connection *c;
2740
2741 spin_lock_bh(&si->lock);
2742 si->connection_destroy_requests++;
2743
2744 /*
2745 * Check to see if we have a flow that matches the rule we're trying
2746 * to destroy. If there isn't then we can't destroy it.
2747 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002748 c = sfe_ipv4_find_sfe_ipv4_connection(si, sid->protocol, sid->src_ip.ip, sid->src_port,
2749 sid->dest_ip.ip, sid->dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002750 if (!c) {
2751 si->connection_destroy_misses++;
2752 spin_unlock_bh(&si->lock);
2753
2754 DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002755 sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2756 &sid->dest_ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002757 return;
2758 }
2759
2760 /*
2761 * Remove our connection details from the hash tables.
2762 */
2763 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2764 spin_unlock_bh(&si->lock);
2765
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07002766 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002767
2768 DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002769 sid->protocol, &sid->src_ip.ip, ntohs(sid->src_port),
2770 &sid->dest_ip.ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002771}
2772
2773/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002774 * sfe_ipv4_register_sync_rule_callback()
2775 * Register a callback for rule synchronization.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002776 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002777void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002778{
2779 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002780
2781 spin_lock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002782 rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002783 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002784}
2785
2786/*
2787 * sfe_ipv4_get_debug_dev()
2788 */
2789static ssize_t sfe_ipv4_get_debug_dev(struct device *dev,
2790 struct device_attribute *attr,
2791 char *buf)
2792{
2793 struct sfe_ipv4 *si = &__si;
2794 ssize_t count;
2795 int num;
2796
2797 spin_lock_bh(&si->lock);
2798 num = si->debug_dev;
2799 spin_unlock_bh(&si->lock);
2800
2801 count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num);
2802 return count;
2803}
2804
2805/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002806 * sysfs attributes.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002807 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002808static const struct device_attribute sfe_ipv4_debug_dev_attr =
Xiaoping Fane70da412016-02-26 16:47:57 -08002809 __ATTR(debug_dev, S_IWUSR | S_IRUGO, sfe_ipv4_get_debug_dev, NULL);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002810
2811/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002812 * sfe_ipv4_destroy_all_rules_for_dev()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002813 * Destroy all connections that match a particular device.
2814 *
2815 * If we pass dev as NULL then this destroys all connections.
2816 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002817void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002818{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002819 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002820 struct sfe_ipv4_connection *c;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002821
Xiaoping Fan34586472015-07-03 02:20:35 -07002822another_round:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002823 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002824
Xiaoping Fan34586472015-07-03 02:20:35 -07002825 for (c = si->all_connections_head; c; c = c->all_connections_next) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002826 /*
Xiaoping Fan34586472015-07-03 02:20:35 -07002827 * Does this connection relate to the device we are destroying?
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002828 */
2829 if (!dev
2830 || (dev == c->original_dev)
2831 || (dev == c->reply_dev)) {
Xiaoping Fan34586472015-07-03 02:20:35 -07002832 break;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002833 }
Xiaoping Fan34586472015-07-03 02:20:35 -07002834 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002835
Xiaoping Fan34586472015-07-03 02:20:35 -07002836 if (c) {
2837 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002838 }
2839
2840 spin_unlock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07002841
2842 if (c) {
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07002843 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY);
Xiaoping Fan34586472015-07-03 02:20:35 -07002844 goto another_round;
2845 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002846}
2847
2848/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002849 * sfe_ipv4_periodic_sync()
2850 */
Tian Yang45f39c82020-10-06 14:07:47 -07002851#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0))
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002852static void sfe_ipv4_periodic_sync(unsigned long arg)
Tian Yang45f39c82020-10-06 14:07:47 -07002853#else
2854static void sfe_ipv4_periodic_sync(struct timer_list *tl)
2855#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002856{
Tian Yang45f39c82020-10-06 14:07:47 -07002857#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0))
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002858 struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg;
Tian Yang45f39c82020-10-06 14:07:47 -07002859#else
2860 struct sfe_ipv4 *si = from_timer(si, tl, timer);
2861#endif
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002862 u64 now_jiffies;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002863 int quota;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002864 sfe_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002865
2866 now_jiffies = get_jiffies_64();
2867
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002868 rcu_read_lock();
2869 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
2870 if (!sync_rule_callback) {
2871 rcu_read_unlock();
2872 goto done;
2873 }
2874
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002875 spin_lock_bh(&si->lock);
2876 sfe_ipv4_update_summary_stats(si);
2877
2878 /*
2879 * Get an estimate of the number of connections to parse in this sync.
2880 */
2881 quota = (si->num_connections + 63) / 64;
2882
2883 /*
2884 * Walk the "active" list and sync the connection state.
2885 */
2886 while (quota--) {
2887 struct sfe_ipv4_connection_match *cm;
2888 struct sfe_ipv4_connection_match *counter_cm;
2889 struct sfe_ipv4_connection *c;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002890 struct sfe_connection_sync sis;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002891
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002892 cm = si->active_head;
2893 if (!cm) {
2894 break;
2895 }
2896
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002897 /*
Nicolas Costabafb3af2014-01-29 16:39:39 -06002898 * There's a possibility that our counter match is in the active list too.
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002899 * If it is then remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002900 */
2901 counter_cm = cm->counter_match;
2902 if (counter_cm->active) {
2903 counter_cm->active = false;
2904
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002905 /*
2906 * We must have a connection preceding this counter match
2907 * because that's the one that got us to this point, so we don't have
2908 * to worry about removing the head of the list.
2909 */
2910 counter_cm->active_prev->active_next = counter_cm->active_next;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002911
2912 if (likely(counter_cm->active_next)) {
2913 counter_cm->active_next->active_prev = counter_cm->active_prev;
2914 } else {
2915 si->active_tail = counter_cm->active_prev;
2916 }
2917
2918 counter_cm->active_next = NULL;
2919 counter_cm->active_prev = NULL;
2920 }
2921
2922 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002923 * Now remove the head of the active scan list.
2924 */
2925 cm->active = false;
2926 si->active_head = cm->active_next;
2927 if (likely(cm->active_next)) {
2928 cm->active_next->active_prev = NULL;
2929 } else {
2930 si->active_tail = NULL;
2931 }
2932 cm->active_next = NULL;
2933
2934 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002935 * Sync the connection state.
2936 */
2937 c = cm->connection;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07002938 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, SFE_SYNC_REASON_STATS, now_jiffies);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002939
2940 /*
2941 * We don't want to be holding the lock when we sync!
2942 */
2943 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002944 sync_rule_callback(&sis);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002945 spin_lock_bh(&si->lock);
2946 }
2947
2948 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002949 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002950
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002951done:
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002952 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002953}
2954
2955#define CHAR_DEV_MSG_SIZE 768
2956
2957/*
2958 * sfe_ipv4_debug_dev_read_start()
2959 * Generate part of the XML output.
2960 */
2961static bool sfe_ipv4_debug_dev_read_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2962 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2963{
2964 int bytes_read;
2965
Xiaoping Fan34586472015-07-03 02:20:35 -07002966 si->debug_read_seq++;
2967
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002968 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "<sfe_ipv4>\n");
2969 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2970 return false;
2971 }
2972
2973 *length -= bytes_read;
2974 *total_read += bytes_read;
2975
2976 ws->state++;
2977 return true;
2978}
2979
2980/*
2981 * sfe_ipv4_debug_dev_read_connections_start()
2982 * Generate part of the XML output.
2983 */
2984static bool sfe_ipv4_debug_dev_read_connections_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2985 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2986{
2987 int bytes_read;
2988
2989 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<connections>\n");
2990 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2991 return false;
2992 }
2993
2994 *length -= bytes_read;
2995 *total_read += bytes_read;
2996
2997 ws->state++;
2998 return true;
2999}
3000
3001/*
3002 * sfe_ipv4_debug_dev_read_connections_connection()
3003 * Generate part of the XML output.
3004 */
3005static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3006 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3007{
3008 struct sfe_ipv4_connection *c;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003009 struct sfe_ipv4_connection_match *original_cm;
3010 struct sfe_ipv4_connection_match *reply_cm;
3011 int bytes_read;
3012 int protocol;
3013 struct net_device *src_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01003014 __be32 src_ip;
3015 __be32 src_ip_xlate;
3016 __be16 src_port;
3017 __be16 src_port_xlate;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07003018 u64 src_rx_packets;
3019 u64 src_rx_bytes;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003020 struct net_device *dest_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01003021 __be32 dest_ip;
3022 __be32 dest_ip_xlate;
3023 __be16 dest_port;
3024 __be16 dest_port_xlate;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07003025 u64 dest_rx_packets;
3026 u64 dest_rx_bytes;
3027 u64 last_sync_jiffies;
3028 u32 mark, src_priority, dest_priority, src_dscp, dest_dscp;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003029#ifdef CONFIG_NF_FLOW_COOKIE
3030 int src_flow_cookie, dst_flow_cookie;
3031#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003032
3033 spin_lock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07003034
3035 for (c = si->all_connections_head; c; c = c->all_connections_next) {
3036 if (c->debug_read_seq < si->debug_read_seq) {
3037 c->debug_read_seq = si->debug_read_seq;
3038 break;
3039 }
3040 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003041
3042 /*
Xiaoping Fan34586472015-07-03 02:20:35 -07003043 * If there were no connections then move to the next state.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003044 */
3045 if (!c) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003046 spin_unlock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07003047 ws->state++;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003048 return true;
3049 }
3050
3051 original_cm = c->original_match;
3052 reply_cm = c->reply_match;
3053
3054 protocol = c->protocol;
3055 src_dev = c->original_dev;
3056 src_ip = c->src_ip;
3057 src_ip_xlate = c->src_ip_xlate;
3058 src_port = c->src_port;
3059 src_port_xlate = c->src_port_xlate;
Xiaoping Fane1963d42015-08-25 17:06:19 -07003060 src_priority = original_cm->priority;
3061 src_dscp = original_cm->dscp >> SFE_IPV4_DSCP_SHIFT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003062
3063 sfe_ipv4_connection_match_update_summary_stats(original_cm);
3064 sfe_ipv4_connection_match_update_summary_stats(reply_cm);
3065
3066 src_rx_packets = original_cm->rx_packet_count64;
3067 src_rx_bytes = original_cm->rx_byte_count64;
3068 dest_dev = c->reply_dev;
3069 dest_ip = c->dest_ip;
3070 dest_ip_xlate = c->dest_ip_xlate;
3071 dest_port = c->dest_port;
3072 dest_port_xlate = c->dest_port_xlate;
Xiaoping Fane1963d42015-08-25 17:06:19 -07003073 dest_priority = reply_cm->priority;
3074 dest_dscp = reply_cm->dscp >> SFE_IPV4_DSCP_SHIFT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003075 dest_rx_packets = reply_cm->rx_packet_count64;
3076 dest_rx_bytes = reply_cm->rx_byte_count64;
3077 last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003078 mark = c->mark;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003079#ifdef CONFIG_NF_FLOW_COOKIE
3080 src_flow_cookie = original_cm->flow_cookie;
3081 dst_flow_cookie = reply_cm->flow_cookie;
3082#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003083 spin_unlock_bh(&si->lock);
3084
3085 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t<connection "
3086 "protocol=\"%u\" "
3087 "src_dev=\"%s\" "
3088 "src_ip=\"%pI4\" src_ip_xlate=\"%pI4\" "
3089 "src_port=\"%u\" src_port_xlate=\"%u\" "
Xiaoping Fane1963d42015-08-25 17:06:19 -07003090 "src_priority=\"%u\" src_dscp=\"%u\" "
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003091 "src_rx_pkts=\"%llu\" src_rx_bytes=\"%llu\" "
3092 "dest_dev=\"%s\" "
3093 "dest_ip=\"%pI4\" dest_ip_xlate=\"%pI4\" "
3094 "dest_port=\"%u\" dest_port_xlate=\"%u\" "
Xiaoping Fane1963d42015-08-25 17:06:19 -07003095 "dest_priority=\"%u\" dest_dscp=\"%u\" "
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003096 "dest_rx_pkts=\"%llu\" dest_rx_bytes=\"%llu\" "
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003097#ifdef CONFIG_NF_FLOW_COOKIE
3098 "src_flow_cookie=\"%d\" dst_flow_cookie=\"%d\" "
3099#endif
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003100 "last_sync=\"%llu\" "
Nicolas Costabb85a2e2014-01-13 16:26:33 -06003101 "mark=\"%08x\" />\n",
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003102 protocol,
3103 src_dev->name,
3104 &src_ip, &src_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01003105 ntohs(src_port), ntohs(src_port_xlate),
Xiaoping Fane1963d42015-08-25 17:06:19 -07003106 src_priority, src_dscp,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003107 src_rx_packets, src_rx_bytes,
3108 dest_dev->name,
3109 &dest_ip, &dest_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01003110 ntohs(dest_port), ntohs(dest_port_xlate),
Xiaoping Fane1963d42015-08-25 17:06:19 -07003111 dest_priority, dest_dscp,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003112 dest_rx_packets, dest_rx_bytes,
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003113#ifdef CONFIG_NF_FLOW_COOKIE
3114 src_flow_cookie, dst_flow_cookie,
3115#endif
Cristian Prundeanu592265e2013-12-26 11:01:22 -06003116 last_sync_jiffies, mark);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003117
3118 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3119 return false;
3120 }
3121
3122 *length -= bytes_read;
3123 *total_read += bytes_read;
3124
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003125 return true;
3126}
3127
3128/*
3129 * sfe_ipv4_debug_dev_read_connections_end()
3130 * Generate part of the XML output.
3131 */
3132static bool sfe_ipv4_debug_dev_read_connections_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3133 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3134{
3135 int bytes_read;
3136
3137 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</connections>\n");
3138 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3139 return false;
3140 }
3141
3142 *length -= bytes_read;
3143 *total_read += bytes_read;
3144
3145 ws->state++;
3146 return true;
3147}
3148
3149/*
3150 * sfe_ipv4_debug_dev_read_exceptions_start()
3151 * Generate part of the XML output.
3152 */
3153static bool sfe_ipv4_debug_dev_read_exceptions_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3154 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3155{
3156 int bytes_read;
3157
3158 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<exceptions>\n");
3159 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3160 return false;
3161 }
3162
3163 *length -= bytes_read;
3164 *total_read += bytes_read;
3165
3166 ws->state++;
3167 return true;
3168}
3169
3170/*
3171 * sfe_ipv4_debug_dev_read_exceptions_exception()
3172 * Generate part of the XML output.
3173 */
3174static bool sfe_ipv4_debug_dev_read_exceptions_exception(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3175 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3176{
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07003177 u64 ct;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003178
3179 spin_lock_bh(&si->lock);
3180 ct = si->exception_events64[ws->iter_exception];
3181 spin_unlock_bh(&si->lock);
3182
3183 if (ct) {
3184 int bytes_read;
3185
3186 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE,
3187 "\t\t<exception name=\"%s\" count=\"%llu\" />\n",
3188 sfe_ipv4_exception_events_string[ws->iter_exception],
3189 ct);
3190 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3191 return false;
3192 }
3193
3194 *length -= bytes_read;
3195 *total_read += bytes_read;
3196 }
3197
3198 ws->iter_exception++;
3199 if (ws->iter_exception >= SFE_IPV4_EXCEPTION_EVENT_LAST) {
3200 ws->iter_exception = 0;
3201 ws->state++;
3202 }
3203
3204 return true;
3205}
3206
3207/*
3208 * sfe_ipv4_debug_dev_read_exceptions_end()
3209 * Generate part of the XML output.
3210 */
3211static bool sfe_ipv4_debug_dev_read_exceptions_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3212 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3213{
3214 int bytes_read;
3215
3216 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</exceptions>\n");
3217 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3218 return false;
3219 }
3220
3221 *length -= bytes_read;
3222 *total_read += bytes_read;
3223
3224 ws->state++;
3225 return true;
3226}
3227
3228/*
3229 * sfe_ipv4_debug_dev_read_stats()
3230 * Generate part of the XML output.
3231 */
3232static bool sfe_ipv4_debug_dev_read_stats(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3233 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3234{
3235 int bytes_read;
3236 unsigned int num_connections;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07003237 u64 packets_forwarded;
3238 u64 packets_not_forwarded;
3239 u64 connection_create_requests;
3240 u64 connection_create_collisions;
3241 u64 connection_destroy_requests;
3242 u64 connection_destroy_misses;
3243 u64 connection_flushes;
3244 u64 connection_match_hash_hits;
3245 u64 connection_match_hash_reorders;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003246
3247 spin_lock_bh(&si->lock);
3248 sfe_ipv4_update_summary_stats(si);
3249
3250 num_connections = si->num_connections;
3251 packets_forwarded = si->packets_forwarded64;
3252 packets_not_forwarded = si->packets_not_forwarded64;
3253 connection_create_requests = si->connection_create_requests64;
3254 connection_create_collisions = si->connection_create_collisions64;
3255 connection_destroy_requests = si->connection_destroy_requests64;
3256 connection_destroy_misses = si->connection_destroy_misses64;
3257 connection_flushes = si->connection_flushes64;
3258 connection_match_hash_hits = si->connection_match_hash_hits64;
3259 connection_match_hash_reorders = si->connection_match_hash_reorders64;
3260 spin_unlock_bh(&si->lock);
3261
3262 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<stats "
3263 "num_connections=\"%u\" "
Xiaoping Fan59176422015-05-22 15:58:10 -07003264 "pkts_forwarded=\"%llu\" pkts_not_forwarded=\"%llu\" "
3265 "create_requests=\"%llu\" create_collisions=\"%llu\" "
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003266 "destroy_requests=\"%llu\" destroy_misses=\"%llu\" "
3267 "flushes=\"%llu\" "
3268 "hash_hits=\"%llu\" hash_reorders=\"%llu\" />\n",
3269 num_connections,
3270 packets_forwarded,
3271 packets_not_forwarded,
3272 connection_create_requests,
3273 connection_create_collisions,
3274 connection_destroy_requests,
3275 connection_destroy_misses,
3276 connection_flushes,
3277 connection_match_hash_hits,
3278 connection_match_hash_reorders);
3279 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3280 return false;
3281 }
3282
3283 *length -= bytes_read;
3284 *total_read += bytes_read;
3285
3286 ws->state++;
3287 return true;
3288}
3289
3290/*
3291 * sfe_ipv4_debug_dev_read_end()
3292 * Generate part of the XML output.
3293 */
3294static bool sfe_ipv4_debug_dev_read_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
3295 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
3296{
3297 int bytes_read;
3298
3299 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "</sfe_ipv4>\n");
3300 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3301 return false;
3302 }
3303
3304 *length -= bytes_read;
3305 *total_read += bytes_read;
3306
3307 ws->state++;
3308 return true;
3309}
3310
3311/*
3312 * Array of write functions that write various XML elements that correspond to
3313 * our XML output state machine.
3314 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07003315static sfe_ipv4_debug_xml_write_method_t sfe_ipv4_debug_xml_write_methods[SFE_IPV4_DEBUG_XML_STATE_DONE] = {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003316 sfe_ipv4_debug_dev_read_start,
3317 sfe_ipv4_debug_dev_read_connections_start,
3318 sfe_ipv4_debug_dev_read_connections_connection,
3319 sfe_ipv4_debug_dev_read_connections_end,
3320 sfe_ipv4_debug_dev_read_exceptions_start,
3321 sfe_ipv4_debug_dev_read_exceptions_exception,
3322 sfe_ipv4_debug_dev_read_exceptions_end,
3323 sfe_ipv4_debug_dev_read_stats,
3324 sfe_ipv4_debug_dev_read_end,
3325};
3326
3327/*
3328 * sfe_ipv4_debug_dev_read()
3329 * Send info to userspace upon read request from user
3330 */
3331static ssize_t sfe_ipv4_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset)
3332{
3333 char msg[CHAR_DEV_MSG_SIZE];
3334 int total_read = 0;
3335 struct sfe_ipv4_debug_xml_write_state *ws;
3336 struct sfe_ipv4 *si = &__si;
3337
3338 ws = (struct sfe_ipv4_debug_xml_write_state *)filp->private_data;
3339 while ((ws->state != SFE_IPV4_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) {
3340 if ((sfe_ipv4_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) {
3341 continue;
3342 }
3343 }
3344
3345 return total_read;
3346}
3347
3348/*
3349 * sfe_ipv4_debug_dev_write()
Nicolas Costabafb3af2014-01-29 16:39:39 -06003350 * Write to char device resets some stats
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003351 */
3352static ssize_t sfe_ipv4_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset)
3353{
Matthew McClintock54167ab2014-01-14 21:06:28 -06003354 struct sfe_ipv4 *si = &__si;
3355
3356 spin_lock_bh(&si->lock);
3357 sfe_ipv4_update_summary_stats(si);
3358
Matthew McClintock54167ab2014-01-14 21:06:28 -06003359 si->packets_forwarded64 = 0;
3360 si->packets_not_forwarded64 = 0;
3361 si->connection_create_requests64 = 0;
3362 si->connection_create_collisions64 = 0;
3363 si->connection_destroy_requests64 = 0;
3364 si->connection_destroy_misses64 = 0;
3365 si->connection_flushes64 = 0;
3366 si->connection_match_hash_hits64 = 0;
3367 si->connection_match_hash_reorders64 = 0;
3368 spin_unlock_bh(&si->lock);
3369
3370 return length;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003371}
3372
3373/*
3374 * sfe_ipv4_debug_dev_open()
3375 */
3376static int sfe_ipv4_debug_dev_open(struct inode *inode, struct file *file)
3377{
3378 struct sfe_ipv4_debug_xml_write_state *ws;
3379
3380 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3381 if (!ws) {
3382 ws = kzalloc(sizeof(struct sfe_ipv4_debug_xml_write_state), GFP_KERNEL);
3383 if (!ws) {
3384 return -ENOMEM;
3385 }
3386
3387 ws->state = SFE_IPV4_DEBUG_XML_STATE_START;
3388 file->private_data = ws;
3389 }
3390
3391 return 0;
3392}
3393
3394/*
3395 * sfe_ipv4_debug_dev_release()
3396 */
3397static int sfe_ipv4_debug_dev_release(struct inode *inode, struct file *file)
3398{
3399 struct sfe_ipv4_debug_xml_write_state *ws;
3400
3401 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
3402 if (ws) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003403 /*
3404 * We've finished with our output so free the write state.
3405 */
3406 kfree(ws);
3407 }
3408
3409 return 0;
3410}
3411
3412/*
3413 * File operations used in the debug char device
3414 */
3415static struct file_operations sfe_ipv4_debug_dev_fops = {
3416 .read = sfe_ipv4_debug_dev_read,
3417 .write = sfe_ipv4_debug_dev_write,
3418 .open = sfe_ipv4_debug_dev_open,
3419 .release = sfe_ipv4_debug_dev_release
3420};
3421
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003422#ifdef CONFIG_NF_FLOW_COOKIE
3423/*
3424 * sfe_register_flow_cookie_cb
3425 * register a function in SFE to let SFE use this function to configure flow cookie for a flow
3426 *
3427 * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE
3428 * can use this function to configure flow cookie for a flow.
3429 * return: 0, success; !=0, fail
3430 */
3431int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb)
3432{
3433 struct sfe_ipv4 *si = &__si;
3434
3435 BUG_ON(!cb);
3436
3437 if (si->flow_cookie_set_func) {
3438 return -1;
3439 }
3440
3441 rcu_assign_pointer(si->flow_cookie_set_func, cb);
3442 return 0;
3443}
3444
3445/*
3446 * sfe_unregister_flow_cookie_cb
3447 * unregister function which is used to configure flow cookie for a flow
3448 *
3449 * return: 0, success; !=0, fail
3450 */
3451int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb)
3452{
3453 struct sfe_ipv4 *si = &__si;
3454
3455 RCU_INIT_POINTER(si->flow_cookie_set_func, NULL);
3456 return 0;
3457}
Xiaoping Fan640faf42015-08-28 15:50:55 -07003458
3459/*
3460 * sfe_ipv4_get_flow_cookie()
3461 */
3462static ssize_t sfe_ipv4_get_flow_cookie(struct device *dev,
3463 struct device_attribute *attr,
3464 char *buf)
3465{
3466 struct sfe_ipv4 *si = &__si;
Xiaoping Fan01c67cc2015-11-09 11:31:57 -08003467 return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", si->flow_cookie_enable);
Xiaoping Fan640faf42015-08-28 15:50:55 -07003468}
3469
3470/*
3471 * sfe_ipv4_set_flow_cookie()
3472 */
3473static ssize_t sfe_ipv4_set_flow_cookie(struct device *dev,
3474 struct device_attribute *attr,
3475 const char *buf, size_t size)
3476{
3477 struct sfe_ipv4 *si = &__si;
3478 strict_strtol(buf, 0, (long int *)&si->flow_cookie_enable);
3479
3480 return size;
3481}
3482
3483/*
3484 * sysfs attributes.
3485 */
3486static const struct device_attribute sfe_ipv4_flow_cookie_attr =
Xiaoping Fane70da412016-02-26 16:47:57 -08003487 __ATTR(flow_cookie_enable, S_IWUSR | S_IRUGO, sfe_ipv4_get_flow_cookie, sfe_ipv4_set_flow_cookie);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003488#endif /*CONFIG_NF_FLOW_COOKIE*/
3489
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003490/*
Dave Hudson87973cd2013-10-22 16:00:04 +01003491 * sfe_ipv4_init()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003492 */
Dave Hudson87973cd2013-10-22 16:00:04 +01003493static int __init sfe_ipv4_init(void)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003494{
3495 struct sfe_ipv4 *si = &__si;
3496 int result = -1;
3497
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003498 DEBUG_INFO("SFE IPv4 init\n");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003499
3500 /*
3501 * Create sys/sfe_ipv4
3502 */
3503 si->sys_sfe_ipv4 = kobject_create_and_add("sfe_ipv4", NULL);
3504 if (!si->sys_sfe_ipv4) {
3505 DEBUG_ERROR("failed to register sfe_ipv4\n");
3506 goto exit1;
3507 }
3508
3509 /*
3510 * Create files, one for each parameter supported by this module.
3511 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003512 result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3513 if (result) {
3514 DEBUG_ERROR("failed to register debug dev file: %d\n", result);
Xiaoping Fan640faf42015-08-28 15:50:55 -07003515 goto exit2;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003516 }
3517
Xiaoping Fan640faf42015-08-28 15:50:55 -07003518#ifdef CONFIG_NF_FLOW_COOKIE
3519 result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr);
3520 if (result) {
3521 DEBUG_ERROR("failed to register flow cookie enable file: %d\n", result);
3522 goto exit3;
3523 }
3524#endif /* CONFIG_NF_FLOW_COOKIE */
3525
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003526 /*
3527 * Register our debug char device.
3528 */
3529 result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops);
3530 if (result < 0) {
3531 DEBUG_ERROR("Failed to register chrdev: %d\n", result);
Xiaoping Fan640faf42015-08-28 15:50:55 -07003532 goto exit4;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003533 }
3534
3535 si->debug_dev = result;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003536
3537 /*
3538 * Create a timer to handle periodic statistics.
3539 */
Tian Yang45f39c82020-10-06 14:07:47 -07003540#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0))
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003541 setup_timer(&si->timer, sfe_ipv4_periodic_sync, (unsigned long)si);
Tian Yang45f39c82020-10-06 14:07:47 -07003542#else
3543 timer_setup(&si->timer, sfe_ipv4_periodic_sync, 0);
3544#endif
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06003545 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003546
Dave Hudson87973cd2013-10-22 16:00:04 +01003547 spin_lock_init(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003548
Dave Hudson87973cd2013-10-22 16:00:04 +01003549 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003550
Xiaoping Fan640faf42015-08-28 15:50:55 -07003551exit4:
3552#ifdef CONFIG_NF_FLOW_COOKIE
3553 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr);
3554
3555exit3:
3556#endif /* CONFIG_NF_FLOW_COOKIE */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003557 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3558
Xiaoping Fan640faf42015-08-28 15:50:55 -07003559exit2:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003560 kobject_put(si->sys_sfe_ipv4);
3561
3562exit1:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003563 return result;
3564}
3565
3566/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003567 * sfe_ipv4_exit()
3568 */
3569static void __exit sfe_ipv4_exit(void)
3570{
Dave Hudson87973cd2013-10-22 16:00:04 +01003571 struct sfe_ipv4 *si = &__si;
3572
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003573 DEBUG_INFO("SFE IPv4 exit\n");
Dave Hudson87973cd2013-10-22 16:00:04 +01003574
3575 /*
3576 * Destroy all connections.
3577 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003578 sfe_ipv4_destroy_all_rules_for_dev(NULL);
Dave Hudson87973cd2013-10-22 16:00:04 +01003579
Dave Hudson87973cd2013-10-22 16:00:04 +01003580 del_timer_sync(&si->timer);
3581
Dave Hudson87973cd2013-10-22 16:00:04 +01003582 unregister_chrdev(si->debug_dev, "sfe_ipv4");
3583
Xiaoping Fan640faf42015-08-28 15:50:55 -07003584#ifdef CONFIG_NF_FLOW_COOKIE
3585 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr);
3586#endif /* CONFIG_NF_FLOW_COOKIE */
Dave Hudson87973cd2013-10-22 16:00:04 +01003587 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3588
Dave Hudson87973cd2013-10-22 16:00:04 +01003589 kobject_put(si->sys_sfe_ipv4);
3590
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003591}
3592
3593module_init(sfe_ipv4_init)
3594module_exit(sfe_ipv4_exit)
3595
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003596EXPORT_SYMBOL(sfe_ipv4_recv);
3597EXPORT_SYMBOL(sfe_ipv4_create_rule);
3598EXPORT_SYMBOL(sfe_ipv4_destroy_rule);
3599EXPORT_SYMBOL(sfe_ipv4_destroy_all_rules_for_dev);
3600EXPORT_SYMBOL(sfe_ipv4_register_sync_rule_callback);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06003601EXPORT_SYMBOL(sfe_ipv4_mark_rule);
Nicolas Costa436926b2014-01-14 10:36:22 -06003602EXPORT_SYMBOL(sfe_ipv4_update_rule);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003603#ifdef CONFIG_NF_FLOW_COOKIE
3604EXPORT_SYMBOL(sfe_register_flow_cookie_cb);
3605EXPORT_SYMBOL(sfe_unregister_flow_cookie_cb);
3606#endif
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003607
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003608MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv4 edition");
Matthew McClintocka3221942014-01-16 11:44:26 -06003609MODULE_LICENSE("Dual BSD/GPL");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003610