blob: 951f92b217c90ba7a2cacc3a7494529965b6364f [file] [log] [blame]
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001/*
2 * sfe_ipv4.c
3 * Shortcut forwarding engine - IPv4 edition.
4 *
Ratheesh Kannoth24fb1db2021-10-20 07:28:06 +05305 * Copyright (c) 2013-2016, 2019-2020, The Linux Foundation. All rights reserved.
6 * Copyright (c) 2021 Qualcomm Innovation Center, Inc. All rights reserved.
7 *
8 * Permission to use, copy, modify, and/or distribute this software for any
9 * purpose with or without fee is hereby granted, provided that the above
10 * copyright notice and this permission notice appear in all copies.
11 *
Xiaoping Fana42c68b2015-08-07 18:00:39 -070012 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
Ratheesh Kannoth24fb1db2021-10-20 07:28:06 +053017 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010019 */
Matthew McClintocka3221942014-01-16 11:44:26 -060020
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010021#include <linux/module.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060022#include <linux/sysfs.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010023#include <linux/skbuff.h>
24#include <linux/icmp.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010025#include <net/tcp.h>
Dave Hudsondcd08fb2013-11-22 09:25:16 -060026#include <linux/etherdevice.h>
Tian Yang45f39c82020-10-06 14:07:47 -070027#include <linux/version.h>
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +053028#include <linux/lockdep.h>
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010029
Ratheesh Kannoth24fb1db2021-10-20 07:28:06 +053030#include "sfe_debug.h"
Dave Hudsondcd08fb2013-11-22 09:25:16 -060031#include "sfe.h"
Ratheesh Kannoth24fb1db2021-10-20 07:28:06 +053032#include "sfe_flow_cookie.h"
33#include "sfe_ipv4.h"
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010034
35static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = {
36 "UDP_HEADER_INCOMPLETE",
37 "UDP_NO_CONNECTION",
38 "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
39 "UDP_SMALL_TTL",
40 "UDP_NEEDS_FRAGMENTATION",
41 "TCP_HEADER_INCOMPLETE",
42 "TCP_NO_CONNECTION_SLOW_FLAGS",
43 "TCP_NO_CONNECTION_FAST_FLAGS",
44 "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
45 "TCP_SMALL_TTL",
46 "TCP_NEEDS_FRAGMENTATION",
47 "TCP_FLAGS",
48 "TCP_SEQ_EXCEEDS_RIGHT_EDGE",
49 "TCP_SMALL_DATA_OFFS",
50 "TCP_BAD_SACK",
51 "TCP_BIG_DATA_OFFS",
52 "TCP_SEQ_BEFORE_LEFT_EDGE",
53 "TCP_ACK_EXCEEDS_RIGHT_EDGE",
54 "TCP_ACK_BEFORE_LEFT_EDGE",
55 "ICMP_HEADER_INCOMPLETE",
56 "ICMP_UNHANDLED_TYPE",
57 "ICMP_IPV4_HEADER_INCOMPLETE",
58 "ICMP_IPV4_NON_V4",
59 "ICMP_IPV4_IP_OPTIONS_INCOMPLETE",
60 "ICMP_IPV4_UDP_HEADER_INCOMPLETE",
61 "ICMP_IPV4_TCP_HEADER_INCOMPLETE",
62 "ICMP_IPV4_UNHANDLED_PROTOCOL",
63 "ICMP_NO_CONNECTION",
64 "ICMP_FLUSHED_CONNECTION",
65 "HEADER_INCOMPLETE",
66 "BAD_TOTAL_LENGTH",
67 "NON_V4",
68 "NON_INITIAL_FRAGMENT",
69 "DATAGRAM_INCOMPLETE",
70 "IP_OPTIONS_INCOMPLETE",
71 "UNHANDLED_PROTOCOL"
72};
73
Xiaoping Fan6a1672f2016-08-17 19:58:12 -070074static struct sfe_ipv4 __si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010075
76/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010077 * sfe_ipv4_gen_ip_csum()
78 * Generate the IP checksum for an IPv4 header.
79 *
80 * Note that this function assumes that we have only 20 bytes of IP header.
81 */
Ratheesh Kannoth741f7992021-10-20 07:39:52 +053082static inline u16 sfe_ipv4_gen_ip_csum(struct iphdr *iph)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010083{
Xiaoping Fan6a1672f2016-08-17 19:58:12 -070084 u32 sum;
85 u16 *i = (u16 *)iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +010086
87 iph->check = 0;
88
89 /*
90 * Generate the sum.
91 */
92 sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9];
93
94 /*
95 * Fold it to ones-complement form.
96 */
97 sum = (sum & 0xffff) + (sum >> 16);
98 sum = (sum & 0xffff) + (sum >> 16);
99
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700100 return (u16)sum ^ 0xffff;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100101}
102
103/*
104 * sfe_ipv4_get_connection_match_hash()
105 * Generate the hash used in connection match lookups.
106 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700107static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, u8 protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100108 __be32 src_ip, __be16 src_port,
109 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100110{
111 size_t dev_addr = (size_t)dev;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700112 u32 hash = ((u32)dev_addr) ^ ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100113 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
114}
115
116/*
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530117 * sfe_ipv4_find_sfe_ipv4_connection_match_rcu()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100118 * Get the IPv4 flow match info that corresponds to a particular 5-tuple.
119 *
120 * On entry we must be holding the lock that protects the hash table.
121 */
122static struct sfe_ipv4_connection_match *
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530123sfe_ipv4_find_sfe_ipv4_connection_match_rcu(struct sfe_ipv4 *si, struct net_device *dev, u8 protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100124 __be32 src_ip, __be16 src_port,
125 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100126{
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530127 struct sfe_ipv4_connection_match *cm = NULL;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100128 unsigned int conn_match_idx;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530129 struct hlist_head *lhead;
130
131 WARN_ON_ONCE(!rcu_read_lock_held());
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100132
133 conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100134
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530135 lhead = &si->hlist_conn_match_hash_head[conn_match_idx];
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100136
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530137 hlist_for_each_entry_rcu(cm, lhead, hnode) {
138 if (cm->match_src_port != src_port
139 || cm->match_dest_port != dest_port
140 || cm->match_src_ip != src_ip
141 || cm->match_dest_ip != dest_ip
142 || cm->match_protocol != protocol
143 || cm->match_dev != dev) {
144 continue;
145 }
146
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +0530147 this_cpu_inc(si->stats_pcpu->connection_match_hash_hits64);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100148
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530149 break;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100150 }
151
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100152 return cm;
153}
154
155/*
156 * sfe_ipv4_connection_match_update_summary_stats()
157 * Update the summary stats for a connection match entry.
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530158 *
159 * Stats are incremented atomically. So use atomic substraction to update summary
160 * stats.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100161 */
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530162static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm,
163 u32 *packets, u32 *bytes)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100164{
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530165 u32 packet_count, byte_count;
166
167 packet_count = atomic_read(&cm->rx_packet_count);
168 cm->rx_packet_count64 += packet_count;
169 atomic_sub(packet_count, &cm->rx_packet_count);
170
171 byte_count = atomic_read(&cm->rx_byte_count);
172 cm->rx_byte_count64 += byte_count;
173 atomic_sub(byte_count, &cm->rx_byte_count);
174
175 *packets = packet_count;
176 *bytes = byte_count;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100177}
178
179/*
180 * sfe_ipv4_connection_match_compute_translations()
181 * Compute port and address translations for a connection match entry.
182 */
183static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm)
184{
185 /*
186 * Before we insert the entry look to see if this is tagged as doing address
187 * translations. If it is then work out the adjustment that we need to apply
188 * to the transport checksum.
189 */
190 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
191 /*
192 * Precompute an incremental checksum adjustment so we can
193 * edit packets in this stream very quickly. The algorithm is from RFC1624.
194 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700195 u16 src_ip_hi = cm->match_src_ip >> 16;
196 u16 src_ip_lo = cm->match_src_ip & 0xffff;
197 u32 xlate_src_ip = ~cm->xlate_src_ip;
198 u16 xlate_src_ip_hi = xlate_src_ip >> 16;
199 u16 xlate_src_ip_lo = xlate_src_ip & 0xffff;
200 u16 xlate_src_port = ~cm->xlate_src_port;
201 u32 adj;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100202
203 /*
204 * When we compute this fold it down to a 16-bit offset
205 * as that way we can avoid having to do a double
206 * folding of the twos-complement result because the
207 * addition of 2 16-bit values cannot cause a double
208 * wrap-around!
209 */
210 adj = src_ip_hi + src_ip_lo + cm->match_src_port
211 + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port;
212 adj = (adj & 0xffff) + (adj >> 16);
213 adj = (adj & 0xffff) + (adj >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700214 cm->xlate_src_csum_adjustment = (u16)adj;
Nicolas Costaac2979c2014-01-14 10:35:24 -0600215
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100216 }
217
218 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
219 /*
220 * Precompute an incremental checksum adjustment so we can
221 * edit packets in this stream very quickly. The algorithm is from RFC1624.
222 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700223 u16 dest_ip_hi = cm->match_dest_ip >> 16;
224 u16 dest_ip_lo = cm->match_dest_ip & 0xffff;
225 u32 xlate_dest_ip = ~cm->xlate_dest_ip;
226 u16 xlate_dest_ip_hi = xlate_dest_ip >> 16;
227 u16 xlate_dest_ip_lo = xlate_dest_ip & 0xffff;
228 u16 xlate_dest_port = ~cm->xlate_dest_port;
229 u32 adj;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100230
231 /*
232 * When we compute this fold it down to a 16-bit offset
233 * as that way we can avoid having to do a double
234 * folding of the twos-complement result because the
235 * addition of 2 16-bit values cannot cause a double
236 * wrap-around!
237 */
238 adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port
239 + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port;
240 adj = (adj & 0xffff) + (adj >> 16);
241 adj = (adj & 0xffff) + (adj >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700242 cm->xlate_dest_csum_adjustment = (u16)adj;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100243 }
Xiaoping Fanad755af2015-04-01 16:58:46 -0700244
245 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700246 u32 adj = ~cm->match_src_ip + cm->xlate_src_ip;
Xiaoping Fanad755af2015-04-01 16:58:46 -0700247 if (adj < cm->xlate_src_ip) {
248 adj++;
249 }
250
251 adj = (adj & 0xffff) + (adj >> 16);
252 adj = (adj & 0xffff) + (adj >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700253 cm->xlate_src_partial_csum_adjustment = (u16)adj;
Xiaoping Fanad755af2015-04-01 16:58:46 -0700254 }
255
256 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700257 u32 adj = ~cm->match_dest_ip + cm->xlate_dest_ip;
Xiaoping Fanad755af2015-04-01 16:58:46 -0700258 if (adj < cm->xlate_dest_ip) {
259 adj++;
260 }
261
262 adj = (adj & 0xffff) + (adj >> 16);
263 adj = (adj & 0xffff) + (adj >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700264 cm->xlate_dest_partial_csum_adjustment = (u16)adj;
Xiaoping Fanad755af2015-04-01 16:58:46 -0700265 }
266
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100267}
268
269/*
270 * sfe_ipv4_update_summary_stats()
271 * Update the summary stats.
272 */
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +0530273static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si, struct sfe_ipv4_stats *stats)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100274{
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +0530275 int i = 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100276
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +0530277 memset(stats, 0, sizeof(*stats));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100278
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +0530279 for_each_possible_cpu(i) {
280 const struct sfe_ipv4_stats *s = per_cpu_ptr(si->stats_pcpu, i);
281
282 stats->connection_create_requests64 += s->connection_create_requests64;
283 stats->connection_create_collisions64 += s->connection_create_collisions64;
284 stats->connection_destroy_requests64 += s->connection_destroy_requests64;
285 stats->connection_destroy_misses64 += s->connection_destroy_misses64;
286 stats->connection_match_hash_hits64 += s->connection_match_hash_hits64;
287 stats->connection_match_hash_reorders64 += s->connection_match_hash_reorders64;
288 stats->connection_flushes64 += s->connection_flushes64;
289 stats->packets_forwarded64 += s->packets_forwarded64;
290 stats->packets_not_forwarded64 += s->packets_not_forwarded64;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100291 }
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +0530292
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100293}
294
295/*
296 * sfe_ipv4_insert_sfe_ipv4_connection_match()
297 * Insert a connection match into the hash.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100298 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700299static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si,
300 struct sfe_ipv4_connection_match *cm)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100301{
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100302 unsigned int conn_match_idx
303 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
304 cm->match_src_ip, cm->match_src_port,
305 cm->match_dest_ip, cm->match_dest_port);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700306
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530307 lockdep_assert_held(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100308
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530309 hlist_add_head_rcu(&cm->hnode, &si->hlist_conn_match_hash_head[conn_match_idx]);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800310#ifdef CONFIG_NF_FLOW_COOKIE
Xiaoping Fan640faf42015-08-28 15:50:55 -0700311 if (!si->flow_cookie_enable)
312 return;
313
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800314 /*
315 * Configure hardware to put a flow cookie in packet of this flow,
316 * then we can accelerate the lookup process when we received this packet.
317 */
318 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
319 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
320
321 if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) {
322 flow_cookie_set_func_t func;
323
324 rcu_read_lock();
325 func = rcu_dereference(si->flow_cookie_set_func);
326 if (func) {
Xiaoping Fan59176422015-05-22 15:58:10 -0700327 if (!func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800328 cm->match_dest_ip, cm->match_dest_port, conn_match_idx)) {
329 entry->match = cm;
330 cm->flow_cookie = conn_match_idx;
331 }
332 }
333 rcu_read_unlock();
334
335 break;
336 }
337 }
338#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100339}
340
341/*
342 * sfe_ipv4_remove_sfe_ipv4_connection_match()
343 * Remove a connection match object from the hash.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100344 */
345static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
346{
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530347
348 lockdep_assert_held(&si->lock);
349
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800350#ifdef CONFIG_NF_FLOW_COOKIE
Xiaoping Fan640faf42015-08-28 15:50:55 -0700351 if (si->flow_cookie_enable) {
352 /*
353 * Tell hardware that we no longer need a flow cookie in packet of this flow
354 */
355 unsigned int conn_match_idx;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800356
Xiaoping Fan640faf42015-08-28 15:50:55 -0700357 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
358 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800359
Xiaoping Fan640faf42015-08-28 15:50:55 -0700360 if (cm == entry->match) {
361 flow_cookie_set_func_t func;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800362
Xiaoping Fan640faf42015-08-28 15:50:55 -0700363 rcu_read_lock();
364 func = rcu_dereference(si->flow_cookie_set_func);
365 if (func) {
366 func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
367 cm->match_dest_ip, cm->match_dest_port, 0);
368 }
369 rcu_read_unlock();
370
371 cm->flow_cookie = 0;
372 entry->match = NULL;
373 entry->last_clean_time = jiffies;
374 break;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800375 }
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800376 }
377 }
378#endif
379
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530380 hlist_del_init_rcu(&cm->hnode);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100381
382 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600383 * If the connection match entry is in the active list remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100384 */
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600385 if (cm->active) {
386 if (likely(cm->active_prev)) {
387 cm->active_prev->active_next = cm->active_next;
388 } else {
389 si->active_head = cm->active_next;
390 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100391
Matthew McClintockaf48f1e2014-01-23 15:29:19 -0600392 if (likely(cm->active_next)) {
393 cm->active_next->active_prev = cm->active_prev;
394 } else {
395 si->active_tail = cm->active_prev;
396 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100397 }
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530398
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100399}
400
401/*
402 * sfe_ipv4_get_connection_hash()
403 * Generate the hash used in connection lookups.
404 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700405static inline unsigned int sfe_ipv4_get_connection_hash(u8 protocol, __be32 src_ip, __be16 src_port,
Dave Hudson87973cd2013-10-22 16:00:04 +0100406 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100407{
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700408 u32 hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100409 return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
410}
411
412/*
413 * sfe_ipv4_find_sfe_ipv4_connection()
414 * Get the IPv4 connection info that corresponds to a particular 5-tuple.
415 *
416 * On entry we must be holding the lock that protects the hash table.
417 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700418static inline struct sfe_ipv4_connection *sfe_ipv4_find_sfe_ipv4_connection(struct sfe_ipv4 *si, u32 protocol,
Dave Hudson87973cd2013-10-22 16:00:04 +0100419 __be32 src_ip, __be16 src_port,
420 __be32 dest_ip, __be16 dest_port)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100421{
422 struct sfe_ipv4_connection *c;
423 unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port);
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530424
425 lockdep_assert_held(&si->lock);
426
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100427 c = si->conn_hash[conn_idx];
428
429 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100430 * Will need connection entry for next create/destroy metadata,
431 * So no need to re-order entry for these requests
432 */
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530433 while (c) {
434 if ((c->src_port == src_port)
435 && (c->dest_port == dest_port)
436 && (c->src_ip == src_ip)
437 && (c->dest_ip == dest_ip)
438 && (c->protocol == protocol)) {
439 return c;
440 }
441
442 c = c->next;
443 }
444
445 return NULL;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100446}
447
448/*
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600449 * sfe_ipv4_mark_rule()
450 * Updates the mark for a current offloaded connection
451 *
452 * Will take hash lock upon entry
453 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700454void sfe_ipv4_mark_rule(struct sfe_connection_mark *mark)
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600455{
456 struct sfe_ipv4 *si = &__si;
457 struct sfe_ipv4_connection *c;
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600458
Xiaoping Fan3c423e32015-07-03 03:09:29 -0700459 spin_lock_bh(&si->lock);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600460 c = sfe_ipv4_find_sfe_ipv4_connection(si, mark->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700461 mark->src_ip.ip, mark->src_port,
462 mark->dest_ip.ip, mark->dest_port);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600463 if (c) {
Nicolas Costaf53d6fe2014-01-13 16:03:46 -0600464 WARN_ON((0 != c->mark) && (0 == mark->mark));
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600465 c->mark = mark->mark;
466 }
Xiaoping Fan3c423e32015-07-03 03:09:29 -0700467 spin_unlock_bh(&si->lock);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700468
469 if (c) {
470 DEBUG_TRACE("Matching connection found for mark, "
471 "setting from %08x to %08x\n",
472 c->mark, mark->mark);
473 }
Matthew McClintockbe7b47d2013-11-27 13:26:23 -0600474}
475
476/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100477 * sfe_ipv4_insert_sfe_ipv4_connection()
478 * Insert a connection into the hash.
479 *
480 * On entry we must be holding the lock that protects the hash table.
481 */
482static void sfe_ipv4_insert_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
483{
484 struct sfe_ipv4_connection **hash_head;
485 struct sfe_ipv4_connection *prev_head;
486 unsigned int conn_idx;
487
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530488 lockdep_assert_held(&si->lock);
489
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100490 /*
491 * Insert entry into the connection hash.
492 */
493 conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
494 c->dest_ip, c->dest_port);
495 hash_head = &si->conn_hash[conn_idx];
496 prev_head = *hash_head;
497 c->prev = NULL;
498 if (prev_head) {
499 prev_head->prev = c;
500 }
501
502 c->next = prev_head;
503 *hash_head = c;
504
505 /*
506 * Insert entry into the "all connections" list.
507 */
508 if (si->all_connections_tail) {
509 c->all_connections_prev = si->all_connections_tail;
510 si->all_connections_tail->all_connections_next = c;
511 } else {
512 c->all_connections_prev = NULL;
513 si->all_connections_head = c;
514 }
515
516 si->all_connections_tail = c;
517 c->all_connections_next = NULL;
518 si->num_connections++;
519
520 /*
521 * Insert the connection match objects too.
522 */
523 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->original_match);
524 sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->reply_match);
525}
526
527/*
528 * sfe_ipv4_remove_sfe_ipv4_connection()
529 * Remove a sfe_ipv4_connection object from the hash.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100530 */
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530531static bool sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100532{
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530533 lockdep_assert_held(&si->lock);
534
535 if (c->removed) {
536 DEBUG_ERROR("%px: Connection has been removed already\n", c);
537 return false;
538 }
539
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100540 /*
541 * Remove the connection match objects.
542 */
543 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->reply_match);
544 sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->original_match);
545
546 /*
547 * Unlink the connection.
548 */
549 if (c->prev) {
550 c->prev->next = c->next;
551 } else {
552 unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
553 c->dest_ip, c->dest_port);
554 si->conn_hash[conn_idx] = c->next;
555 }
556
557 if (c->next) {
558 c->next->prev = c->prev;
559 }
Xiaoping Fan34586472015-07-03 02:20:35 -0700560
561 /*
562 * Unlink connection from all_connections list
563 */
564 if (c->all_connections_prev) {
565 c->all_connections_prev->all_connections_next = c->all_connections_next;
566 } else {
567 si->all_connections_head = c->all_connections_next;
568 }
569
570 if (c->all_connections_next) {
571 c->all_connections_next->all_connections_prev = c->all_connections_prev;
572 } else {
573 si->all_connections_tail = c->all_connections_prev;
574 }
575
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530576 c->removed = true;
Xiaoping Fan34586472015-07-03 02:20:35 -0700577 si->num_connections--;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530578 return true;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100579}
580
581/*
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530582 * sfe_ipv4_gen_sync_sfe_ipv4_connection()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100583 * Sync a connection.
584 *
585 * On entry to this function we expect that the lock for the connection is either
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530586 * already held (while called from sfe_ipv4_periodic_sync() or isn't required
587 * (while called from sfe_ipv4_flush_sfe_ipv4_connection())
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100588 */
589static void sfe_ipv4_gen_sync_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c,
Xiaoping Fan99cb4c12015-08-21 19:07:32 -0700590 struct sfe_connection_sync *sis, sfe_sync_reason_t reason,
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700591 u64 now_jiffies)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100592{
593 struct sfe_ipv4_connection_match *original_cm;
594 struct sfe_ipv4_connection_match *reply_cm;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530595 u32 packet_count, byte_count;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100596
597 /*
598 * Fill in the update message.
599 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700600 sis->is_v6 = 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100601 sis->protocol = c->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700602 sis->src_ip.ip = c->src_ip;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -0700603 sis->src_ip_xlate.ip = c->src_ip_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700604 sis->dest_ip.ip = c->dest_ip;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -0700605 sis->dest_ip_xlate.ip = c->dest_ip_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100606 sis->src_port = c->src_port;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -0700607 sis->src_port_xlate = c->src_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100608 sis->dest_port = c->dest_port;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -0700609 sis->dest_port_xlate = c->dest_port_xlate;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100610
611 original_cm = c->original_match;
612 reply_cm = c->reply_match;
613 sis->src_td_max_window = original_cm->protocol_state.tcp.max_win;
614 sis->src_td_end = original_cm->protocol_state.tcp.end;
615 sis->src_td_max_end = original_cm->protocol_state.tcp.max_end;
616 sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win;
617 sis->dest_td_end = reply_cm->protocol_state.tcp.end;
618 sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end;
619
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530620 sfe_ipv4_connection_match_update_summary_stats(original_cm, &packet_count, &byte_count);
621 sis->src_new_packet_count = packet_count;
622 sis->src_new_byte_count = byte_count;
Matthew McClintockd0cdb802014-02-24 16:30:35 -0600623
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530624 sfe_ipv4_connection_match_update_summary_stats(reply_cm, &packet_count, &byte_count);
625 sis->dest_new_packet_count = packet_count;
626 sis->dest_new_byte_count = byte_count;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100627
Matthew McClintockd0cdb802014-02-24 16:30:35 -0600628 sis->src_dev = original_cm->match_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100629 sis->src_packet_count = original_cm->rx_packet_count64;
630 sis->src_byte_count = original_cm->rx_byte_count64;
Matthew McClintockd0cdb802014-02-24 16:30:35 -0600631
632 sis->dest_dev = reply_cm->match_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100633 sis->dest_packet_count = reply_cm->rx_packet_count64;
634 sis->dest_byte_count = reply_cm->rx_byte_count64;
635
Xiaoping Fan99cb4c12015-08-21 19:07:32 -0700636 sis->reason = reason;
637
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100638 /*
639 * Get the time increment since our last sync.
640 */
641 sis->delta_jiffies = now_jiffies - c->last_sync_jiffies;
642 c->last_sync_jiffies = now_jiffies;
643}
644
645/*
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530646 * sfe_ipv4_free_sfe_ipv4_connection_rcu()
647 * Called at RCU qs state to free the connection object.
648 */
649static void sfe_ipv4_free_sfe_ipv4_connection_rcu(struct rcu_head *head)
650{
651 struct sfe_ipv4_connection *c;
652
653 /*
654 * We dont need spin lock as the connection is already removed from link list
655 */
656 c = container_of(head, struct sfe_ipv4_connection, rcu);
657
658 BUG_ON(!c->removed);
659
660 DEBUG_TRACE("%px: connecton has been deleted\n", c);
661
662 /*
663 * Release our hold of the source and dest devices and free the memory
664 * for our connection objects.
665 */
666 dev_put(c->original_dev);
667 dev_put(c->reply_dev);
668 kfree(c->original_match);
669 kfree(c->reply_match);
670 kfree(c);
671}
672
673/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100674 * sfe_ipv4_flush_sfe_ipv4_connection()
675 * Flush a connection and free all associated resources.
676 *
677 * We need to be called with bottom halves disabled locally as we need to acquire
678 * the connection hash lock and release it again. In general we're actually called
679 * from within a BH and so we're fine, but we're also called when connections are
680 * torn down.
681 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700682static void sfe_ipv4_flush_sfe_ipv4_connection(struct sfe_ipv4 *si,
683 struct sfe_ipv4_connection *c,
684 sfe_sync_reason_t reason)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100685{
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700686 u64 now_jiffies;
Xiaoping Fand44a5b42015-05-26 17:37:37 -0700687 sfe_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100688
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530689 BUG_ON(!c->removed);
690
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +0530691 this_cpu_inc(si->stats_pcpu->connection_flushes64);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100692
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530693 rcu_read_lock();
694 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
695
696 /*
697 * Generate a sync message and then sync.
698 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600699 if (sync_rule_callback) {
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530700 struct sfe_connection_sync sis;
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600701 now_jiffies = get_jiffies_64();
Xiaoping Fan99cb4c12015-08-21 19:07:32 -0700702 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, reason, now_jiffies);
Dave Hudsondcd08fb2013-11-22 09:25:16 -0600703 sync_rule_callback(&sis);
704 }
705
706 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100707
708 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100709 * Release our hold of the source and dest devices and free the memory
710 * for our connection objects.
711 */
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530712 call_rcu(&c->rcu, sfe_ipv4_free_sfe_ipv4_connection_rcu);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100713}
714
715/*
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +0530716 * sfe_ipv4_exception_stats_inc()
717 * Increment exception stats.
718 */
719static inline void sfe_ipv4_exception_stats_inc(struct sfe_ipv4 *si, enum sfe_ipv4_exception_events reason)
720{
721 struct sfe_ipv4_stats *stats = this_cpu_ptr(si->stats_pcpu);
722 stats->exception_events64[reason]++;
723 stats->packets_not_forwarded64++;
724}
725
726/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100727 * sfe_ipv4_recv_udp()
728 * Handle UDP packet receives and forwarding.
729 */
730static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Ratheesh Kannoth741f7992021-10-20 07:39:52 +0530731 unsigned int len, struct iphdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100732{
Ratheesh Kannoth741f7992021-10-20 07:39:52 +0530733 struct udphdr *udph;
Dave Hudson87973cd2013-10-22 16:00:04 +0100734 __be32 src_ip;
735 __be32 dest_ip;
736 __be16 src_port;
737 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100738 struct sfe_ipv4_connection_match *cm;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700739 u8 ttl;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100740 struct net_device *xmit_dev;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530741 bool ret;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100742
743 /*
744 * Is our packet too short to contain a valid UDP header?
745 */
Ratheesh Kannoth741f7992021-10-20 07:39:52 +0530746 if (unlikely(!pskb_may_pull(skb, (sizeof(struct udphdr) + ihl)))) {
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +0530747 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100748 DEBUG_TRACE("packet too short for UDP header\n");
749 return 0;
750 }
751
752 /*
753 * Read the IP address and port information. Read the IP header data first
754 * because we've almost certainly got that in the cache. We may not yet have
755 * the UDP header cached though so allow more time for any prefetching.
756 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100757 src_ip = iph->saddr;
758 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100759
Ratheesh Kannoth741f7992021-10-20 07:39:52 +0530760 udph = (struct udphdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +0100761 src_port = udph->source;
762 dest_port = udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100763
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530764 rcu_read_lock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100765
766 /*
767 * Look for a connection match.
768 */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800769#ifdef CONFIG_NF_FLOW_COOKIE
770 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
771 if (unlikely(!cm)) {
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530772 cm = sfe_ipv4_find_sfe_ipv4_connection_match_rcu(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800773 }
774#else
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530775 cm = sfe_ipv4_find_sfe_ipv4_connection_match_rcu(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -0800776#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100777 if (unlikely(!cm)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100778
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530779 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +0530780 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100781 DEBUG_TRACE("no connection found\n");
782 return 0;
783 }
784
785 /*
786 * If our packet has beern marked as "flush on find" we can't actually
787 * forward it in the fast path, but now that we've found an associated
788 * connection we can flush that out before we process the packet.
789 */
790 if (unlikely(flush_on_find)) {
791 struct sfe_ipv4_connection *c = cm->connection;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530792 spin_lock_bh(&si->lock);
793 ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Xiaoping Fan3c423e32015-07-03 03:09:29 -0700794 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100795
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530796 if (ret) {
797 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
798 }
799 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +0530800 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100801 DEBUG_TRACE("flush on find\n");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100802 return 0;
803 }
804
Zhi Chen8748eb32015-06-18 12:58:48 -0700805#ifdef CONFIG_XFRM
806 /*
807 * We can't accelerate the flow on this direction, just let it go
808 * through the slow path.
809 */
810 if (unlikely(!cm->flow_accel)) {
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530811 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +0530812 this_cpu_inc(si->stats_pcpu->packets_not_forwarded64);
Zhi Chen8748eb32015-06-18 12:58:48 -0700813 return 0;
814 }
815#endif
816
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100817 /*
818 * Does our TTL allow forwarding?
819 */
820 ttl = iph->ttl;
821 if (unlikely(ttl < 2)) {
822 struct sfe_ipv4_connection *c = cm->connection;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530823 spin_lock_bh(&si->lock);
824 ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Xiaoping Fan3c423e32015-07-03 03:09:29 -0700825 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100826
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530827 if (ret) {
828 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
829 }
830 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +0530831
832 DEBUG_TRACE("ttl too low\n");
833 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100834 return 0;
835 }
836
837 /*
838 * If our packet is larger than the MTU of the transmit interface then
839 * we can't forward it easily.
840 */
841 if (unlikely(len > cm->xmit_dev_mtu)) {
842 struct sfe_ipv4_connection *c = cm->connection;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530843 spin_lock_bh(&si->lock);
844 ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Xiaoping Fan3c423e32015-07-03 03:09:29 -0700845 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100846
847 DEBUG_TRACE("larger than mtu\n");
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530848 if (ret) {
849 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
850 }
851 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +0530852 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100853 return 0;
854 }
855
856 /*
857 * From this point on we're good to modify the packet.
858 */
859
860 /*
Murat Sezginc7dd8172019-02-27 15:23:50 -0800861 * Check if skb was cloned. If it was, unshare it. Because
862 * the data area is going to be written in this path and we don't want to
863 * change the cloned skb's data section.
864 */
865 if (unlikely(skb_cloned(skb))) {
Tian Yang45f39c82020-10-06 14:07:47 -0700866 DEBUG_TRACE("%px: skb is a cloned skb\n", skb);
Murat Sezginc7dd8172019-02-27 15:23:50 -0800867 skb = skb_unshare(skb, GFP_ATOMIC);
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530868 if (!skb) {
Murat Sezginc7dd8172019-02-27 15:23:50 -0800869 DEBUG_WARN("Failed to unshare the cloned skb\n");
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530870 rcu_read_unlock();
Murat Sezginc7dd8172019-02-27 15:23:50 -0800871 return 0;
872 }
873
874 /*
875 * Update the iph and udph pointers with the unshared skb's data area.
876 */
Ratheesh Kannoth741f7992021-10-20 07:39:52 +0530877 iph = (struct iphdr *)skb->data;
878 udph = (struct udphdr *)(skb->data + ihl);
Murat Sezginc7dd8172019-02-27 15:23:50 -0800879 }
880
881 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -0700882 * Update DSCP
883 */
884 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK)) {
885 iph->tos = (iph->tos & SFE_IPV4_DSCP_MASK) | cm->dscp;
886 }
887
888 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100889 * Decrement our TTL.
890 */
891 iph->ttl = ttl - 1;
892
893 /*
894 * Do we have to perform translations of the source address/port?
895 */
896 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700897 u16 udp_csum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100898
Dave Hudson87973cd2013-10-22 16:00:04 +0100899 iph->saddr = cm->xlate_src_ip;
900 udph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100901
902 /*
903 * Do we have a non-zero UDP checksum? If we do then we need
904 * to update it.
905 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100906 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100907 if (likely(udp_csum)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700908 u32 sum;
Xiaoping Fanad755af2015-04-01 16:58:46 -0700909
910 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
911 sum = udp_csum + cm->xlate_src_partial_csum_adjustment;
912 } else {
913 sum = udp_csum + cm->xlate_src_csum_adjustment;
914 }
915
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100916 sum = (sum & 0xffff) + (sum >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700917 udph->check = (u16)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100918 }
919 }
920
921 /*
922 * Do we have to perform translations of the destination address/port?
923 */
924 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700925 u16 udp_csum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100926
Dave Hudson87973cd2013-10-22 16:00:04 +0100927 iph->daddr = cm->xlate_dest_ip;
928 udph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100929
930 /*
931 * Do we have a non-zero UDP checksum? If we do then we need
932 * to update it.
933 */
Dave Hudson87973cd2013-10-22 16:00:04 +0100934 udp_csum = udph->check;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100935 if (likely(udp_csum)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700936 u32 sum;
Xiaoping Fanad755af2015-04-01 16:58:46 -0700937
938 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
939 sum = udp_csum + cm->xlate_dest_partial_csum_adjustment;
940 } else {
941 sum = udp_csum + cm->xlate_dest_csum_adjustment;
942 }
943
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100944 sum = (sum & 0xffff) + (sum >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -0700945 udph->check = (u16)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100946 }
947 }
948
949 /*
950 * Replace the IP checksum.
951 */
952 iph->check = sfe_ipv4_gen_ip_csum(iph);
953
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100954 /*
955 * Update traffic stats.
956 */
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530957 atomic_inc(&cm->rx_packet_count);
958 atomic_add(len, &cm->rx_byte_count);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100959
960 /*
961 * If we're not already on the active list then insert ourselves at the tail
962 * of the current list.
963 */
964 if (unlikely(!cm->active)) {
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530965 spin_lock_bh(&si->lock);
966 if (unlikely(!cm->active)) {
967 cm->active = true;
968 cm->active_prev = si->active_tail;
969 if (likely(si->active_tail)) {
970 si->active_tail->active_next = cm;
971 } else {
972 si->active_head = cm;
973 }
974 si->active_tail = cm;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100975 }
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +0530976 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100977 }
978
979 xmit_dev = cm->xmit_dev;
980 skb->dev = xmit_dev;
981
982 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600983 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100984 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600985 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
986 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Xiaoping Fan2784e612015-06-25 17:57:41 -0700987 dev_hard_header(skb, xmit_dev, ETH_P_IP,
988 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600989 } else {
990 /*
991 * For the simple case we write this really fast.
992 */
Ratheesh Kannoth741f7992021-10-20 07:39:52 +0530993 struct ethhdr *eth = (struct ethhdr *)__skb_push(skb, ETH_HLEN);
Matthew McClintockdb5ac512014-01-16 17:01:40 -0600994 eth->h_proto = htons(ETH_P_IP);
Ratheesh Kannoth741f7992021-10-20 07:39:52 +0530995 ether_addr_copy((u8 *)eth->h_dest, (u8 *)cm->xmit_dest_mac);
996 ether_addr_copy((u8 *)eth->h_source, (u8 *)cm->xmit_src_mac);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100997 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +0100998 }
999
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001000 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07001001 * Update priority of skb.
1002 */
1003 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) {
1004 skb->priority = cm->priority;
1005 }
1006
1007 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001008 * Mark outgoing packet.
1009 */
Cristian Prundeanu94fff982013-12-23 15:02:59 -06001010 skb->mark = cm->connection->mark;
1011 if (skb->mark) {
1012 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1013 }
1014
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301015 rcu_read_unlock();
1016
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301017 this_cpu_inc(si->stats_pcpu->packets_forwarded64);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001018
1019 /*
1020 * We're going to check for GSO flags when we transmit the packet so
1021 * start fetching the necessary cache line now.
1022 */
1023 prefetch(skb_shinfo(skb));
1024
1025 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06001026 * Mark that this packet has been fast forwarded.
1027 */
1028 skb->fast_forwarded = 1;
1029
1030 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001031 * Send the packet on its way.
1032 */
1033 dev_queue_xmit(skb);
1034
1035 return 1;
1036}
1037
1038/*
1039 * sfe_ipv4_process_tcp_option_sack()
1040 * Parse TCP SACK option and update ack according
1041 */
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301042static bool sfe_ipv4_process_tcp_option_sack(const struct tcphdr *th, const u32 data_offs,
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001043 u32 *ack)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001044{
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301045 u32 length = sizeof(struct tcphdr);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001046 u8 *ptr = (u8 *)th + length;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001047
1048 /*
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001049 * Ignore processing if TCP packet has only TIMESTAMP option.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001050 */
1051 if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
1052 && likely(ptr[0] == TCPOPT_NOP)
1053 && likely(ptr[1] == TCPOPT_NOP)
1054 && likely(ptr[2] == TCPOPT_TIMESTAMP)
1055 && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
1056 return true;
1057 }
1058
1059 /*
1060 * TCP options. Parse SACK option.
1061 */
1062 while (length < data_offs) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001063 u8 size;
1064 u8 kind;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001065
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001066 ptr = (u8 *)th + length;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001067 kind = *ptr;
1068
1069 /*
1070 * NOP, for padding
1071 * Not in the switch because to fast escape and to not calculate size
1072 */
1073 if (kind == TCPOPT_NOP) {
1074 length++;
1075 continue;
1076 }
1077
1078 if (kind == TCPOPT_SACK) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001079 u32 sack = 0;
1080 u8 re = 1 + 1;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001081
1082 size = *(ptr + 1);
1083 if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
1084 || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
1085 || (size > (data_offs - length))) {
1086 return false;
1087 }
1088
1089 re += 4;
1090 while (re < size) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001091 u32 sack_re;
1092 u8 *sptr = ptr + re;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001093 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
1094 if (sack_re > sack) {
1095 sack = sack_re;
1096 }
1097 re += TCPOLEN_SACK_PERBLOCK;
1098 }
1099 if (sack > *ack) {
1100 *ack = sack;
1101 }
1102 length += size;
1103 continue;
1104 }
1105 if (kind == TCPOPT_EOL) {
1106 return true;
1107 }
1108 size = *(ptr + 1);
1109 if (size < 2) {
1110 return false;
1111 }
1112 length += size;
1113 }
1114
1115 return true;
1116}
1117
1118/*
1119 * sfe_ipv4_recv_tcp()
1120 * Handle TCP packet receives and forwarding.
1121 */
1122static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301123 unsigned int len, struct iphdr *iph, unsigned int ihl, bool flush_on_find)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001124{
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301125 struct tcphdr *tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001126 __be32 src_ip;
1127 __be32 dest_ip;
1128 __be16 src_port;
1129 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001130 struct sfe_ipv4_connection_match *cm;
1131 struct sfe_ipv4_connection_match *counter_cm;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001132 u8 ttl;
1133 u32 flags;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001134 struct net_device *xmit_dev;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301135 bool ret;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001136
1137 /*
1138 * Is our packet too short to contain a valid UDP header?
1139 */
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301140 if (unlikely(!pskb_may_pull(skb, (sizeof(struct tcphdr) + ihl)))) {
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301141 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001142 DEBUG_TRACE("packet too short for TCP header\n");
1143 return 0;
1144 }
1145
1146 /*
1147 * Read the IP address and port information. Read the IP header data first
1148 * because we've almost certainly got that in the cache. We may not yet have
1149 * the TCP header cached though so allow more time for any prefetching.
1150 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001151 src_ip = iph->saddr;
1152 dest_ip = iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001153
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301154 tcph = (struct tcphdr *)(skb->data + ihl);
Dave Hudson87973cd2013-10-22 16:00:04 +01001155 src_port = tcph->source;
1156 dest_port = tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001157 flags = tcp_flag_word(tcph);
1158
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301159 rcu_read_lock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001160
1161 /*
1162 * Look for a connection match.
1163 */
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001164#ifdef CONFIG_NF_FLOW_COOKIE
1165 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1166 if (unlikely(!cm)) {
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301167 cm = sfe_ipv4_find_sfe_ipv4_connection_match_rcu(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001168 }
1169#else
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301170 cm = sfe_ipv4_find_sfe_ipv4_connection_match_rcu(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08001171#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001172 if (unlikely(!cm)) {
1173 /*
1174 * We didn't get a connection but as TCP is connection-oriented that
1175 * may be because this is a non-fast connection (not running established).
1176 * For diagnostic purposes we differentiate this here.
1177 */
1178 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001179
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301180 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301181 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001182 DEBUG_TRACE("no connection found - fast flags\n");
1183 return 0;
1184 }
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301185
1186 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301187 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001188 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
1189 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1190 return 0;
1191 }
1192
1193 /*
1194 * If our packet has beern marked as "flush on find" we can't actually
1195 * forward it in the fast path, but now that we've found an associated
1196 * connection we can flush that out before we process the packet.
1197 */
1198 if (unlikely(flush_on_find)) {
1199 struct sfe_ipv4_connection *c = cm->connection;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301200
1201 spin_lock_bh(&si->lock);
1202 ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001203 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001204
1205 DEBUG_TRACE("flush on find\n");
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301206 if (ret) {
1207 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1208 }
1209
1210 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301211
1212 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001213 return 0;
1214 }
1215
Zhi Chen8748eb32015-06-18 12:58:48 -07001216#ifdef CONFIG_XFRM
1217 /*
1218 * We can't accelerate the flow on this direction, just let it go
1219 * through the slow path.
1220 */
1221 if (unlikely(!cm->flow_accel)) {
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301222 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301223 this_cpu_inc(si->stats_pcpu->packets_not_forwarded64);
Zhi Chen8748eb32015-06-18 12:58:48 -07001224 return 0;
1225 }
1226#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001227 /*
1228 * Does our TTL allow forwarding?
1229 */
1230 ttl = iph->ttl;
1231 if (unlikely(ttl < 2)) {
1232 struct sfe_ipv4_connection *c = cm->connection;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301233 spin_lock_bh(&si->lock);
1234 ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001235 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001236
1237 DEBUG_TRACE("ttl too low\n");
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301238 if (ret) {
1239 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1240 }
1241
1242 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301243 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001244 return 0;
1245 }
1246
1247 /*
1248 * If our packet is larger than the MTU of the transmit interface then
1249 * we can't forward it easily.
1250 */
Xiaoping Fand642a6e2015-04-10 15:19:06 -07001251 if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001252 struct sfe_ipv4_connection *c = cm->connection;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301253 spin_lock_bh(&si->lock);
1254 ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001255 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001256
1257 DEBUG_TRACE("larger than mtu\n");
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301258 if (ret) {
1259 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1260 }
1261
1262 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301263 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001264 return 0;
1265 }
1266
1267 /*
1268 * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN
1269 * set is not a fast path packet.
1270 */
1271 if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
1272 struct sfe_ipv4_connection *c = cm->connection;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301273 spin_lock_bh(&si->lock);
1274 ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001275 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001276
1277 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
1278 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301279 if (ret) {
1280 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1281 }
1282 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301283 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001284 return 0;
1285 }
1286
1287 counter_cm = cm->counter_match;
1288
1289 /*
1290 * Are we doing sequence number checking?
1291 */
1292 if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001293 u32 seq;
1294 u32 ack;
1295 u32 sack;
1296 u32 data_offs;
1297 u32 end;
1298 u32 left_edge;
1299 u32 scaled_win;
1300 u32 max_end;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001301
1302 /*
1303 * Is our sequence fully past the right hand edge of the window?
1304 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001305 seq = ntohl(tcph->seq);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001306 if (unlikely((s32)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001307 struct sfe_ipv4_connection *c = cm->connection;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301308 spin_lock_bh(&si->lock);
1309 ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001310 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001311
1312 DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
1313 seq, cm->protocol_state.tcp.max_end + 1);
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301314 if (ret) {
1315 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1316 }
1317 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301318 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001319 return 0;
1320 }
1321
1322 /*
1323 * Check that our TCP data offset isn't too short.
1324 */
1325 data_offs = tcph->doff << 2;
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301326 if (unlikely(data_offs < sizeof(struct tcphdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001327 struct sfe_ipv4_connection *c = cm->connection;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301328 spin_lock_bh(&si->lock);
1329 ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001330 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001331
1332 DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301333 if (ret) {
1334 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1335 }
1336 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301337 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001338 return 0;
1339 }
1340
1341 /*
1342 * Update ACK according to any SACK option.
1343 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001344 ack = ntohl(tcph->ack_seq);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001345 sack = ack;
1346 if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) {
1347 struct sfe_ipv4_connection *c = cm->connection;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301348 spin_lock_bh(&si->lock);
1349 ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001350 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001351
1352 DEBUG_TRACE("TCP option SACK size is wrong\n");
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301353 if (ret) {
1354 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1355 }
1356 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301357 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001358 return 0;
1359 }
1360
1361 /*
1362 * Check that our TCP data offset isn't past the end of the packet.
1363 */
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301364 data_offs += sizeof(struct iphdr);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001365 if (unlikely(len < data_offs)) {
1366 struct sfe_ipv4_connection *c = cm->connection;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301367 spin_lock_bh(&si->lock);
1368 ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001369 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001370
1371 DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
1372 data_offs, len);
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301373 if (ret) {
1374 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1375 }
1376 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301377 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001378 return 0;
1379 }
1380
1381 end = seq + len - data_offs;
1382
1383 /*
1384 * Is our sequence fully before the left hand edge of the window?
1385 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001386 if (unlikely((s32)(end - (cm->protocol_state.tcp.end
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001387 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
1388 struct sfe_ipv4_connection *c = cm->connection;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301389 spin_lock_bh(&si->lock);
1390 ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001391 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001392
1393 DEBUG_TRACE("seq: %u before left edge: %u\n",
1394 end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301395 if (ret) {
1396 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1397 }
1398 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301399 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001400 return 0;
1401 }
1402
1403 /*
1404 * Are we acking data that is to the right of what has been sent?
1405 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001406 if (unlikely((s32)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001407 struct sfe_ipv4_connection *c = cm->connection;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301408 spin_lock_bh(&si->lock);
1409 ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001410 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001411
1412 DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
1413 sack, counter_cm->protocol_state.tcp.end + 1);
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301414 if (ret) {
1415 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1416 }
1417 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301418 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001419 return 0;
1420 }
1421
1422 /*
1423 * Is our ack too far before the left hand edge of the window?
1424 */
1425 left_edge = counter_cm->protocol_state.tcp.end
1426 - cm->protocol_state.tcp.max_win
1427 - SFE_IPV4_TCP_MAX_ACK_WINDOW
1428 - 1;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001429 if (unlikely((s32)(sack - left_edge) < 0)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001430 struct sfe_ipv4_connection *c = cm->connection;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301431 spin_lock_bh(&si->lock);
1432 ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001433 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001434
1435 DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301436 if (ret) {
1437 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1438 }
1439 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301440 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001441 return 0;
1442 }
1443
1444 /*
1445 * Have we just seen the largest window size yet for this connection? If yes
1446 * then we need to record the new value.
1447 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001448 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001449 scaled_win += (sack - ack);
1450 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
1451 cm->protocol_state.tcp.max_win = scaled_win;
1452 }
1453
1454 /*
1455 * If our sequence and/or ack numbers have advanced then record the new state.
1456 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001457 if (likely((s32)(end - cm->protocol_state.tcp.end) >= 0)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001458 cm->protocol_state.tcp.end = end;
1459 }
1460
1461 max_end = sack + scaled_win;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001462 if (likely((s32)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001463 counter_cm->protocol_state.tcp.max_end = max_end;
1464 }
1465 }
1466
1467 /*
1468 * From this point on we're good to modify the packet.
1469 */
1470
1471 /*
Murat Sezginc7dd8172019-02-27 15:23:50 -08001472 * Check if skb was cloned. If it was, unshare it. Because
1473 * the data area is going to be written in this path and we don't want to
1474 * change the cloned skb's data section.
1475 */
1476 if (unlikely(skb_cloned(skb))) {
Tian Yang45f39c82020-10-06 14:07:47 -07001477 DEBUG_TRACE("%px: skb is a cloned skb\n", skb);
Murat Sezginc7dd8172019-02-27 15:23:50 -08001478 skb = skb_unshare(skb, GFP_ATOMIC);
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301479 if (!skb) {
Murat Sezginc7dd8172019-02-27 15:23:50 -08001480 DEBUG_WARN("Failed to unshare the cloned skb\n");
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301481 rcu_read_unlock();
Murat Sezginc7dd8172019-02-27 15:23:50 -08001482 return 0;
1483 }
1484
1485 /*
1486 * Update the iph and tcph pointers with the unshared skb's data area.
1487 */
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301488 iph = (struct iphdr *)skb->data;
1489 tcph = (struct tcphdr *)(skb->data + ihl);
Murat Sezginc7dd8172019-02-27 15:23:50 -08001490 }
1491
1492 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07001493 * Update DSCP
1494 */
1495 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK)) {
1496 iph->tos = (iph->tos & SFE_IPV4_DSCP_MASK) | cm->dscp;
1497 }
1498
1499 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001500 * Decrement our TTL.
1501 */
1502 iph->ttl = ttl - 1;
1503
1504 /*
1505 * Do we have to perform translations of the source address/port?
1506 */
1507 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001508 u16 tcp_csum;
1509 u32 sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001510
Dave Hudson87973cd2013-10-22 16:00:04 +01001511 iph->saddr = cm->xlate_src_ip;
1512 tcph->source = cm->xlate_src_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001513
1514 /*
1515 * Do we have a non-zero UDP checksum? If we do then we need
1516 * to update it.
1517 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001518 tcp_csum = tcph->check;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001519 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1520 sum = tcp_csum + cm->xlate_src_partial_csum_adjustment;
1521 } else {
1522 sum = tcp_csum + cm->xlate_src_csum_adjustment;
1523 }
1524
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001525 sum = (sum & 0xffff) + (sum >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001526 tcph->check = (u16)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001527 }
1528
1529 /*
1530 * Do we have to perform translations of the destination address/port?
1531 */
1532 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001533 u16 tcp_csum;
1534 u32 sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001535
Dave Hudson87973cd2013-10-22 16:00:04 +01001536 iph->daddr = cm->xlate_dest_ip;
1537 tcph->dest = cm->xlate_dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001538
1539 /*
1540 * Do we have a non-zero UDP checksum? If we do then we need
1541 * to update it.
1542 */
Dave Hudson87973cd2013-10-22 16:00:04 +01001543 tcp_csum = tcph->check;
Xiaoping Fanad755af2015-04-01 16:58:46 -07001544 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1545 sum = tcp_csum + cm->xlate_dest_partial_csum_adjustment;
1546 } else {
1547 sum = tcp_csum + cm->xlate_dest_csum_adjustment;
1548 }
1549
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001550 sum = (sum & 0xffff) + (sum >> 16);
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001551 tcph->check = (u16)sum;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001552 }
1553
1554 /*
1555 * Replace the IP checksum.
1556 */
1557 iph->check = sfe_ipv4_gen_ip_csum(iph);
1558
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001559 /*
1560 * Update traffic stats.
1561 */
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301562 atomic_inc(&cm->rx_packet_count);
1563 atomic_add(len, &cm->rx_byte_count);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001564
1565 /*
1566 * If we're not already on the active list then insert ourselves at the tail
1567 * of the current list.
1568 */
1569 if (unlikely(!cm->active)) {
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301570 spin_lock_bh(&si->lock);
1571 if (unlikely(!cm->active)) {
1572
1573 cm->active = true;
1574 cm->active_prev = si->active_tail;
1575 if (likely(si->active_tail)) {
1576 si->active_tail->active_next = cm;
1577 } else {
1578 si->active_head = cm;
1579 }
1580 si->active_tail = cm;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001581 }
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301582 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001583 }
1584
1585 xmit_dev = cm->xmit_dev;
1586 skb->dev = xmit_dev;
1587
1588 /*
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001589 * Check to see if we need to write a header.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001590 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001591 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1592 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Xiaoping Fan2784e612015-06-25 17:57:41 -07001593 dev_hard_header(skb, xmit_dev, ETH_P_IP,
1594 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001595 } else {
1596 /*
1597 * For the simple case we write this really fast.
1598 */
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301599 struct ethhdr *eth = (struct ethhdr *)__skb_push(skb, ETH_HLEN);
1600
Matthew McClintockdb5ac512014-01-16 17:01:40 -06001601 eth->h_proto = htons(ETH_P_IP);
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301602
1603 ether_addr_copy((u8 *)eth->h_dest, (u8 *)cm->xmit_dest_mac);
1604 ether_addr_copy((u8 *)eth->h_source, (u8 *)cm->xmit_src_mac);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001605 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001606 }
1607
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06001608 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07001609 * Update priority of skb.
1610 */
1611 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) {
1612 skb->priority = cm->priority;
1613 }
1614
1615 /*
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06001616 * Mark outgoing packet
1617 */
1618 skb->mark = cm->connection->mark;
1619 if (skb->mark) {
1620 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1621 }
1622
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301623 rcu_read_unlock();
1624
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301625 this_cpu_inc(si->stats_pcpu->packets_forwarded64);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001626
1627 /*
1628 * We're going to check for GSO flags when we transmit the packet so
1629 * start fetching the necessary cache line now.
1630 */
1631 prefetch(skb_shinfo(skb));
1632
1633 /*
Nicolas Costa9ec8c7b2014-01-29 12:50:46 -06001634 * Mark that this packet has been fast forwarded.
1635 */
1636 skb->fast_forwarded = 1;
1637
1638 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001639 * Send the packet on its way.
1640 */
1641 dev_queue_xmit(skb);
1642
1643 return 1;
1644}
1645
1646/*
1647 * sfe_ipv4_recv_icmp()
1648 * Handle ICMP packet receives.
1649 *
1650 * ICMP packets aren't handled as a "fast path" and always have us process them
1651 * through the default Linux stack. What we do need to do is look for any errors
1652 * about connections we are handling in the fast path. If we find any such
1653 * connections then we want to flush their state so that the ICMP error path
1654 * within Linux has all of the correct state should it need it.
1655 */
1656static int sfe_ipv4_recv_icmp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301657 unsigned int len, struct iphdr *iph, unsigned int ihl)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001658{
1659 struct icmphdr *icmph;
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301660 struct iphdr *icmp_iph;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001661 unsigned int icmp_ihl_words;
1662 unsigned int icmp_ihl;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001663 u32 *icmp_trans_h;
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301664 struct udphdr *icmp_udph;
1665 struct tcphdr *icmp_tcph;
Dave Hudson87973cd2013-10-22 16:00:04 +01001666 __be32 src_ip;
1667 __be32 dest_ip;
1668 __be16 src_port;
1669 __be16 dest_port;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001670 struct sfe_ipv4_connection_match *cm;
1671 struct sfe_ipv4_connection *c;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001672 u32 pull_len = sizeof(struct icmphdr) + ihl;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301673 bool ret;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001674
1675 /*
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001676 * Is our packet too short to contain a valid ICMP header?
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001677 */
1678 len -= ihl;
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001679 if (!pskb_may_pull(skb, pull_len)) {
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301680 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001681
1682 DEBUG_TRACE("packet too short for ICMP header\n");
1683 return 0;
1684 }
1685
1686 /*
1687 * We only handle "destination unreachable" and "time exceeded" messages.
1688 */
1689 icmph = (struct icmphdr *)(skb->data + ihl);
1690 if ((icmph->type != ICMP_DEST_UNREACH)
1691 && (icmph->type != ICMP_TIME_EXCEEDED)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001692
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301693 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001694 DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->type);
1695 return 0;
1696 }
1697
1698 /*
1699 * Do we have the full embedded IP header?
1700 */
1701 len -= sizeof(struct icmphdr);
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301702 pull_len += sizeof(struct iphdr);
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001703 if (!pskb_may_pull(skb, pull_len)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001704
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301705 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001706 DEBUG_TRACE("Embedded IP header not complete\n");
1707 return 0;
1708 }
1709
1710 /*
1711 * Is our embedded IP version wrong?
1712 */
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301713 icmp_iph = (struct iphdr *)(icmph + 1);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001714 if (unlikely(icmp_iph->version != 4)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001715
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301716 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001717 DEBUG_TRACE("IP version: %u\n", icmp_iph->version);
1718 return 0;
1719 }
1720
1721 /*
1722 * Do we have the full embedded IP header, including any options?
1723 */
1724 icmp_ihl_words = icmp_iph->ihl;
1725 icmp_ihl = icmp_ihl_words << 2;
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301726 pull_len += icmp_ihl - sizeof(struct iphdr);
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001727 if (!pskb_may_pull(skb, pull_len)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001728
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301729 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001730 DEBUG_TRACE("Embedded header not large enough for IP options\n");
1731 return 0;
1732 }
1733
Nicolas Costaac2979c2014-01-14 10:35:24 -06001734 len -= icmp_ihl;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001735 icmp_trans_h = ((u32 *)icmp_iph) + icmp_ihl_words;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001736
1737 /*
1738 * Handle the embedded transport layer header.
1739 */
1740 switch (icmp_iph->protocol) {
1741 case IPPROTO_UDP:
1742 /*
1743 * We should have 8 bytes of UDP header - that's enough to identify
1744 * the connection.
1745 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001746 pull_len += 8;
1747 if (!pskb_may_pull(skb, pull_len)) {
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301748 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001749 DEBUG_TRACE("Incomplete embedded UDP header\n");
1750 return 0;
1751 }
1752
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301753 icmp_udph = (struct udphdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01001754 src_port = icmp_udph->source;
1755 dest_port = icmp_udph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001756 break;
1757
1758 case IPPROTO_TCP:
1759 /*
1760 * We should have 8 bytes of TCP header - that's enough to identify
1761 * the connection.
1762 */
Xiaoping Fanf8260b82015-04-10 15:17:00 -07001763 pull_len += 8;
1764 if (!pskb_may_pull(skb, pull_len)) {
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301765 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001766 DEBUG_TRACE("Incomplete embedded TCP header\n");
1767 return 0;
1768 }
1769
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301770 icmp_tcph = (struct tcphdr *)icmp_trans_h;
Dave Hudson87973cd2013-10-22 16:00:04 +01001771 src_port = icmp_tcph->source;
1772 dest_port = icmp_tcph->dest;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001773 break;
1774
1775 default:
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301776 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001777 DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", icmp_iph->protocol);
1778 return 0;
1779 }
1780
Dave Hudson87973cd2013-10-22 16:00:04 +01001781 src_ip = icmp_iph->saddr;
1782 dest_ip = icmp_iph->daddr;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001783
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301784 rcu_read_lock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001785
1786 /*
1787 * Look for a connection match. Note that we reverse the source and destination
1788 * here because our embedded message contains a packet that was sent in the
1789 * opposite direction to the one in which we just received it. It will have
1790 * been sent on the interface from which we received it though so that's still
1791 * ok to use.
1792 */
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301793 cm = sfe_ipv4_find_sfe_ipv4_connection_match_rcu(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001794 if (unlikely(!cm)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001795
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301796 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301797 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001798 DEBUG_TRACE("no connection found\n");
1799 return 0;
1800 }
1801
1802 /*
1803 * We found a connection so now remove it from the connection list and flush
1804 * its state.
1805 */
1806 c = cm->connection;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301807 spin_lock_bh(&si->lock);
1808 ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001809 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001810
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05301811 if (ret) {
1812 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1813 }
1814 rcu_read_unlock();
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301815 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001816 return 0;
1817}
1818
1819/*
1820 * sfe_ipv4_recv()
Matthew McClintocka8ad7962014-01-16 16:49:30 -06001821 * Handle packet receives and forwaring.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001822 *
1823 * Returns 1 if the packet is forwarded or 0 if it isn't.
1824 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06001825int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001826{
1827 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001828 unsigned int len;
1829 unsigned int tot_len;
1830 unsigned int frag_off;
1831 unsigned int ihl;
1832 bool flush_on_find;
1833 bool ip_options;
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301834 struct iphdr *iph;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001835 u32 protocol;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001836
1837 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001838 * Check that we have space for an IP header here.
1839 */
1840 len = skb->len;
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301841 if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr)))) {
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301842 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001843 DEBUG_TRACE("len: %u is too short\n", len);
1844 return 0;
1845 }
1846
1847 /*
1848 * Check that our "total length" is large enough for an IP header.
1849 */
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301850 iph = (struct iphdr *)skb->data;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001851 tot_len = ntohs(iph->tot_len);
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301852 if (unlikely(tot_len < sizeof(struct iphdr))) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001853
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301854 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001855 DEBUG_TRACE("tot_len: %u is too short\n", tot_len);
1856 return 0;
1857 }
1858
1859 /*
1860 * Is our IP version wrong?
1861 */
1862 if (unlikely(iph->version != 4)) {
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301863 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_NON_V4);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001864 DEBUG_TRACE("IP version: %u\n", iph->version);
1865 return 0;
1866 }
1867
1868 /*
1869 * Does our datagram fit inside the skb?
1870 */
1871 if (unlikely(tot_len > len)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001872
1873 DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len);
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301874 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001875 return 0;
1876 }
1877
1878 /*
1879 * Do we have a non-initial fragment?
Nicolas Costaac2979c2014-01-14 10:35:24 -06001880 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001881 frag_off = ntohs(iph->frag_off);
1882 if (unlikely(frag_off & IP_OFFSET)) {
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301883 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001884 DEBUG_TRACE("non-initial fragment\n");
1885 return 0;
1886 }
1887
1888 /*
1889 * If we have a (first) fragment then mark it to cause any connection to flush.
1890 */
1891 flush_on_find = unlikely(frag_off & IP_MF) ? true : false;
1892
1893 /*
1894 * Do we have any IP options? That's definite a slow path! If we do have IP
1895 * options we need to recheck our header size.
1896 */
1897 ihl = iph->ihl << 2;
Ratheesh Kannoth741f7992021-10-20 07:39:52 +05301898 ip_options = unlikely(ihl != sizeof(struct iphdr)) ? true : false;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001899 if (unlikely(ip_options)) {
1900 if (unlikely(len < ihl)) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001901
1902 DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl);
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301903 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001904 return 0;
1905 }
1906
1907 flush_on_find = true;
1908 }
1909
1910 protocol = iph->protocol;
1911 if (IPPROTO_UDP == protocol) {
1912 return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find);
1913 }
1914
1915 if (IPPROTO_TCP == protocol) {
1916 return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find);
1917 }
1918
1919 if (IPPROTO_ICMP == protocol) {
1920 return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl);
1921 }
1922
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05301923 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01001924
1925 DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol);
1926 return 0;
1927}
1928
Nicolas Costa436926b2014-01-14 10:36:22 -06001929static void
1930sfe_ipv4_update_tcp_state(struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001931 struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06001932{
1933 struct sfe_ipv4_connection_match *orig_cm;
1934 struct sfe_ipv4_connection_match *repl_cm;
1935 struct sfe_ipv4_tcp_connection_match *orig_tcp;
1936 struct sfe_ipv4_tcp_connection_match *repl_tcp;
1937
1938 orig_cm = c->original_match;
1939 repl_cm = c->reply_match;
1940 orig_tcp = &orig_cm->protocol_state.tcp;
1941 repl_tcp = &repl_cm->protocol_state.tcp;
1942
1943 /* update orig */
1944 if (orig_tcp->max_win < sic->src_td_max_window) {
1945 orig_tcp->max_win = sic->src_td_max_window;
1946 }
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001947 if ((s32)(orig_tcp->end - sic->src_td_end) < 0) {
Nicolas Costa436926b2014-01-14 10:36:22 -06001948 orig_tcp->end = sic->src_td_end;
1949 }
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001950 if ((s32)(orig_tcp->max_end - sic->src_td_max_end) < 0) {
Nicolas Costa436926b2014-01-14 10:36:22 -06001951 orig_tcp->max_end = sic->src_td_max_end;
1952 }
1953
1954 /* update reply */
1955 if (repl_tcp->max_win < sic->dest_td_max_window) {
1956 repl_tcp->max_win = sic->dest_td_max_window;
1957 }
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001958 if ((s32)(repl_tcp->end - sic->dest_td_end) < 0) {
Nicolas Costa436926b2014-01-14 10:36:22 -06001959 repl_tcp->end = sic->dest_td_end;
1960 }
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07001961 if ((s32)(repl_tcp->max_end - sic->dest_td_max_end) < 0) {
Nicolas Costa436926b2014-01-14 10:36:22 -06001962 repl_tcp->max_end = sic->dest_td_max_end;
1963 }
1964
1965 /* update match flags */
1966 orig_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
1967 repl_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001968 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
Nicolas Costa436926b2014-01-14 10:36:22 -06001969 orig_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
1970 repl_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
1971 }
1972}
1973
1974static void
1975sfe_ipv4_update_protocol_state(struct sfe_ipv4_connection *c,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001976 struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06001977{
1978 switch (sic->protocol) {
1979 case IPPROTO_TCP:
1980 sfe_ipv4_update_tcp_state(c, sic);
1981 break;
1982 }
1983}
1984
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001985void sfe_ipv4_update_rule(struct sfe_connection_create *sic)
Nicolas Costa436926b2014-01-14 10:36:22 -06001986{
1987 struct sfe_ipv4_connection *c;
1988 struct sfe_ipv4 *si = &__si;
1989
1990 spin_lock_bh(&si->lock);
1991
1992 c = sfe_ipv4_find_sfe_ipv4_connection(si,
1993 sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001994 sic->src_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06001995 sic->src_port,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07001996 sic->dest_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06001997 sic->dest_port);
1998 if (c != NULL) {
1999 sfe_ipv4_update_protocol_state(c, sic);
2000 }
2001
2002 spin_unlock_bh(&si->lock);
2003}
2004
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002005/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002006 * sfe_ipv4_create_rule()
2007 * Create a forwarding rule.
2008 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002009int sfe_ipv4_create_rule(struct sfe_connection_create *sic)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002010{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002011 struct sfe_ipv4 *si = &__si;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302012 struct sfe_ipv4_connection *c, *c_old;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002013 struct sfe_ipv4_connection_match *original_cm;
2014 struct sfe_ipv4_connection_match *reply_cm;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002015 struct net_device *dest_dev;
2016 struct net_device *src_dev;
2017
2018 dest_dev = sic->dest_dev;
2019 src_dev = sic->src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002020
Matthew McClintock389b42a2014-09-24 14:05:51 -05002021 if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) ||
2022 (src_dev->reg_state != NETREG_REGISTERED))) {
2023 return -EINVAL;
2024 }
2025
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302026 /*
2027 * Allocate the various connection tracking objects.
2028 */
2029 c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC);
2030 if (unlikely(!c)) {
2031 return -ENOMEM;
2032 }
2033
2034 original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2035 if (unlikely(!original_cm)) {
2036 kfree(c);
2037 return -ENOMEM;
2038 }
2039
2040 reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2041 if (unlikely(!reply_cm)) {
2042 kfree(original_cm);
2043 kfree(c);
2044 return -ENOMEM;
2045 }
2046
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05302047 this_cpu_inc(si->stats_pcpu->connection_create_requests64);
2048
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002049 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002050
2051 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002052 * Check to see if there is already a flow that matches the rule we're
2053 * trying to create. If there is then we can't create a new one.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002054 */
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302055 c_old = sfe_ipv4_find_sfe_ipv4_connection(si,
Nicolas Costa436926b2014-01-14 10:36:22 -06002056 sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002057 sic->src_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002058 sic->src_port,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002059 sic->dest_ip.ip,
Nicolas Costa436926b2014-01-14 10:36:22 -06002060 sic->dest_port);
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302061 if (c_old != NULL) {
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05302062 this_cpu_inc(si->stats_pcpu->connection_create_collisions64);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002063
2064 /*
Nicolas Costa436926b2014-01-14 10:36:22 -06002065 * If we already have the flow then it's likely that this
2066 * request to create the connection rule contains more
2067 * up-to-date information. Check and update accordingly.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002068 */
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302069 sfe_ipv4_update_protocol_state(c_old, sic);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002070 spin_unlock_bh(&si->lock);
2071
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302072 kfree(reply_cm);
2073 kfree(original_cm);
2074 kfree(c);
2075
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002076 DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n"
Tian Yang45f39c82020-10-06 14:07:47 -07002077 " s: %s:%pxM:%pI4:%u, d: %s:%pxM:%pI4:%u\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002078 sic->mark, sic->protocol,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002079 sic->src_dev->name, sic->src_mac, &sic->src_ip.ip, ntohs(sic->src_port),
2080 sic->dest_dev->name, sic->dest_mac, &sic->dest_ip.ip, ntohs(sic->dest_port));
Nicolas Costa514fde02014-01-13 15:50:29 -06002081 return -EADDRINUSE;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002082 }
2083
2084 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002085 * Fill in the "original" direction connection matching object.
2086 * Note that the transmit MAC address is "dest_mac_xlate" because
2087 * we always know both ends of a connection by their translated
2088 * addresses and not their public addresses.
2089 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002090 original_cm->match_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002091 original_cm->match_protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002092 original_cm->match_src_ip = sic->src_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002093 original_cm->match_src_port = sic->src_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002094 original_cm->match_dest_ip = sic->dest_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002095 original_cm->match_dest_port = sic->dest_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002096 original_cm->xlate_src_ip = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002097 original_cm->xlate_src_port = sic->src_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002098 original_cm->xlate_dest_ip = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002099 original_cm->xlate_dest_port = sic->dest_port_xlate;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302100 atomic_set(&original_cm->rx_packet_count, 0);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002101 original_cm->rx_packet_count64 = 0;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302102 atomic_set(&original_cm->rx_byte_count, 0);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002103 original_cm->rx_byte_count64 = 0;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002104 original_cm->xmit_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002105 original_cm->xmit_dev_mtu = sic->dest_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002106 memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002107 memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN);
2108 original_cm->connection = c;
2109 original_cm->counter_match = reply_cm;
2110 original_cm->flags = 0;
Xiaoping Fane1963d42015-08-25 17:06:19 -07002111 if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) {
2112 original_cm->priority = sic->src_priority;
2113 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK;
2114 }
2115 if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) {
2116 original_cm->dscp = sic->src_dscp << SFE_IPV4_DSCP_SHIFT;
2117 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK;
2118 }
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002119#ifdef CONFIG_NF_FLOW_COOKIE
2120 original_cm->flow_cookie = 0;
2121#endif
Zhi Chen8748eb32015-06-18 12:58:48 -07002122#ifdef CONFIG_XFRM
2123 original_cm->flow_accel = sic->original_accel;
2124#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002125 original_cm->active_next = NULL;
2126 original_cm->active_prev = NULL;
2127 original_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002128
2129 /*
2130 * For PPP links we don't write an L2 header. For everything else we do.
2131 */
2132 if (!(dest_dev->flags & IFF_POINTOPOINT)) {
2133 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2134
2135 /*
2136 * If our dev writes Ethernet headers then we can write a really fast
2137 * version.
2138 */
2139 if (dest_dev->header_ops) {
2140 if (dest_dev->header_ops->create == eth_header) {
2141 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2142 }
2143 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002144 }
2145
2146 /*
2147 * Fill in the "reply" direction connection matching object.
2148 */
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002149 reply_cm->match_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002150 reply_cm->match_protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002151 reply_cm->match_src_ip = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002152 reply_cm->match_src_port = sic->dest_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002153 reply_cm->match_dest_ip = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002154 reply_cm->match_dest_port = sic->src_port_xlate;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002155 reply_cm->xlate_src_ip = sic->dest_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002156 reply_cm->xlate_src_port = sic->dest_port;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002157 reply_cm->xlate_dest_ip = sic->src_ip.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002158 reply_cm->xlate_dest_port = sic->src_port;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302159
2160 atomic_set(&reply_cm->rx_packet_count, 0);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002161 reply_cm->rx_packet_count64 = 0;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302162 atomic_set(&reply_cm->rx_byte_count, 0);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002163 reply_cm->rx_byte_count64 = 0;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302164
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002165 reply_cm->xmit_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002166 reply_cm->xmit_dev_mtu = sic->src_mtu;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002167 memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002168 memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN);
2169 reply_cm->connection = c;
2170 reply_cm->counter_match = original_cm;
2171 reply_cm->flags = 0;
Xiaoping Fane1963d42015-08-25 17:06:19 -07002172 if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) {
2173 reply_cm->priority = sic->dest_priority;
2174 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK;
2175 }
2176 if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) {
2177 reply_cm->dscp = sic->dest_dscp << SFE_IPV4_DSCP_SHIFT;
2178 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK;
2179 }
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002180#ifdef CONFIG_NF_FLOW_COOKIE
2181 reply_cm->flow_cookie = 0;
2182#endif
Zhi Chen8748eb32015-06-18 12:58:48 -07002183#ifdef CONFIG_XFRM
2184 reply_cm->flow_accel = sic->reply_accel;
2185#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002186 reply_cm->active_next = NULL;
2187 reply_cm->active_prev = NULL;
2188 reply_cm->active = false;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002189
2190 /*
2191 * For PPP links we don't write an L2 header. For everything else we do.
2192 */
2193 if (!(src_dev->flags & IFF_POINTOPOINT)) {
2194 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2195
2196 /*
2197 * If our dev writes Ethernet headers then we can write a really fast
2198 * version.
2199 */
2200 if (src_dev->header_ops) {
2201 if (src_dev->header_ops->create == eth_header) {
2202 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2203 }
2204 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002205 }
2206
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002207 if (sic->dest_ip.ip != sic->dest_ip_xlate.ip || sic->dest_port != sic->dest_port_xlate) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002208 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2209 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2210 }
2211
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002212 if (sic->src_ip.ip != sic->src_ip_xlate.ip || sic->src_port != sic->src_port_xlate) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002213 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2214 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2215 }
2216
2217 c->protocol = sic->protocol;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002218 c->src_ip = sic->src_ip.ip;
2219 c->src_ip_xlate = sic->src_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002220 c->src_port = sic->src_port;
2221 c->src_port_xlate = sic->src_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002222 c->original_dev = src_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002223 c->original_match = original_cm;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002224 c->dest_ip = sic->dest_ip.ip;
2225 c->dest_ip_xlate = sic->dest_ip_xlate.ip;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002226 c->dest_port = sic->dest_port;
2227 c->dest_port_xlate = sic->dest_port_xlate;
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002228 c->reply_dev = dest_dev;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002229 c->reply_match = reply_cm;
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06002230 c->mark = sic->mark;
Xiaoping Fan34586472015-07-03 02:20:35 -07002231 c->debug_read_seq = 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002232 c->last_sync_jiffies = get_jiffies_64();
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302233 c->removed = false;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002234
2235 /*
2236 * Take hold of our source and dest devices for the duration of the connection.
2237 */
2238 dev_hold(c->original_dev);
2239 dev_hold(c->reply_dev);
2240
2241 /*
2242 * Initialize the protocol-specific information that we track.
2243 */
2244 switch (sic->protocol) {
2245 case IPPROTO_TCP:
2246 original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale;
2247 original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1;
2248 original_cm->protocol_state.tcp.end = sic->src_td_end;
2249 original_cm->protocol_state.tcp.max_end = sic->src_td_max_end;
2250 reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale;
2251 reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1;
2252 reply_cm->protocol_state.tcp.end = sic->dest_td_end;
2253 reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002254 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002255 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2256 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2257 }
2258 break;
2259 }
2260
2261 sfe_ipv4_connection_match_compute_translations(original_cm);
2262 sfe_ipv4_connection_match_compute_translations(reply_cm);
2263 sfe_ipv4_insert_sfe_ipv4_connection(si, c);
2264
2265 spin_unlock_bh(&si->lock);
2266
2267 /*
2268 * We have everything we need!
2269 */
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002270 DEBUG_INFO("new connection - mark: %08x, p: %d\n"
Tian Yang45f39c82020-10-06 14:07:47 -07002271 " s: %s:%pxM(%pxM):%pI4(%pI4):%u(%u)\n"
2272 " d: %s:%pxM(%pxM):%pI4(%pI4):%u(%u)\n",
Nicolas Costaf53d6fe2014-01-13 16:03:46 -06002273 sic->mark, sic->protocol,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002274 sic->src_dev->name, sic->src_mac, sic->src_mac_xlate,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002275 &sic->src_ip.ip, &sic->src_ip_xlate.ip, ntohs(sic->src_port), ntohs(sic->src_port_xlate),
Matthew McClintockdb5ac512014-01-16 17:01:40 -06002276 dest_dev->name, sic->dest_mac, sic->dest_mac_xlate,
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002277 &sic->dest_ip.ip, &sic->dest_ip_xlate.ip, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate));
Nicolas Costa514fde02014-01-13 15:50:29 -06002278
2279 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002280}
2281
2282/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002283 * sfe_ipv4_destroy_rule()
2284 * Destroy a forwarding rule.
2285 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002286void sfe_ipv4_destroy_rule(struct sfe_connection_destroy *sid)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002287{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002288 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002289 struct sfe_ipv4_connection *c;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302290 bool ret;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002291
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05302292 this_cpu_inc(si->stats_pcpu->connection_destroy_requests64);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002293 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002294
2295 /*
2296 * Check to see if we have a flow that matches the rule we're trying
2297 * to destroy. If there isn't then we can't destroy it.
2298 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002299 c = sfe_ipv4_find_sfe_ipv4_connection(si, sid->protocol, sid->src_ip.ip, sid->src_port,
2300 sid->dest_ip.ip, sid->dest_port);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002301 if (!c) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002302 spin_unlock_bh(&si->lock);
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05302303 this_cpu_inc(si->stats_pcpu->connection_destroy_misses64);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002304
2305 DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Dave Hudson87973cd2013-10-22 16:00:04 +01002306 sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2307 &sid->dest_ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002308 return;
2309 }
2310
2311 /*
2312 * Remove our connection details from the hash tables.
2313 */
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302314 ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002315 spin_unlock_bh(&si->lock);
2316
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302317 if (ret) {
2318 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY);
2319 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002320
2321 DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n",
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002322 sid->protocol, &sid->src_ip.ip, ntohs(sid->src_port),
2323 &sid->dest_ip.ip, ntohs(sid->dest_port));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002324}
2325
2326/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002327 * sfe_ipv4_register_sync_rule_callback()
2328 * Register a callback for rule synchronization.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002329 */
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002330void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002331{
2332 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002333
2334 spin_lock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002335 rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002336 spin_unlock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002337}
2338
2339/*
2340 * sfe_ipv4_get_debug_dev()
2341 */
2342static ssize_t sfe_ipv4_get_debug_dev(struct device *dev,
2343 struct device_attribute *attr,
2344 char *buf)
2345{
2346 struct sfe_ipv4 *si = &__si;
2347 ssize_t count;
2348 int num;
2349
2350 spin_lock_bh(&si->lock);
2351 num = si->debug_dev;
2352 spin_unlock_bh(&si->lock);
2353
2354 count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num);
2355 return count;
2356}
2357
2358/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002359 * sysfs attributes.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002360 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002361static const struct device_attribute sfe_ipv4_debug_dev_attr =
Xiaoping Fane70da412016-02-26 16:47:57 -08002362 __ATTR(debug_dev, S_IWUSR | S_IRUGO, sfe_ipv4_get_debug_dev, NULL);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002363
2364/*
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002365 * sfe_ipv4_destroy_all_rules_for_dev()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002366 * Destroy all connections that match a particular device.
2367 *
2368 * If we pass dev as NULL then this destroys all connections.
2369 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002370void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002371{
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002372 struct sfe_ipv4 *si = &__si;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002373 struct sfe_ipv4_connection *c;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302374 bool ret;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002375
Xiaoping Fan34586472015-07-03 02:20:35 -07002376another_round:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002377 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002378
Xiaoping Fan34586472015-07-03 02:20:35 -07002379 for (c = si->all_connections_head; c; c = c->all_connections_next) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002380 /*
Xiaoping Fan34586472015-07-03 02:20:35 -07002381 * Does this connection relate to the device we are destroying?
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002382 */
2383 if (!dev
2384 || (dev == c->original_dev)
2385 || (dev == c->reply_dev)) {
Xiaoping Fan34586472015-07-03 02:20:35 -07002386 break;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002387 }
Xiaoping Fan34586472015-07-03 02:20:35 -07002388 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002389
Xiaoping Fan34586472015-07-03 02:20:35 -07002390 if (c) {
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302391 ret = sfe_ipv4_remove_sfe_ipv4_connection(si, c);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002392 }
2393
2394 spin_unlock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07002395
2396 if (c) {
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302397 if (ret) {
2398 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY);
2399 }
Xiaoping Fan34586472015-07-03 02:20:35 -07002400 goto another_round;
2401 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002402}
2403
2404/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002405 * sfe_ipv4_periodic_sync()
2406 */
Tian Yang45f39c82020-10-06 14:07:47 -07002407#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0))
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002408static void sfe_ipv4_periodic_sync(unsigned long arg)
Tian Yang45f39c82020-10-06 14:07:47 -07002409#else
2410static void sfe_ipv4_periodic_sync(struct timer_list *tl)
2411#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002412{
Tian Yang45f39c82020-10-06 14:07:47 -07002413#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0))
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002414 struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg;
Tian Yang45f39c82020-10-06 14:07:47 -07002415#else
2416 struct sfe_ipv4 *si = from_timer(si, tl, timer);
2417#endif
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002418 u64 now_jiffies;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002419 int quota;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002420 sfe_sync_rule_callback_t sync_rule_callback;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002421
2422 now_jiffies = get_jiffies_64();
2423
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002424 rcu_read_lock();
2425 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
2426 if (!sync_rule_callback) {
2427 rcu_read_unlock();
2428 goto done;
2429 }
2430
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002431 spin_lock_bh(&si->lock);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002432
2433 /*
2434 * Get an estimate of the number of connections to parse in this sync.
2435 */
2436 quota = (si->num_connections + 63) / 64;
2437
2438 /*
2439 * Walk the "active" list and sync the connection state.
2440 */
2441 while (quota--) {
2442 struct sfe_ipv4_connection_match *cm;
2443 struct sfe_ipv4_connection_match *counter_cm;
2444 struct sfe_ipv4_connection *c;
Xiaoping Fand44a5b42015-05-26 17:37:37 -07002445 struct sfe_connection_sync sis;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002446
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002447 cm = si->active_head;
2448 if (!cm) {
2449 break;
2450 }
2451
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002452 /*
Nicolas Costabafb3af2014-01-29 16:39:39 -06002453 * There's a possibility that our counter match is in the active list too.
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002454 * If it is then remove it.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002455 */
2456 counter_cm = cm->counter_match;
2457 if (counter_cm->active) {
2458 counter_cm->active = false;
2459
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002460 /*
2461 * We must have a connection preceding this counter match
2462 * because that's the one that got us to this point, so we don't have
2463 * to worry about removing the head of the list.
2464 */
2465 counter_cm->active_prev->active_next = counter_cm->active_next;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002466
2467 if (likely(counter_cm->active_next)) {
2468 counter_cm->active_next->active_prev = counter_cm->active_prev;
2469 } else {
2470 si->active_tail = counter_cm->active_prev;
2471 }
2472
2473 counter_cm->active_next = NULL;
2474 counter_cm->active_prev = NULL;
2475 }
2476
2477 /*
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002478 * Now remove the head of the active scan list.
2479 */
2480 cm->active = false;
2481 si->active_head = cm->active_next;
2482 if (likely(cm->active_next)) {
2483 cm->active_next->active_prev = NULL;
2484 } else {
2485 si->active_tail = NULL;
2486 }
2487 cm->active_next = NULL;
2488
2489 /*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002490 * Sync the connection state.
2491 */
2492 c = cm->connection;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07002493 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, SFE_SYNC_REASON_STATS, now_jiffies);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002494
2495 /*
2496 * We don't want to be holding the lock when we sync!
2497 */
2498 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002499 sync_rule_callback(&sis);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002500 spin_lock_bh(&si->lock);
2501 }
2502
2503 spin_unlock_bh(&si->lock);
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002504 rcu_read_unlock();
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002505
Dave Hudsondcd08fb2013-11-22 09:25:16 -06002506done:
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06002507 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002508}
2509
2510#define CHAR_DEV_MSG_SIZE 768
2511
2512/*
2513 * sfe_ipv4_debug_dev_read_start()
2514 * Generate part of the XML output.
2515 */
2516static bool sfe_ipv4_debug_dev_read_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2517 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2518{
2519 int bytes_read;
2520
Xiaoping Fan34586472015-07-03 02:20:35 -07002521 si->debug_read_seq++;
2522
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002523 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "<sfe_ipv4>\n");
2524 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2525 return false;
2526 }
2527
2528 *length -= bytes_read;
2529 *total_read += bytes_read;
2530
2531 ws->state++;
2532 return true;
2533}
2534
2535/*
2536 * sfe_ipv4_debug_dev_read_connections_start()
2537 * Generate part of the XML output.
2538 */
2539static bool sfe_ipv4_debug_dev_read_connections_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2540 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2541{
2542 int bytes_read;
2543
2544 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<connections>\n");
2545 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2546 return false;
2547 }
2548
2549 *length -= bytes_read;
2550 *total_read += bytes_read;
2551
2552 ws->state++;
2553 return true;
2554}
2555
2556/*
2557 * sfe_ipv4_debug_dev_read_connections_connection()
2558 * Generate part of the XML output.
2559 */
2560static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2561 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2562{
2563 struct sfe_ipv4_connection *c;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002564 struct sfe_ipv4_connection_match *original_cm;
2565 struct sfe_ipv4_connection_match *reply_cm;
2566 int bytes_read;
2567 int protocol;
2568 struct net_device *src_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002569 __be32 src_ip;
2570 __be32 src_ip_xlate;
2571 __be16 src_port;
2572 __be16 src_port_xlate;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002573 u64 src_rx_packets;
2574 u64 src_rx_bytes;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002575 struct net_device *dest_dev;
Dave Hudson87973cd2013-10-22 16:00:04 +01002576 __be32 dest_ip;
2577 __be32 dest_ip_xlate;
2578 __be16 dest_port;
2579 __be16 dest_port_xlate;
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002580 u64 dest_rx_packets;
2581 u64 dest_rx_bytes;
2582 u64 last_sync_jiffies;
2583 u32 mark, src_priority, dest_priority, src_dscp, dest_dscp;
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302584 u32 packet, byte;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002585#ifdef CONFIG_NF_FLOW_COOKIE
2586 int src_flow_cookie, dst_flow_cookie;
2587#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002588
2589 spin_lock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07002590
2591 for (c = si->all_connections_head; c; c = c->all_connections_next) {
2592 if (c->debug_read_seq < si->debug_read_seq) {
2593 c->debug_read_seq = si->debug_read_seq;
2594 break;
2595 }
2596 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002597
2598 /*
Xiaoping Fan34586472015-07-03 02:20:35 -07002599 * If there were no connections then move to the next state.
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002600 */
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302601 if (!c || c->removed) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002602 spin_unlock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07002603 ws->state++;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002604 return true;
2605 }
2606
2607 original_cm = c->original_match;
2608 reply_cm = c->reply_match;
2609
2610 protocol = c->protocol;
2611 src_dev = c->original_dev;
2612 src_ip = c->src_ip;
2613 src_ip_xlate = c->src_ip_xlate;
2614 src_port = c->src_port;
2615 src_port_xlate = c->src_port_xlate;
Xiaoping Fane1963d42015-08-25 17:06:19 -07002616 src_priority = original_cm->priority;
2617 src_dscp = original_cm->dscp >> SFE_IPV4_DSCP_SHIFT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002618
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302619 sfe_ipv4_connection_match_update_summary_stats(original_cm, &packet, &byte);
2620 sfe_ipv4_connection_match_update_summary_stats(reply_cm, &packet, &byte);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002621
2622 src_rx_packets = original_cm->rx_packet_count64;
2623 src_rx_bytes = original_cm->rx_byte_count64;
2624 dest_dev = c->reply_dev;
2625 dest_ip = c->dest_ip;
2626 dest_ip_xlate = c->dest_ip_xlate;
2627 dest_port = c->dest_port;
2628 dest_port_xlate = c->dest_port_xlate;
Xiaoping Fane1963d42015-08-25 17:06:19 -07002629 dest_priority = reply_cm->priority;
2630 dest_dscp = reply_cm->dscp >> SFE_IPV4_DSCP_SHIFT;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002631 dest_rx_packets = reply_cm->rx_packet_count64;
2632 dest_rx_bytes = reply_cm->rx_byte_count64;
2633 last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies;
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002634 mark = c->mark;
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002635#ifdef CONFIG_NF_FLOW_COOKIE
2636 src_flow_cookie = original_cm->flow_cookie;
2637 dst_flow_cookie = reply_cm->flow_cookie;
2638#endif
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002639 spin_unlock_bh(&si->lock);
2640
2641 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t<connection "
2642 "protocol=\"%u\" "
2643 "src_dev=\"%s\" "
2644 "src_ip=\"%pI4\" src_ip_xlate=\"%pI4\" "
2645 "src_port=\"%u\" src_port_xlate=\"%u\" "
Xiaoping Fane1963d42015-08-25 17:06:19 -07002646 "src_priority=\"%u\" src_dscp=\"%u\" "
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002647 "src_rx_pkts=\"%llu\" src_rx_bytes=\"%llu\" "
2648 "dest_dev=\"%s\" "
2649 "dest_ip=\"%pI4\" dest_ip_xlate=\"%pI4\" "
2650 "dest_port=\"%u\" dest_port_xlate=\"%u\" "
Xiaoping Fane1963d42015-08-25 17:06:19 -07002651 "dest_priority=\"%u\" dest_dscp=\"%u\" "
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002652 "dest_rx_pkts=\"%llu\" dest_rx_bytes=\"%llu\" "
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002653#ifdef CONFIG_NF_FLOW_COOKIE
2654 "src_flow_cookie=\"%d\" dst_flow_cookie=\"%d\" "
2655#endif
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002656 "last_sync=\"%llu\" "
Nicolas Costabb85a2e2014-01-13 16:26:33 -06002657 "mark=\"%08x\" />\n",
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002658 protocol,
2659 src_dev->name,
2660 &src_ip, &src_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002661 ntohs(src_port), ntohs(src_port_xlate),
Xiaoping Fane1963d42015-08-25 17:06:19 -07002662 src_priority, src_dscp,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002663 src_rx_packets, src_rx_bytes,
2664 dest_dev->name,
2665 &dest_ip, &dest_ip_xlate,
Dave Hudson87973cd2013-10-22 16:00:04 +01002666 ntohs(dest_port), ntohs(dest_port_xlate),
Xiaoping Fane1963d42015-08-25 17:06:19 -07002667 dest_priority, dest_dscp,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002668 dest_rx_packets, dest_rx_bytes,
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002669#ifdef CONFIG_NF_FLOW_COOKIE
2670 src_flow_cookie, dst_flow_cookie,
2671#endif
Cristian Prundeanu592265e2013-12-26 11:01:22 -06002672 last_sync_jiffies, mark);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002673
2674 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2675 return false;
2676 }
2677
2678 *length -= bytes_read;
2679 *total_read += bytes_read;
2680
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002681 return true;
2682}
2683
2684/*
2685 * sfe_ipv4_debug_dev_read_connections_end()
2686 * Generate part of the XML output.
2687 */
2688static bool sfe_ipv4_debug_dev_read_connections_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2689 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2690{
2691 int bytes_read;
2692
2693 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</connections>\n");
2694 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2695 return false;
2696 }
2697
2698 *length -= bytes_read;
2699 *total_read += bytes_read;
2700
2701 ws->state++;
2702 return true;
2703}
2704
2705/*
2706 * sfe_ipv4_debug_dev_read_exceptions_start()
2707 * Generate part of the XML output.
2708 */
2709static bool sfe_ipv4_debug_dev_read_exceptions_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2710 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2711{
2712 int bytes_read;
2713
2714 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<exceptions>\n");
2715 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2716 return false;
2717 }
2718
2719 *length -= bytes_read;
2720 *total_read += bytes_read;
2721
2722 ws->state++;
2723 return true;
2724}
2725
2726/*
2727 * sfe_ipv4_debug_dev_read_exceptions_exception()
2728 * Generate part of the XML output.
2729 */
2730static bool sfe_ipv4_debug_dev_read_exceptions_exception(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2731 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2732{
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05302733 int i;
2734 u64 val = 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002735
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05302736 for_each_possible_cpu(i) {
2737 const struct sfe_ipv4_stats *s = per_cpu_ptr(si->stats_pcpu, i);
2738 val += s->exception_events64[ws->iter_exception];
2739 }
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002740
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05302741 if (val) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002742 int bytes_read;
2743
2744 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE,
2745 "\t\t<exception name=\"%s\" count=\"%llu\" />\n",
2746 sfe_ipv4_exception_events_string[ws->iter_exception],
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05302747 val);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002748 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2749 return false;
2750 }
2751
2752 *length -= bytes_read;
2753 *total_read += bytes_read;
2754 }
2755
2756 ws->iter_exception++;
2757 if (ws->iter_exception >= SFE_IPV4_EXCEPTION_EVENT_LAST) {
2758 ws->iter_exception = 0;
2759 ws->state++;
2760 }
2761
2762 return true;
2763}
2764
2765/*
2766 * sfe_ipv4_debug_dev_read_exceptions_end()
2767 * Generate part of the XML output.
2768 */
2769static bool sfe_ipv4_debug_dev_read_exceptions_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2770 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2771{
2772 int bytes_read;
2773
2774 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</exceptions>\n");
2775 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2776 return false;
2777 }
2778
2779 *length -= bytes_read;
2780 *total_read += bytes_read;
2781
2782 ws->state++;
2783 return true;
2784}
2785
2786/*
2787 * sfe_ipv4_debug_dev_read_stats()
2788 * Generate part of the XML output.
2789 */
2790static bool sfe_ipv4_debug_dev_read_stats(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2791 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2792{
2793 int bytes_read;
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05302794 struct sfe_ipv4_stats stats;
2795 unsigned int num_conn;
2796
2797 sfe_ipv4_update_summary_stats(si, &stats);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002798
2799 spin_lock_bh(&si->lock);
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05302800 num_conn = si->num_connections;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002801 spin_unlock_bh(&si->lock);
2802
2803 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<stats "
2804 "num_connections=\"%u\" "
Xiaoping Fan59176422015-05-22 15:58:10 -07002805 "pkts_forwarded=\"%llu\" pkts_not_forwarded=\"%llu\" "
2806 "create_requests=\"%llu\" create_collisions=\"%llu\" "
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002807 "destroy_requests=\"%llu\" destroy_misses=\"%llu\" "
2808 "flushes=\"%llu\" "
2809 "hash_hits=\"%llu\" hash_reorders=\"%llu\" />\n",
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05302810 num_conn,
2811 stats.packets_forwarded64,
2812 stats.packets_not_forwarded64,
2813 stats.connection_create_requests64,
2814 stats.connection_create_collisions64,
2815 stats.connection_destroy_requests64,
2816 stats.connection_destroy_misses64,
2817 stats.connection_flushes64,
2818 stats.connection_match_hash_hits64,
2819 stats.connection_match_hash_reorders64);
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002820 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2821 return false;
2822 }
2823
2824 *length -= bytes_read;
2825 *total_read += bytes_read;
2826
2827 ws->state++;
2828 return true;
2829}
2830
2831/*
2832 * sfe_ipv4_debug_dev_read_end()
2833 * Generate part of the XML output.
2834 */
2835static bool sfe_ipv4_debug_dev_read_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
2836 int *total_read, struct sfe_ipv4_debug_xml_write_state *ws)
2837{
2838 int bytes_read;
2839
2840 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "</sfe_ipv4>\n");
2841 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2842 return false;
2843 }
2844
2845 *length -= bytes_read;
2846 *total_read += bytes_read;
2847
2848 ws->state++;
2849 return true;
2850}
2851
2852/*
2853 * Array of write functions that write various XML elements that correspond to
2854 * our XML output state machine.
2855 */
Xiaoping Fan6a1672f2016-08-17 19:58:12 -07002856static sfe_ipv4_debug_xml_write_method_t sfe_ipv4_debug_xml_write_methods[SFE_IPV4_DEBUG_XML_STATE_DONE] = {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002857 sfe_ipv4_debug_dev_read_start,
2858 sfe_ipv4_debug_dev_read_connections_start,
2859 sfe_ipv4_debug_dev_read_connections_connection,
2860 sfe_ipv4_debug_dev_read_connections_end,
2861 sfe_ipv4_debug_dev_read_exceptions_start,
2862 sfe_ipv4_debug_dev_read_exceptions_exception,
2863 sfe_ipv4_debug_dev_read_exceptions_end,
2864 sfe_ipv4_debug_dev_read_stats,
2865 sfe_ipv4_debug_dev_read_end,
2866};
2867
2868/*
2869 * sfe_ipv4_debug_dev_read()
2870 * Send info to userspace upon read request from user
2871 */
2872static ssize_t sfe_ipv4_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset)
2873{
2874 char msg[CHAR_DEV_MSG_SIZE];
2875 int total_read = 0;
2876 struct sfe_ipv4_debug_xml_write_state *ws;
2877 struct sfe_ipv4 *si = &__si;
2878
2879 ws = (struct sfe_ipv4_debug_xml_write_state *)filp->private_data;
2880 while ((ws->state != SFE_IPV4_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) {
2881 if ((sfe_ipv4_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) {
2882 continue;
2883 }
2884 }
2885
2886 return total_read;
2887}
2888
2889/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002890 * sfe_ipv4_debug_dev_open()
2891 */
2892static int sfe_ipv4_debug_dev_open(struct inode *inode, struct file *file)
2893{
2894 struct sfe_ipv4_debug_xml_write_state *ws;
2895
2896 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
2897 if (!ws) {
2898 ws = kzalloc(sizeof(struct sfe_ipv4_debug_xml_write_state), GFP_KERNEL);
2899 if (!ws) {
2900 return -ENOMEM;
2901 }
2902
2903 ws->state = SFE_IPV4_DEBUG_XML_STATE_START;
2904 file->private_data = ws;
2905 }
2906
2907 return 0;
2908}
2909
2910/*
2911 * sfe_ipv4_debug_dev_release()
2912 */
2913static int sfe_ipv4_debug_dev_release(struct inode *inode, struct file *file)
2914{
2915 struct sfe_ipv4_debug_xml_write_state *ws;
2916
2917 ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data;
2918 if (ws) {
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002919 /*
2920 * We've finished with our output so free the write state.
2921 */
2922 kfree(ws);
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05302923 file->private_data = NULL;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002924 }
2925
2926 return 0;
2927}
2928
2929/*
2930 * File operations used in the debug char device
2931 */
2932static struct file_operations sfe_ipv4_debug_dev_fops = {
2933 .read = sfe_ipv4_debug_dev_read,
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01002934 .open = sfe_ipv4_debug_dev_open,
2935 .release = sfe_ipv4_debug_dev_release
2936};
2937
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08002938#ifdef CONFIG_NF_FLOW_COOKIE
2939/*
2940 * sfe_register_flow_cookie_cb
2941 * register a function in SFE to let SFE use this function to configure flow cookie for a flow
2942 *
2943 * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE
2944 * can use this function to configure flow cookie for a flow.
2945 * return: 0, success; !=0, fail
2946 */
2947int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb)
2948{
2949 struct sfe_ipv4 *si = &__si;
2950
2951 BUG_ON(!cb);
2952
2953 if (si->flow_cookie_set_func) {
2954 return -1;
2955 }
2956
2957 rcu_assign_pointer(si->flow_cookie_set_func, cb);
2958 return 0;
2959}
2960
2961/*
2962 * sfe_unregister_flow_cookie_cb
2963 * unregister function which is used to configure flow cookie for a flow
2964 *
2965 * return: 0, success; !=0, fail
2966 */
2967int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb)
2968{
2969 struct sfe_ipv4 *si = &__si;
2970
2971 RCU_INIT_POINTER(si->flow_cookie_set_func, NULL);
2972 return 0;
2973}
Xiaoping Fan640faf42015-08-28 15:50:55 -07002974
2975/*
2976 * sfe_ipv4_get_flow_cookie()
2977 */
2978static ssize_t sfe_ipv4_get_flow_cookie(struct device *dev,
2979 struct device_attribute *attr,
2980 char *buf)
2981{
2982 struct sfe_ipv4 *si = &__si;
Xiaoping Fan01c67cc2015-11-09 11:31:57 -08002983 return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", si->flow_cookie_enable);
Xiaoping Fan640faf42015-08-28 15:50:55 -07002984}
2985
2986/*
2987 * sfe_ipv4_set_flow_cookie()
2988 */
2989static ssize_t sfe_ipv4_set_flow_cookie(struct device *dev,
2990 struct device_attribute *attr,
2991 const char *buf, size_t size)
2992{
2993 struct sfe_ipv4 *si = &__si;
2994 strict_strtol(buf, 0, (long int *)&si->flow_cookie_enable);
2995
2996 return size;
2997}
2998
2999/*
3000 * sysfs attributes.
3001 */
3002static const struct device_attribute sfe_ipv4_flow_cookie_attr =
Xiaoping Fane70da412016-02-26 16:47:57 -08003003 __ATTR(flow_cookie_enable, S_IWUSR | S_IRUGO, sfe_ipv4_get_flow_cookie, sfe_ipv4_set_flow_cookie);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003004#endif /*CONFIG_NF_FLOW_COOKIE*/
3005
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05303006 /*
3007 * sfe_ipv4_conn_match_hash_init()
3008 * Initialize conn match hash lists
3009 */
3010static void sfe_ipv4_conn_match_hash_init(struct sfe_ipv4 *si, int len)
3011{
3012 struct hlist_head *hash_list = si->hlist_conn_match_hash_head;
3013 int i;
3014
3015 for (i = 0; i < len; i++) {
3016 INIT_HLIST_HEAD(&hash_list[i]);
3017 }
3018}
3019
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003020/*
Dave Hudson87973cd2013-10-22 16:00:04 +01003021 * sfe_ipv4_init()
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003022 */
Ratheesh Kannoth24fb1db2021-10-20 07:28:06 +05303023int sfe_ipv4_init(void)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003024{
3025 struct sfe_ipv4 *si = &__si;
3026 int result = -1;
3027
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003028 DEBUG_INFO("SFE IPv4 init\n");
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003029
Ratheesh Kannoth94fc5b82021-10-20 07:45:06 +05303030 sfe_ipv4_conn_match_hash_init(si, ARRAY_SIZE(si->hlist_conn_match_hash_head));
3031
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05303032 si->stats_pcpu = alloc_percpu_gfp(struct sfe_ipv4_stats, GFP_KERNEL | __GFP_ZERO);
3033 if (!si->stats_pcpu) {
3034 DEBUG_ERROR("failed to allocate stats memory for sfe_ipv4\n");
3035 goto exit0;
3036 }
3037
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003038 /*
3039 * Create sys/sfe_ipv4
3040 */
3041 si->sys_sfe_ipv4 = kobject_create_and_add("sfe_ipv4", NULL);
3042 if (!si->sys_sfe_ipv4) {
3043 DEBUG_ERROR("failed to register sfe_ipv4\n");
3044 goto exit1;
3045 }
3046
3047 /*
3048 * Create files, one for each parameter supported by this module.
3049 */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003050 result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3051 if (result) {
3052 DEBUG_ERROR("failed to register debug dev file: %d\n", result);
Xiaoping Fan640faf42015-08-28 15:50:55 -07003053 goto exit2;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003054 }
3055
Xiaoping Fan640faf42015-08-28 15:50:55 -07003056#ifdef CONFIG_NF_FLOW_COOKIE
3057 result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr);
3058 if (result) {
3059 DEBUG_ERROR("failed to register flow cookie enable file: %d\n", result);
3060 goto exit3;
3061 }
3062#endif /* CONFIG_NF_FLOW_COOKIE */
3063
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003064 /*
3065 * Register our debug char device.
3066 */
3067 result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops);
3068 if (result < 0) {
3069 DEBUG_ERROR("Failed to register chrdev: %d\n", result);
Xiaoping Fan640faf42015-08-28 15:50:55 -07003070 goto exit4;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003071 }
3072
3073 si->debug_dev = result;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003074
3075 /*
3076 * Create a timer to handle periodic statistics.
3077 */
Tian Yang45f39c82020-10-06 14:07:47 -07003078#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0))
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003079 setup_timer(&si->timer, sfe_ipv4_periodic_sync, (unsigned long)si);
Tian Yang45f39c82020-10-06 14:07:47 -07003080#else
3081 timer_setup(&si->timer, sfe_ipv4_periodic_sync, 0);
3082#endif
Matthew McClintockaf48f1e2014-01-23 15:29:19 -06003083 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
Dave Hudson87973cd2013-10-22 16:00:04 +01003084 spin_lock_init(&si->lock);
Dave Hudson87973cd2013-10-22 16:00:04 +01003085 return 0;
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003086
Xiaoping Fan640faf42015-08-28 15:50:55 -07003087exit4:
3088#ifdef CONFIG_NF_FLOW_COOKIE
3089 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr);
3090
3091exit3:
3092#endif /* CONFIG_NF_FLOW_COOKIE */
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003093 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3094
Xiaoping Fan640faf42015-08-28 15:50:55 -07003095exit2:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003096 kobject_put(si->sys_sfe_ipv4);
3097
3098exit1:
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05303099 free_percpu(si->stats_pcpu);
3100
3101exit0:
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003102 return result;
3103}
3104
3105/*
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003106 * sfe_ipv4_exit()
3107 */
Ratheesh Kannoth24fb1db2021-10-20 07:28:06 +05303108void sfe_ipv4_exit(void)
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003109{
Dave Hudson87973cd2013-10-22 16:00:04 +01003110 struct sfe_ipv4 *si = &__si;
3111
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003112 DEBUG_INFO("SFE IPv4 exit\n");
Dave Hudson87973cd2013-10-22 16:00:04 +01003113 /*
3114 * Destroy all connections.
3115 */
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003116 sfe_ipv4_destroy_all_rules_for_dev(NULL);
Dave Hudson87973cd2013-10-22 16:00:04 +01003117
Dave Hudson87973cd2013-10-22 16:00:04 +01003118 del_timer_sync(&si->timer);
3119
Dave Hudson87973cd2013-10-22 16:00:04 +01003120 unregister_chrdev(si->debug_dev, "sfe_ipv4");
3121
Xiaoping Fan640faf42015-08-28 15:50:55 -07003122#ifdef CONFIG_NF_FLOW_COOKIE
3123 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr);
3124#endif /* CONFIG_NF_FLOW_COOKIE */
Dave Hudson87973cd2013-10-22 16:00:04 +01003125 sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
3126
Dave Hudson87973cd2013-10-22 16:00:04 +01003127 kobject_put(si->sys_sfe_ipv4);
3128
Ratheesh Kannoth3aeb2892021-10-20 07:57:15 +05303129 free_percpu(si->stats_pcpu);
3130
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003131}
3132
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003133EXPORT_SYMBOL(sfe_ipv4_recv);
3134EXPORT_SYMBOL(sfe_ipv4_create_rule);
3135EXPORT_SYMBOL(sfe_ipv4_destroy_rule);
3136EXPORT_SYMBOL(sfe_ipv4_destroy_all_rules_for_dev);
3137EXPORT_SYMBOL(sfe_ipv4_register_sync_rule_callback);
Matthew McClintockbe7b47d2013-11-27 13:26:23 -06003138EXPORT_SYMBOL(sfe_ipv4_mark_rule);
Nicolas Costa436926b2014-01-14 10:36:22 -06003139EXPORT_SYMBOL(sfe_ipv4_update_rule);
Xiaoping Fand1dc7b22015-01-23 00:43:56 -08003140#ifdef CONFIG_NF_FLOW_COOKIE
3141EXPORT_SYMBOL(sfe_register_flow_cookie_cb);
3142EXPORT_SYMBOL(sfe_unregister_flow_cookie_cb);
3143#endif
Dave Hudsondcd08fb2013-11-22 09:25:16 -06003144
Dave Hudsonaaf97ca2013-06-13 17:52:29 +01003145MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv4 edition");
Matthew McClintocka3221942014-01-16 11:44:26 -06003146MODULE_LICENSE("Dual BSD/GPL");