blob: cea230aaa55c9c4151039b06bb682a6a5798b500 [file] [log] [blame]
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +05301/*
2 * sfe_ipv4_tcp.c
3 * Shortcut forwarding engine - IPv4 TCP implementation
4 *
5 * Copyright (c) 2013-2016, 2019-2020, The Linux Foundation. All rights reserved.
Guduri Prathyusha647fe3e2021-11-22 19:17:51 +05306 * Copyright (c) 2021,2022 Qualcomm Innovation Center, Inc. All rights reserved.
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +05307 *
8 * Permission to use, copy, modify, and/or distribute this software for any
9 * purpose with or without fee is hereby granted, provided that the above
10 * copyright notice and this permission notice appear in all copies.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 */
20
21#include <linux/skbuff.h>
22#include <net/tcp.h>
23#include <linux/etherdevice.h>
24#include <linux/lockdep.h>
25
26#include "sfe_debug.h"
27#include "sfe_api.h"
28#include "sfe.h"
29#include "sfe_flow_cookie.h"
30#include "sfe_ipv4.h"
Guduri Prathyusha79a5fee2021-11-11 17:59:10 +053031#include "sfe_pppoe.h"
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +053032
33/*
34 * sfe_ipv4_process_tcp_option_sack()
35 * Parse TCP SACK option and update ack according
36 */
37static bool sfe_ipv4_process_tcp_option_sack(const struct tcphdr *th, const u32 data_offs,
38 u32 *ack)
39{
40 u32 length = sizeof(struct tcphdr);
41 u8 *ptr = (u8 *)th + length;
42
43 /*
44 * Ignore processing if TCP packet has only TIMESTAMP option.
45 */
46 if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
47 && likely(ptr[0] == TCPOPT_NOP)
48 && likely(ptr[1] == TCPOPT_NOP)
49 && likely(ptr[2] == TCPOPT_TIMESTAMP)
50 && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
51 return true;
52 }
53
54 /*
55 * TCP options. Parse SACK option.
56 */
57 while (length < data_offs) {
58 u8 size;
59 u8 kind;
60
61 ptr = (u8 *)th + length;
62 kind = *ptr;
63
64 /*
65 * NOP, for padding
66 * Not in the switch because to fast escape and to not calculate size
67 */
68 if (kind == TCPOPT_NOP) {
69 length++;
70 continue;
71 }
72
73 if (kind == TCPOPT_SACK) {
74 u32 sack = 0;
75 u8 re = 1 + 1;
76
77 size = *(ptr + 1);
78 if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
79 || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
80 || (size > (data_offs - length))) {
81 return false;
82 }
83
84 re += 4;
85 while (re < size) {
86 u32 sack_re;
87 u8 *sptr = ptr + re;
88 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
89 if (sack_re > sack) {
90 sack = sack_re;
91 }
92 re += TCPOLEN_SACK_PERBLOCK;
93 }
94 if (sack > *ack) {
95 *ack = sack;
96 }
97 length += size;
98 continue;
99 }
100 if (kind == TCPOPT_EOL) {
101 return true;
102 }
103 size = *(ptr + 1);
104 if (size < 2) {
105 return false;
106 }
107 length += size;
108 }
109
110 return true;
111}
112
113/*
114 * sfe_ipv4_recv_tcp()
115 * Handle TCP packet receives and forwarding.
116 */
117int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
Guduri Prathyusha647fe3e2021-11-22 19:17:51 +0530118 unsigned int len, struct iphdr *iph, unsigned int ihl, bool flush_on_find, struct sfe_l2_info *l2_info)
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530119{
120 struct tcphdr *tcph;
121 __be32 src_ip;
122 __be32 dest_ip;
123 __be16 src_port;
124 __be16 dest_port;
125 struct sfe_ipv4_connection_match *cm;
126 struct sfe_ipv4_connection_match *counter_cm;
127 u8 ttl;
128 u32 flags;
129 struct net_device *xmit_dev;
130 bool ret;
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530131 bool hw_csum;
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530132
133 /*
134 * Is our packet too short to contain a valid UDP header?
135 */
136 if (unlikely(!pskb_may_pull(skb, (sizeof(struct tcphdr) + ihl)))) {
137 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE);
138 DEBUG_TRACE("packet too short for TCP header\n");
139 return 0;
140 }
141
142 /*
143 * Read the IP address and port information. Read the IP header data first
144 * because we've almost certainly got that in the cache. We may not yet have
145 * the TCP header cached though so allow more time for any prefetching.
146 */
147 src_ip = iph->saddr;
148 dest_ip = iph->daddr;
149
150 tcph = (struct tcphdr *)(skb->data + ihl);
151 src_port = tcph->source;
152 dest_port = tcph->dest;
153 flags = tcp_flag_word(tcph);
154
155 rcu_read_lock();
156
157 /*
158 * Look for a connection match.
159 */
160#ifdef CONFIG_NF_FLOW_COOKIE
161 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
162 if (unlikely(!cm)) {
163 cm = sfe_ipv4_find_connection_match_rcu(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
164 }
165#else
166 cm = sfe_ipv4_find_connection_match_rcu(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
167#endif
168 if (unlikely(!cm)) {
169 /*
170 * We didn't get a connection but as TCP is connection-oriented that
171 * may be because this is a non-fast connection (not running established).
172 * For diagnostic purposes we differentiate this here.
173 */
174 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
175
176 rcu_read_unlock();
177 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS);
178 DEBUG_TRACE("no connection found - fast flags\n");
179 return 0;
180 }
181
182 rcu_read_unlock();
183 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS);
184 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
185 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
186 return 0;
187 }
188
189 /*
190 * If our packet has beern marked as "flush on find" we can't actually
191 * forward it in the fast path, but now that we've found an associated
192 * connection we can flush that out before we process the packet.
193 */
194 if (unlikely(flush_on_find)) {
195 struct sfe_ipv4_connection *c = cm->connection;
196
197 spin_lock_bh(&si->lock);
198 ret = sfe_ipv4_remove_connection(si, c);
199 spin_unlock_bh(&si->lock);
200
201 DEBUG_TRACE("flush on find\n");
202 if (ret) {
203 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
204 }
205
206 rcu_read_unlock();
207
208 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT);
209 return 0;
210 }
211
212#ifdef CONFIG_XFRM
213 /*
214 * We can't accelerate the flow on this direction, just let it go
215 * through the slow path.
216 */
217 if (unlikely(!cm->flow_accel)) {
218 rcu_read_unlock();
219 this_cpu_inc(si->stats_pcpu->packets_not_forwarded64);
220 return 0;
221 }
222#endif
223 /*
224 * Does our TTL allow forwarding?
225 */
226 ttl = iph->ttl;
227 if (unlikely(ttl < 2)) {
228 struct sfe_ipv4_connection *c = cm->connection;
229 spin_lock_bh(&si->lock);
230 ret = sfe_ipv4_remove_connection(si, c);
231 spin_unlock_bh(&si->lock);
232
233 DEBUG_TRACE("ttl too low\n");
234 if (ret) {
235 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
236 }
237
238 rcu_read_unlock();
239 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL);
240 return 0;
241 }
242
243 /*
244 * If our packet is larger than the MTU of the transmit interface then
245 * we can't forward it easily.
246 */
247 if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) {
248 struct sfe_ipv4_connection *c = cm->connection;
249 spin_lock_bh(&si->lock);
250 ret = sfe_ipv4_remove_connection(si, c);
251 spin_unlock_bh(&si->lock);
252
253 DEBUG_TRACE("larger than mtu\n");
254 if (ret) {
255 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
256 }
257
258 rcu_read_unlock();
259 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT);
260 return 0;
261 }
262
263 /*
264 * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN
265 * set is not a fast path packet.
266 */
267 if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
268 struct sfe_ipv4_connection *c = cm->connection;
269 spin_lock_bh(&si->lock);
270 ret = sfe_ipv4_remove_connection(si, c);
271 spin_unlock_bh(&si->lock);
272
273 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
274 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
275 if (ret) {
276 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
277 }
278 rcu_read_unlock();
279 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS);
280 return 0;
281 }
282
283 counter_cm = cm->counter_match;
284
285 /*
286 * Are we doing sequence number checking?
287 */
288 if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
289 u32 seq;
290 u32 ack;
291 u32 sack;
292 u32 data_offs;
293 u32 end;
294 u32 left_edge;
295 u32 scaled_win;
296 u32 max_end;
297
298 /*
299 * Is our sequence fully past the right hand edge of the window?
300 */
301 seq = ntohl(tcph->seq);
302 if (unlikely((s32)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
303 struct sfe_ipv4_connection *c = cm->connection;
304 spin_lock_bh(&si->lock);
305 ret = sfe_ipv4_remove_connection(si, c);
306 spin_unlock_bh(&si->lock);
307
308 DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
309 seq, cm->protocol_state.tcp.max_end + 1);
310 if (ret) {
311 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
312 }
313 rcu_read_unlock();
314 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE);
315 return 0;
316 }
317
318 /*
319 * Check that our TCP data offset isn't too short.
320 */
321 data_offs = tcph->doff << 2;
322 if (unlikely(data_offs < sizeof(struct tcphdr))) {
323 struct sfe_ipv4_connection *c = cm->connection;
324 spin_lock_bh(&si->lock);
325 ret = sfe_ipv4_remove_connection(si, c);
326 spin_unlock_bh(&si->lock);
327
328 DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
329 if (ret) {
330 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
331 }
332 rcu_read_unlock();
333 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS);
334 return 0;
335 }
336
337 /*
338 * Update ACK according to any SACK option.
339 */
340 ack = ntohl(tcph->ack_seq);
341 sack = ack;
342 if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) {
343 struct sfe_ipv4_connection *c = cm->connection;
344 spin_lock_bh(&si->lock);
345 ret = sfe_ipv4_remove_connection(si, c);
346 spin_unlock_bh(&si->lock);
347
348 DEBUG_TRACE("TCP option SACK size is wrong\n");
349 if (ret) {
350 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
351 }
352 rcu_read_unlock();
353 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK);
354 return 0;
355 }
356
357 /*
358 * Check that our TCP data offset isn't past the end of the packet.
359 */
360 data_offs += sizeof(struct iphdr);
361 if (unlikely(len < data_offs)) {
362 struct sfe_ipv4_connection *c = cm->connection;
363 spin_lock_bh(&si->lock);
364 ret = sfe_ipv4_remove_connection(si, c);
365 spin_unlock_bh(&si->lock);
366
367 DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
368 data_offs, len);
369 if (ret) {
370 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
371 }
372 rcu_read_unlock();
373 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS);
374 return 0;
375 }
376
377 end = seq + len - data_offs;
378
379 /*
380 * Is our sequence fully before the left hand edge of the window?
381 */
382 if (unlikely((s32)(end - (cm->protocol_state.tcp.end
383 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
384 struct sfe_ipv4_connection *c = cm->connection;
385 spin_lock_bh(&si->lock);
386 ret = sfe_ipv4_remove_connection(si, c);
387 spin_unlock_bh(&si->lock);
388
389 DEBUG_TRACE("seq: %u before left edge: %u\n",
390 end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
391 if (ret) {
392 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
393 }
394 rcu_read_unlock();
395 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE);
396 return 0;
397 }
398
399 /*
400 * Are we acking data that is to the right of what has been sent?
401 */
402 if (unlikely((s32)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
403 struct sfe_ipv4_connection *c = cm->connection;
404 spin_lock_bh(&si->lock);
405 ret = sfe_ipv4_remove_connection(si, c);
406 spin_unlock_bh(&si->lock);
407
408 DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
409 sack, counter_cm->protocol_state.tcp.end + 1);
410 if (ret) {
411 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
412 }
413 rcu_read_unlock();
414 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE);
415 return 0;
416 }
417
418 /*
419 * Is our ack too far before the left hand edge of the window?
420 */
421 left_edge = counter_cm->protocol_state.tcp.end
422 - cm->protocol_state.tcp.max_win
423 - SFE_IPV4_TCP_MAX_ACK_WINDOW
424 - 1;
425 if (unlikely((s32)(sack - left_edge) < 0)) {
426 struct sfe_ipv4_connection *c = cm->connection;
427 spin_lock_bh(&si->lock);
428 ret = sfe_ipv4_remove_connection(si, c);
429 spin_unlock_bh(&si->lock);
430
431 DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
432 if (ret) {
433 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
434 }
435 rcu_read_unlock();
436 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE);
437 return 0;
438 }
439
440 /*
441 * Have we just seen the largest window size yet for this connection? If yes
442 * then we need to record the new value.
443 */
444 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
445 scaled_win += (sack - ack);
446 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
447 cm->protocol_state.tcp.max_win = scaled_win;
448 }
449
450 /*
451 * If our sequence and/or ack numbers have advanced then record the new state.
452 */
453 if (likely((s32)(end - cm->protocol_state.tcp.end) >= 0)) {
454 cm->protocol_state.tcp.end = end;
455 }
456
457 max_end = sack + scaled_win;
458 if (likely((s32)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
459 counter_cm->protocol_state.tcp.max_end = max_end;
460 }
461 }
462
463 /*
Guduri Prathyusha647fe3e2021-11-22 19:17:51 +0530464 * For PPPoE packets, match server MAC and session id
465 */
466 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PPPOE_DECAP)) {
467 struct pppoe_hdr *ph;
468 struct ethhdr *eth;
469
470 if (unlikely(!l2_info) || unlikely(!sfe_l2_parse_flag_check(l2_info, SFE_L2_PARSE_FLAGS_PPPOE_INGRESS))) {
471 rcu_read_unlock();
472 DEBUG_TRACE("%px: PPPoE is not parsed\n", skb);
473 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_INCORRECT_PPPOE_PARSING);
474 return 0;
475 }
476
477 ph = (struct pppoe_hdr *)(skb->head + sfe_l2_pppoe_hdr_offset_get(l2_info));
478 eth = (struct ethhdr *)(skb->head + sfe_l2_hdr_offset_get(l2_info));
479 if (unlikely(cm->pppoe_session_id != htons(ph->sid)) || unlikely(!(ether_addr_equal((u8*)cm->pppoe_remote_mac, (u8 *)eth->h_source)))) {
480 rcu_read_unlock();
481 DEBUG_TRACE("%px: PPPoE sessions did not match \n", skb);
482 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_INVALID_PPPOE_SESSION);
483 return 0;
484 }
485 this_cpu_inc(si->stats_pcpu->pppoe_decap_packets_forwarded64);
486
487 } else if (unlikely(l2_info) && unlikely(sfe_l2_parse_flag_check(l2_info, SFE_L2_PARSE_FLAGS_PPPOE_INGRESS))) {
488
489 /*
490 * If packet contains PPPOE header but CME doesn't contain PPPoE flag yet we are exceptioning the packet to linux
491 */
492 rcu_read_unlock();
493 DEBUG_TRACE("%px: CME doesn't contain PPPOE flag but packet has PPPoE header\n", skb);
494 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_PPPOE_NOT_SET_IN_CME);
495 return 0;
496 }
497
498
499 /*
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530500 * From this point on we're good to modify the packet.
501 */
502
503 /*
504 * Check if skb was cloned. If it was, unshare it. Because
505 * the data area is going to be written in this path and we don't want to
506 * change the cloned skb's data section.
507 */
508 if (unlikely(skb_cloned(skb))) {
509 DEBUG_TRACE("%px: skb is a cloned skb\n", skb);
510 skb = skb_unshare(skb, GFP_ATOMIC);
511 if (!skb) {
512 DEBUG_WARN("Failed to unshare the cloned skb\n");
513 rcu_read_unlock();
514 return 0;
515 }
516
517 /*
518 * Update the iph and tcph pointers with the unshared skb's data area.
519 */
520 iph = (struct iphdr *)skb->data;
521 tcph = (struct tcphdr *)(skb->data + ihl);
522 }
523
524 /*
Guduri Prathyusha79a5fee2021-11-11 17:59:10 +0530525 * For PPPoE flows, add PPPoE header before L2 header is added.
526 */
527 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PPPOE_ENCAP) {
528 if (unlikely(!sfe_pppoe_add_header(skb, cm->pppoe_session_id, PPP_IP))) {
529 rcu_read_unlock();
530 DEBUG_WARN("%px: PPPoE header addition failed\n", skb);
531 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_PPPOE_HEADER_ENCAP_FAILED);
532 return 0;
533 }
534 this_cpu_inc(si->stats_pcpu->pppoe_encap_packets_forwarded64);
535 }
536
537 /*
538 * TODO : VLAN headers if any should be added here when supported.
539 */
540
541 /*
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530542 * Update DSCP
543 */
544 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK)) {
545 iph->tos = (iph->tos & SFE_IPV4_DSCP_MASK) | cm->dscp;
546 }
547
548 /*
549 * Decrement our TTL.
550 */
551 iph->ttl = ttl - 1;
552
553 /*
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530554 * Enable HW csum if rx checksum is verified and xmit interface is CSUM offload capable.
555 * Note: If L4 csum at Rx was found to be incorrect, we (router) should use incremental L4 checksum here
556 * so that HW does not re-calculate/replace the L4 csum
557 */
558 hw_csum = !!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_CSUM_OFFLOAD) && (skb->ip_summed == CHECKSUM_UNNECESSARY);
559
560 /*
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530561 * Do we have to perform translations of the source address/port?
562 */
563 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
564 u16 tcp_csum;
565 u32 sum;
566
567 iph->saddr = cm->xlate_src_ip;
568 tcph->source = cm->xlate_src_port;
569
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530570 if (unlikely(!hw_csum)) {
571 tcp_csum = tcph->check;
572 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
573 sum = tcp_csum + cm->xlate_src_partial_csum_adjustment;
574 } else {
575 sum = tcp_csum + cm->xlate_src_csum_adjustment;
576 }
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530577
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530578 sum = (sum & 0xffff) + (sum >> 16);
579 tcph->check = (u16)sum;
580 }
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530581 }
582
583 /*
584 * Do we have to perform translations of the destination address/port?
585 */
586 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
587 u16 tcp_csum;
588 u32 sum;
589
590 iph->daddr = cm->xlate_dest_ip;
591 tcph->dest = cm->xlate_dest_port;
592
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530593 if (unlikely(!hw_csum)) {
594 tcp_csum = tcph->check;
595 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
596 sum = tcp_csum + cm->xlate_dest_partial_csum_adjustment;
597 } else {
598 sum = tcp_csum + cm->xlate_dest_csum_adjustment;
599 }
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530600
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530601 sum = (sum & 0xffff) + (sum >> 16);
602 tcph->check = (u16)sum;
603 }
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530604 }
605
606 /*
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530607 * If HW checksum offload is not possible, full L3 checksum and incremental L4 checksum
608 * are used to update the packet. Setting ip_summed to CHECKSUM_UNNECESSARY ensures checksum is
609 * not recalculated further in packet path.
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530610 */
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530611 if (likely(hw_csum)) {
612 skb->ip_summed = CHECKSUM_PARTIAL;
613 } else {
614 iph->check = sfe_ipv4_gen_ip_csum(iph);
615 skb->ip_summed = CHECKSUM_UNNECESSARY;
616 }
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530617
618 /*
619 * Update traffic stats.
620 */
621 atomic_inc(&cm->rx_packet_count);
622 atomic_add(len, &cm->rx_byte_count);
623
624 xmit_dev = cm->xmit_dev;
625 skb->dev = xmit_dev;
626
627 /*
628 * Check to see if we need to write a header.
629 */
630 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
631 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
632 dev_hard_header(skb, xmit_dev, ETH_P_IP,
633 cm->xmit_dest_mac, cm->xmit_src_mac, len);
634 } else {
635 /*
636 * For the simple case we write this really fast.
637 */
638 struct ethhdr *eth = (struct ethhdr *)__skb_push(skb, ETH_HLEN);
639
640 eth->h_proto = htons(ETH_P_IP);
641
642 ether_addr_copy((u8 *)eth->h_dest, (u8 *)cm->xmit_dest_mac);
643 ether_addr_copy((u8 *)eth->h_source, (u8 *)cm->xmit_src_mac);
644 }
645 }
646
647 /*
648 * Update priority of skb.
649 */
650 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) {
651 skb->priority = cm->priority;
652 }
653
654 /*
655 * Mark outgoing packet
656 */
657 skb->mark = cm->connection->mark;
658 if (skb->mark) {
659 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
660 }
661
662 rcu_read_unlock();
663
664 this_cpu_inc(si->stats_pcpu->packets_forwarded64);
665
666 /*
667 * We're going to check for GSO flags when we transmit the packet so
668 * start fetching the necessary cache line now.
669 */
670 prefetch(skb_shinfo(skb));
671
672 /*
673 * Mark that this packet has been fast forwarded.
674 */
675 skb->fast_forwarded = 1;
676
677 /*
678 * Send the packet on its way.
679 */
680 dev_queue_xmit(skb);
681
682 return 1;
683}