blob: 445018467e7b7d8f0ac134ce42e2c340f11d16b2 [file] [log] [blame]
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +05301/*
2 * sfe_ipv4_tcp.c
3 * Shortcut forwarding engine - IPv4 TCP implementation
4 *
5 * Copyright (c) 2013-2016, 2019-2020, The Linux Foundation. All rights reserved.
6 * Copyright (c) 2021 Qualcomm Innovation Center, Inc. All rights reserved.
7 *
8 * Permission to use, copy, modify, and/or distribute this software for any
9 * purpose with or without fee is hereby granted, provided that the above
10 * copyright notice and this permission notice appear in all copies.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 */
20
21#include <linux/skbuff.h>
22#include <net/tcp.h>
23#include <linux/etherdevice.h>
24#include <linux/lockdep.h>
25
26#include "sfe_debug.h"
27#include "sfe_api.h"
28#include "sfe.h"
29#include "sfe_flow_cookie.h"
30#include "sfe_ipv4.h"
Guduri Prathyusha79a5fee2021-11-11 17:59:10 +053031#include "sfe_pppoe.h"
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +053032
33/*
34 * sfe_ipv4_process_tcp_option_sack()
35 * Parse TCP SACK option and update ack according
36 */
37static bool sfe_ipv4_process_tcp_option_sack(const struct tcphdr *th, const u32 data_offs,
38 u32 *ack)
39{
40 u32 length = sizeof(struct tcphdr);
41 u8 *ptr = (u8 *)th + length;
42
43 /*
44 * Ignore processing if TCP packet has only TIMESTAMP option.
45 */
46 if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
47 && likely(ptr[0] == TCPOPT_NOP)
48 && likely(ptr[1] == TCPOPT_NOP)
49 && likely(ptr[2] == TCPOPT_TIMESTAMP)
50 && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
51 return true;
52 }
53
54 /*
55 * TCP options. Parse SACK option.
56 */
57 while (length < data_offs) {
58 u8 size;
59 u8 kind;
60
61 ptr = (u8 *)th + length;
62 kind = *ptr;
63
64 /*
65 * NOP, for padding
66 * Not in the switch because to fast escape and to not calculate size
67 */
68 if (kind == TCPOPT_NOP) {
69 length++;
70 continue;
71 }
72
73 if (kind == TCPOPT_SACK) {
74 u32 sack = 0;
75 u8 re = 1 + 1;
76
77 size = *(ptr + 1);
78 if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
79 || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
80 || (size > (data_offs - length))) {
81 return false;
82 }
83
84 re += 4;
85 while (re < size) {
86 u32 sack_re;
87 u8 *sptr = ptr + re;
88 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
89 if (sack_re > sack) {
90 sack = sack_re;
91 }
92 re += TCPOLEN_SACK_PERBLOCK;
93 }
94 if (sack > *ack) {
95 *ack = sack;
96 }
97 length += size;
98 continue;
99 }
100 if (kind == TCPOPT_EOL) {
101 return true;
102 }
103 size = *(ptr + 1);
104 if (size < 2) {
105 return false;
106 }
107 length += size;
108 }
109
110 return true;
111}
112
113/*
114 * sfe_ipv4_recv_tcp()
115 * Handle TCP packet receives and forwarding.
116 */
117int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
118 unsigned int len, struct iphdr *iph, unsigned int ihl, bool flush_on_find)
119{
120 struct tcphdr *tcph;
121 __be32 src_ip;
122 __be32 dest_ip;
123 __be16 src_port;
124 __be16 dest_port;
125 struct sfe_ipv4_connection_match *cm;
126 struct sfe_ipv4_connection_match *counter_cm;
127 u8 ttl;
128 u32 flags;
129 struct net_device *xmit_dev;
130 bool ret;
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530131 bool hw_csum;
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530132
133 /*
134 * Is our packet too short to contain a valid UDP header?
135 */
136 if (unlikely(!pskb_may_pull(skb, (sizeof(struct tcphdr) + ihl)))) {
137 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE);
138 DEBUG_TRACE("packet too short for TCP header\n");
139 return 0;
140 }
141
142 /*
143 * Read the IP address and port information. Read the IP header data first
144 * because we've almost certainly got that in the cache. We may not yet have
145 * the TCP header cached though so allow more time for any prefetching.
146 */
147 src_ip = iph->saddr;
148 dest_ip = iph->daddr;
149
150 tcph = (struct tcphdr *)(skb->data + ihl);
151 src_port = tcph->source;
152 dest_port = tcph->dest;
153 flags = tcp_flag_word(tcph);
154
155 rcu_read_lock();
156
157 /*
158 * Look for a connection match.
159 */
160#ifdef CONFIG_NF_FLOW_COOKIE
161 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
162 if (unlikely(!cm)) {
163 cm = sfe_ipv4_find_connection_match_rcu(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
164 }
165#else
166 cm = sfe_ipv4_find_connection_match_rcu(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
167#endif
168 if (unlikely(!cm)) {
169 /*
170 * We didn't get a connection but as TCP is connection-oriented that
171 * may be because this is a non-fast connection (not running established).
172 * For diagnostic purposes we differentiate this here.
173 */
174 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
175
176 rcu_read_unlock();
177 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS);
178 DEBUG_TRACE("no connection found - fast flags\n");
179 return 0;
180 }
181
182 rcu_read_unlock();
183 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS);
184 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
185 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
186 return 0;
187 }
188
189 /*
190 * If our packet has beern marked as "flush on find" we can't actually
191 * forward it in the fast path, but now that we've found an associated
192 * connection we can flush that out before we process the packet.
193 */
194 if (unlikely(flush_on_find)) {
195 struct sfe_ipv4_connection *c = cm->connection;
196
197 spin_lock_bh(&si->lock);
198 ret = sfe_ipv4_remove_connection(si, c);
199 spin_unlock_bh(&si->lock);
200
201 DEBUG_TRACE("flush on find\n");
202 if (ret) {
203 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
204 }
205
206 rcu_read_unlock();
207
208 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT);
209 return 0;
210 }
211
212#ifdef CONFIG_XFRM
213 /*
214 * We can't accelerate the flow on this direction, just let it go
215 * through the slow path.
216 */
217 if (unlikely(!cm->flow_accel)) {
218 rcu_read_unlock();
219 this_cpu_inc(si->stats_pcpu->packets_not_forwarded64);
220 return 0;
221 }
222#endif
223 /*
224 * Does our TTL allow forwarding?
225 */
226 ttl = iph->ttl;
227 if (unlikely(ttl < 2)) {
228 struct sfe_ipv4_connection *c = cm->connection;
229 spin_lock_bh(&si->lock);
230 ret = sfe_ipv4_remove_connection(si, c);
231 spin_unlock_bh(&si->lock);
232
233 DEBUG_TRACE("ttl too low\n");
234 if (ret) {
235 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
236 }
237
238 rcu_read_unlock();
239 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL);
240 return 0;
241 }
242
243 /*
244 * If our packet is larger than the MTU of the transmit interface then
245 * we can't forward it easily.
246 */
247 if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) {
248 struct sfe_ipv4_connection *c = cm->connection;
249 spin_lock_bh(&si->lock);
250 ret = sfe_ipv4_remove_connection(si, c);
251 spin_unlock_bh(&si->lock);
252
253 DEBUG_TRACE("larger than mtu\n");
254 if (ret) {
255 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
256 }
257
258 rcu_read_unlock();
259 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT);
260 return 0;
261 }
262
263 /*
264 * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN
265 * set is not a fast path packet.
266 */
267 if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
268 struct sfe_ipv4_connection *c = cm->connection;
269 spin_lock_bh(&si->lock);
270 ret = sfe_ipv4_remove_connection(si, c);
271 spin_unlock_bh(&si->lock);
272
273 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
274 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
275 if (ret) {
276 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
277 }
278 rcu_read_unlock();
279 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS);
280 return 0;
281 }
282
283 counter_cm = cm->counter_match;
284
285 /*
286 * Are we doing sequence number checking?
287 */
288 if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
289 u32 seq;
290 u32 ack;
291 u32 sack;
292 u32 data_offs;
293 u32 end;
294 u32 left_edge;
295 u32 scaled_win;
296 u32 max_end;
297
298 /*
299 * Is our sequence fully past the right hand edge of the window?
300 */
301 seq = ntohl(tcph->seq);
302 if (unlikely((s32)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
303 struct sfe_ipv4_connection *c = cm->connection;
304 spin_lock_bh(&si->lock);
305 ret = sfe_ipv4_remove_connection(si, c);
306 spin_unlock_bh(&si->lock);
307
308 DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
309 seq, cm->protocol_state.tcp.max_end + 1);
310 if (ret) {
311 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
312 }
313 rcu_read_unlock();
314 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE);
315 return 0;
316 }
317
318 /*
319 * Check that our TCP data offset isn't too short.
320 */
321 data_offs = tcph->doff << 2;
322 if (unlikely(data_offs < sizeof(struct tcphdr))) {
323 struct sfe_ipv4_connection *c = cm->connection;
324 spin_lock_bh(&si->lock);
325 ret = sfe_ipv4_remove_connection(si, c);
326 spin_unlock_bh(&si->lock);
327
328 DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
329 if (ret) {
330 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
331 }
332 rcu_read_unlock();
333 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS);
334 return 0;
335 }
336
337 /*
338 * Update ACK according to any SACK option.
339 */
340 ack = ntohl(tcph->ack_seq);
341 sack = ack;
342 if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) {
343 struct sfe_ipv4_connection *c = cm->connection;
344 spin_lock_bh(&si->lock);
345 ret = sfe_ipv4_remove_connection(si, c);
346 spin_unlock_bh(&si->lock);
347
348 DEBUG_TRACE("TCP option SACK size is wrong\n");
349 if (ret) {
350 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
351 }
352 rcu_read_unlock();
353 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK);
354 return 0;
355 }
356
357 /*
358 * Check that our TCP data offset isn't past the end of the packet.
359 */
360 data_offs += sizeof(struct iphdr);
361 if (unlikely(len < data_offs)) {
362 struct sfe_ipv4_connection *c = cm->connection;
363 spin_lock_bh(&si->lock);
364 ret = sfe_ipv4_remove_connection(si, c);
365 spin_unlock_bh(&si->lock);
366
367 DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
368 data_offs, len);
369 if (ret) {
370 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
371 }
372 rcu_read_unlock();
373 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS);
374 return 0;
375 }
376
377 end = seq + len - data_offs;
378
379 /*
380 * Is our sequence fully before the left hand edge of the window?
381 */
382 if (unlikely((s32)(end - (cm->protocol_state.tcp.end
383 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
384 struct sfe_ipv4_connection *c = cm->connection;
385 spin_lock_bh(&si->lock);
386 ret = sfe_ipv4_remove_connection(si, c);
387 spin_unlock_bh(&si->lock);
388
389 DEBUG_TRACE("seq: %u before left edge: %u\n",
390 end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
391 if (ret) {
392 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
393 }
394 rcu_read_unlock();
395 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE);
396 return 0;
397 }
398
399 /*
400 * Are we acking data that is to the right of what has been sent?
401 */
402 if (unlikely((s32)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
403 struct sfe_ipv4_connection *c = cm->connection;
404 spin_lock_bh(&si->lock);
405 ret = sfe_ipv4_remove_connection(si, c);
406 spin_unlock_bh(&si->lock);
407
408 DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
409 sack, counter_cm->protocol_state.tcp.end + 1);
410 if (ret) {
411 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
412 }
413 rcu_read_unlock();
414 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE);
415 return 0;
416 }
417
418 /*
419 * Is our ack too far before the left hand edge of the window?
420 */
421 left_edge = counter_cm->protocol_state.tcp.end
422 - cm->protocol_state.tcp.max_win
423 - SFE_IPV4_TCP_MAX_ACK_WINDOW
424 - 1;
425 if (unlikely((s32)(sack - left_edge) < 0)) {
426 struct sfe_ipv4_connection *c = cm->connection;
427 spin_lock_bh(&si->lock);
428 ret = sfe_ipv4_remove_connection(si, c);
429 spin_unlock_bh(&si->lock);
430
431 DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
432 if (ret) {
433 sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
434 }
435 rcu_read_unlock();
436 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE);
437 return 0;
438 }
439
440 /*
441 * Have we just seen the largest window size yet for this connection? If yes
442 * then we need to record the new value.
443 */
444 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
445 scaled_win += (sack - ack);
446 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
447 cm->protocol_state.tcp.max_win = scaled_win;
448 }
449
450 /*
451 * If our sequence and/or ack numbers have advanced then record the new state.
452 */
453 if (likely((s32)(end - cm->protocol_state.tcp.end) >= 0)) {
454 cm->protocol_state.tcp.end = end;
455 }
456
457 max_end = sack + scaled_win;
458 if (likely((s32)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
459 counter_cm->protocol_state.tcp.max_end = max_end;
460 }
461 }
462
463 /*
464 * From this point on we're good to modify the packet.
465 */
466
467 /*
468 * Check if skb was cloned. If it was, unshare it. Because
469 * the data area is going to be written in this path and we don't want to
470 * change the cloned skb's data section.
471 */
472 if (unlikely(skb_cloned(skb))) {
473 DEBUG_TRACE("%px: skb is a cloned skb\n", skb);
474 skb = skb_unshare(skb, GFP_ATOMIC);
475 if (!skb) {
476 DEBUG_WARN("Failed to unshare the cloned skb\n");
477 rcu_read_unlock();
478 return 0;
479 }
480
481 /*
482 * Update the iph and tcph pointers with the unshared skb's data area.
483 */
484 iph = (struct iphdr *)skb->data;
485 tcph = (struct tcphdr *)(skb->data + ihl);
486 }
487
488 /*
Guduri Prathyusha79a5fee2021-11-11 17:59:10 +0530489 * For PPPoE flows, add PPPoE header before L2 header is added.
490 */
491 if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PPPOE_ENCAP) {
492 if (unlikely(!sfe_pppoe_add_header(skb, cm->pppoe_session_id, PPP_IP))) {
493 rcu_read_unlock();
494 DEBUG_WARN("%px: PPPoE header addition failed\n", skb);
495 sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_PPPOE_HEADER_ENCAP_FAILED);
496 return 0;
497 }
498 this_cpu_inc(si->stats_pcpu->pppoe_encap_packets_forwarded64);
499 }
500
501 /*
502 * TODO : VLAN headers if any should be added here when supported.
503 */
504
505 /*
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530506 * Update DSCP
507 */
508 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK)) {
509 iph->tos = (iph->tos & SFE_IPV4_DSCP_MASK) | cm->dscp;
510 }
511
512 /*
513 * Decrement our TTL.
514 */
515 iph->ttl = ttl - 1;
516
517 /*
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530518 * Enable HW csum if rx checksum is verified and xmit interface is CSUM offload capable.
519 * Note: If L4 csum at Rx was found to be incorrect, we (router) should use incremental L4 checksum here
520 * so that HW does not re-calculate/replace the L4 csum
521 */
522 hw_csum = !!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_CSUM_OFFLOAD) && (skb->ip_summed == CHECKSUM_UNNECESSARY);
523
524 /*
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530525 * Do we have to perform translations of the source address/port?
526 */
527 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
528 u16 tcp_csum;
529 u32 sum;
530
531 iph->saddr = cm->xlate_src_ip;
532 tcph->source = cm->xlate_src_port;
533
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530534 if (unlikely(!hw_csum)) {
535 tcp_csum = tcph->check;
536 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
537 sum = tcp_csum + cm->xlate_src_partial_csum_adjustment;
538 } else {
539 sum = tcp_csum + cm->xlate_src_csum_adjustment;
540 }
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530541
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530542 sum = (sum & 0xffff) + (sum >> 16);
543 tcph->check = (u16)sum;
544 }
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530545 }
546
547 /*
548 * Do we have to perform translations of the destination address/port?
549 */
550 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
551 u16 tcp_csum;
552 u32 sum;
553
554 iph->daddr = cm->xlate_dest_ip;
555 tcph->dest = cm->xlate_dest_port;
556
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530557 if (unlikely(!hw_csum)) {
558 tcp_csum = tcph->check;
559 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
560 sum = tcp_csum + cm->xlate_dest_partial_csum_adjustment;
561 } else {
562 sum = tcp_csum + cm->xlate_dest_csum_adjustment;
563 }
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530564
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530565 sum = (sum & 0xffff) + (sum >> 16);
566 tcph->check = (u16)sum;
567 }
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530568 }
569
570 /*
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530571 * If HW checksum offload is not possible, full L3 checksum and incremental L4 checksum
572 * are used to update the packet. Setting ip_summed to CHECKSUM_UNNECESSARY ensures checksum is
573 * not recalculated further in packet path.
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530574 */
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530575 if (likely(hw_csum)) {
576 skb->ip_summed = CHECKSUM_PARTIAL;
577 } else {
578 iph->check = sfe_ipv4_gen_ip_csum(iph);
579 skb->ip_summed = CHECKSUM_UNNECESSARY;
580 }
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530581
582 /*
583 * Update traffic stats.
584 */
585 atomic_inc(&cm->rx_packet_count);
586 atomic_add(len, &cm->rx_byte_count);
587
588 xmit_dev = cm->xmit_dev;
589 skb->dev = xmit_dev;
590
591 /*
592 * Check to see if we need to write a header.
593 */
594 if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
595 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
596 dev_hard_header(skb, xmit_dev, ETH_P_IP,
597 cm->xmit_dest_mac, cm->xmit_src_mac, len);
598 } else {
599 /*
600 * For the simple case we write this really fast.
601 */
602 struct ethhdr *eth = (struct ethhdr *)__skb_push(skb, ETH_HLEN);
603
604 eth->h_proto = htons(ETH_P_IP);
605
606 ether_addr_copy((u8 *)eth->h_dest, (u8 *)cm->xmit_dest_mac);
607 ether_addr_copy((u8 *)eth->h_source, (u8 *)cm->xmit_src_mac);
608 }
609 }
610
611 /*
612 * Update priority of skb.
613 */
614 if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) {
615 skb->priority = cm->priority;
616 }
617
618 /*
619 * Mark outgoing packet
620 */
621 skb->mark = cm->connection->mark;
622 if (skb->mark) {
623 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
624 }
625
626 rcu_read_unlock();
627
628 this_cpu_inc(si->stats_pcpu->packets_forwarded64);
629
630 /*
631 * We're going to check for GSO flags when we transmit the packet so
632 * start fetching the necessary cache line now.
633 */
634 prefetch(skb_shinfo(skb));
635
636 /*
637 * Mark that this packet has been fast forwarded.
638 */
639 skb->fast_forwarded = 1;
640
641 /*
642 * Send the packet on its way.
643 */
644 dev_queue_xmit(skb);
645
646 return 1;
647}