blob: f70fea536f41a6a3316f131216baa761f3d4cbcc [file] [log] [blame]
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +05301/*
2 * sfe_ipv6_tcp.c
3 * Shortcut forwarding engine file for IPv6 TCP
4 *
5 * Copyright (c) 2015-2016, 2019-2020, The Linux Foundation. All rights reserved.
Guduri Prathyusha647fe3e2021-11-22 19:17:51 +05306 * Copyright (c) 2021,2022 Qualcomm Innovation Center, Inc. All rights reserved.
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +05307 *
8 * Permission to use, copy, modify, and/or distribute this software for any
9 * purpose with or without fee is hereby granted, provided that the above
10 * copyright notice and this permission notice appear in all copies.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 */
20
21#include <linux/skbuff.h>
22#include <net/tcp.h>
23#include <linux/etherdevice.h>
24#include <linux/version.h>
25
26#include "sfe_debug.h"
27#include "sfe_api.h"
28#include "sfe.h"
29#include "sfe_flow_cookie.h"
30#include "sfe_ipv6.h"
Guduri Prathyusha79a5fee2021-11-11 17:59:10 +053031#include "sfe_pppoe.h"
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +053032
33/*
34 * sfe_ipv6_process_tcp_option_sack()
35 * Parse TCP SACK option and update ack according
36 */
37static bool sfe_ipv6_process_tcp_option_sack(const struct tcphdr *th, const u32 data_offs,
38 u32 *ack)
39{
40 u32 length = sizeof(struct tcphdr);
41 u8 *ptr = (u8 *)th + length;
42
43 /*
44 * Ignore processing if TCP packet has only TIMESTAMP option.
45 */
46 if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
47 && likely(ptr[0] == TCPOPT_NOP)
48 && likely(ptr[1] == TCPOPT_NOP)
49 && likely(ptr[2] == TCPOPT_TIMESTAMP)
50 && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
51 return true;
52 }
53
54 /*
55 * TCP options. Parse SACK option.
56 */
57 while (length < data_offs) {
58 u8 size;
59 u8 kind;
60
61 ptr = (u8 *)th + length;
62 kind = *ptr;
63
64 /*
65 * NOP, for padding
66 * Not in the switch because to fast escape and to not calculate size
67 */
68 if (kind == TCPOPT_NOP) {
69 length++;
70 continue;
71 }
72
73 if (kind == TCPOPT_SACK) {
74 u32 sack = 0;
75 u8 re = 1 + 1;
76
77 size = *(ptr + 1);
78 if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
79 || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
80 || (size > (data_offs - length))) {
81 return false;
82 }
83
84 re += 4;
85 while (re < size) {
86 u32 sack_re;
87 u8 *sptr = ptr + re;
88 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
89 if (sack_re > sack) {
90 sack = sack_re;
91 }
92 re += TCPOLEN_SACK_PERBLOCK;
93 }
94 if (sack > *ack) {
95 *ack = sack;
96 }
97 length += size;
98 continue;
99 }
100 if (kind == TCPOPT_EOL) {
101 return true;
102 }
103 size = *(ptr + 1);
104 if (size < 2) {
105 return false;
106 }
107 length += size;
108 }
109
110 return true;
111}
112
113/*
114 * sfe_ipv6_recv_tcp()
115 * Handle TCP packet receives and forwarding.
116 */
117int sfe_ipv6_recv_tcp(struct sfe_ipv6 *si, struct sk_buff *skb, struct net_device *dev,
Guduri Prathyusha647fe3e2021-11-22 19:17:51 +0530118 unsigned int len, struct ipv6hdr *iph, unsigned int ihl, bool flush_on_find, struct sfe_l2_info *l2_info)
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530119{
120 struct tcphdr *tcph;
121 struct sfe_ipv6_addr *src_ip;
122 struct sfe_ipv6_addr *dest_ip;
123 __be16 src_port;
124 __be16 dest_port;
125 struct sfe_ipv6_connection_match *cm;
126 struct sfe_ipv6_connection_match *counter_cm;
127 u32 flags;
128 struct net_device *xmit_dev;
129 bool ret;
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530130 bool hw_csum;
Ratheesh Kannoth71fc51e2022-01-05 10:02:47 +0530131 bool bridge_flow;
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530132
133 /*
134 * Is our packet too short to contain a valid UDP header?
135 */
136 if (!pskb_may_pull(skb, (sizeof(struct tcphdr) + ihl))) {
137
138 sfe_ipv6_exception_stats_inc(si, SFE_IPV6_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE);
139 DEBUG_TRACE("packet too short for TCP header\n");
140 return 0;
141 }
142
143 /*
144 * Read the IP address and port information. Read the IP header data first
145 * because we've almost certainly got that in the cache. We may not yet have
146 * the TCP header cached though so allow more time for any prefetching.
147 */
148 src_ip = (struct sfe_ipv6_addr *)iph->saddr.s6_addr32;
149 dest_ip = (struct sfe_ipv6_addr *)iph->daddr.s6_addr32;
150
151 tcph = (struct tcphdr *)(skb->data + ihl);
152 src_port = tcph->source;
153 dest_port = tcph->dest;
154 flags = tcp_flag_word(tcph);
155
156 rcu_read_lock();
157
158 /*
159 * Look for a connection match.
160 */
161#ifdef CONFIG_NF_FLOW_COOKIE
162 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
163 if (unlikely(!cm)) {
164 cm = sfe_ipv6_find_connection_match_rcu(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
165 }
166#else
167 cm = sfe_ipv6_find_connection_match_rcu(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
168#endif
169 if (unlikely(!cm)) {
170 /*
171 * We didn't get a connection but as TCP is connection-oriented that
172 * may be because this is a non-fast connection (not running established).
173 * For diagnostic purposes we differentiate this here.
174 */
175 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
176 rcu_read_unlock();
177
178 sfe_ipv6_exception_stats_inc(si, SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS);
179
180 DEBUG_TRACE("no connection found - fast flags\n");
181 return 0;
182 }
183
184 rcu_read_unlock();
185
186 sfe_ipv6_exception_stats_inc(si, SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS);
187 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
188 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
189 return 0;
190 }
191
192 /*
193 * If our packet has beern marked as "flush on find" we can't actually
194 * forward it in the fast path, but now that we've found an associated
195 * connection we can flush that out before we process the packet.
196 */
197 if (unlikely(flush_on_find)) {
198 struct sfe_ipv6_connection *c = cm->connection;
199 spin_lock_bh(&si->lock);
200 ret = sfe_ipv6_remove_connection(si, c);
201 spin_unlock_bh(&si->lock);
202
203 DEBUG_TRACE("flush on find\n");
204 if (ret) {
205 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
206 }
207 rcu_read_unlock();
208
209 sfe_ipv6_exception_stats_inc(si, SFE_IPV6_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT);
210 return 0;
211 }
212
213#ifdef CONFIG_XFRM
214 /*
215 * We can't accelerate the flow on this direction, just let it go
216 * through the slow path.
217 */
218 if (unlikely(!cm->flow_accel)) {
219 rcu_read_unlock();
220 this_cpu_inc(si->stats_pcpu->packets_not_forwarded64);
221 return 0;
222 }
223#endif
224
Ratheesh Kannoth71fc51e2022-01-05 10:02:47 +0530225 bridge_flow = !!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_BRIDGE_FLOW);
226
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530227 /*
228 * Does our hop_limit allow forwarding?
229 */
Ratheesh Kannoth71fc51e2022-01-05 10:02:47 +0530230 if (likely(!bridge_flow)) {
231 if (unlikely(iph->hop_limit < 2)) {
232 struct sfe_ipv6_connection *c = cm->connection;
233 spin_lock_bh(&si->lock);
234 ret = sfe_ipv6_remove_connection(si, c);
235 spin_unlock_bh(&si->lock);
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530236
Ratheesh Kannoth71fc51e2022-01-05 10:02:47 +0530237 DEBUG_TRACE("hop_limit too low\n");
238 if (ret) {
239 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
240 }
241 rcu_read_unlock();
242
243 sfe_ipv6_exception_stats_inc(si, SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_TTL);
244 return 0;
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530245 }
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530246 }
247
248 /*
249 * If our packet is larger than the MTU of the transmit interface then
250 * we can't forward it easily.
251 */
252 if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) {
253 struct sfe_ipv6_connection *c = cm->connection;
254 spin_lock_bh(&si->lock);
255 ret = sfe_ipv6_remove_connection(si, c);
256 spin_unlock_bh(&si->lock);
257
258 DEBUG_TRACE("larger than mtu\n");
259 if (ret) {
260 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
261 }
262 rcu_read_unlock();
263
264 sfe_ipv6_exception_stats_inc(si, SFE_IPV6_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION);
265 return 0;
266 }
267
268 /*
269 * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN
270 * set is not a fast path packet.
271 */
272 if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
273 struct sfe_ipv6_connection *c = cm->connection;
274 spin_lock_bh(&si->lock);
275 ret = sfe_ipv6_remove_connection(si, c);
276 spin_unlock_bh(&si->lock);
277
278 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
279 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
280 if (ret) {
281 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
282 }
283 rcu_read_unlock();
284
285 sfe_ipv6_exception_stats_inc(si, SFE_IPV6_EXCEPTION_EVENT_TCP_FLAGS);
286 return 0;
287 }
288
289 counter_cm = cm->counter_match;
290
291 /*
292 * Are we doing sequence number checking?
293 */
294 if (likely(!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
295 u32 seq;
296 u32 ack;
297 u32 sack;
298 u32 data_offs;
299 u32 end;
300 u32 left_edge;
301 u32 scaled_win;
302 u32 max_end;
303
304 /*
305 * Is our sequence fully past the right hand edge of the window?
306 */
307 seq = ntohl(tcph->seq);
308 if (unlikely((s32)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
309 struct sfe_ipv6_connection *c = cm->connection;
310 spin_lock_bh(&si->lock);
311 ret = sfe_ipv6_remove_connection(si, c);
312 spin_unlock_bh(&si->lock);
313
314 DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
315 seq, cm->protocol_state.tcp.max_end + 1);
316 if (ret) {
317 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
318 }
319 rcu_read_unlock();
320
321 sfe_ipv6_exception_stats_inc(si, SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE);
322 return 0;
323 }
324
325 /*
326 * Check that our TCP data offset isn't too short.
327 */
328 data_offs = tcph->doff << 2;
329 if (unlikely(data_offs < sizeof(struct tcphdr))) {
330 struct sfe_ipv6_connection *c = cm->connection;
331 spin_lock_bh(&si->lock);
332 ret = sfe_ipv6_remove_connection(si, c);
333 spin_unlock_bh(&si->lock);
334
335 DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
336 if (ret) {
337 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
338 }
339 rcu_read_unlock();
340
341 sfe_ipv6_exception_stats_inc(si, SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS);
342 return 0;
343 }
344
345 /*
346 * Update ACK according to any SACK option.
347 */
348 ack = ntohl(tcph->ack_seq);
349 sack = ack;
350 if (unlikely(!sfe_ipv6_process_tcp_option_sack(tcph, data_offs, &sack))) {
351 struct sfe_ipv6_connection *c = cm->connection;
352 spin_lock_bh(&si->lock);
353 ret = sfe_ipv6_remove_connection(si, c);
354 spin_unlock_bh(&si->lock);
355
356 DEBUG_TRACE("TCP option SACK size is wrong\n");
357 if (ret) {
358 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
359 }
360 rcu_read_unlock();
361
362 sfe_ipv6_exception_stats_inc(si, SFE_IPV6_EXCEPTION_EVENT_TCP_BAD_SACK);
363 return 0;
364 }
365
366 /*
367 * Check that our TCP data offset isn't past the end of the packet.
368 */
369 data_offs += sizeof(struct ipv6hdr);
370 if (unlikely(len < data_offs)) {
371 struct sfe_ipv6_connection *c = cm->connection;
372 spin_lock_bh(&si->lock);
373 ret = sfe_ipv6_remove_connection(si, c);
374 spin_unlock_bh(&si->lock);
375
376 DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
377 data_offs, len);
378 if (ret) {
379 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
380 }
381 rcu_read_unlock();
382
383 sfe_ipv6_exception_stats_inc(si, SFE_IPV6_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS);
384 return 0;
385 }
386
387 end = seq + len - data_offs;
388
389 /*
390 * Is our sequence fully before the left hand edge of the window?
391 */
392 if (unlikely((s32)(end - (cm->protocol_state.tcp.end
393 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
394 struct sfe_ipv6_connection *c = cm->connection;
395 spin_lock_bh(&si->lock);
396 ret = sfe_ipv6_remove_connection(si, c);
397 spin_unlock_bh(&si->lock);
398
399 DEBUG_TRACE("seq: %u before left edge: %u\n",
400 end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
401 if (ret) {
402 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
403 }
404 rcu_read_unlock();
405
406 sfe_ipv6_exception_stats_inc(si, SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE);
407 return 0;
408 }
409
410 /*
411 * Are we acking data that is to the right of what has been sent?
412 */
413 if (unlikely((s32)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
414 struct sfe_ipv6_connection *c = cm->connection;
415 spin_lock_bh(&si->lock);
416 ret = sfe_ipv6_remove_connection(si, c);
417 spin_unlock_bh(&si->lock);
418
419 DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
420 sack, counter_cm->protocol_state.tcp.end + 1);
421 if (ret) {
422 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
423 }
424 rcu_read_unlock();
425
426 sfe_ipv6_exception_stats_inc(si, SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE);
427 return 0;
428 }
429
430 /*
431 * Is our ack too far before the left hand edge of the window?
432 */
433 left_edge = counter_cm->protocol_state.tcp.end
434 - cm->protocol_state.tcp.max_win
435 - SFE_IPV6_TCP_MAX_ACK_WINDOW
436 - 1;
437 if (unlikely((s32)(sack - left_edge) < 0)) {
438 struct sfe_ipv6_connection *c = cm->connection;
439 spin_lock_bh(&si->lock);
440 ret = sfe_ipv6_remove_connection(si, c);
441 spin_unlock_bh(&si->lock);
442
443 DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
444 if (ret) {
445 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
446 }
447 rcu_read_unlock();
448
449 sfe_ipv6_exception_stats_inc(si, SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE);
450 return 0;
451 }
452
453 /*
454 * Have we just seen the largest window size yet for this connection? If yes
455 * then we need to record the new value.
456 */
457 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
458 scaled_win += (sack - ack);
459 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
460 cm->protocol_state.tcp.max_win = scaled_win;
461 }
462
463 /*
464 * If our sequence and/or ack numbers have advanced then record the new state.
465 */
466 if (likely((s32)(end - cm->protocol_state.tcp.end) >= 0)) {
467 cm->protocol_state.tcp.end = end;
468 }
469
470 max_end = sack + scaled_win;
471 if (likely((s32)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
472 counter_cm->protocol_state.tcp.max_end = max_end;
473 }
474 }
475
476 /*
Guduri Prathyusha647fe3e2021-11-22 19:17:51 +0530477 * For PPPoE packets, match server MAC and session id
478 */
479 if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_PPPOE_DECAP)) {
480 struct pppoe_hdr *ph;
481 struct ethhdr *eth;
482
483 if (unlikely(!l2_info) || unlikely(!sfe_l2_parse_flag_check(l2_info, SFE_L2_PARSE_FLAGS_PPPOE_INGRESS))) {
484 rcu_read_unlock();
485 DEBUG_TRACE("%px: PPPoE is not parsed\n", skb);
486 sfe_ipv6_exception_stats_inc(si, SFE_IPV6_EXCEPTION_EVENT_INVALID_PPPOE_SESSION);
487 return 0;
488 }
489
490 ph = (struct pppoe_hdr *)(skb->head + sfe_l2_pppoe_hdr_offset_get(l2_info));
491 eth = (struct ethhdr *)(skb->head + sfe_l2_hdr_offset_get(l2_info));
492
493 if (unlikely(cm->pppoe_session_id != htons(ph->sid)) || unlikely(!(ether_addr_equal((u8*)cm->pppoe_remote_mac, (u8 *)eth->h_source)))) {
494 rcu_read_unlock();
495 DEBUG_TRACE("%px: PPPoE sessions did not match \n", skb);
496 sfe_ipv6_exception_stats_inc(si, SFE_IPV6_EXCEPTION_EVENT_INVALID_PPPOE_SESSION);
497 return 0;
498 }
499 this_cpu_inc(si->stats_pcpu->pppoe_decap_packets_forwarded64);
500
501 } else if (unlikely(l2_info) && unlikely(sfe_l2_parse_flag_check(l2_info, SFE_L2_PARSE_FLAGS_PPPOE_INGRESS))) {
502
503 /*
504 * If packet contains PPPOE header but CME doesn't contain PPPoE flag yet we are exceptioning the packet to linux
505 */
506 rcu_read_unlock();
507 DEBUG_TRACE("%px: CME doesn't contain PPPOE flag but packet has PPPoE header\n", skb);
508 sfe_ipv6_exception_stats_inc(si, SFE_IPV6_EXCEPTION_EVENT_PPPOE_NOT_SET_IN_CME);
509 return 0;
510 }
511
512 /*
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530513 * From this point on we're good to modify the packet.
514 */
515
516 /*
517 * Check if skb was cloned. If it was, unshare it. Because
518 * the data area is going to be written in this path and we don't want to
519 * change the cloned skb's data section.
520 */
521 if (unlikely(skb_cloned(skb))) {
522 DEBUG_TRACE("%px: skb is a cloned skb\n", skb);
523 skb = skb_unshare(skb, GFP_ATOMIC);
524 if (!skb) {
525 DEBUG_WARN("Failed to unshare the cloned skb\n");
526 rcu_read_unlock();
527 return 0;
528 }
529
530 /*
531 * Update the iph and tcph pointers with the unshared skb's data area.
532 */
533 iph = (struct ipv6hdr *)skb->data;
534 tcph = (struct tcphdr *)(skb->data + ihl);
535 }
536
537 /*
Guduri Prathyusha79a5fee2021-11-11 17:59:10 +0530538 * For PPPoE flows, add PPPoE header before L2 header is added.
539 */
540 if (cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_PPPOE_ENCAP) {
541 if (unlikely(!sfe_pppoe_add_header(skb, cm->pppoe_session_id, PPP_IPV6))) {
542 rcu_read_unlock();
543 DEBUG_WARN("%px: PPPoE header addition failed\n", skb);
544 sfe_ipv6_exception_stats_inc(si, SFE_IPV6_EXCEPTION_EVENT_PPPOE_HEADER_ENCAP_FAILED);
545 return 0;
546 }
547 this_cpu_inc(si->stats_pcpu->pppoe_encap_packets_forwarded64);
548 }
549
550 /*
551 * TODO: VLAN header should be added here when they are supported.
552 */
553
554 /*
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530555 * Update DSCP
556 */
557 if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK)) {
558 sfe_ipv6_change_dsfield(iph, cm->dscp);
559 }
560
561 /*
562 * Decrement our hop_limit.
563 */
Ratheesh Kannoth71fc51e2022-01-05 10:02:47 +0530564 if (likely(!bridge_flow)) {
565 iph->hop_limit -= 1;
566 }
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530567
568 /*
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530569 * Enable HW csum if rx checksum is verified and xmit interface is CSUM offload capable.
570 * Note: If L4 csum at Rx was found to be incorrect, we (router) should use incremental L4 checksum here
571 * so that HW does not re-calculate/replace the L4 csum
572 */
573 hw_csum = !!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_CSUM_OFFLOAD) && (skb->ip_summed == CHECKSUM_UNNECESSARY);
574
575 /*
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530576 * Do we have to perform translations of the source address/port?
577 */
578 if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
579 u16 tcp_csum;
580 u32 sum;
581
582 iph->saddr.s6_addr32[0] = cm->xlate_src_ip[0].addr[0];
583 iph->saddr.s6_addr32[1] = cm->xlate_src_ip[0].addr[1];
584 iph->saddr.s6_addr32[2] = cm->xlate_src_ip[0].addr[2];
585 iph->saddr.s6_addr32[3] = cm->xlate_src_ip[0].addr[3];
586 tcph->source = cm->xlate_src_port;
587
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530588 if (unlikely(!hw_csum)) {
589 tcp_csum = tcph->check;
590 sum = tcp_csum + cm->xlate_src_csum_adjustment;
591 sum = (sum & 0xffff) + (sum >> 16);
592 tcph->check = (u16)sum;
593 }
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530594 }
595
596 /*
597 * Do we have to perform translations of the destination address/port?
598 */
599 if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
600 u16 tcp_csum;
601 u32 sum;
602
603 iph->daddr.s6_addr32[0] = cm->xlate_dest_ip[0].addr[0];
604 iph->daddr.s6_addr32[1] = cm->xlate_dest_ip[0].addr[1];
605 iph->daddr.s6_addr32[2] = cm->xlate_dest_ip[0].addr[2];
606 iph->daddr.s6_addr32[3] = cm->xlate_dest_ip[0].addr[3];
607 tcph->dest = cm->xlate_dest_port;
608
Ratheesh Kannotha3cf0e02021-12-09 09:44:10 +0530609 if (unlikely(!hw_csum)) {
610 tcp_csum = tcph->check;
611 sum = tcp_csum + cm->xlate_dest_csum_adjustment;
612 sum = (sum & 0xffff) + (sum >> 16);
613 tcph->check = (u16)sum;
614 }
615 }
616
617 /*
618 * If HW checksum offload is not possible, incremental L4 checksum is used to update the packet.
619 * Setting ip_summed to CHECKSUM_UNNECESSARY ensures checksum is not recalculated further in packet
620 * path.
621 */
622 if (likely(hw_csum)) {
623 skb->ip_summed = CHECKSUM_PARTIAL;
624 } else {
625 skb->ip_summed = CHECKSUM_UNNECESSARY;
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530626 }
627
628 /*
629 * Update traffic stats.
630 */
631 atomic_inc(&cm->rx_packet_count);
632 atomic_add(len, &cm->rx_byte_count);
633
634 xmit_dev = cm->xmit_dev;
635 skb->dev = xmit_dev;
636
637 /*
638 * Check to see if we need to write a header.
639 */
640 if (likely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
641 if (unlikely(!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
642 dev_hard_header(skb, xmit_dev, ETH_P_IPV6,
643 cm->xmit_dest_mac, cm->xmit_src_mac, len);
644 } else {
645 /*
646 * For the simple case we write this really fast.
647 */
648 struct ethhdr *eth = (struct ethhdr *)__skb_push(skb, ETH_HLEN);
Guduri Prathyusha647fe3e2021-11-22 19:17:51 +0530649
Ratheesh Kannoth6307bec2021-11-25 08:26:39 +0530650 eth->h_proto = htons(ETH_P_IPV6);
651 ether_addr_copy((u8 *)eth->h_dest, (u8 *)cm->xmit_dest_mac);
652 ether_addr_copy((u8 *)eth->h_source, (u8 *)cm->xmit_src_mac);
653 }
654 }
655
656 /*
657 * Update priority of skb.
658 */
659 if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) {
660 skb->priority = cm->priority;
661 }
662
663 /*
664 * Mark outgoing packet
665 */
666 skb->mark = cm->connection->mark;
667 if (skb->mark) {
668 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
669 }
670
671 rcu_read_unlock();
672
673 this_cpu_inc(si->stats_pcpu->packets_forwarded64);
674
675 /*
676 * We're going to check for GSO flags when we transmit the packet so
677 * start fetching the necessary cache line now.
678 */
679 prefetch(skb_shinfo(skb));
680
681 /*
682 * Mark that this packet has been fast forwarded.
683 */
684 skb->fast_forwarded = 1;
685
686 /*
687 * Send the packet on its way.
688 */
689 dev_queue_xmit(skb);
690
691 return 1;
692}