blob: 82694995840aeff008ce47b8b6b9027f10f06041 [file] [log] [blame]
Dave Barach68b0fb02017-02-28 15:15:56 -05001/*
Florin Coras222e1f412019-02-16 20:47:32 -08002 * Copyright (c) 2016-2019 Cisco and/or its affiliates.
Dave Barach68b0fb02017-02-28 15:15:56 -05003 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include <vnet/tcp/tcp.h>
Florin Coras999840c2020-03-18 20:31:34 +000017#include <vnet/tcp/tcp_inlines.h>
Florin Corasb2215d62017-08-01 16:56:58 -070018#include <math.h>
Neale Rannse4031132020-10-26 13:00:06 +000019#include <vnet/ip/ip4_inlines.h>
20#include <vnet/ip/ip6_inlines.h>
Dave Barach68b0fb02017-02-28 15:15:56 -050021
Dave Barach2c25a622017-06-26 11:35:07 -040022typedef enum _tcp_output_next
Dave Barach68b0fb02017-02-28 15:15:56 -050023{
24 TCP_OUTPUT_NEXT_DROP,
Dave Barach2c25a622017-06-26 11:35:07 -040025 TCP_OUTPUT_NEXT_IP_LOOKUP,
Florin Corasf9d05682018-04-26 08:26:52 -070026 TCP_OUTPUT_NEXT_IP_REWRITE,
27 TCP_OUTPUT_NEXT_IP_ARP,
Dave Barach68b0fb02017-02-28 15:15:56 -050028 TCP_OUTPUT_N_NEXT
29} tcp_output_next_t;
30
31#define foreach_tcp4_output_next \
32 _ (DROP, "error-drop") \
Florin Corasf9d05682018-04-26 08:26:52 -070033 _ (IP_LOOKUP, "ip4-lookup") \
34 _ (IP_REWRITE, "ip4-rewrite") \
35 _ (IP_ARP, "ip4-arp")
Dave Barach68b0fb02017-02-28 15:15:56 -050036
37#define foreach_tcp6_output_next \
38 _ (DROP, "error-drop") \
Florin Corasf9d05682018-04-26 08:26:52 -070039 _ (IP_LOOKUP, "ip6-lookup") \
40 _ (IP_REWRITE, "ip6-rewrite") \
41 _ (IP_ARP, "ip6-discover-neighbor")
Dave Barach68b0fb02017-02-28 15:15:56 -050042
43static char *tcp_error_strings[] = {
44#define tcp_error(n,s) s,
45#include <vnet/tcp/tcp_error.def>
46#undef tcp_error
47};
48
49typedef struct
50{
Clement Durand6cf260c2017-04-13 13:27:04 +020051 tcp_header_t tcp_header;
52 tcp_connection_t tcp_connection;
Dave Barach68b0fb02017-02-28 15:15:56 -050053} tcp_tx_trace_t;
54
Filip Tehlare275bed2019-03-06 00:06:56 -080055static u8 *
Dave Barach68b0fb02017-02-28 15:15:56 -050056format_tcp_tx_trace (u8 * s, va_list * args)
57{
58 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
59 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
Clement Durand6cf260c2017-04-13 13:27:04 +020060 tcp_tx_trace_t *t = va_arg (*args, tcp_tx_trace_t *);
Florin Coras30928f82020-01-27 19:21:28 -080061 tcp_connection_t *tc = &t->tcp_connection;
Christophe Fontained3c008d2017-10-02 18:10:54 +020062 u32 indent = format_get_indent (s);
Dave Barach68b0fb02017-02-28 15:15:56 -050063
Florin Coras30928f82020-01-27 19:21:28 -080064 s = format (s, "%U state %U\n%U%U", format_tcp_connection_id, tc,
65 format_tcp_state, tc->state, format_white_space, indent,
66 format_tcp_header, &t->tcp_header, 128);
Dave Barach68b0fb02017-02-28 15:15:56 -050067
68 return s;
69}
70
Filip Tehlare275bed2019-03-06 00:06:56 -080071#ifndef CLIB_MARCH_VARIANT
Dave Barach68b0fb02017-02-28 15:15:56 -050072static u8
Florin Coras4eeeaaf2017-09-05 14:03:37 -040073tcp_window_compute_scale (u32 window)
Dave Barach68b0fb02017-02-28 15:15:56 -050074{
75 u8 wnd_scale = 0;
Florin Coras4eeeaaf2017-09-05 14:03:37 -040076 while (wnd_scale < TCP_MAX_WND_SCALE && (window >> wnd_scale) > TCP_WND_MAX)
Dave Barach68b0fb02017-02-28 15:15:56 -050077 wnd_scale++;
78 return wnd_scale;
79}
80
81/**
Florin Coras6534b7a2017-07-18 05:38:03 -040082 * TCP's initial window
Florin Corase04c2992017-03-01 08:17:34 -080083 */
84always_inline u32
85tcp_initial_wnd_unscaled (tcp_connection_t * tc)
86{
Florin Coras6534b7a2017-07-18 05:38:03 -040087 /* RFC 6928 recommends the value lower. However at the time our connections
88 * are initialized, fifos may not be allocated. Therefore, advertise the
89 * smallest possible unscaled window size and update once fifos are
90 * assigned to the session.
91 */
92 /*
93 tcp_update_rcv_mss (tc);
94 TCP_IW_N_SEGMENTS * tc->mss;
95 */
Florin Coras9094b5c2019-08-12 14:17:47 -070096 return tcp_cfg.min_rx_fifo;
Florin Corase04c2992017-03-01 08:17:34 -080097}
98
99/**
Dave Barach68b0fb02017-02-28 15:15:56 -0500100 * Compute initial window and scale factor. As per RFC1323, window field in
101 * SYN and SYN-ACK segments is never scaled.
102 */
103u32
104tcp_initial_window_to_advertise (tcp_connection_t * tc)
105{
Florin Corase80b5912018-12-12 19:25:43 -0800106 /* Compute rcv wscale only if peer advertised support for it */
107 if (tc->state != TCP_STATE_SYN_RCVD || tcp_opts_wscale (&tc->rcv_opts))
Florin Coras9094b5c2019-08-12 14:17:47 -0700108 tc->rcv_wscale = tcp_window_compute_scale (tcp_cfg.max_rx_fifo);
Florin Corase80b5912018-12-12 19:25:43 -0800109
Florin Corase04c2992017-03-01 08:17:34 -0800110 tc->rcv_wnd = tcp_initial_wnd_unscaled (tc);
Dave Barach68b0fb02017-02-28 15:15:56 -0500111
112 return clib_min (tc->rcv_wnd, TCP_WND_MAX);
113}
114
Florin Corasf20fd1a2019-03-28 13:21:19 -0700115static inline void
Florin Coras6792ec02017-03-13 03:49:51 -0700116tcp_update_rcv_wnd (tcp_connection_t * tc)
117{
Florin Corasf20fd1a2019-03-28 13:21:19 -0700118 u32 available_space, wnd;
Florin Coras6792ec02017-03-13 03:49:51 -0700119 i32 observed_wnd;
Florin Corasf20fd1a2019-03-28 13:21:19 -0700120
Florin Corase04c2992017-03-01 08:17:34 -0800121 /*
122 * Figure out how much space we have available
123 */
Florin Corasd2aab832018-05-22 11:39:59 -0700124 available_space = transport_max_rx_enqueue (&tc->connection);
Ryujiro Shibuyacc108562020-06-24 08:36:14 +0100125
Florin Corase04c2992017-03-01 08:17:34 -0800126 /*
127 * Use the above and what we know about what we've previously advertised
128 * to compute the new window
129 */
Florin Coras6792ec02017-03-13 03:49:51 -0700130 observed_wnd = (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
Florin Corase04c2992017-03-01 08:17:34 -0800131
Florin Corasc78d47b2020-07-06 08:06:54 -0700132 /* Check if we are about to retract the window. Do the comparison before
133 * rounding to avoid errors. Per RFC7323 sec. 2.4 we could remove this */
Florin Corasf20fd1a2019-03-28 13:21:19 -0700134 if (PREDICT_FALSE ((i32) available_space < observed_wnd))
Florin Corase04c2992017-03-01 08:17:34 -0800135 {
Florin Corasc78d47b2020-07-06 08:06:54 -0700136 wnd = round_down_pow2 (clib_max (observed_wnd, 0), 1 << tc->rcv_wscale);
Florin Corasa436a422019-08-20 07:09:31 -0700137 TCP_EVT (TCP_EVT_RCV_WND_SHRUNK, tc, observed_wnd, available_space);
Florin Corase04c2992017-03-01 08:17:34 -0800138 }
Florin Coras6792ec02017-03-13 03:49:51 -0700139 else
Florin Corase04c2992017-03-01 08:17:34 -0800140 {
Florin Corasc78d47b2020-07-06 08:06:54 -0700141 /* Make sure we have a multiple of 1 << rcv_wscale. We round down to
142 * avoid advertising a window larger than what can be buffered */
143 wnd = round_down_pow2 (available_space, 1 << tc->rcv_wscale);
Florin Corase04c2992017-03-01 08:17:34 -0800144 }
145
Florin Corasc78d47b2020-07-06 08:06:54 -0700146 if (PREDICT_FALSE (wnd < tc->rcv_opts.mss))
147 wnd = 0;
148
Florin Coras6792ec02017-03-13 03:49:51 -0700149 tc->rcv_wnd = clib_min (wnd, TCP_WND_MAX << tc->rcv_wscale);
Dave Barach68b0fb02017-02-28 15:15:56 -0500150}
151
152/**
Florin Coras0dbd5172018-06-25 16:19:34 -0700153 * Compute and return window to advertise, scaled as per RFC1323
154 */
Florin Coras47596832019-03-12 18:58:54 -0700155static inline u32
Florin Coras0dbd5172018-06-25 16:19:34 -0700156tcp_window_to_advertise (tcp_connection_t * tc, tcp_state_t state)
157{
158 if (state < TCP_STATE_ESTABLISHED)
159 return tcp_initial_window_to_advertise (tc);
160
161 tcp_update_rcv_wnd (tc);
Florin Coras0dbd5172018-06-25 16:19:34 -0700162 return tc->rcv_wnd >> tc->rcv_wscale;
163}
164
Florin Coras0dbd5172018-06-25 16:19:34 -0700165static int
Florin Corascedcf602019-08-27 12:15:43 -0700166tcp_make_syn_options (tcp_connection_t * tc, tcp_options_t * opts)
Dave Barach68b0fb02017-02-28 15:15:56 -0500167{
168 u8 len = 0;
169
170 opts->flags |= TCP_OPTS_FLAG_MSS;
Florin Corascedcf602019-08-27 12:15:43 -0700171 opts->mss = tc->mss;
Dave Barach68b0fb02017-02-28 15:15:56 -0500172 len += TCP_OPTION_LEN_MSS;
173
174 opts->flags |= TCP_OPTS_FLAG_WSCALE;
Florin Corascedcf602019-08-27 12:15:43 -0700175 opts->wscale = tc->rcv_wscale;
Dave Barach68b0fb02017-02-28 15:15:56 -0500176 len += TCP_OPTION_LEN_WINDOW_SCALE;
177
178 opts->flags |= TCP_OPTS_FLAG_TSTAMP;
Florin Coras8f10b902021-04-02 18:32:00 -0700179 opts->tsval = tcp_time_tstamp (tc->c_thread_index);
Dave Barach68b0fb02017-02-28 15:15:56 -0500180 opts->tsecr = 0;
181 len += TCP_OPTION_LEN_TIMESTAMP;
182
Florin Coras93992a92017-05-24 18:03:56 -0700183 if (TCP_USE_SACKS)
184 {
185 opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
186 len += TCP_OPTION_LEN_SACK_PERMITTED;
187 }
Dave Barach68b0fb02017-02-28 15:15:56 -0500188
189 /* Align to needed boundary */
190 len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
191 return len;
192}
193
Florin Coras0dbd5172018-06-25 16:19:34 -0700194static int
Dave Barach68b0fb02017-02-28 15:15:56 -0500195tcp_make_synack_options (tcp_connection_t * tc, tcp_options_t * opts)
196{
197 u8 len = 0;
198
199 opts->flags |= TCP_OPTS_FLAG_MSS;
Florin Corasc8343412017-05-04 14:25:50 -0700200 opts->mss = tc->mss;
Dave Barach68b0fb02017-02-28 15:15:56 -0500201 len += TCP_OPTION_LEN_MSS;
202
Florin Coras93992a92017-05-24 18:03:56 -0700203 if (tcp_opts_wscale (&tc->rcv_opts))
Dave Barach68b0fb02017-02-28 15:15:56 -0500204 {
205 opts->flags |= TCP_OPTS_FLAG_WSCALE;
206 opts->wscale = tc->rcv_wscale;
207 len += TCP_OPTION_LEN_WINDOW_SCALE;
208 }
209
Florin Coras93992a92017-05-24 18:03:56 -0700210 if (tcp_opts_tstamp (&tc->rcv_opts))
Dave Barach68b0fb02017-02-28 15:15:56 -0500211 {
212 opts->flags |= TCP_OPTS_FLAG_TSTAMP;
Florin Coras8f10b902021-04-02 18:32:00 -0700213 opts->tsval = tcp_time_tstamp (tc->c_thread_index);
Dave Barach68b0fb02017-02-28 15:15:56 -0500214 opts->tsecr = tc->tsval_recent;
215 len += TCP_OPTION_LEN_TIMESTAMP;
216 }
217
Florin Coras93992a92017-05-24 18:03:56 -0700218 if (tcp_opts_sack_permitted (&tc->rcv_opts))
Dave Barach68b0fb02017-02-28 15:15:56 -0500219 {
220 opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
221 len += TCP_OPTION_LEN_SACK_PERMITTED;
222 }
223
224 /* Align to needed boundary */
225 len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
226 return len;
227}
228
Florin Coras0dbd5172018-06-25 16:19:34 -0700229static int
Dave Barach68b0fb02017-02-28 15:15:56 -0500230tcp_make_established_options (tcp_connection_t * tc, tcp_options_t * opts)
231{
232 u8 len = 0;
233
234 opts->flags = 0;
235
Florin Coras93992a92017-05-24 18:03:56 -0700236 if (tcp_opts_tstamp (&tc->rcv_opts))
Dave Barach68b0fb02017-02-28 15:15:56 -0500237 {
238 opts->flags |= TCP_OPTS_FLAG_TSTAMP;
Vladimir Kropylev1cfcb782019-07-02 11:25:26 +0300239 opts->tsval = tcp_tstamp (tc);
Dave Barach68b0fb02017-02-28 15:15:56 -0500240 opts->tsecr = tc->tsval_recent;
241 len += TCP_OPTION_LEN_TIMESTAMP;
242 }
Florin Coras93992a92017-05-24 18:03:56 -0700243 if (tcp_opts_sack_permitted (&tc->rcv_opts))
Dave Barach68b0fb02017-02-28 15:15:56 -0500244 {
245 if (vec_len (tc->snd_sacks))
246 {
247 opts->flags |= TCP_OPTS_FLAG_SACK;
Florin Corase5b17912019-02-21 16:46:24 -0800248 if (tc->snd_sack_pos >= vec_len (tc->snd_sacks))
249 tc->snd_sack_pos = 0;
250 opts->sacks = &tc->snd_sacks[tc->snd_sack_pos];
251 opts->n_sack_blocks = vec_len (tc->snd_sacks) - tc->snd_sack_pos;
252 opts->n_sack_blocks = clib_min (opts->n_sack_blocks,
Florin Corasc28764f2017-04-26 00:08:42 -0700253 TCP_OPTS_MAX_SACK_BLOCKS);
Florin Corase5b17912019-02-21 16:46:24 -0800254 tc->snd_sack_pos += opts->n_sack_blocks;
Dave Barach68b0fb02017-02-28 15:15:56 -0500255 len += 2 + TCP_OPTION_LEN_SACK_BLOCK * opts->n_sack_blocks;
256 }
257 }
258
259 /* Align to needed boundary */
260 len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
261 return len;
262}
263
264always_inline int
265tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts,
266 tcp_state_t state)
267{
268 switch (state)
269 {
270 case TCP_STATE_ESTABLISHED:
Florin Coras25579b42018-06-06 17:55:02 -0700271 case TCP_STATE_CLOSE_WAIT:
Florin Coras54ddf432018-12-21 13:54:09 -0800272 case TCP_STATE_FIN_WAIT_1:
273 case TCP_STATE_LAST_ACK:
274 case TCP_STATE_CLOSING:
275 case TCP_STATE_FIN_WAIT_2:
276 case TCP_STATE_TIME_WAIT:
277 case TCP_STATE_CLOSED:
Dave Barach68b0fb02017-02-28 15:15:56 -0500278 return tcp_make_established_options (tc, opts);
279 case TCP_STATE_SYN_RCVD:
280 return tcp_make_synack_options (tc, opts);
281 case TCP_STATE_SYN_SENT:
Florin Corascedcf602019-08-27 12:15:43 -0700282 return tcp_make_syn_options (tc, opts);
Dave Barach68b0fb02017-02-28 15:15:56 -0500283 default:
Florin Coras371ca502018-02-21 12:07:41 -0800284 clib_warning ("State not handled! %d", state);
Dave Barach68b0fb02017-02-28 15:15:56 -0500285 return 0;
286 }
287}
288
Florin Corasc8343412017-05-04 14:25:50 -0700289/**
Florin Corasb26743d2018-06-26 09:31:04 -0700290 * Update burst send vars
291 *
292 * - Updates snd_mss to reflect the effective segment size that we can send
293 * by taking into account all TCP options, including SACKs.
294 * - Cache 'on the wire' options for reuse
295 * - Updates receive window which can be reused for a burst.
296 *
297 * This should *only* be called when doing bursts
Florin Corasc8343412017-05-04 14:25:50 -0700298 */
299void
Florin Corasb26743d2018-06-26 09:31:04 -0700300tcp_update_burst_snd_vars (tcp_connection_t * tc)
Florin Corasc8343412017-05-04 14:25:50 -0700301{
Florin Corasb26743d2018-06-26 09:31:04 -0700302 tcp_main_t *tm = &tcp_main;
303
Florin Corasc8343412017-05-04 14:25:50 -0700304 /* Compute options to be used for connection. These may be reused when
305 * sending data or to compute the effective mss (snd_mss) */
Florin Corasb26743d2018-06-26 09:31:04 -0700306 tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts,
307 TCP_STATE_ESTABLISHED);
Florin Corasc8343412017-05-04 14:25:50 -0700308
309 /* XXX check if MTU has been updated */
Florin Coras93992a92017-05-24 18:03:56 -0700310 tc->snd_mss = clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len;
Florin Corasdb84e572017-05-09 18:54:52 -0700311 ASSERT (tc->snd_mss > 0);
Florin Corasb26743d2018-06-26 09:31:04 -0700312
313 tcp_options_write (tm->wrk_ctx[tc->c_thread_index].cached_opts,
314 &tc->snd_opts);
315
316 tcp_update_rcv_wnd (tc);
Florin Coras52814732019-06-12 15:38:19 -0700317
Florin Corasbbcfaac2019-10-10 13:52:04 -0700318 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
Florin Corasd6ae4bf2019-10-12 18:10:20 -0700319 tcp_bt_check_app_limited (tc);
Florin Corasca2831a2019-07-04 17:05:59 -0700320
321 if (tc->snd_una == tc->snd_nxt)
Florin Corasc31dc312019-10-06 14:06:14 -0700322 {
323 tcp_cc_event (tc, TCP_CC_EVT_START_TX);
Florin Coras11e9e352019-11-13 19:09:47 -0800324 tcp_connection_tx_pacer_reset (tc, tc->cwnd, TRANSPORT_PACER_MIN_BURST);
Florin Corasc31dc312019-10-06 14:06:14 -0700325 }
Florin Corasfb9d3742020-11-05 19:01:44 -0800326
327 if (tc->flags & TCP_CONN_PSH_PENDING)
328 {
329 u32 max_deq = transport_max_tx_dequeue (&tc->connection);
330 /* Last byte marked for push */
331 tc->psh_seq = tc->snd_una + max_deq - 1;
332 }
Florin Corasc8343412017-05-04 14:25:50 -0700333}
334
Filip Tehlare275bed2019-03-06 00:06:56 -0800335#endif /* CLIB_MARCH_VARIANT */
Florin Corasc8343412017-05-04 14:25:50 -0700336
Florin Coras0dbd5172018-06-25 16:19:34 -0700337static void *
Dave Barach68b0fb02017-02-28 15:15:56 -0500338tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b)
339{
Florin Corasb2215d62017-08-01 16:56:58 -0700340 if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
341 vlib_buffer_free_one (vm, b->next_buffer);
Florin Coras4eeeaaf2017-09-05 14:03:37 -0400342 /* Zero all flags but free list index and trace flag */
343 b->flags &= VLIB_BUFFER_NEXT_PRESENT - 1;
Florin Coras1f152cd2017-08-18 19:28:03 -0700344 b->current_data = 0;
345 b->current_length = 0;
346 b->total_length_not_including_first_buffer = 0;
347 vnet_buffer (b)->tcp.flags = 0;
Benoît Gannef89bbbe2021-03-04 14:31:03 +0100348 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
Dave Barach68b0fb02017-02-28 15:15:56 -0500349 /* Leave enough space for headers */
Florin Coras1ee78302019-02-05 15:51:15 -0800350 return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN);
Florin Coras1f152cd2017-08-18 19:28:03 -0700351}
352
Filip Tehlare275bed2019-03-06 00:06:56 -0800353#ifndef CLIB_MARCH_VARIANT
Florin Coras0dbd5172018-06-25 16:19:34 -0700354static void *
Florin Coras1f152cd2017-08-18 19:28:03 -0700355tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b)
356{
357 ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
Florin Coras4eeeaaf2017-09-05 14:03:37 -0400358 b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
Florin Coras1f152cd2017-08-18 19:28:03 -0700359 b->total_length_not_including_first_buffer = 0;
Florin Coras24793e32018-05-09 11:34:25 -0700360 b->current_data = 0;
Florin Corasd79b41e2017-03-04 05:37:52 -0800361 vnet_buffer (b)->tcp.flags = 0;
Florin Coras1f152cd2017-08-18 19:28:03 -0700362 /* Leave enough space for headers */
Florin Coras1ee78302019-02-05 15:51:15 -0800363 return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN);
Dave Barach68b0fb02017-02-28 15:15:56 -0500364}
365
Srikanth A02833ff2019-10-02 17:48:58 -0700366
367/* Compute TCP checksum in software when offloading is disabled for a connection */
368u16
369ip6_tcp_compute_checksum_custom (vlib_main_t * vm, vlib_buffer_t * p0,
370 ip46_address_t * src, ip46_address_t * dst)
371{
372 ip_csum_t sum0;
373 u16 payload_length_host_byte_order;
374 u32 i;
375
376 /* Initialize checksum with ip header. */
377 sum0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0)) +
378 clib_host_to_net_u16 (IP_PROTOCOL_TCP);
379 payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
380
381 for (i = 0; i < ARRAY_LEN (src->ip6.as_uword); i++)
382 {
383 sum0 = ip_csum_with_carry
384 (sum0, clib_mem_unaligned (&src->ip6.as_uword[i], uword));
385 sum0 = ip_csum_with_carry
386 (sum0, clib_mem_unaligned (&dst->ip6.as_uword[i], uword));
387 }
388
389 return ip_calculate_l4_checksum (vm, p0, sum0,
390 payload_length_host_byte_order, NULL, 0,
391 NULL);
392}
393
394u16
395ip4_tcp_compute_checksum_custom (vlib_main_t * vm, vlib_buffer_t * p0,
396 ip46_address_t * src, ip46_address_t * dst)
397{
398 ip_csum_t sum0;
399 u32 payload_length_host_byte_order;
400
401 payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
402 sum0 =
403 clib_host_to_net_u32 (payload_length_host_byte_order +
404 (IP_PROTOCOL_TCP << 16));
405
406 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&src->ip4, u32));
407 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&dst->ip4, u32));
408
409 return ip_calculate_l4_checksum (vm, p0, sum0,
410 payload_length_host_byte_order, NULL, 0,
411 NULL);
412}
413
414static inline u16
415tcp_compute_checksum (tcp_connection_t * tc, vlib_buffer_t * b)
416{
417 u16 checksum = 0;
Florin Corasbbcfaac2019-10-10 13:52:04 -0700418 if (PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD))
Srikanth A02833ff2019-10-02 17:48:58 -0700419 {
420 tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
421 vlib_main_t *vm = wrk->vm;
422
423 if (tc->c_is_ip4)
424 checksum = ip4_tcp_compute_checksum_custom
425 (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
426 else
427 checksum = ip6_tcp_compute_checksum_custom
428 (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
429 }
430 else
431 {
Mohsin Kazmi68095382021-02-10 11:26:24 +0100432 vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TCP_CKSUM);
Srikanth A02833ff2019-10-02 17:48:58 -0700433 }
434 return checksum;
435}
436
Dave Barach68b0fb02017-02-28 15:15:56 -0500437/**
438 * Prepare ACK
439 */
Florin Corasbdf7fd62019-01-31 17:31:01 -0800440static inline void
Dave Barach68b0fb02017-02-28 15:15:56 -0500441tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state,
442 u8 flags)
443{
444 tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
445 u8 tcp_opts_len, tcp_hdr_opts_len;
446 tcp_header_t *th;
447 u16 wnd;
448
449 wnd = tcp_window_to_advertise (tc, state);
450
451 /* Make and write options */
452 tcp_opts_len = tcp_make_established_options (tc, snd_opts);
453 tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
454
455 th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
456 tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
457
458 tcp_options_write ((u8 *) (th + 1), snd_opts);
Srikanth A02833ff2019-10-02 17:48:58 -0700459
460 th->checksum = tcp_compute_checksum (tc, b);
461
Dave Barach68b0fb02017-02-28 15:15:56 -0500462 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
Vladimir Kropylev398afbd2019-06-25 00:06:52 +0300463
464 if (wnd == 0)
Florin Coras5a41fd52021-04-19 10:17:26 -0700465 {
466 transport_rx_fifo_req_deq_ntf (&tc->connection);
467 tcp_zero_rwnd_sent_on (tc);
468 }
Vladimir Kropylev398afbd2019-06-25 00:06:52 +0300469 else
470 tcp_zero_rwnd_sent_off (tc);
Dave Barach68b0fb02017-02-28 15:15:56 -0500471}
472
473/**
474 * Convert buffer to ACK
475 */
Florin Corasbdf7fd62019-01-31 17:31:01 -0800476static inline void
Dave Barach68b0fb02017-02-28 15:15:56 -0500477tcp_make_ack (tcp_connection_t * tc, vlib_buffer_t * b)
478{
Dave Barach68b0fb02017-02-28 15:15:56 -0500479 tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK);
Florin Corasa436a422019-08-20 07:09:31 -0700480 TCP_EVT (TCP_EVT_ACK_SENT, tc);
Florin Coras3e350af2017-03-30 02:54:28 -0700481 tc->rcv_las = tc->rcv_nxt;
Dave Barach68b0fb02017-02-28 15:15:56 -0500482}
483
484/**
485 * Convert buffer to FIN-ACK
486 */
Florin Coras999840c2020-03-18 20:31:34 +0000487static void
Florin Corasd79b41e2017-03-04 05:37:52 -0800488tcp_make_fin (tcp_connection_t * tc, vlib_buffer_t * b)
Dave Barach68b0fb02017-02-28 15:15:56 -0500489{
Florin Corasbdf7fd62019-01-31 17:31:01 -0800490 tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK);
Florin Coras4eeeaaf2017-09-05 14:03:37 -0400491}
Dave Barach68b0fb02017-02-28 15:15:56 -0500492
Florin Coras4eeeaaf2017-09-05 14:03:37 -0400493/**
494 * Convert buffer to SYN
495 */
496void
497tcp_make_syn (tcp_connection_t * tc, vlib_buffer_t * b)
498{
499 u8 tcp_hdr_opts_len, tcp_opts_len;
500 tcp_header_t *th;
501 u16 initial_wnd;
502 tcp_options_t snd_opts;
503
504 initial_wnd = tcp_initial_window_to_advertise (tc);
505
506 /* Make and write options */
Dave Barachb7b92992018-10-17 10:38:51 -0400507 clib_memset (&snd_opts, 0, sizeof (snd_opts));
Florin Corascedcf602019-08-27 12:15:43 -0700508 tcp_opts_len = tcp_make_syn_options (tc, &snd_opts);
Florin Coras4eeeaaf2017-09-05 14:03:37 -0400509 tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
510
511 th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
512 tc->rcv_nxt, tcp_hdr_opts_len, TCP_FLAG_SYN,
513 initial_wnd);
514 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
515 tcp_options_write ((u8 *) (th + 1), &snd_opts);
Srikanth A02833ff2019-10-02 17:48:58 -0700516 th->checksum = tcp_compute_checksum (tc, b);
Dave Barach68b0fb02017-02-28 15:15:56 -0500517}
518
519/**
520 * Convert buffer to SYN-ACK
521 */
Florin Coras999840c2020-03-18 20:31:34 +0000522static void
Dave Barach68b0fb02017-02-28 15:15:56 -0500523tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b)
524{
Dave Barach68b0fb02017-02-28 15:15:56 -0500525 tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
526 u8 tcp_opts_len, tcp_hdr_opts_len;
527 tcp_header_t *th;
528 u16 initial_wnd;
Dave Barach68b0fb02017-02-28 15:15:56 -0500529
Dave Barachb7b92992018-10-17 10:38:51 -0400530 clib_memset (snd_opts, 0, sizeof (*snd_opts));
Dave Barach68b0fb02017-02-28 15:15:56 -0500531 initial_wnd = tcp_initial_window_to_advertise (tc);
Dave Barach68b0fb02017-02-28 15:15:56 -0500532 tcp_opts_len = tcp_make_synack_options (tc, snd_opts);
533 tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
534
535 th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
536 tc->rcv_nxt, tcp_hdr_opts_len,
537 TCP_FLAG_SYN | TCP_FLAG_ACK, initial_wnd);
Dave Barach68b0fb02017-02-28 15:15:56 -0500538 tcp_options_write ((u8 *) (th + 1), snd_opts);
539
540 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
Srikanth A02833ff2019-10-02 17:48:58 -0700541 th->checksum = tcp_compute_checksum (tc, b);
Dave Barach68b0fb02017-02-28 15:15:56 -0500542}
543
Florin Coras5484daa2020-03-27 23:55:06 +0000544static void
Florin Coras647acd52021-07-02 18:10:20 -0700545tcp_enqueue_half_open (tcp_worker_ctx_t *wrk, tcp_connection_t *tc,
546 vlib_buffer_t *b, u32 bi)
Dave Barach68b0fb02017-02-28 15:15:56 -0500547{
Florin Corasbe72ae62018-11-01 11:23:03 -0700548 vlib_main_t *vm = wrk->vm;
Dave Barach68b0fb02017-02-28 15:15:56 -0500549
Damjan Marion213b5aa2017-07-13 21:19:27 +0200550 b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
Dave Barach68b0fb02017-02-28 15:15:56 -0500551 b->error = 0;
552
Florin Coras5484daa2020-03-27 23:55:06 +0000553 session_add_pending_tx_buffer (vm->thread_index, bi,
Florin Coras647acd52021-07-02 18:10:20 -0700554 wrk->tco_next_node[!tc->c_is_ip4]);
Florin Coras9d063042017-09-14 03:08:00 -0400555
Florin Coras5484daa2020-03-27 23:55:06 +0000556 if (vm->thread_index == 0 && vlib_num_workers ())
Florin Coras647acd52021-07-02 18:10:20 -0700557 session_queue_run_on_main_thread (vm);
Dave Barach68b0fb02017-02-28 15:15:56 -0500558}
559
Florin Coras0dbd5172018-06-25 16:19:34 -0700560static void
Florin Corasbe72ae62018-11-01 11:23:03 -0700561tcp_enqueue_to_output (tcp_worker_ctx_t * wrk, vlib_buffer_t * b, u32 bi,
562 u8 is_ip4)
Florin Coras1f152cd2017-08-18 19:28:03 -0700563{
Florin Coras2a7ea2e2019-10-16 22:06:08 -0700564 b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
565 b->error = 0;
566
Florin Coras5484daa2020-03-27 23:55:06 +0000567 session_add_pending_tx_buffer (wrk->vm->thread_index, bi,
568 wrk->tco_next_node[!is_ip4]);
Florin Coras1f152cd2017-08-18 19:28:03 -0700569}
570
Filip Tehlare275bed2019-03-06 00:06:56 -0800571#endif /* CLIB_MARCH_VARIANT */
Florin Coras1f152cd2017-08-18 19:28:03 -0700572
Florin Coras0dbd5172018-06-25 16:19:34 -0700573static int
Florin Coras360336f2020-02-13 18:46:18 +0000574tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b, u8 is_ip4)
Dave Barach68b0fb02017-02-28 15:15:56 -0500575{
Dave Barach68b0fb02017-02-28 15:15:56 -0500576 ip4_header_t *ih4;
577 ip6_header_t *ih6;
Florin Coras360336f2020-02-13 18:46:18 +0000578 tcp_header_t *th;
579 ip4_address_t src_ip4, dst_ip4;
580 ip6_address_t src_ip6, dst_ip6;
Florin Corasdc629cd2017-05-09 00:52:37 -0700581 u16 src_port, dst_port;
Florin Coras360336f2020-02-13 18:46:18 +0000582 u32 tmp, len, seq, ack;
Florin Corasdc629cd2017-05-09 00:52:37 -0700583 u8 flags;
Dave Barach68b0fb02017-02-28 15:15:56 -0500584
585 /* Find IP and TCP headers */
Florin Coras360336f2020-02-13 18:46:18 +0000586 th = tcp_buffer_hdr (b);
Florin Corasdc629cd2017-05-09 00:52:37 -0700587
588 /* Save src and dst ip */
Dave Barach68b0fb02017-02-28 15:15:56 -0500589 if (is_ip4)
590 {
Florin Coras360336f2020-02-13 18:46:18 +0000591 ih4 = vlib_buffer_get_current (b);
Florin Corasdc629cd2017-05-09 00:52:37 -0700592 ASSERT ((ih4->ip_version_and_header_length & 0xF0) == 0x40);
Florin Coras360336f2020-02-13 18:46:18 +0000593 src_ip4.as_u32 = ih4->src_address.as_u32;
594 dst_ip4.as_u32 = ih4->dst_address.as_u32;
Dave Barach68b0fb02017-02-28 15:15:56 -0500595 }
596 else
597 {
Florin Coras360336f2020-02-13 18:46:18 +0000598 ih6 = vlib_buffer_get_current (b);
Dave Barach68b0fb02017-02-28 15:15:56 -0500599 ASSERT ((ih6->ip_version_traffic_class_and_flow_label & 0xF0) == 0x60);
Florin Coras360336f2020-02-13 18:46:18 +0000600 clib_memcpy_fast (&src_ip6, &ih6->src_address, sizeof (ip6_address_t));
601 clib_memcpy_fast (&dst_ip6, &ih6->dst_address, sizeof (ip6_address_t));
Dave Barach68b0fb02017-02-28 15:15:56 -0500602 }
603
Florin Coras360336f2020-02-13 18:46:18 +0000604 src_port = th->src_port;
605 dst_port = th->dst_port;
606 flags = TCP_FLAG_RST;
Florin Corasdc629cd2017-05-09 00:52:37 -0700607
Florin Coras360336f2020-02-13 18:46:18 +0000608 /*
609 * RFC 793. If the ACK bit is off, sequence number zero is used,
610 * <SEQ=0><ACK=SEG.SEQ+SEG.LEN><CTL=RST,ACK>
611 * If the ACK bit is on,
612 * <SEQ=SEG.ACK><CTL=RST>
613 */
614 if (tcp_ack (th))
Dave Barach68b0fb02017-02-28 15:15:56 -0500615 {
Florin Coras360336f2020-02-13 18:46:18 +0000616 seq = th->ack_number;
Florin Corasdc629cd2017-05-09 00:52:37 -0700617 ack = 0;
Dave Barach68b0fb02017-02-28 15:15:56 -0500618 }
Florin Coras360336f2020-02-13 18:46:18 +0000619 else
620 {
621 flags |= TCP_FLAG_ACK;
622 tmp = clib_net_to_host_u32 (th->seq_number);
623 len = vnet_buffer (b)->tcp.data_len + tcp_is_syn (th) + tcp_is_fin (th);
624 ack = clib_host_to_net_u32 (tmp + len);
625 seq = 0;
626 }
Dave Barach68b0fb02017-02-28 15:15:56 -0500627
Florin Coras360336f2020-02-13 18:46:18 +0000628 tcp_reuse_buffer (vm, b);
Florin Coras360336f2020-02-13 18:46:18 +0000629 th = vlib_buffer_push_tcp_net_order (b, dst_port, src_port, seq, ack,
630 sizeof (tcp_header_t), flags, 0);
Dave Barach68b0fb02017-02-28 15:15:56 -0500631
Dave Barach68b0fb02017-02-28 15:15:56 -0500632 if (is_ip4)
633 {
Florin Coras360336f2020-02-13 18:46:18 +0000634 ih4 = vlib_buffer_push_ip4 (vm, b, &dst_ip4, &src_ip4,
Florin Corasfdbc3822017-07-27 00:34:12 -0700635 IP_PROTOCOL_TCP, 1);
Florin Coras360336f2020-02-13 18:46:18 +0000636 th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
Dave Barach68b0fb02017-02-28 15:15:56 -0500637 }
638 else
639 {
640 int bogus = ~0;
Florin Coras360336f2020-02-13 18:46:18 +0000641 ih6 = vlib_buffer_push_ip6 (vm, b, &dst_ip6, &src_ip6, IP_PROTOCOL_TCP);
642 th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
Dave Barach68b0fb02017-02-28 15:15:56 -0500643 ASSERT (!bogus);
644 }
645
646 return 0;
647}
648
Filip Tehlare275bed2019-03-06 00:06:56 -0800649#ifndef CLIB_MARCH_VARIANT
Dave Barach68b0fb02017-02-28 15:15:56 -0500650/**
651 * Send reset without reusing existing buffer
Florin Coras1f152cd2017-08-18 19:28:03 -0700652 *
653 * It extracts connection info out of original packet
Dave Barach68b0fb02017-02-28 15:15:56 -0500654 */
655void
Florin Corasd4c49be2019-02-07 00:15:53 -0800656tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
657 u32 thread_index, u8 is_ip4)
Dave Barach68b0fb02017-02-28 15:15:56 -0500658{
Florin Corasd4c49be2019-02-07 00:15:53 -0800659 tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
Florin Corasbe72ae62018-11-01 11:23:03 -0700660 vlib_main_t *vm = wrk->vm;
Dave Barach68b0fb02017-02-28 15:15:56 -0500661 vlib_buffer_t *b;
Dave Barach68b0fb02017-02-28 15:15:56 -0500662 u8 tcp_hdr_len, flags = 0;
663 tcp_header_t *th, *pkt_th;
Florin Coras647acd52021-07-02 18:10:20 -0700664 u32 seq, ack, bi;
Dave Barach68b0fb02017-02-28 15:15:56 -0500665 ip4_header_t *ih4, *pkt_ih4;
666 ip6_header_t *ih6, *pkt_ih6;
667
Florin Corasbdf7fd62019-01-31 17:31:01 -0800668 if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
Florin Corasd4712a82020-05-22 21:51:30 +0000669 {
670 tcp_worker_stats_inc (wrk, no_buffer, 1);
671 return;
672 }
Florin Coras66b11312017-07-31 17:18:03 -0700673
Dave Barach68b0fb02017-02-28 15:15:56 -0500674 b = vlib_get_buffer (vm, bi);
Florin Coras1f152cd2017-08-18 19:28:03 -0700675 tcp_init_buffer (vm, b);
Dave Barach68b0fb02017-02-28 15:15:56 -0500676
677 /* Make and write options */
678 tcp_hdr_len = sizeof (tcp_header_t);
679
680 if (is_ip4)
681 {
682 pkt_ih4 = vlib_buffer_get_current (pkt);
683 pkt_th = ip4_next_header (pkt_ih4);
684 }
685 else
686 {
687 pkt_ih6 = vlib_buffer_get_current (pkt);
688 pkt_th = ip6_next_header (pkt_ih6);
689 }
690
691 if (tcp_ack (pkt_th))
692 {
693 flags = TCP_FLAG_RST;
694 seq = pkt_th->ack_number;
Florin Coras776f3d82018-11-02 08:23:58 -0700695 ack = (tc->state >= TCP_STATE_SYN_RCVD) ? tc->rcv_nxt : 0;
Dave Barach68b0fb02017-02-28 15:15:56 -0500696 }
697 else
698 {
699 flags = TCP_FLAG_RST | TCP_FLAG_ACK;
700 seq = 0;
701 ack = clib_host_to_net_u32 (vnet_buffer (pkt)->tcp.seq_end);
702 }
703
704 th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port,
705 seq, ack, tcp_hdr_len, flags, 0);
706
707 /* Swap src and dst ip */
708 if (is_ip4)
709 {
710 ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
711 ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
Srikanth A02833ff2019-10-02 17:48:58 -0700712 &pkt_ih4->src_address, IP_PROTOCOL_TCP,
Florin Corasbbcfaac2019-10-10 13:52:04 -0700713 tcp_csum_offload (tc));
Dave Barach68b0fb02017-02-28 15:15:56 -0500714 th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
715 }
716 else
717 {
718 int bogus = ~0;
Dave Barach68b0fb02017-02-28 15:15:56 -0500719 ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) ==
720 0x60);
Tarun Gupta2089c692019-11-04 16:35:59 -0800721 ih6 = vlib_buffer_push_ip6_custom (vm, b, &pkt_ih6->dst_address,
722 &pkt_ih6->src_address,
723 IP_PROTOCOL_TCP,
724 tc->ipv6_flow_label);
Dave Barach68b0fb02017-02-28 15:15:56 -0500725 th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
726 ASSERT (!bogus);
727 }
728
Florin Coras647acd52021-07-02 18:10:20 -0700729 tcp_enqueue_half_open (wrk, tc, b, bi);
Florin Corasa436a422019-08-20 07:09:31 -0700730 TCP_EVT (TCP_EVT_RST_SENT, tc);
Florin Coras1f421012019-07-26 10:18:51 -0700731 vlib_node_increment_counter (vm, tcp_node_index (output, tc->c_is_ip4),
732 TCP_ERROR_RST_SENT, 1);
Dave Barach68b0fb02017-02-28 15:15:56 -0500733}
734
Florin Coras1f152cd2017-08-18 19:28:03 -0700735/**
736 * Build and set reset packet for connection
737 */
738void
739tcp_send_reset (tcp_connection_t * tc)
740{
Florin Corasbe72ae62018-11-01 11:23:03 -0700741 tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
742 vlib_main_t *vm = wrk->vm;
Florin Coras1f152cd2017-08-18 19:28:03 -0700743 vlib_buffer_t *b;
744 u32 bi;
745 tcp_header_t *th;
746 u16 tcp_hdr_opts_len, advertise_wnd, opts_write_len;
747 u8 flags;
748
Florin Corasbdf7fd62019-01-31 17:31:01 -0800749 if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
Florin Corasd4712a82020-05-22 21:51:30 +0000750 {
751 tcp_worker_stats_inc (wrk, no_buffer, 1);
752 return;
753 }
Florin Coras1f152cd2017-08-18 19:28:03 -0700754 b = vlib_get_buffer (vm, bi);
755 tcp_init_buffer (vm, b);
756
757 tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
758 tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
Florin Coras4e783b92020-03-23 23:24:19 +0000759 advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
liuyacan3871bd32021-06-29 16:45:11 +0800760 flags = TCP_FLAG_RST | TCP_FLAG_ACK;
Florin Coras1f152cd2017-08-18 19:28:03 -0700761 th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
762 tc->rcv_nxt, tcp_hdr_opts_len, flags,
763 advertise_wnd);
764 opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
Srikanth A02833ff2019-10-02 17:48:58 -0700765 th->checksum = tcp_compute_checksum (tc, b);
Florin Coras1f152cd2017-08-18 19:28:03 -0700766 ASSERT (opts_write_len == tc->snd_opts_len);
767 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
Florin Corasdf36f492019-08-18 18:09:28 -0700768 tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
Florin Corasa436a422019-08-20 07:09:31 -0700769 TCP_EVT (TCP_EVT_RST_SENT, tc);
Florin Coras1f421012019-07-26 10:18:51 -0700770 vlib_node_increment_counter (vm, tcp_node_index (output, tc->c_is_ip4),
771 TCP_ERROR_RST_SENT, 1);
Florin Coras1f152cd2017-08-18 19:28:03 -0700772}
773
Dave Barach68b0fb02017-02-28 15:15:56 -0500774/**
775 * Send SYN
776 *
Florin Coras647acd52021-07-02 18:10:20 -0700777 * Builds a SYN packet for a half-open connection and sends it to tcp-output.
778 * The packet is handled by main thread and because half-open and established
779 * connections use the same pool the connection can be retrieved without
780 * additional logic.
Dave Barach68b0fb02017-02-28 15:15:56 -0500781 */
782void
783tcp_send_syn (tcp_connection_t * tc)
784{
Florin Corasbe72ae62018-11-01 11:23:03 -0700785 tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
786 vlib_main_t *vm = wrk->vm;
Dave Barach68b0fb02017-02-28 15:15:56 -0500787 vlib_buffer_t *b;
788 u32 bi;
Dave Barach68b0fb02017-02-28 15:15:56 -0500789
Florin Corasf988e692017-11-27 04:34:14 -0500790 /*
791 * Setup retransmit and establish timers before requesting buffer
792 * such that we can return if we've ran out.
793 */
Florin Coras0765d972020-03-18 21:26:41 +0000794 tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
Florin Corasf988e692017-11-27 04:34:14 -0500795 tc->rto * TCP_TO_TIMER_TICK);
796
Florin Corasbdf7fd62019-01-31 17:31:01 -0800797 if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
Florin Coras222e1f412019-02-16 20:47:32 -0800798 {
liuyacan7e781192021-06-14 18:09:01 +0800799 tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
800 tcp_cfg.alloc_err_timeout);
Florin Corasd4712a82020-05-22 21:51:30 +0000801 tcp_worker_stats_inc (wrk, no_buffer, 1);
Florin Coras222e1f412019-02-16 20:47:32 -0800802 return;
803 }
Florin Coras66b11312017-07-31 17:18:03 -0700804
Dave Barach68b0fb02017-02-28 15:15:56 -0500805 b = vlib_get_buffer (vm, bi);
Florin Coras1f152cd2017-08-18 19:28:03 -0700806 tcp_init_buffer (vm, b);
Florin Coras4eeeaaf2017-09-05 14:03:37 -0400807 tcp_make_syn (tc, b);
Dave Barach68b0fb02017-02-28 15:15:56 -0500808
809 /* Measure RTT with this */
Florin Corasefefc6b2018-11-07 12:49:19 -0800810 tc->rtt_ts = tcp_time_now_us (vlib_num_workers ()? 1 : 0);
Dave Barach68b0fb02017-02-28 15:15:56 -0500811 tc->rtt_seq = tc->snd_nxt;
Dave Barach68b0fb02017-02-28 15:15:56 -0500812 tc->rto_boff = 0;
813
Florin Coras647acd52021-07-02 18:10:20 -0700814 tcp_enqueue_half_open (wrk, tc, b, bi);
Florin Corasa436a422019-08-20 07:09:31 -0700815 TCP_EVT (TCP_EVT_SYN_SENT, tc);
Dave Barach68b0fb02017-02-28 15:15:56 -0500816}
817
Florin Coras7ac053b2018-11-05 15:57:21 -0800818void
819tcp_send_synack (tcp_connection_t * tc)
820{
821 tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
822 vlib_main_t *vm = wrk->vm;
823 vlib_buffer_t *b;
824 u32 bi;
825
Florin Coraseaec00c2020-10-22 11:22:22 -0700826 ASSERT (tc->snd_una != tc->snd_nxt);
827 tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
Florin Coras222e1f412019-02-16 20:47:32 -0800828
Florin Corasbdf7fd62019-01-31 17:31:01 -0800829 if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
Florin Coras222e1f412019-02-16 20:47:32 -0800830 {
liuyacan7e781192021-06-14 18:09:01 +0800831 tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
832 tcp_cfg.alloc_err_timeout);
Florin Corasd4712a82020-05-22 21:51:30 +0000833 tcp_worker_stats_inc (wrk, no_buffer, 1);
Florin Coras222e1f412019-02-16 20:47:32 -0800834 return;
835 }
Florin Coras7ac053b2018-11-05 15:57:21 -0800836
Florin Corasdf084782018-12-06 17:41:10 -0800837 tc->rtt_ts = tcp_time_now_us (tc->c_thread_index);
Florin Coras7ac053b2018-11-05 15:57:21 -0800838 b = vlib_get_buffer (vm, bi);
Florin Corasbdf7fd62019-01-31 17:31:01 -0800839 tcp_init_buffer (vm, b);
Florin Coras7ac053b2018-11-05 15:57:21 -0800840 tcp_make_synack (tc, b);
841 tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
Florin Corasa436a422019-08-20 07:09:31 -0700842 TCP_EVT (TCP_EVT_SYNACK_SENT, tc);
Florin Coras7ac053b2018-11-05 15:57:21 -0800843}
844
Florin Coras66b11312017-07-31 17:18:03 -0700845/**
Dave Barach68b0fb02017-02-28 15:15:56 -0500846 * Send FIN
847 */
848void
849tcp_send_fin (tcp_connection_t * tc)
850{
Florin Corasbe72ae62018-11-01 11:23:03 -0700851 tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
852 vlib_main_t *vm = wrk->vm;
Florin Coras9d063042017-09-14 03:08:00 -0400853 vlib_buffer_t *b;
854 u32 bi;
855 u8 fin_snt = 0;
856
Florin Corasc977e7c2018-10-16 20:30:31 -0700857 fin_snt = tc->flags & TCP_CONN_FINSNT;
858 if (fin_snt)
Florin Coras47596832019-03-12 18:58:54 -0700859 tc->snd_nxt -= 1;
Florin Corasc977e7c2018-10-16 20:30:31 -0700860
Florin Corasbdf7fd62019-01-31 17:31:01 -0800861 if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
Florin Coraseb97e5f2018-10-15 21:35:42 -0700862 {
863 /* Out of buffers so program fin retransmit ASAP */
liuyacan7e781192021-06-14 18:09:01 +0800864 tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
865 tcp_cfg.alloc_err_timeout);
Florin Coras85a3ddd2018-12-24 16:54:34 -0800866 if (fin_snt)
Florin Coras47596832019-03-12 18:58:54 -0700867 tc->snd_nxt += 1;
Florin Coras222e1f412019-02-16 20:47:32 -0800868 else
869 /* Make sure retransmit retries a fin not data */
870 tc->flags |= TCP_CONN_FINSNT;
Florin Corasd4712a82020-05-22 21:51:30 +0000871 tcp_worker_stats_inc (wrk, no_buffer, 1);
Florin Coras85a3ddd2018-12-24 16:54:34 -0800872 return;
Florin Coraseb97e5f2018-10-15 21:35:42 -0700873 }
874
Florin Corascb711a42019-10-16 19:28:17 -0700875 /* If we have non-dupacks programmed, no need to send them */
876 if ((tc->flags & TCP_CONN_SNDACK) && !tc->pending_dupacks)
877 tc->flags &= ~TCP_CONN_SNDACK;
878
Dave Barach68b0fb02017-02-28 15:15:56 -0500879 b = vlib_get_buffer (vm, bi);
Florin Coras24793e32018-05-09 11:34:25 -0700880 tcp_init_buffer (vm, b);
Florin Corasd79b41e2017-03-04 05:37:52 -0800881 tcp_make_fin (tc, b);
Florin Coras2a7ea2e2019-10-16 22:06:08 -0700882 tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
Florin Corasa436a422019-08-20 07:09:31 -0700883 TCP_EVT (TCP_EVT_FIN_SENT, tc);
Florin Coras47596832019-03-12 18:58:54 -0700884 /* Account for the FIN */
885 tc->snd_nxt += 1;
Florin Coraseaec00c2020-10-22 11:22:22 -0700886 tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
Florin Coras9d063042017-09-14 03:08:00 -0400887 if (!fin_snt)
Florin Coras4eeeaaf2017-09-05 14:03:37 -0400888 {
889 tc->flags |= TCP_CONN_FINSNT;
890 tc->flags &= ~TCP_CONN_FINPNDG;
Florin Corasa096f2d2017-09-28 23:49:42 -0400891 }
Dave Barach68b0fb02017-02-28 15:15:56 -0500892}
893
Dave Barach68b0fb02017-02-28 15:15:56 -0500894/**
Florin Corasfd247442019-03-12 16:56:26 -0700895 * Push TCP header and update connection variables. Should only be called
896 * for segments with data, not for 'control' packets.
Dave Barach68b0fb02017-02-28 15:15:56 -0500897 */
Florin Coras0dbd5172018-06-25 16:19:34 -0700898always_inline void
Florin Coras47596832019-03-12 18:58:54 -0700899tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, u32 snd_nxt,
900 u8 compute_opts, u8 maybe_burst, u8 update_snd_nxt)
Dave Barach68b0fb02017-02-28 15:15:56 -0500901{
Florin Corasfd247442019-03-12 16:56:26 -0700902 u8 tcp_hdr_opts_len, flags = TCP_FLAG_ACK;
Dave Barach68b0fb02017-02-28 15:15:56 -0500903 u32 advertise_wnd, data_len;
Florin Corasb26743d2018-06-26 09:31:04 -0700904 tcp_main_t *tm = &tcp_main;
Dave Barach68b0fb02017-02-28 15:15:56 -0500905 tcp_header_t *th;
906
Florin Corasb26743d2018-06-26 09:31:04 -0700907 data_len = b->current_length;
908 if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
909 data_len += b->total_length_not_including_first_buffer;
910
Dave Barach68b0fb02017-02-28 15:15:56 -0500911 vnet_buffer (b)->tcp.flags = 0;
Florin Corasb26743d2018-06-26 09:31:04 -0700912 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
Dave Barach68b0fb02017-02-28 15:15:56 -0500913
Florin Corasc8343412017-05-04 14:25:50 -0700914 if (compute_opts)
915 tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
916
Florin Corasc8343412017-05-04 14:25:50 -0700917 tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
Florin Corasb26743d2018-06-26 09:31:04 -0700918
919 if (maybe_burst)
920 advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
921 else
Florin Coras47596832019-03-12 18:58:54 -0700922 advertise_wnd = tcp_window_to_advertise (tc, TCP_STATE_ESTABLISHED);
Florin Corasb26743d2018-06-26 09:31:04 -0700923
Florin Coras42ceddb2018-12-12 10:56:01 -0800924 if (PREDICT_FALSE (tc->flags & TCP_CONN_PSH_PENDING))
925 {
Florin Coras47596832019-03-12 18:58:54 -0700926 if (seq_geq (tc->psh_seq, snd_nxt)
927 && seq_lt (tc->psh_seq, snd_nxt + data_len))
Florin Coras42ceddb2018-12-12 10:56:01 -0800928 flags |= TCP_FLAG_PSH;
929 }
Florin Coras47596832019-03-12 18:58:54 -0700930 th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, snd_nxt,
Dave Barach68b0fb02017-02-28 15:15:56 -0500931 tc->rcv_nxt, tcp_hdr_opts_len, flags,
932 advertise_wnd);
Dave Barach68b0fb02017-02-28 15:15:56 -0500933
Florin Corasb26743d2018-06-26 09:31:04 -0700934 if (maybe_burst)
935 {
Dave Barach178cf492018-11-13 16:34:13 -0500936 clib_memcpy_fast ((u8 *) (th + 1),
937 tm->wrk_ctx[tc->c_thread_index].cached_opts,
938 tc->snd_opts_len);
Florin Corasb26743d2018-06-26 09:31:04 -0700939 }
940 else
941 {
942 u8 len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
943 ASSERT (len == tc->snd_opts_len);
944 }
Dave Barach68b0fb02017-02-28 15:15:56 -0500945
Florin Coras93992a92017-05-24 18:03:56 -0700946 /*
947 * Update connection variables
948 */
949
Florin Coras47596832019-03-12 18:58:54 -0700950 if (update_snd_nxt)
951 tc->snd_nxt += data_len;
Florin Corasc28764f2017-04-26 00:08:42 -0700952 tc->rcv_las = tc->rcv_nxt;
Florin Coras3e350af2017-03-30 02:54:28 -0700953
Florin Corasedfe0ee2019-07-29 18:13:25 -0700954 tc->bytes_out += data_len;
955 tc->data_segs_out += 1;
956
Srikanth A02833ff2019-10-02 17:48:58 -0700957 th->checksum = tcp_compute_checksum (tc, b);
958
Florin Corasa436a422019-08-20 07:09:31 -0700959 TCP_EVT (TCP_EVT_PKTIZE, tc);
Dave Barach68b0fb02017-02-28 15:15:56 -0500960}
961
Florin Corasd6ae4bf2019-10-12 18:10:20 -0700962always_inline u32
963tcp_buffer_len (vlib_buffer_t * b)
964{
965 u32 data_len = b->current_length;
966 if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
967 data_len += b->total_length_not_including_first_buffer;
968 return data_len;
969}
970
Florin Coras0dbd5172018-06-25 16:19:34 -0700971u32
Florin Coras14ed6df2019-03-06 21:13:42 -0800972tcp_session_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
Florin Coras0dbd5172018-06-25 16:19:34 -0700973{
Florin Coras14ed6df2019-03-06 21:13:42 -0800974 tcp_connection_t *tc = (tcp_connection_t *) tconn;
Florin Coras52814732019-06-12 15:38:19 -0700975
Florin Corasd6ae4bf2019-10-12 18:10:20 -0700976 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
977 tcp_bt_track_tx (tc, tcp_buffer_len (b));
Florin Coras52814732019-06-12 15:38:19 -0700978
Florin Coras47596832019-03-12 18:58:54 -0700979 tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0, /* burst */ 1,
980 /* update_snd_nxt */ 1);
Florin Coras52814732019-06-12 15:38:19 -0700981
Florin Coras55e556c2020-10-23 10:45:48 -0700982 tcp_validate_txf_size (tc, tc->snd_nxt - tc->snd_una);
Florin Coras0dbd5172018-06-25 16:19:34 -0700983 /* If not tracking an ACK, start tracking */
984 if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))
985 {
Florin Corasd67f1122018-05-21 17:47:40 -0700986 tc->rtt_ts = tcp_time_now_us (tc->c_thread_index);
Florin Coras0dbd5172018-06-25 16:19:34 -0700987 tc->rtt_seq = tc->snd_nxt;
988 }
989 if (PREDICT_FALSE (!tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)))
990 {
Florin Coras0765d972020-03-18 21:26:41 +0000991 tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
992 tcp_retransmit_timer_set (&wrk->timer_wheel, tc);
Florin Coras0dbd5172018-06-25 16:19:34 -0700993 tc->rto_boff = 0;
994 }
Florin Coras0dbd5172018-06-25 16:19:34 -0700995 return 0;
996}
997
Dave Barach68b0fb02017-02-28 15:15:56 -0500998void
Florin Coras6792ec02017-03-13 03:49:51 -0700999tcp_send_ack (tcp_connection_t * tc)
Dave Barach68b0fb02017-02-28 15:15:56 -05001000{
Florin Corasbe72ae62018-11-01 11:23:03 -07001001 tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1002 vlib_main_t *vm = wrk->vm;
Dave Barach68b0fb02017-02-28 15:15:56 -05001003 vlib_buffer_t *b;
1004 u32 bi;
1005
Florin Corasbdf7fd62019-01-31 17:31:01 -08001006 if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
Florin Coras222e1f412019-02-16 20:47:32 -08001007 {
1008 tcp_update_rcv_wnd (tc);
Florin Corasd4712a82020-05-22 21:51:30 +00001009 tcp_worker_stats_inc (wrk, no_buffer, 1);
Florin Coras222e1f412019-02-16 20:47:32 -08001010 return;
1011 }
Dave Barach68b0fb02017-02-28 15:15:56 -05001012 b = vlib_get_buffer (vm, bi);
Florin Coras24793e32018-05-09 11:34:25 -07001013 tcp_init_buffer (vm, b);
Dave Barach68b0fb02017-02-28 15:15:56 -05001014 tcp_make_ack (tc, b);
Florin Corasbe72ae62018-11-01 11:23:03 -07001015 tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
Dave Barach68b0fb02017-02-28 15:15:56 -05001016}
1017
Florin Coras7ac053b2018-11-05 15:57:21 -08001018void
Florin Coras26dd6de2019-07-23 23:54:47 -07001019tcp_program_ack (tcp_connection_t * tc)
Florin Coras7ac053b2018-11-05 15:57:21 -08001020{
1021 if (!(tc->flags & TCP_CONN_SNDACK))
1022 {
Florin Coras26dd6de2019-07-23 23:54:47 -07001023 session_add_self_custom_tx_evt (&tc->connection, 1);
Florin Coras7ac053b2018-11-05 15:57:21 -08001024 tc->flags |= TCP_CONN_SNDACK;
1025 }
1026}
1027
1028void
Florin Coras26dd6de2019-07-23 23:54:47 -07001029tcp_program_dupack (tcp_connection_t * tc)
Florin Coras7ac053b2018-11-05 15:57:21 -08001030{
1031 if (!(tc->flags & TCP_CONN_SNDACK))
1032 {
Florin Coras26dd6de2019-07-23 23:54:47 -07001033 session_add_self_custom_tx_evt (&tc->connection, 1);
Florin Coras7ac053b2018-11-05 15:57:21 -08001034 tc->flags |= TCP_CONN_SNDACK;
1035 }
1036 if (tc->pending_dupacks < 255)
1037 tc->pending_dupacks += 1;
1038}
1039
1040void
Florin Coras36ebcff2019-09-12 18:36:44 -07001041tcp_program_retransmit (tcp_connection_t * tc)
Florin Coras7ac053b2018-11-05 15:57:21 -08001042{
Florin Coras36ebcff2019-09-12 18:36:44 -07001043 if (!(tc->flags & TCP_CONN_RXT_PENDING))
Florin Coras7ac053b2018-11-05 15:57:21 -08001044 {
Florin Coras26dd6de2019-07-23 23:54:47 -07001045 session_add_self_custom_tx_evt (&tc->connection, 0);
Florin Coras36ebcff2019-09-12 18:36:44 -07001046 tc->flags |= TCP_CONN_RXT_PENDING;
Florin Coras7ac053b2018-11-05 15:57:21 -08001047 }
Florin Coras7ac053b2018-11-05 15:57:21 -08001048}
1049
Florin Corasb2215d62017-08-01 16:56:58 -07001050/**
Florin Coras017dc452019-08-30 11:06:35 -07001051 * Send window update ack
1052 *
1053 * Ensures that it will be sent only once, after a zero rwnd has been
1054 * advertised in a previous ack, and only if rwnd has grown beyond a
1055 * configurable value.
Vladimir Kropylev398afbd2019-06-25 00:06:52 +03001056 */
1057void
1058tcp_send_window_update_ack (tcp_connection_t * tc)
1059{
Vladimir Kropylev398afbd2019-06-25 00:06:52 +03001060 if (tcp_zero_rwnd_sent (tc))
1061 {
Florin Coras017dc452019-08-30 11:06:35 -07001062 tcp_update_rcv_wnd (tc);
1063 if (tc->rcv_wnd >= tcp_cfg.rwnd_min_update_ack * tc->snd_mss)
Vladimir Kropylev398afbd2019-06-25 00:06:52 +03001064 {
1065 tcp_zero_rwnd_sent_off (tc);
Florin Coras26dd6de2019-07-23 23:54:47 -07001066 tcp_program_ack (tc);
Vladimir Kropylev398afbd2019-06-25 00:06:52 +03001067 }
1068 }
1069}
1070
1071/**
Florin Coras36ee9f12018-11-02 12:52:10 -07001072 * Allocate a new buffer and build a new tcp segment
Dave Barach68b0fb02017-02-28 15:15:56 -05001073 *
Florin Coras36ee9f12018-11-02 12:52:10 -07001074 * @param wrk tcp worker
1075 * @param tc connection for which the segment will be allocated
1076 * @param offset offset of the first byte in the tx fifo
1077 * @param max_deq_byte segment size
1078 * @param[out] b pointer to buffer allocated
1079 *
1080 * @return the number of bytes in the segment or 0 if buffer cannot be
1081 * allocated or no data available
Florin Coras93992a92017-05-24 18:03:56 -07001082 */
Florin Coras36ee9f12018-11-02 12:52:10 -07001083static int
1084tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
1085 u32 offset, u32 max_deq_bytes, vlib_buffer_t ** b)
Dave Barach68b0fb02017-02-28 15:15:56 -05001086{
Florin Corasbe72ae62018-11-01 11:23:03 -07001087 u32 bytes_per_buffer = vnet_get_tcp_main ()->bytes_per_buffer;
1088 vlib_main_t *vm = wrk->vm;
Florin Corasbdf7fd62019-01-31 17:31:01 -08001089 u32 bi, seg_size;
Florin Coras93992a92017-05-24 18:03:56 -07001090 int n_bytes = 0;
Florin Corase87216f2017-08-17 16:59:22 -07001091 u8 *data;
Dave Barach68b0fb02017-02-28 15:15:56 -05001092
Florin Coras1ee78302019-02-05 15:51:15 -08001093 seg_size = max_deq_bytes + TRANSPORT_MAX_HDRS_LEN;
Florin Coras1f152cd2017-08-18 19:28:03 -07001094
Florin Corasb2215d62017-08-01 16:56:58 -07001095 /*
1096 * Prepare options
1097 */
Florin Corasc8343412017-05-04 14:25:50 -07001098 tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
1099
Florin Corasb2215d62017-08-01 16:56:58 -07001100 /*
1101 * Allocate and fill in buffer(s)
1102 */
Dave Barach68b0fb02017-02-28 15:15:56 -05001103
Florin Corasb2215d62017-08-01 16:56:58 -07001104 /* Easy case, buffer size greater than mss */
Florin Corasbe72ae62018-11-01 11:23:03 -07001105 if (PREDICT_TRUE (seg_size <= bytes_per_buffer))
Florin Corasb2215d62017-08-01 16:56:58 -07001106 {
Florin Corasbdf7fd62019-01-31 17:31:01 -08001107 if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
Florin Corasd4712a82020-05-22 21:51:30 +00001108 {
1109 tcp_worker_stats_inc (wrk, no_buffer, 1);
1110 return 0;
1111 }
Florin Coras24793e32018-05-09 11:34:25 -07001112 *b = vlib_get_buffer (vm, bi);
1113 data = tcp_init_buffer (vm, *b);
Florin Coras31c99552019-03-01 13:00:58 -08001114 n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
1115 max_deq_bytes);
Florin Corasb2215d62017-08-01 16:56:58 -07001116 ASSERT (n_bytes == max_deq_bytes);
1117 b[0]->current_length = n_bytes;
Florin Coras47596832019-03-12 18:58:54 -07001118 tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
1119 /* burst */ 0, /* update_snd_nxt */ 0);
Florin Corasb2215d62017-08-01 16:56:58 -07001120 }
1121 /* Split mss into multiple buffers */
1122 else
1123 {
Florin Corasbdf7fd62019-01-31 17:31:01 -08001124 u32 chain_bi = ~0, n_bufs_per_seg, n_bufs;
1125 u16 n_peeked, len_to_deq;
Florin Corasb2215d62017-08-01 16:56:58 -07001126 vlib_buffer_t *chain_b, *prev_b;
Florin Corasb2215d62017-08-01 16:56:58 -07001127 int i;
1128
Florin Corasb2215d62017-08-01 16:56:58 -07001129 /* Make sure we have enough buffers */
Florin Corasbe72ae62018-11-01 11:23:03 -07001130 n_bufs_per_seg = ceil ((double) seg_size / bytes_per_buffer);
Florin Corasbdf7fd62019-01-31 17:31:01 -08001131 vec_validate_aligned (wrk->tx_buffers, n_bufs_per_seg - 1,
1132 CLIB_CACHE_LINE_BYTES);
1133 n_bufs = vlib_buffer_alloc (vm, wrk->tx_buffers, n_bufs_per_seg);
1134 if (PREDICT_FALSE (n_bufs != n_bufs_per_seg))
Florin Corasb2215d62017-08-01 16:56:58 -07001135 {
Florin Corasbdf7fd62019-01-31 17:31:01 -08001136 if (n_bufs)
1137 vlib_buffer_free (vm, wrk->tx_buffers, n_bufs);
Florin Corasd4712a82020-05-22 21:51:30 +00001138 tcp_worker_stats_inc (wrk, no_buffer, 1);
Florin Corasbdf7fd62019-01-31 17:31:01 -08001139 return 0;
Florin Corasb2215d62017-08-01 16:56:58 -07001140 }
1141
Florin Corasbdf7fd62019-01-31 17:31:01 -08001142 *b = vlib_get_buffer (vm, wrk->tx_buffers[--n_bufs]);
Florin Coras24793e32018-05-09 11:34:25 -07001143 data = tcp_init_buffer (vm, *b);
Florin Coras31c99552019-03-01 13:00:58 -08001144 n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
1145 bytes_per_buffer -
1146 TRANSPORT_MAX_HDRS_LEN);
Florin Corasb2215d62017-08-01 16:56:58 -07001147 b[0]->current_length = n_bytes;
1148 b[0]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
1149 b[0]->total_length_not_including_first_buffer = 0;
Florin Corasb2215d62017-08-01 16:56:58 -07001150 max_deq_bytes -= n_bytes;
1151
1152 chain_b = *b;
1153 for (i = 1; i < n_bufs_per_seg; i++)
1154 {
1155 prev_b = chain_b;
Florin Corasbe72ae62018-11-01 11:23:03 -07001156 len_to_deq = clib_min (max_deq_bytes, bytes_per_buffer);
Florin Corasbdf7fd62019-01-31 17:31:01 -08001157 chain_bi = wrk->tx_buffers[--n_bufs];
Florin Corasb2215d62017-08-01 16:56:58 -07001158 chain_b = vlib_get_buffer (vm, chain_bi);
1159 chain_b->current_data = 0;
Florin Corase87216f2017-08-17 16:59:22 -07001160 data = vlib_buffer_get_current (chain_b);
Florin Coras31c99552019-03-01 13:00:58 -08001161 n_peeked = session_tx_fifo_peek_bytes (&tc->connection, data,
1162 offset + n_bytes,
1163 len_to_deq);
Florin Corasb2215d62017-08-01 16:56:58 -07001164 ASSERT (n_peeked == len_to_deq);
Florin Coras1f152cd2017-08-18 19:28:03 -07001165 n_bytes += n_peeked;
Florin Corasb2215d62017-08-01 16:56:58 -07001166 chain_b->current_length = n_peeked;
Florin Coras1f152cd2017-08-18 19:28:03 -07001167 chain_b->next_buffer = 0;
Florin Corasb2215d62017-08-01 16:56:58 -07001168
1169 /* update previous buffer */
1170 prev_b->next_buffer = chain_bi;
1171 prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
1172
Florin Corasb2215d62017-08-01 16:56:58 -07001173 max_deq_bytes -= n_peeked;
Florin Coras1f152cd2017-08-18 19:28:03 -07001174 b[0]->total_length_not_including_first_buffer += n_peeked;
Florin Corasb2215d62017-08-01 16:56:58 -07001175 }
Florin Coras1f152cd2017-08-18 19:28:03 -07001176
Florin Coras47596832019-03-12 18:58:54 -07001177 tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
1178 /* burst */ 0, /* update_snd_nxt */ 0);
Florin Corasbdf7fd62019-01-31 17:31:01 -08001179
1180 if (PREDICT_FALSE (n_bufs))
1181 {
1182 clib_warning ("not all buffers consumed");
1183 vlib_buffer_free (vm, wrk->tx_buffers, n_bufs);
1184 }
Florin Corasb2215d62017-08-01 16:56:58 -07001185 }
1186
Florin Coras93992a92017-05-24 18:03:56 -07001187 ASSERT (n_bytes > 0);
Florin Corasbe72ae62018-11-01 11:23:03 -07001188 ASSERT (((*b)->current_data + (*b)->current_length) <= bytes_per_buffer);
Florin Corasbb292f42017-05-19 09:49:19 -07001189
Florin Coras36ee9f12018-11-02 12:52:10 -07001190 return n_bytes;
1191}
1192
1193/**
1194 * Build a retransmit segment
1195 *
1196 * @return the number of bytes in the segment or 0 if there's nothing to
1197 * retransmit
1198 */
1199static u32
1200tcp_prepare_retransmit_segment (tcp_worker_ctx_t * wrk,
1201 tcp_connection_t * tc, u32 offset,
1202 u32 max_deq_bytes, vlib_buffer_t ** b)
1203{
1204 u32 start, available_bytes;
1205 int n_bytes = 0;
1206
1207 ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
1208 ASSERT (max_deq_bytes != 0);
1209
1210 /*
1211 * Make sure we can retransmit something
1212 */
Florin Coras31c99552019-03-01 13:00:58 -08001213 available_bytes = transport_max_tx_dequeue (&tc->connection);
Florin Coras36ee9f12018-11-02 12:52:10 -07001214 ASSERT (available_bytes >= offset);
1215 available_bytes -= offset;
1216 if (!available_bytes)
1217 return 0;
1218
1219 max_deq_bytes = clib_min (tc->snd_mss, max_deq_bytes);
1220 max_deq_bytes = clib_min (available_bytes, max_deq_bytes);
1221
Florin Coras36ee9f12018-11-02 12:52:10 -07001222 start = tc->snd_una + offset;
Florin Coras81cb8e42019-10-22 19:44:45 -07001223 ASSERT (seq_leq (start + max_deq_bytes, tc->snd_nxt));
1224
Florin Coras36ee9f12018-11-02 12:52:10 -07001225 n_bytes = tcp_prepare_segment (wrk, tc, offset, max_deq_bytes, b);
1226 if (!n_bytes)
1227 return 0;
1228
Florin Coras36ebcff2019-09-12 18:36:44 -07001229 tc->snd_rxt_bytes += n_bytes;
1230
Florin Corasbbcfaac2019-10-10 13:52:04 -07001231 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
Florin Coras36ebcff2019-09-12 18:36:44 -07001232 tcp_bt_track_rxt (tc, start, start + n_bytes);
Dave Barach68b0fb02017-02-28 15:15:56 -05001233
Florin Corasedfe0ee2019-07-29 18:13:25 -07001234 tc->bytes_retrans += n_bytes;
1235 tc->segs_retrans += 1;
Florin Coras0765d972020-03-18 21:26:41 +00001236 tcp_worker_stats_inc (wrk, rxt_segs, 1);
Florin Corasa436a422019-08-20 07:09:31 -07001237 TCP_EVT (TCP_EVT_CC_RTX, tc, offset, n_bytes);
Florin Coras36ebcff2019-09-12 18:36:44 -07001238
Dave Barach68b0fb02017-02-28 15:15:56 -05001239 return n_bytes;
1240}
1241
Florin Coras36ebcff2019-09-12 18:36:44 -07001242static void
1243tcp_check_sack_reneging (tcp_connection_t * tc)
1244{
1245 sack_scoreboard_t *sb = &tc->sack_sb;
1246 sack_scoreboard_hole_t *hole;
1247
1248 hole = scoreboard_first_hole (sb);
1249 if (!sb->is_reneging && (!hole || hole->start == tc->snd_una))
1250 return;
1251
1252 scoreboard_clear_reneging (sb, tc->snd_una, tc->snd_nxt);
1253}
1254
Florin Coras6792ec02017-03-13 03:49:51 -07001255/**
1256 * Reset congestion control, switch cwnd to loss window and try again.
1257 */
1258static void
Florin Corasa3c32652019-07-03 17:47:22 -07001259tcp_cc_init_rxt_timeout (tcp_connection_t * tc)
Florin Coras6792ec02017-03-13 03:49:51 -07001260{
Florin Corasa436a422019-08-20 07:09:31 -07001261 TCP_EVT (TCP_EVT_CC_EVT, tc, 6);
Florin Coras36ebcff2019-09-12 18:36:44 -07001262
Florin Coras93992a92017-05-24 18:03:56 -07001263 tc->prev_ssthresh = tc->ssthresh;
1264 tc->prev_cwnd = tc->cwnd;
1265
Florin Coras36ebcff2019-09-12 18:36:44 -07001266 /* If we entrered loss without fast recovery, notify cc algo of the
1267 * congestion event such that it can update ssthresh and its state */
1268 if (!tcp_in_fastrecovery (tc))
1269 tcp_cc_congestion (tc);
Florin Corasa3c32652019-07-03 17:47:22 -07001270
Florin Coras36ebcff2019-09-12 18:36:44 -07001271 /* Let cc algo decide loss cwnd and ssthresh post unrecovered loss */
Florin Corasa3c32652019-07-03 17:47:22 -07001272 tcp_cc_loss (tc);
Florin Coras6792ec02017-03-13 03:49:51 -07001273
Florin Corasf1762d62017-09-24 19:43:08 -04001274 tc->rtt_ts = 0;
Florin Corasd2aab832018-05-22 11:39:59 -07001275 tc->cwnd_acc_bytes = 0;
Florin Corasedfe0ee2019-07-29 18:13:25 -07001276 tc->tr_occurences += 1;
Florin Corascc4d6d02020-07-29 23:03:39 -07001277 tc->sack_sb.reorder = TCP_DUPACK_THRESHOLD;
Florin Coras3af90fc2017-05-03 21:09:42 -07001278 tcp_recovery_on (tc);
Florin Coras6792ec02017-03-13 03:49:51 -07001279}
1280
Florin Coras75c48c12019-08-02 15:17:21 -07001281void
Florin Corasaa388692020-02-14 23:41:25 +00001282tcp_timer_retransmit_handler (tcp_connection_t * tc)
Dave Barach68b0fb02017-02-28 15:15:56 -05001283{
Florin Corasaa388692020-02-14 23:41:25 +00001284 tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
Florin Corasbe72ae62018-11-01 11:23:03 -07001285 vlib_main_t *vm = wrk->vm;
Florin Corasb2215d62017-08-01 16:56:58 -07001286 vlib_buffer_t *b = 0;
Florin Coras3af90fc2017-05-03 21:09:42 -07001287 u32 bi, n_bytes;
Dave Barach68b0fb02017-02-28 15:15:56 -05001288
Florin Coras0765d972020-03-18 21:26:41 +00001289 tcp_worker_stats_inc (wrk, tr_events, 1);
Florin Coras75c48c12019-08-02 15:17:21 -07001290
Florin Corasaa388692020-02-14 23:41:25 +00001291 /* Should be handled by a different handler */
1292 if (PREDICT_FALSE (tc->state == TCP_STATE_SYN_SENT))
Florin Coras75c48c12019-08-02 15:17:21 -07001293 return;
1294
Florin Coras75c48c12019-08-02 15:17:21 -07001295 /* Wait-close and retransmit could pop at the same time */
1296 if (tc->state == TCP_STATE_CLOSED)
1297 return;
Dave Barach68b0fb02017-02-28 15:15:56 -05001298
Florin Corase04c2992017-03-01 08:17:34 -08001299 if (tc->state >= TCP_STATE_ESTABLISHED)
Dave Barach68b0fb02017-02-28 15:15:56 -05001300 {
Florin Corasa436a422019-08-20 07:09:31 -07001301 TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
Florin Corase55a6d72018-10-31 23:09:22 -07001302
Florin Coras93992a92017-05-24 18:03:56 -07001303 /* Lost FIN, retransmit and return */
Florin Coras222e1f412019-02-16 20:47:32 -08001304 if (tc->flags & TCP_CONN_FINSNT)
Florin Coras93992a92017-05-24 18:03:56 -07001305 {
1306 tcp_send_fin (tc);
Florin Corasf988e692017-11-27 04:34:14 -05001307 tc->rto_boff += 1;
1308 tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
Florin Coras93992a92017-05-24 18:03:56 -07001309 return;
1310 }
1311
Florin Coraseaec00c2020-10-22 11:22:22 -07001312 /* Shouldn't be here */
1313 if (tc->snd_una == tc->snd_nxt)
Florin Corasa096f2d2017-09-28 23:49:42 -04001314 {
Florin Coras3ec66b02018-08-23 16:27:05 -07001315 ASSERT (!tcp_in_recovery (tc));
1316 tc->rto_boff = 0;
Florin Corasa096f2d2017-09-28 23:49:42 -04001317 return;
1318 }
1319
Florin Coras3ec66b02018-08-23 16:27:05 -07001320 /* We're not in recovery so make sure rto_boff is 0. Can be non 0 due
1321 * to persist timer timeout */
Florin Coras4eeeaaf2017-09-05 14:03:37 -04001322 if (!tcp_in_recovery (tc) && tc->rto_boff > 0)
1323 {
1324 tc->rto_boff = 0;
1325 tcp_update_rto (tc);
1326 }
1327
Florin Coras548f7572019-06-07 15:31:06 -07001328 /* Peer is dead or network connectivity is lost. Close connection.
1329 * RFC 1122 section 4.2.3.5 recommends a value of at least 100s. For
1330 * a min rto of 0.2s we need to retry about 8 times. */
1331 if (tc->rto_boff >= TCP_RTO_BOFF_MAX)
1332 {
1333 tcp_send_reset (tc);
1334 tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1335 session_transport_closing_notify (&tc->connection);
Florin Coras52be6742019-11-14 23:32:08 -08001336 session_transport_closed_notify (&tc->connection);
Florin Coras548f7572019-06-07 15:31:06 -07001337 tcp_connection_timers_reset (tc);
Florin Coras7a3a8662020-02-22 02:27:21 +00001338 tcp_program_cleanup (wrk, tc);
Florin Coras0765d972020-03-18 21:26:41 +00001339 tcp_worker_stats_inc (wrk, tr_abort, 1);
Florin Coras548f7572019-06-07 15:31:06 -07001340 return;
1341 }
1342
Florin Coras36ebcff2019-09-12 18:36:44 -07001343 if (tcp_opts_sack_permitted (&tc->rcv_opts))
1344 tcp_check_sack_reneging (tc);
Florin Coras4eeeaaf2017-09-05 14:03:37 -04001345
Florin Coras36ebcff2019-09-12 18:36:44 -07001346 /* Update send congestion to make sure that rxt has data to send */
1347 tc->snd_congestion = tc->snd_nxt;
Florin Corasa3c32652019-07-03 17:47:22 -07001348
Florin Coras36ebcff2019-09-12 18:36:44 -07001349 /* Send the first unacked segment. If we're short on buffers, return
1350 * as soon as possible */
Florin Coras81cb8e42019-10-22 19:44:45 -07001351 n_bytes = clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una);
1352 n_bytes = tcp_prepare_retransmit_segment (wrk, tc, 0, n_bytes, &b);
Florin Coras85a3ddd2018-12-24 16:54:34 -08001353 if (!n_bytes)
Florin Corase04c2992017-03-01 08:17:34 -08001354 {
liuyacan7e781192021-06-14 18:09:01 +08001355 tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
1356 tcp_cfg.alloc_err_timeout);
Florin Corase04c2992017-03-01 08:17:34 -08001357 return;
1358 }
Dave Barach68b0fb02017-02-28 15:15:56 -05001359
Dave Barachb7f1faa2017-08-29 11:43:37 -04001360 bi = vlib_get_buffer_index (vm, b);
Florin Corasbe72ae62018-11-01 11:23:03 -07001361 tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
Florin Coras75c48c12019-08-02 15:17:21 -07001362
1363 tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
Florin Coraseaec00c2020-10-22 11:22:22 -07001364 tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
Florin Coras36ebcff2019-09-12 18:36:44 -07001365
1366 tc->rto_boff += 1;
1367 if (tc->rto_boff == 1)
1368 {
1369 tcp_cc_init_rxt_timeout (tc);
1370 /* Record timestamp. Eifel detection algorithm RFC3522 */
1371 tc->snd_rxt_ts = tcp_tstamp (tc);
1372 }
1373
1374 if (tcp_opts_sack_permitted (&tc->rcv_opts))
Florin Corasbe237bf2019-09-27 08:16:40 -07001375 scoreboard_init_rxt (&tc->sack_sb, tc->snd_una + n_bytes);
Florin Coras36ebcff2019-09-12 18:36:44 -07001376
1377 tcp_program_retransmit (tc);
Florin Coras93992a92017-05-24 18:03:56 -07001378 }
Florin Coras4eeeaaf2017-09-05 14:03:37 -04001379 /* Retransmit SYN-ACK */
1380 else if (tc->state == TCP_STATE_SYN_RCVD)
1381 {
Florin Corasa436a422019-08-20 07:09:31 -07001382 TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
Florin Corase55a6d72018-10-31 23:09:22 -07001383
Florin Coras4eeeaaf2017-09-05 14:03:37 -04001384 tc->rtt_ts = 0;
1385
Florin Coras75c48c12019-08-02 15:17:21 -07001386 /* Passive open establish timeout */
1387 if (tc->rto > TCP_ESTABLISH_TIME >> 1)
1388 {
1389 tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1390 tcp_connection_timers_reset (tc);
Florin Coras7a3a8662020-02-22 02:27:21 +00001391 tcp_program_cleanup (wrk, tc);
Florin Coras0765d972020-03-18 21:26:41 +00001392 tcp_worker_stats_inc (wrk, tr_abort, 1);
Florin Coras75c48c12019-08-02 15:17:21 -07001393 return;
1394 }
Florin Coras222e1f412019-02-16 20:47:32 -08001395
Florin Corasbdf7fd62019-01-31 17:31:01 -08001396 if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
Florin Corasf988e692017-11-27 04:34:14 -05001397 {
liuyacan7e781192021-06-14 18:09:01 +08001398 tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
1399 tcp_cfg.alloc_err_timeout);
Florin Corasd4712a82020-05-22 21:51:30 +00001400 tcp_worker_stats_inc (wrk, no_buffer, 1);
Florin Corasf988e692017-11-27 04:34:14 -05001401 return;
1402 }
Florin Coras4eeeaaf2017-09-05 14:03:37 -04001403
Florin Coras75c48c12019-08-02 15:17:21 -07001404 tc->rto_boff += 1;
1405 if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
1406 tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1407
Florin Coraseaec00c2020-10-22 11:22:22 -07001408 ASSERT (tc->snd_una != tc->snd_nxt);
1409 tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
Florin Coras75c48c12019-08-02 15:17:21 -07001410
Florin Coras4eeeaaf2017-09-05 14:03:37 -04001411 b = vlib_get_buffer (vm, bi);
Florin Coras24793e32018-05-09 11:34:25 -07001412 tcp_init_buffer (vm, b);
Florin Coras4eeeaaf2017-09-05 14:03:37 -04001413 tcp_make_synack (tc, b);
Florin Corasa436a422019-08-20 07:09:31 -07001414 TCP_EVT (TCP_EVT_SYN_RXT, tc, 1);
Florin Coras4eeeaaf2017-09-05 14:03:37 -04001415
1416 /* Retransmit timer already updated, just enqueue to output */
Florin Corasbe72ae62018-11-01 11:23:03 -07001417 tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
Dave Barach68b0fb02017-02-28 15:15:56 -05001418 }
Florin Coras93992a92017-05-24 18:03:56 -07001419 else
1420 {
1421 ASSERT (tc->state == TCP_STATE_CLOSED);
Florin Coras93992a92017-05-24 18:03:56 -07001422 return;
1423 }
Dave Barach68b0fb02017-02-28 15:15:56 -05001424}
1425
Florin Coras75c48c12019-08-02 15:17:21 -07001426/**
1427 * SYN retransmit timer handler. Active open only.
1428 */
Dave Barach68b0fb02017-02-28 15:15:56 -05001429void
Florin Corasaa388692020-02-14 23:41:25 +00001430tcp_timer_retransmit_syn_handler (tcp_connection_t * tc)
Dave Barach68b0fb02017-02-28 15:15:56 -05001431{
Florin Corasaa388692020-02-14 23:41:25 +00001432 tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
Florin Coras75c48c12019-08-02 15:17:21 -07001433 vlib_main_t *vm = wrk->vm;
Florin Coras75c48c12019-08-02 15:17:21 -07001434 vlib_buffer_t *b = 0;
1435 u32 bi;
Dave Barach68b0fb02017-02-28 15:15:56 -05001436
Florin Coras75c48c12019-08-02 15:17:21 -07001437 /* Note: the connection may have transitioned to ESTABLISHED... */
Florin Corasaa388692020-02-14 23:41:25 +00001438 if (PREDICT_FALSE (tc->state != TCP_STATE_SYN_SENT))
Florin Coras75c48c12019-08-02 15:17:21 -07001439 return;
1440
Florin Coras75c48c12019-08-02 15:17:21 -07001441 /* Half-open connection actually moved to established but we were
1442 * waiting for syn retransmit to pop to call cleanup from the right
1443 * thread. */
1444 if (tc->flags & TCP_CONN_HALF_OPEN_DONE)
1445 {
1446 if (tcp_half_open_connection_cleanup (tc))
1447 TCP_DBG ("could not remove half-open connection");
1448 return;
1449 }
1450
Florin Corasa436a422019-08-20 07:09:31 -07001451 TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
Florin Coras75c48c12019-08-02 15:17:21 -07001452 tc->rtt_ts = 0;
1453
1454 /* Active open establish timeout */
1455 if (tc->rto >= TCP_ESTABLISH_TIME >> 1)
1456 {
Florin Coras00e01d32019-10-21 16:07:46 -07001457 session_stream_connect_notify (&tc->connection, SESSION_E_TIMEDOUT);
Florin Coras75c48c12019-08-02 15:17:21 -07001458 tcp_connection_cleanup (tc);
1459 return;
1460 }
1461
1462 if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1463 {
liuyacan7e781192021-06-14 18:09:01 +08001464 tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
1465 tcp_cfg.alloc_err_timeout);
Florin Corasd4712a82020-05-22 21:51:30 +00001466 tcp_worker_stats_inc (wrk, no_buffer, 1);
Florin Coras75c48c12019-08-02 15:17:21 -07001467 return;
1468 }
1469
1470 /* Try without increasing RTO a number of times. If this fails,
1471 * start growing RTO exponentially */
1472 tc->rto_boff += 1;
1473 if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
1474 tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1475
1476 b = vlib_get_buffer (vm, bi);
1477 tcp_init_buffer (vm, b);
1478 tcp_make_syn (tc, b);
1479
Florin Corasa436a422019-08-20 07:09:31 -07001480 TCP_EVT (TCP_EVT_SYN_RXT, tc, 0);
Florin Coras75c48c12019-08-02 15:17:21 -07001481
Florin Coras647acd52021-07-02 18:10:20 -07001482 tcp_enqueue_half_open (wrk, tc, b, bi);
Florin Coras75c48c12019-08-02 15:17:21 -07001483
Florin Coras0765d972020-03-18 21:26:41 +00001484 tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
Florin Coras75c48c12019-08-02 15:17:21 -07001485 tc->rto * TCP_TO_TIMER_TICK);
Dave Barach68b0fb02017-02-28 15:15:56 -05001486}
1487
1488/**
Florin Coras3e350af2017-03-30 02:54:28 -07001489 * Got 0 snd_wnd from peer, try to do something about it.
1490 *
1491 */
1492void
Florin Corasaa388692020-02-14 23:41:25 +00001493tcp_timer_persist_handler (tcp_connection_t * tc)
Florin Coras3e350af2017-03-30 02:54:28 -07001494{
Florin Corasaa388692020-02-14 23:41:25 +00001495 tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
Florin Corasbe72ae62018-11-01 11:23:03 -07001496 u32 bi, max_snd_bytes, available_bytes, offset;
1497 tcp_main_t *tm = vnet_get_tcp_main ();
1498 vlib_main_t *vm = wrk->vm;
Florin Coras3e350af2017-03-30 02:54:28 -07001499 vlib_buffer_t *b;
Florin Coras93992a92017-05-24 18:03:56 -07001500 int n_bytes = 0;
Florin Coras1f152cd2017-08-18 19:28:03 -07001501 u8 *data;
Florin Coras3e350af2017-03-30 02:54:28 -07001502
Florin Coras3e350af2017-03-30 02:54:28 -07001503 /* Problem already solved or worse */
Florin Coras7e74bf32019-03-06 16:51:58 -08001504 if (tc->state == TCP_STATE_CLOSED || tc->snd_wnd > tc->snd_mss
1505 || (tc->flags & TCP_CONN_FINSNT))
Florin Coras70f879d2020-03-13 17:54:42 +00001506 goto update_scheduler;
Florin Coras3e350af2017-03-30 02:54:28 -07001507
Florin Coras31c99552019-03-01 13:00:58 -08001508 available_bytes = transport_max_tx_dequeue (&tc->connection);
Florin Coras47596832019-03-12 18:58:54 -07001509 offset = tc->snd_nxt - tc->snd_una;
Florin Coras50958952017-08-29 14:50:13 -07001510
1511 /* Reprogram persist if no new bytes available to send. We may have data
1512 * next time */
1513 if (!available_bytes)
1514 {
Florin Coras0765d972020-03-18 21:26:41 +00001515 tcp_persist_timer_set (&wrk->timer_wheel, tc);
Florin Coras50958952017-08-29 14:50:13 -07001516 return;
1517 }
1518
1519 if (available_bytes <= offset)
Florin Coras70f879d2020-03-13 17:54:42 +00001520 goto update_scheduler;
Florin Coras50958952017-08-29 14:50:13 -07001521
Florin Coras3e350af2017-03-30 02:54:28 -07001522 /* Increment RTO backoff */
1523 tc->rto_boff += 1;
1524 tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1525
Florin Corasb2215d62017-08-01 16:56:58 -07001526 /*
1527 * Try to force the first unsent segment (or buffer)
1528 */
Florin Corasbdf7fd62019-01-31 17:31:01 -08001529 if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
Florin Coras3ec66b02018-08-23 16:27:05 -07001530 {
Florin Coras0765d972020-03-18 21:26:41 +00001531 tcp_persist_timer_set (&wrk->timer_wheel, tc);
Florin Corasd4712a82020-05-22 21:51:30 +00001532 tcp_worker_stats_inc (wrk, no_buffer, 1);
Florin Coras3ec66b02018-08-23 16:27:05 -07001533 return;
1534 }
Florin Coras70f879d2020-03-13 17:54:42 +00001535
Florin Coras3e350af2017-03-30 02:54:28 -07001536 b = vlib_get_buffer (vm, bi);
Florin Coras1f152cd2017-08-18 19:28:03 -07001537 data = tcp_init_buffer (vm, b);
Florin Coras93992a92017-05-24 18:03:56 -07001538
Florin Coras1f152cd2017-08-18 19:28:03 -07001539 tcp_validate_txf_size (tc, offset);
Florin Corasc8343412017-05-04 14:25:50 -07001540 tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
Florin Coras7e74bf32019-03-06 16:51:58 -08001541 max_snd_bytes = clib_min (tc->snd_mss,
1542 tm->bytes_per_buffer - TRANSPORT_MAX_HDRS_LEN);
1543 n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
1544 max_snd_bytes);
Florin Coras3e350af2017-03-30 02:54:28 -07001545 b->current_length = n_bytes;
Florin Coras84275e92017-09-26 12:30:40 -04001546 ASSERT (n_bytes != 0 && (tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)
Florin Coras55e556c2020-10-23 10:45:48 -07001547 || tc->snd_una == tc->snd_nxt
Florin Coras84275e92017-09-26 12:30:40 -04001548 || tc->rto_boff > 1));
Florin Coras93992a92017-05-24 18:03:56 -07001549
Florin Corasbbcfaac2019-10-10 13:52:04 -07001550 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
Florin Corasdd60b1b2019-10-07 17:19:09 -07001551 {
1552 tcp_bt_check_app_limited (tc);
Florin Corasd6ae4bf2019-10-12 18:10:20 -07001553 tcp_bt_track_tx (tc, n_bytes);
Florin Corasdd60b1b2019-10-07 17:19:09 -07001554 }
1555
Florin Coras47596832019-03-12 18:58:54 -07001556 tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0,
1557 /* burst */ 0, /* update_snd_nxt */ 1);
Florin Coras55e556c2020-10-23 10:45:48 -07001558 tcp_validate_txf_size (tc, tc->snd_nxt - tc->snd_una);
Florin Corasbe72ae62018-11-01 11:23:03 -07001559 tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
Florin Coras3e350af2017-03-30 02:54:28 -07001560
Florin Coras1f152cd2017-08-18 19:28:03 -07001561 /* Just sent new data, enable retransmit */
Florin Coras0765d972020-03-18 21:26:41 +00001562 tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
Florin Coras70f879d2020-03-13 17:54:42 +00001563
Florin Coras6080e0d2020-03-13 20:39:43 +00001564 return;
1565
Florin Coras70f879d2020-03-13 17:54:42 +00001566update_scheduler:
1567
Florin Coras6080e0d2020-03-13 20:39:43 +00001568 if (tcp_is_descheduled (tc))
Florin Coras70f879d2020-03-13 17:54:42 +00001569 transport_connection_reschedule (&tc->connection);
Florin Coras3e350af2017-03-30 02:54:28 -07001570}
1571
1572/**
Florin Coras6792ec02017-03-13 03:49:51 -07001573 * Retransmit first unacked segment
1574 */
Florin Corasbf4d5ce2018-10-19 16:26:24 -07001575int
Florin Corasbe72ae62018-11-01 11:23:03 -07001576tcp_retransmit_first_unacked (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
Dave Barach68b0fb02017-02-28 15:15:56 -05001577{
Florin Corasbe72ae62018-11-01 11:23:03 -07001578 vlib_main_t *vm = wrk->vm;
1579 vlib_buffer_t *b;
Florin Coras47596832019-03-12 18:58:54 -07001580 u32 bi, n_bytes;
Dave Barach68b0fb02017-02-28 15:15:56 -05001581
Florin Corasa436a422019-08-20 07:09:31 -07001582 TCP_EVT (TCP_EVT_CC_EVT, tc, 1);
Florin Corasbf4d5ce2018-10-19 16:26:24 -07001583
Florin Corasbe72ae62018-11-01 11:23:03 -07001584 n_bytes = tcp_prepare_retransmit_segment (wrk, tc, 0, tc->snd_mss, &b);
Florin Corasb2215d62017-08-01 16:56:58 -07001585 if (!n_bytes)
Florin Corasbf4d5ce2018-10-19 16:26:24 -07001586 return -1;
1587
Florin Corasb2215d62017-08-01 16:56:58 -07001588 bi = vlib_get_buffer_index (vm, b);
Florin Corasbe72ae62018-11-01 11:23:03 -07001589 tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
Florin Corasbf4d5ce2018-10-19 16:26:24 -07001590
1591 return 0;
Florin Coras6792ec02017-03-13 03:49:51 -07001592}
1593
Florin Coras36ee9f12018-11-02 12:52:10 -07001594static int
Florin Coras36ebcff2019-09-12 18:36:44 -07001595tcp_transmit_unsent (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
1596 u32 burst_size)
Florin Coras36ee9f12018-11-02 12:52:10 -07001597{
Florin Coras635acbf2019-07-31 14:11:05 -07001598 u32 offset, n_segs = 0, n_written, bi, available_wnd;
Florin Coras36ee9f12018-11-02 12:52:10 -07001599 vlib_main_t *vm = wrk->vm;
1600 vlib_buffer_t *b = 0;
1601
Florin Coras47596832019-03-12 18:58:54 -07001602 offset = tc->snd_nxt - tc->snd_una;
Florin Coras635acbf2019-07-31 14:11:05 -07001603 available_wnd = tc->snd_wnd - offset;
1604 burst_size = clib_min (burst_size, available_wnd / tc->snd_mss);
1605
Florin Corasd6ae4bf2019-10-12 18:10:20 -07001606 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1607 tcp_bt_check_app_limited (tc);
1608
Florin Coras36ee9f12018-11-02 12:52:10 -07001609 while (n_segs < burst_size)
1610 {
1611 n_written = tcp_prepare_segment (wrk, tc, offset, tc->snd_mss, &b);
1612 if (!n_written)
1613 goto done;
1614
1615 bi = vlib_get_buffer_index (vm, b);
1616 tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1617 offset += n_written;
1618 n_segs += 1;
Florin Coras47596832019-03-12 18:58:54 -07001619
Florin Corasd6ae4bf2019-10-12 18:10:20 -07001620 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1621 tcp_bt_track_tx (tc, n_written);
1622
Florin Coras47596832019-03-12 18:58:54 -07001623 tc->snd_nxt += n_written;
Florin Coras36ee9f12018-11-02 12:52:10 -07001624 }
1625
1626done:
1627 return n_segs;
1628}
1629
Florin Coras36ebcff2019-09-12 18:36:44 -07001630/**
1631 * Estimate send space using proportional rate reduction (RFC6937)
1632 */
Florin Corasbe237bf2019-09-27 08:16:40 -07001633int
Florin Coras36ebcff2019-09-12 18:36:44 -07001634tcp_fastrecovery_prr_snd_space (tcp_connection_t * tc)
1635{
1636 u32 pipe, prr_out;
1637 int space;
1638
1639 pipe = tcp_flight_size (tc);
1640 prr_out = tc->snd_rxt_bytes + (tc->snd_nxt - tc->snd_congestion);
1641
1642 if (pipe > tc->ssthresh)
1643 {
1644 space = ((int) tc->prr_delivered * ((f64) tc->ssthresh / tc->prev_cwnd))
1645 - prr_out;
1646 }
1647 else
1648 {
Florin Corasbe237bf2019-09-27 08:16:40 -07001649 int limit;
1650 limit = clib_max ((int) (tc->prr_delivered - prr_out), 0) + tc->snd_mss;
Florin Coras36ebcff2019-09-12 18:36:44 -07001651 space = clib_min (tc->ssthresh - pipe, limit);
1652 }
1653 space = clib_max (space, prr_out ? 0 : tc->snd_mss);
1654 return space;
1655}
1656
Florin Corasbe237bf2019-09-27 08:16:40 -07001657static inline u8
1658tcp_retransmit_should_retry_head (tcp_connection_t * tc,
1659 sack_scoreboard_t * sb)
1660{
1661 u32 tx_adv_sack = sb->high_sacked - tc->snd_congestion;
1662 f64 rr = (f64) tc->ssthresh / tc->prev_cwnd;
1663
Florin Corasb3dce892019-10-30 09:22:14 -07001664 if (tcp_fastrecovery_first (tc))
1665 return 1;
1666
Florin Corasbe237bf2019-09-27 08:16:40 -07001667 return (tx_adv_sack > (tc->snd_una - tc->prr_start) * rr);
1668}
1669
Florin Corascb711a42019-10-16 19:28:17 -07001670static inline u8
1671tcp_max_tx_deq (tcp_connection_t * tc)
1672{
1673 return (transport_max_tx_dequeue (&tc->connection)
1674 - (tc->snd_nxt - tc->snd_una));
1675}
1676
Florin Coras36ee9f12018-11-02 12:52:10 -07001677#define scoreboard_rescue_rxt_valid(_sb, _tc) \
1678 (seq_geq (_sb->rescue_rxt, _tc->snd_una) \
1679 && seq_leq (_sb->rescue_rxt, _tc->snd_congestion))
1680
Florin Coras6792ec02017-03-13 03:49:51 -07001681/**
Florin Coras36ebcff2019-09-12 18:36:44 -07001682 * Do retransmit with SACKs
Florin Coras6792ec02017-03-13 03:49:51 -07001683 */
Florin Coras36ebcff2019-09-12 18:36:44 -07001684static int
1685tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
1686 u32 burst_size)
Dave Barach68b0fb02017-02-28 15:15:56 -05001687{
Florin Coras36ebcff2019-09-12 18:36:44 -07001688 u32 n_written = 0, offset, max_bytes, n_segs = 0;
Florin Coras11e9e352019-11-13 19:09:47 -08001689 u8 snd_limited = 0, can_rescue = 0;
1690 u32 bi, max_deq, burst_bytes;
Florin Coras93992a92017-05-24 18:03:56 -07001691 sack_scoreboard_hole_t *hole;
Florin Coras36ee9f12018-11-02 12:52:10 -07001692 vlib_main_t *vm = wrk->vm;
1693 vlib_buffer_t *b = 0;
Florin Coras93992a92017-05-24 18:03:56 -07001694 sack_scoreboard_t *sb;
Florin Coras93992a92017-05-24 18:03:56 -07001695 int snd_space;
Dave Barach68b0fb02017-02-28 15:15:56 -05001696
Florin Coras36ebcff2019-09-12 18:36:44 -07001697 ASSERT (tcp_in_cong_recovery (tc));
Dave Barach68b0fb02017-02-28 15:15:56 -05001698
Florin Corasa8e71c82019-10-22 19:01:39 -07001699 burst_bytes = transport_connection_tx_pacer_burst (&tc->connection);
Florin Corasc31dc312019-10-06 14:06:14 -07001700 burst_size = clib_min (burst_size, burst_bytes / tc->snd_mss);
1701 if (!burst_size)
1702 {
1703 tcp_program_retransmit (tc);
1704 return 0;
1705 }
1706
Florin Coras36ebcff2019-09-12 18:36:44 -07001707 if (tcp_in_recovery (tc))
1708 snd_space = tcp_available_cc_snd_space (tc);
1709 else
1710 snd_space = tcp_fastrecovery_prr_snd_space (tc);
1711
Florin Corasca1c8f32018-05-23 21:01:30 -07001712 if (snd_space < tc->snd_mss)
Florin Coras11e9e352019-11-13 19:09:47 -08001713 goto done;
Florin Corasc31dc312019-10-06 14:06:14 -07001714
Florin Coras36ee9f12018-11-02 12:52:10 -07001715 sb = &tc->sack_sb;
Florin Corasbe237bf2019-09-27 08:16:40 -07001716
1717 /* Check if snd_una is a lost retransmit */
Florin Coras8a8b05c2019-10-16 10:07:39 -07001718 if (pool_elts (sb->holes)
1719 && seq_gt (sb->high_sacked, tc->snd_congestion)
Florin Corasbe237bf2019-09-27 08:16:40 -07001720 && tc->rxt_head != tc->snd_una
1721 && tcp_retransmit_should_retry_head (tc, sb))
1722 {
Florin Corasbf1f8b72019-11-04 14:39:33 -08001723 max_bytes = clib_min (tc->snd_mss, tc->snd_congestion - tc->snd_una);
1724 n_written = tcp_prepare_retransmit_segment (wrk, tc, 0, max_bytes, &b);
Florin Corasbe237bf2019-09-27 08:16:40 -07001725 if (!n_written)
1726 {
1727 tcp_program_retransmit (tc);
1728 goto done;
1729 }
1730 bi = vlib_get_buffer_index (vm, b);
1731 tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1732 n_segs = 1;
1733
1734 tc->rxt_head = tc->snd_una;
1735 tc->rxt_delivered += n_written;
1736 tc->prr_delivered += n_written;
1737 ASSERT (tc->rxt_delivered <= tc->snd_rxt_bytes);
1738 }
1739
Florin Corasb3dce892019-10-30 09:22:14 -07001740 tcp_fastrecovery_first_off (tc);
1741
Florin Corasbe237bf2019-09-27 08:16:40 -07001742 TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
Florin Coras36ee9f12018-11-02 12:52:10 -07001743 hole = scoreboard_get_hole (sb, sb->cur_rxt_hole);
1744
Florin Coras31c99552019-03-01 13:00:58 -08001745 max_deq = transport_max_tx_dequeue (&tc->connection);
Florin Coras47596832019-03-12 18:58:54 -07001746 max_deq -= tc->snd_nxt - tc->snd_una;
Florin Coras36ee9f12018-11-02 12:52:10 -07001747
Florin Corasbf4d5ce2018-10-19 16:26:24 -07001748 while (snd_space > 0 && n_segs < burst_size)
Florin Coras93992a92017-05-24 18:03:56 -07001749 {
Florin Coras81cb8e42019-10-22 19:44:45 -07001750 hole = scoreboard_next_rxt_hole (sb, hole, max_deq != 0, &can_rescue,
Florin Coras36ee9f12018-11-02 12:52:10 -07001751 &snd_limited);
Florin Coras93992a92017-05-24 18:03:56 -07001752 if (!hole)
1753 {
Florin Coras36ebcff2019-09-12 18:36:44 -07001754 /* We are out of lost holes to retransmit so send some new data. */
Florin Corasbe237bf2019-09-27 08:16:40 -07001755 if (max_deq > tc->snd_mss)
Florin Corasbf4d5ce2018-10-19 16:26:24 -07001756 {
Florin Corasc31dc312019-10-06 14:06:14 -07001757 u32 n_segs_new;
1758 int av_wnd;
1759
Florin Coras3eba7f12019-11-15 17:56:48 -08001760 /* Make sure we don't exceed available window and leave space
1761 * for one more packet, to avoid zero window acks */
Florin Corasc31dc312019-10-06 14:06:14 -07001762 av_wnd = (int) tc->snd_wnd - (tc->snd_nxt - tc->snd_una);
Florin Coras3eba7f12019-11-15 17:56:48 -08001763 av_wnd = clib_max (av_wnd - tc->snd_mss, 0);
Florin Corasc31dc312019-10-06 14:06:14 -07001764 snd_space = clib_min (snd_space, av_wnd);
Florin Coras36ee9f12018-11-02 12:52:10 -07001765 snd_space = clib_min (max_deq, snd_space);
1766 burst_size = clib_min (burst_size - n_segs,
1767 snd_space / tc->snd_mss);
Florin Coras36ebcff2019-09-12 18:36:44 -07001768 burst_size = clib_min (burst_size, TCP_RXT_MAX_BURST);
1769 n_segs_new = tcp_transmit_unsent (wrk, tc, burst_size);
1770 if (max_deq > n_segs_new * tc->snd_mss)
1771 tcp_program_retransmit (tc);
1772
1773 n_segs += n_segs_new;
Florin Coras36ee9f12018-11-02 12:52:10 -07001774 goto done;
Florin Corasbf4d5ce2018-10-19 16:26:24 -07001775 }
Florin Coras93992a92017-05-24 18:03:56 -07001776
Florin Coras36ebcff2019-09-12 18:36:44 -07001777 if (tcp_in_recovery (tc) || !can_rescue
1778 || scoreboard_rescue_rxt_valid (sb, tc))
Florin Coras36ee9f12018-11-02 12:52:10 -07001779 break;
1780
Florin Coras93992a92017-05-24 18:03:56 -07001781 /* If rescue rxt undefined or less than snd_una then one segment of
1782 * up to SMSS octets that MUST include the highest outstanding
1783 * unSACKed sequence number SHOULD be returned, and RescueRxt set to
1784 * RecoveryPoint. HighRxt MUST NOT be updated.
1785 */
Florin Coras3eba7f12019-11-15 17:56:48 -08001786 hole = scoreboard_last_hole (sb);
1787 max_bytes = clib_min (tc->snd_mss, hole->end - hole->start);
Florin Coras1f152cd2017-08-18 19:28:03 -07001788 max_bytes = clib_min (max_bytes, snd_space);
Florin Coras3eba7f12019-11-15 17:56:48 -08001789 offset = hole->end - tc->snd_una - max_bytes;
Florin Corasbe72ae62018-11-01 11:23:03 -07001790 n_written = tcp_prepare_retransmit_segment (wrk, tc, offset,
1791 max_bytes, &b);
Florin Coras24793e32018-05-09 11:34:25 -07001792 if (!n_written)
1793 goto done;
1794
Florin Coras3eba7f12019-11-15 17:56:48 -08001795 sb->rescue_rxt = tc->snd_congestion;
Florin Corasb2215d62017-08-01 16:56:58 -07001796 bi = vlib_get_buffer_index (vm, b);
Florin Corasbe72ae62018-11-01 11:23:03 -07001797 tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
Florin Corasbf4d5ce2018-10-19 16:26:24 -07001798 n_segs += 1;
Florin Coras93992a92017-05-24 18:03:56 -07001799 break;
1800 }
1801
Florin Coras1f152cd2017-08-18 19:28:03 -07001802 max_bytes = clib_min (hole->end - sb->high_rxt, snd_space);
1803 max_bytes = snd_limited ? clib_min (max_bytes, tc->snd_mss) : max_bytes;
1804 if (max_bytes == 0)
1805 break;
Florin Coras36ee9f12018-11-02 12:52:10 -07001806
Florin Coras93992a92017-05-24 18:03:56 -07001807 offset = sb->high_rxt - tc->snd_una;
Florin Corasbe72ae62018-11-01 11:23:03 -07001808 n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, max_bytes,
1809 &b);
Florin Coras36ee9f12018-11-02 12:52:10 -07001810 ASSERT (n_written <= snd_space);
Florin Coras93992a92017-05-24 18:03:56 -07001811
1812 /* Nothing left to retransmit */
1813 if (n_written == 0)
Florin Corasb2215d62017-08-01 16:56:58 -07001814 break;
Florin Coras93992a92017-05-24 18:03:56 -07001815
Florin Corasb2215d62017-08-01 16:56:58 -07001816 bi = vlib_get_buffer_index (vm, b);
Florin Corasbe72ae62018-11-01 11:23:03 -07001817 tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
Florin Coras36ee9f12018-11-02 12:52:10 -07001818
1819 sb->high_rxt += n_written;
Florin Coras81cb8e42019-10-22 19:44:45 -07001820 ASSERT (seq_leq (sb->high_rxt, tc->snd_nxt));
1821
Florin Coras93992a92017-05-24 18:03:56 -07001822 snd_space -= n_written;
Florin Corasbf4d5ce2018-10-19 16:26:24 -07001823 n_segs += 1;
Florin Coras93992a92017-05-24 18:03:56 -07001824 }
1825
Florin Corasbf4d5ce2018-10-19 16:26:24 -07001826 if (hole)
Florin Coras36ebcff2019-09-12 18:36:44 -07001827 tcp_program_retransmit (tc);
Florin Corasbf4d5ce2018-10-19 16:26:24 -07001828
Florin Coras24793e32018-05-09 11:34:25 -07001829done:
Florin Coras36ebcff2019-09-12 18:36:44 -07001830
Florin Coras11e9e352019-11-13 19:09:47 -08001831 transport_connection_tx_pacer_reset_bucket (&tc->connection, 0);
Florin Corasbf4d5ce2018-10-19 16:26:24 -07001832 return n_segs;
Florin Coras93992a92017-05-24 18:03:56 -07001833}
1834
1835/**
1836 * Fast retransmit without SACK info
1837 */
Florin Coras36ebcff2019-09-12 18:36:44 -07001838static int
1839tcp_retransmit_no_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
1840 u32 burst_size)
Florin Coras93992a92017-05-24 18:03:56 -07001841{
Florin Corasbf1f8b72019-11-04 14:39:33 -08001842 u32 n_written = 0, offset = 0, bi, max_deq, n_segs_now, max_bytes;
Florin Corasc31dc312019-10-06 14:06:14 -07001843 u32 burst_bytes, sent_bytes;
Florin Corasbe72ae62018-11-01 11:23:03 -07001844 vlib_main_t *vm = wrk->vm;
Florin Corasbf4d5ce2018-10-19 16:26:24 -07001845 int snd_space, n_segs = 0;
Florin Corasc31dc312019-10-06 14:06:14 -07001846 u8 cc_limited = 0;
Florin Coras93992a92017-05-24 18:03:56 -07001847 vlib_buffer_t *b;
1848
Florin Corasbf1f8b72019-11-04 14:39:33 -08001849 ASSERT (tcp_in_cong_recovery (tc));
Florin Corasa436a422019-08-20 07:09:31 -07001850 TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
Florin Coras6792ec02017-03-13 03:49:51 -07001851
Florin Corasa8e71c82019-10-22 19:01:39 -07001852 burst_bytes = transport_connection_tx_pacer_burst (&tc->connection);
Florin Corasc31dc312019-10-06 14:06:14 -07001853 burst_size = clib_min (burst_size, burst_bytes / tc->snd_mss);
1854 if (!burst_size)
1855 {
1856 tcp_program_retransmit (tc);
1857 return 0;
1858 }
1859
Florin Coras87a9bf82019-06-12 11:26:35 -07001860 snd_space = tcp_available_cc_snd_space (tc);
Florin Corasc31dc312019-10-06 14:06:14 -07001861 cc_limited = snd_space < burst_bytes;
Florin Coras87a9bf82019-06-12 11:26:35 -07001862
Florin Coras36ee9f12018-11-02 12:52:10 -07001863 if (!tcp_fastrecovery_first (tc))
1864 goto send_unsent;
1865
1866 /* RFC 6582: [If a partial ack], retransmit the first unacknowledged
1867 * segment. */
Florin Corasbf4d5ce2018-10-19 16:26:24 -07001868 while (snd_space > 0 && n_segs < burst_size)
Dave Barach68b0fb02017-02-28 15:15:56 -05001869 {
Florin Corasbf1f8b72019-11-04 14:39:33 -08001870 max_bytes = clib_min (tc->snd_mss,
1871 tc->snd_congestion - tc->snd_una - offset);
1872 if (!max_bytes)
1873 break;
1874 n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, max_bytes,
1875 &b);
Dave Barach68b0fb02017-02-28 15:15:56 -05001876
1877 /* Nothing left to retransmit */
Florin Coras6792ec02017-03-13 03:49:51 -07001878 if (n_written == 0)
Florin Corasb2215d62017-08-01 16:56:58 -07001879 break;
Dave Barach68b0fb02017-02-28 15:15:56 -05001880
Florin Corasb2215d62017-08-01 16:56:58 -07001881 bi = vlib_get_buffer_index (vm, b);
Florin Corasbe72ae62018-11-01 11:23:03 -07001882 tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
Florin Coras6792ec02017-03-13 03:49:51 -07001883 snd_space -= n_written;
Florin Coras36ee9f12018-11-02 12:52:10 -07001884 offset += n_written;
Florin Corasbf4d5ce2018-10-19 16:26:24 -07001885 n_segs += 1;
Dave Barach68b0fb02017-02-28 15:15:56 -05001886 }
1887
Florin Coras36ee9f12018-11-02 12:52:10 -07001888 if (n_segs == burst_size)
1889 goto done;
Florin Corasbf4d5ce2018-10-19 16:26:24 -07001890
Florin Coras36ee9f12018-11-02 12:52:10 -07001891send_unsent:
1892
1893 /* RFC 6582: Send a new segment if permitted by the new value of cwnd. */
Florin Coras9ece3c02018-11-05 11:06:53 -08001894 if (snd_space < tc->snd_mss || tc->snd_mss == 0)
Florin Coras36ee9f12018-11-02 12:52:10 -07001895 goto done;
1896
Florin Coras31c99552019-03-01 13:00:58 -08001897 max_deq = transport_max_tx_dequeue (&tc->connection);
Florin Coras47596832019-03-12 18:58:54 -07001898 max_deq -= tc->snd_nxt - tc->snd_una;
Florin Coras36ee9f12018-11-02 12:52:10 -07001899 if (max_deq)
1900 {
1901 snd_space = clib_min (max_deq, snd_space);
1902 burst_size = clib_min (burst_size - n_segs, snd_space / tc->snd_mss);
Florin Coras36ebcff2019-09-12 18:36:44 -07001903 n_segs_now = tcp_transmit_unsent (wrk, tc, burst_size);
Florin Corasbf1f8b72019-11-04 14:39:33 -08001904 if (n_segs_now && max_deq > n_segs_now * tc->snd_mss)
Florin Coras36ebcff2019-09-12 18:36:44 -07001905 tcp_program_retransmit (tc);
Florin Coras36ee9f12018-11-02 12:52:10 -07001906 n_segs += n_segs_now;
1907 }
1908
Florin Coras36ee9f12018-11-02 12:52:10 -07001909done:
1910 tcp_fastrecovery_first_off (tc);
Florin Coras93992a92017-05-24 18:03:56 -07001911
Florin Corasc31dc312019-10-06 14:06:14 -07001912 sent_bytes = clib_min (n_segs * tc->snd_mss, burst_bytes);
1913 sent_bytes = cc_limited ? burst_bytes : sent_bytes;
1914 transport_connection_tx_pacer_update_bytes (&tc->connection, sent_bytes);
1915
1916 return n_segs;
Dave Barach68b0fb02017-02-28 15:15:56 -05001917}
Florin Coras26dd6de2019-07-23 23:54:47 -07001918
1919static int
1920tcp_send_acks (tcp_connection_t * tc, u32 max_burst_size)
1921{
1922 int j, n_acks;
1923
1924 if (!tc->pending_dupacks)
1925 {
Florin Corascb711a42019-10-16 19:28:17 -07001926 if (tcp_in_cong_recovery (tc) || !tcp_max_tx_deq (tc)
1927 || tc->state != TCP_STATE_ESTABLISHED)
1928 {
1929 tcp_send_ack (tc);
1930 return 1;
1931 }
1932 return 0;
Florin Coras26dd6de2019-07-23 23:54:47 -07001933 }
1934
1935 /* If we're supposed to send dupacks but have no ooo data
1936 * send only one ack */
1937 if (!vec_len (tc->snd_sacks))
1938 {
1939 tcp_send_ack (tc);
Florin Coras7fd59cc2020-03-12 15:50:57 +00001940 tc->dupacks_out += 1;
Florin Corascb711a42019-10-16 19:28:17 -07001941 tc->pending_dupacks = 0;
Florin Coras26dd6de2019-07-23 23:54:47 -07001942 return 1;
1943 }
1944
1945 /* Start with first sack block */
1946 tc->snd_sack_pos = 0;
1947
1948 /* Generate enough dupacks to cover all sack blocks. Do not generate
1949 * more sacks than the number of packets received. But do generate at
1950 * least 3, i.e., the number needed to signal congestion, if needed. */
1951 n_acks = vec_len (tc->snd_sacks) / TCP_OPTS_MAX_SACK_BLOCKS;
1952 n_acks = clib_min (n_acks, tc->pending_dupacks);
1953 n_acks = clib_max (n_acks, clib_min (tc->pending_dupacks, 3));
1954 for (j = 0; j < clib_min (n_acks, max_burst_size); j++)
1955 tcp_send_ack (tc);
1956
1957 if (n_acks < max_burst_size)
1958 {
1959 tc->pending_dupacks = 0;
1960 tc->snd_sack_pos = 0;
Florin Corasedfe0ee2019-07-29 18:13:25 -07001961 tc->dupacks_out += n_acks;
Florin Coras26dd6de2019-07-23 23:54:47 -07001962 return n_acks;
1963 }
1964 else
1965 {
1966 TCP_DBG ("constrained by burst size");
1967 tc->pending_dupacks = n_acks - max_burst_size;
Florin Corasedfe0ee2019-07-29 18:13:25 -07001968 tc->dupacks_out += max_burst_size;
Florin Coras26dd6de2019-07-23 23:54:47 -07001969 tcp_program_dupack (tc);
1970 return max_burst_size;
1971 }
1972}
1973
1974static int
Florin Coras36ebcff2019-09-12 18:36:44 -07001975tcp_do_retransmit (tcp_connection_t * tc, u32 max_burst_size)
Florin Coras26dd6de2019-07-23 23:54:47 -07001976{
Florin Coras26dd6de2019-07-23 23:54:47 -07001977 tcp_worker_ctx_t *wrk;
Florin Corasc31dc312019-10-06 14:06:14 -07001978 u32 n_segs;
Florin Coras26dd6de2019-07-23 23:54:47 -07001979
Florin Corasfd4c3fe2019-11-07 12:33:12 -08001980 if (PREDICT_FALSE (tc->state == TCP_STATE_CLOSED))
1981 return 0;
1982
Florin Coras26dd6de2019-07-23 23:54:47 -07001983 wrk = tcp_get_worker (tc->c_thread_index);
Florin Coras26dd6de2019-07-23 23:54:47 -07001984
Florin Corasc31dc312019-10-06 14:06:14 -07001985 if (tcp_opts_sack_permitted (&tc->rcv_opts))
1986 n_segs = tcp_retransmit_sack (wrk, tc, max_burst_size);
1987 else
1988 n_segs = tcp_retransmit_no_sack (wrk, tc, max_burst_size);
1989
Florin Coras26dd6de2019-07-23 23:54:47 -07001990 return n_segs;
1991}
1992
1993int
Florin Coras9f86d222020-03-23 15:34:22 +00001994tcp_session_custom_tx (void *conn, transport_send_params_t * sp)
Florin Coras26dd6de2019-07-23 23:54:47 -07001995{
1996 tcp_connection_t *tc = (tcp_connection_t *) conn;
1997 u32 n_segs = 0;
1998
Florin Coras36ebcff2019-09-12 18:36:44 -07001999 if (tcp_in_cong_recovery (tc) && (tc->flags & TCP_CONN_RXT_PENDING))
Florin Coras26dd6de2019-07-23 23:54:47 -07002000 {
Florin Coras36ebcff2019-09-12 18:36:44 -07002001 tc->flags &= ~TCP_CONN_RXT_PENDING;
Florin Coras9f86d222020-03-23 15:34:22 +00002002 n_segs = tcp_do_retransmit (tc, sp->max_burst_size);
Florin Coras26dd6de2019-07-23 23:54:47 -07002003 }
2004
2005 if (!(tc->flags & TCP_CONN_SNDACK))
2006 return n_segs;
2007
2008 tc->flags &= ~TCP_CONN_SNDACK;
2009
2010 /* We have retransmitted packets and no dupack */
2011 if (n_segs && !tc->pending_dupacks)
2012 return n_segs;
2013
Florin Coras9f86d222020-03-23 15:34:22 +00002014 if (sp->max_burst_size <= n_segs)
Florin Coras26dd6de2019-07-23 23:54:47 -07002015 {
2016 tcp_program_ack (tc);
Florin Coras9f86d222020-03-23 15:34:22 +00002017 return n_segs;
Florin Coras26dd6de2019-07-23 23:54:47 -07002018 }
2019
Florin Coras9f86d222020-03-23 15:34:22 +00002020 n_segs += tcp_send_acks (tc, sp->max_burst_size - n_segs);
Florin Coras26dd6de2019-07-23 23:54:47 -07002021
2022 return n_segs;
2023}
Filip Tehlare275bed2019-03-06 00:06:56 -08002024#endif /* CLIB_MARCH_VARIANT */
Dave Barach68b0fb02017-02-28 15:15:56 -05002025
Florin Corasf9d05682018-04-26 08:26:52 -07002026static void
2027tcp_output_handle_link_local (tcp_connection_t * tc0, vlib_buffer_t * b0,
Florin Coras8b20bf52018-06-14 14:55:50 -07002028 u16 * next0, u32 * error0)
Florin Corasf9d05682018-04-26 08:26:52 -07002029{
2030 ip_adjacency_t *adj;
2031 adj_index_t ai;
2032
Florin Coras1c8ff632018-05-17 13:28:34 -07002033 /* Not thread safe but as long as the connection exists the adj should
2034 * not be removed */
Florin Corasf9d05682018-04-26 08:26:52 -07002035 ai = adj_nbr_find (FIB_PROTOCOL_IP6, VNET_LINK_IP6, &tc0->c_rmt_ip,
2036 tc0->sw_if_index);
2037 if (ai == ADJ_INDEX_INVALID)
2038 {
Florin Corasf9d05682018-04-26 08:26:52 -07002039 vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
2040 *next0 = TCP_OUTPUT_NEXT_DROP;
2041 *error0 = TCP_ERROR_LINK_LOCAL_RW;
2042 return;
2043 }
2044
2045 adj = adj_get (ai);
Florin Coras1c8ff632018-05-17 13:28:34 -07002046 if (PREDICT_TRUE (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE))
Florin Corasf9d05682018-04-26 08:26:52 -07002047 *next0 = TCP_OUTPUT_NEXT_IP_REWRITE;
2048 else if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP)
2049 *next0 = TCP_OUTPUT_NEXT_IP_ARP;
Florin Coras1c8ff632018-05-17 13:28:34 -07002050 else
2051 {
2052 *next0 = TCP_OUTPUT_NEXT_DROP;
2053 *error0 = TCP_ERROR_LINK_LOCAL_RW;
2054 }
2055 vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai;
Florin Corasf9d05682018-04-26 08:26:52 -07002056}
2057
Florin Coras8b20bf52018-06-14 14:55:50 -07002058static void
2059tcp46_output_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
2060 u32 * to_next, u32 n_bufs)
Dave Barach68b0fb02017-02-28 15:15:56 -05002061{
Florin Coras8b20bf52018-06-14 14:55:50 -07002062 tcp_connection_t *tc;
2063 tcp_tx_trace_t *t;
2064 vlib_buffer_t *b;
2065 tcp_header_t *th;
2066 int i;
Dave Barach68b0fb02017-02-28 15:15:56 -05002067
Florin Coras30928f82020-01-27 19:21:28 -08002068 for (i = 0; i < n_bufs; i++)
Dave Barach68b0fb02017-02-28 15:15:56 -05002069 {
Florin Coras8b20bf52018-06-14 14:55:50 -07002070 b = vlib_get_buffer (vm, to_next[i]);
Florin Coras30928f82020-01-27 19:21:28 -08002071 if (!(b->flags & VLIB_BUFFER_IS_TRACED))
2072 continue;
Florin Coras8b20bf52018-06-14 14:55:50 -07002073 th = vlib_buffer_get_current (b);
2074 tc = tcp_connection_get (vnet_buffer (b)->tcp.connection_index,
2075 vm->thread_index);
2076 t = vlib_add_trace (vm, node, b, sizeof (*t));
Dave Barach178cf492018-11-13 16:34:13 -05002077 clib_memcpy_fast (&t->tcp_header, th, sizeof (t->tcp_header));
2078 clib_memcpy_fast (&t->tcp_connection, tc, sizeof (t->tcp_connection));
Florin Coras8b20bf52018-06-14 14:55:50 -07002079 }
2080}
Dave Barach68b0fb02017-02-28 15:15:56 -05002081
Florin Coras0dbd5172018-06-25 16:19:34 -07002082always_inline void
Florin Coras8b20bf52018-06-14 14:55:50 -07002083tcp_output_push_ip (vlib_main_t * vm, vlib_buffer_t * b0,
2084 tcp_connection_t * tc0, u8 is_ip4)
2085{
Florin Corasf4ce6ba2019-11-20 18:34:58 -08002086 TCP_EVT (TCP_EVT_OUTPUT, tc0,
2087 ((tcp_header_t *) vlib_buffer_get_current (b0))->flags,
2088 b0->current_length);
Srikanth A02833ff2019-10-02 17:48:58 -07002089
Florin Coras8b20bf52018-06-14 14:55:50 -07002090 if (is_ip4)
Florin Corasf4ce6ba2019-11-20 18:34:58 -08002091 vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4,
2092 IP_PROTOCOL_TCP, tcp_csum_offload (tc0));
Florin Coras8b20bf52018-06-14 14:55:50 -07002093 else
Florin Corasf4ce6ba2019-11-20 18:34:58 -08002094 vlib_buffer_push_ip6_custom (vm, b0, &tc0->c_lcl_ip6, &tc0->c_rmt_ip6,
2095 IP_PROTOCOL_TCP, tc0->ipv6_flow_label);
Florin Coras8b20bf52018-06-14 14:55:50 -07002096}
Dave Barach68b0fb02017-02-28 15:15:56 -05002097
Florin Coras0dbd5172018-06-25 16:19:34 -07002098always_inline void
Simon Zhang1146ff42019-09-02 22:54:00 +08002099tcp_check_if_gso (tcp_connection_t * tc, vlib_buffer_t * b)
2100{
Florin Corasbbcfaac2019-10-10 13:52:04 -07002101 if (PREDICT_TRUE (!(tc->cfg_flags & TCP_CFG_F_TSO)))
Simon Zhang1146ff42019-09-02 22:54:00 +08002102 return;
Florin Corasbbcfaac2019-10-10 13:52:04 -07002103
Simon Zhang8a047ed2019-09-24 21:16:56 +08002104 u16 data_len = b->current_length - sizeof (tcp_header_t) - tc->snd_opts_len;
Simon Zhang1146ff42019-09-02 22:54:00 +08002105
Simon Zhang8a047ed2019-09-24 21:16:56 +08002106 if (PREDICT_FALSE (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID))
2107 data_len += b->total_length_not_including_first_buffer;
2108
2109 if (PREDICT_TRUE (data_len <= tc->snd_mss))
2110 return;
2111 else
Simon Zhang1146ff42019-09-02 22:54:00 +08002112 {
2113 ASSERT ((b->flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID) != 0);
2114 ASSERT ((b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID) != 0);
2115 b->flags |= VNET_BUFFER_F_GSO;
2116 vnet_buffer2 (b)->gso_l4_hdr_sz =
2117 sizeof (tcp_header_t) + tc->snd_opts_len;
2118 vnet_buffer2 (b)->gso_size = tc->snd_mss;
2119 }
2120}
2121
2122always_inline void
Florin Coras8b20bf52018-06-14 14:55:50 -07002123tcp_output_handle_packet (tcp_connection_t * tc0, vlib_buffer_t * b0,
Florin Corasdf36f492019-08-18 18:09:28 -07002124 vlib_node_runtime_t * error_node, u16 * next0,
2125 u8 is_ip4)
Florin Coras8b20bf52018-06-14 14:55:50 -07002126{
Florin Coras540a8da2019-06-18 08:55:14 -07002127 /* If next_index is not drop use it */
Florin Coras07beade2019-06-20 12:18:31 -07002128 if (tc0->next_node_index)
2129 {
2130 *next0 = tc0->next_node_index;
2131 vnet_buffer (b0)->tcp.next_node_opaque = tc0->next_node_opaque;
2132 }
Florin Coras78dae002019-08-30 10:00:30 -07002133 else
2134 {
2135 *next0 = TCP_OUTPUT_NEXT_IP_LOOKUP;
2136 }
Florin Coras540a8da2019-06-18 08:55:14 -07002137
Florin Coras8b20bf52018-06-14 14:55:50 -07002138 vnet_buffer (b0)->sw_if_index[VLIB_TX] = tc0->c_fib_index;
2139 vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
2140
2141 if (!is_ip4)
2142 {
Florin Corasdf36f492019-08-18 18:09:28 -07002143 u32 error0 = 0;
2144
Florin Coras8b20bf52018-06-14 14:55:50 -07002145 if (PREDICT_FALSE (ip6_address_is_link_local_unicast (&tc0->c_rmt_ip6)))
Florin Corasdf36f492019-08-18 18:09:28 -07002146 tcp_output_handle_link_local (tc0, b0, next0, &error0);
2147
2148 if (PREDICT_FALSE (error0))
2149 {
2150 b0->error = error_node->errors[error0];
2151 return;
2152 }
Florin Coras8b20bf52018-06-14 14:55:50 -07002153 }
2154
Florin Corasedfe0ee2019-07-29 18:13:25 -07002155 tc0->segs_out += 1;
Florin Coras8b20bf52018-06-14 14:55:50 -07002156}
2157
2158always_inline uword
2159tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
2160 vlib_frame_t * frame, int is_ip4)
2161{
2162 u32 n_left_from, *from, thread_index = vm->thread_index;
2163 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2164 u16 nexts[VLIB_FRAME_SIZE], *next;
2165
2166 from = vlib_frame_vector_args (frame);
2167 n_left_from = frame->n_vectors;
Florin Coras8f10b902021-04-02 18:32:00 -07002168 tcp_update_time_now (tcp_get_worker (thread_index));
Florin Coras8b20bf52018-06-14 14:55:50 -07002169
2170 if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
2171 tcp46_output_trace_frame (vm, node, from, n_left_from);
2172
2173 vlib_get_buffers (vm, from, bufs, n_left_from);
2174 b = bufs;
2175 next = nexts;
2176
2177 while (n_left_from >= 4)
2178 {
Florin Coras8b20bf52018-06-14 14:55:50 -07002179 tcp_connection_t *tc0, *tc1;
2180
2181 {
2182 vlib_prefetch_buffer_header (b[2], STORE);
2183 CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
2184
2185 vlib_prefetch_buffer_header (b[3], STORE);
2186 CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
2187 }
2188
Florin Coras8b20bf52018-06-14 14:55:50 -07002189 tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
2190 thread_index);
2191 tc1 = tcp_connection_get (vnet_buffer (b[1])->tcp.connection_index,
2192 thread_index);
2193
Florin Coras78dae002019-08-30 10:00:30 -07002194 if (PREDICT_TRUE (!tc0 + !tc1 == 0))
2195 {
2196 tcp_output_push_ip (vm, b[0], tc0, is_ip4);
2197 tcp_output_push_ip (vm, b[1], tc1, is_ip4);
Florin Coras8b20bf52018-06-14 14:55:50 -07002198
Simon Zhang1146ff42019-09-02 22:54:00 +08002199 tcp_check_if_gso (tc0, b[0]);
2200 tcp_check_if_gso (tc1, b[1]);
2201
Florin Coras573f44c2020-04-09 21:23:01 +00002202 tcp_output_handle_packet (tc0, b[0], node, &next[0], is_ip4);
2203 tcp_output_handle_packet (tc1, b[1], node, &next[1], is_ip4);
Florin Coras78dae002019-08-30 10:00:30 -07002204 }
2205 else
2206 {
2207 if (tc0 != 0)
2208 {
2209 tcp_output_push_ip (vm, b[0], tc0, is_ip4);
Simon Zhang1146ff42019-09-02 22:54:00 +08002210 tcp_check_if_gso (tc0, b[0]);
Florin Coras573f44c2020-04-09 21:23:01 +00002211 tcp_output_handle_packet (tc0, b[0], node, &next[0], is_ip4);
Florin Coras78dae002019-08-30 10:00:30 -07002212 }
2213 else
2214 {
Florin Coras573f44c2020-04-09 21:23:01 +00002215 b[0]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
Florin Coras78dae002019-08-30 10:00:30 -07002216 next[0] = TCP_OUTPUT_NEXT_DROP;
2217 }
2218 if (tc1 != 0)
2219 {
2220 tcp_output_push_ip (vm, b[1], tc1, is_ip4);
Simon Zhang1146ff42019-09-02 22:54:00 +08002221 tcp_check_if_gso (tc1, b[1]);
Florin Coras573f44c2020-04-09 21:23:01 +00002222 tcp_output_handle_packet (tc1, b[1], node, &next[1], is_ip4);
Florin Coras78dae002019-08-30 10:00:30 -07002223 }
2224 else
2225 {
Florin Coras573f44c2020-04-09 21:23:01 +00002226 b[1]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
Florin Coras78dae002019-08-30 10:00:30 -07002227 next[1] = TCP_OUTPUT_NEXT_DROP;
2228 }
2229 }
Florin Coras8b20bf52018-06-14 14:55:50 -07002230
2231 b += 2;
2232 next += 2;
2233 n_left_from -= 2;
2234 }
2235 while (n_left_from > 0)
2236 {
Florin Coras8b20bf52018-06-14 14:55:50 -07002237 tcp_connection_t *tc0;
2238
2239 if (n_left_from > 1)
2240 {
Florin Coras91ca4622018-06-30 11:27:59 -07002241 vlib_prefetch_buffer_header (b[1], STORE);
2242 CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
Florin Coras8b20bf52018-06-14 14:55:50 -07002243 }
2244
Florin Coras8b20bf52018-06-14 14:55:50 -07002245 tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
2246 thread_index);
2247
Florin Coras78dae002019-08-30 10:00:30 -07002248 if (PREDICT_TRUE (tc0 != 0))
2249 {
2250 tcp_output_push_ip (vm, b[0], tc0, is_ip4);
Simon Zhang1146ff42019-09-02 22:54:00 +08002251 tcp_check_if_gso (tc0, b[0]);
Florin Coras573f44c2020-04-09 21:23:01 +00002252 tcp_output_handle_packet (tc0, b[0], node, &next[0], is_ip4);
Florin Coras78dae002019-08-30 10:00:30 -07002253 }
2254 else
2255 {
Florin Coras573f44c2020-04-09 21:23:01 +00002256 b[0]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
Florin Coras78dae002019-08-30 10:00:30 -07002257 next[0] = TCP_OUTPUT_NEXT_DROP;
2258 }
Florin Coras8b20bf52018-06-14 14:55:50 -07002259
2260 b += 1;
2261 next += 1;
2262 n_left_from -= 1;
2263 }
2264
2265 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
Florin Corasdf36f492019-08-18 18:09:28 -07002266 vlib_node_increment_counter (vm, tcp_node_index (output, is_ip4),
2267 TCP_ERROR_PKTS_SENT, frame->n_vectors);
Florin Coras8b20bf52018-06-14 14:55:50 -07002268 return frame->n_vectors;
Dave Barach68b0fb02017-02-28 15:15:56 -05002269}
2270
Filip Tehlare275bed2019-03-06 00:06:56 -08002271VLIB_NODE_FN (tcp4_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2272 vlib_frame_t * from_frame)
Dave Barach68b0fb02017-02-28 15:15:56 -05002273{
2274 return tcp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ );
2275}
2276
Filip Tehlare275bed2019-03-06 00:06:56 -08002277VLIB_NODE_FN (tcp6_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2278 vlib_frame_t * from_frame)
Dave Barach68b0fb02017-02-28 15:15:56 -05002279{
2280 return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ );
2281}
2282
Florin Corase69f4952017-03-07 10:06:24 -08002283/* *INDENT-OFF* */
Dave Barach68b0fb02017-02-28 15:15:56 -05002284VLIB_REGISTER_NODE (tcp4_output_node) =
2285{
Dave Barachf970d2f2018-11-26 18:34:33 -05002286 .name = "tcp4-output",
2287 /* Takes a vector of packets. */
2288 .vector_size = sizeof (u32),
2289 .n_errors = TCP_N_ERROR,
Dave Barach7fff3d22018-11-27 16:52:59 -05002290 .protocol_hint = VLIB_NODE_PROTO_HINT_TCP,
Dave Barachf970d2f2018-11-26 18:34:33 -05002291 .error_strings = tcp_error_strings,
2292 .n_next_nodes = TCP_OUTPUT_N_NEXT,
2293 .next_nodes = {
Dave Barach68b0fb02017-02-28 15:15:56 -05002294#define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
2295 foreach_tcp4_output_next
2296#undef _
Dave Barachf970d2f2018-11-26 18:34:33 -05002297 },
2298 .format_buffer = format_tcp_header,
2299 .format_trace = format_tcp_tx_trace,
Florin Corase69f4952017-03-07 10:06:24 -08002300};
2301/* *INDENT-ON* */
Dave Barach68b0fb02017-02-28 15:15:56 -05002302
Florin Corase69f4952017-03-07 10:06:24 -08002303/* *INDENT-OFF* */
Dave Barach68b0fb02017-02-28 15:15:56 -05002304VLIB_REGISTER_NODE (tcp6_output_node) =
2305{
Florin Corase69f4952017-03-07 10:06:24 -08002306 .name = "tcp6-output",
Dave Barach68b0fb02017-02-28 15:15:56 -05002307 /* Takes a vector of packets. */
Florin Corase69f4952017-03-07 10:06:24 -08002308 .vector_size = sizeof (u32),
2309 .n_errors = TCP_N_ERROR,
Dave Barach7fff3d22018-11-27 16:52:59 -05002310 .protocol_hint = VLIB_NODE_PROTO_HINT_TCP,
Florin Corase69f4952017-03-07 10:06:24 -08002311 .error_strings = tcp_error_strings,
2312 .n_next_nodes = TCP_OUTPUT_N_NEXT,
2313 .next_nodes = {
Dave Barach68b0fb02017-02-28 15:15:56 -05002314#define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
2315 foreach_tcp6_output_next
2316#undef _
Florin Corase69f4952017-03-07 10:06:24 -08002317 },
2318 .format_buffer = format_tcp_header,
2319 .format_trace = format_tcp_tx_trace,
2320};
2321/* *INDENT-ON* */
Dave Barach68b0fb02017-02-28 15:15:56 -05002322
Dave Barach68b0fb02017-02-28 15:15:56 -05002323typedef enum _tcp_reset_next
2324{
2325 TCP_RESET_NEXT_DROP,
2326 TCP_RESET_NEXT_IP_LOOKUP,
2327 TCP_RESET_N_NEXT
2328} tcp_reset_next_t;
2329
2330#define foreach_tcp4_reset_next \
2331 _(DROP, "error-drop") \
2332 _(IP_LOOKUP, "ip4-lookup")
2333
2334#define foreach_tcp6_reset_next \
2335 _(DROP, "error-drop") \
2336 _(IP_LOOKUP, "ip6-lookup")
2337
2338static uword
2339tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
2340 vlib_frame_t * from_frame, u8 is_ip4)
2341{
Florin Coras360336f2020-02-13 18:46:18 +00002342 u32 error0 = TCP_ERROR_RST_SENT, next0 = TCP_RESET_NEXT_IP_LOOKUP;
Dave Barach68b0fb02017-02-28 15:15:56 -05002343 u32 n_left_from, next_index, *from, *to_next;
Dave Barach68b0fb02017-02-28 15:15:56 -05002344
2345 from = vlib_frame_vector_args (from_frame);
2346 n_left_from = from_frame->n_vectors;
2347
2348 next_index = node->cached_next_index;
2349
2350 while (n_left_from > 0)
2351 {
2352 u32 n_left_to_next;
2353
2354 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2355
2356 while (n_left_from > 0 && n_left_to_next > 0)
2357 {
Dave Barach68b0fb02017-02-28 15:15:56 -05002358 vlib_buffer_t *b0;
Clement Durand6cf260c2017-04-13 13:27:04 +02002359 tcp_tx_trace_t *t0;
2360 tcp_header_t *th0;
Florin Coras360336f2020-02-13 18:46:18 +00002361 u32 bi0;
Dave Barach68b0fb02017-02-28 15:15:56 -05002362
2363 bi0 = from[0];
2364 to_next[0] = bi0;
2365 from += 1;
2366 to_next += 1;
2367 n_left_from -= 1;
2368 n_left_to_next -= 1;
2369
2370 b0 = vlib_get_buffer (vm, bi0);
Florin Coras360336f2020-02-13 18:46:18 +00002371 tcp_make_reset_in_place (vm, b0, is_ip4);
Dave Barach68b0fb02017-02-28 15:15:56 -05002372
2373 /* Prepare to send to IP lookup */
Florin Corasf988e692017-11-27 04:34:14 -05002374 vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
Dave Barach68b0fb02017-02-28 15:15:56 -05002375
Florin Corase69f4952017-03-07 10:06:24 -08002376 b0->error = node->errors[error0];
Damjan Marion213b5aa2017-07-13 21:19:27 +02002377 b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
Dave Barach68b0fb02017-02-28 15:15:56 -05002378 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
2379 {
Clement Durand6cf260c2017-04-13 13:27:04 +02002380 th0 = vlib_buffer_get_current (b0);
2381 if (is_ip4)
2382 th0 = ip4_next_header ((ip4_header_t *) th0);
2383 else
2384 th0 = ip6_next_header ((ip6_header_t *) th0);
Clement Durand6cf260c2017-04-13 13:27:04 +02002385 t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
Dave Barach178cf492018-11-13 16:34:13 -05002386 clib_memcpy_fast (&t0->tcp_header, th0,
2387 sizeof (t0->tcp_header));
Dave Barach68b0fb02017-02-28 15:15:56 -05002388 }
2389
2390 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
2391 n_left_to_next, bi0, next0);
2392 }
2393 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2394 }
2395 return from_frame->n_vectors;
2396}
2397
Filip Tehlare275bed2019-03-06 00:06:56 -08002398VLIB_NODE_FN (tcp4_reset_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2399 vlib_frame_t * from_frame)
Dave Barach68b0fb02017-02-28 15:15:56 -05002400{
2401 return tcp46_send_reset_inline (vm, node, from_frame, 1);
2402}
2403
Filip Tehlare275bed2019-03-06 00:06:56 -08002404VLIB_NODE_FN (tcp6_reset_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2405 vlib_frame_t * from_frame)
Dave Barach68b0fb02017-02-28 15:15:56 -05002406{
2407 return tcp46_send_reset_inline (vm, node, from_frame, 0);
2408}
2409
2410/* *INDENT-OFF* */
2411VLIB_REGISTER_NODE (tcp4_reset_node) = {
Dave Barach68b0fb02017-02-28 15:15:56 -05002412 .name = "tcp4-reset",
2413 .vector_size = sizeof (u32),
2414 .n_errors = TCP_N_ERROR,
2415 .error_strings = tcp_error_strings,
2416 .n_next_nodes = TCP_RESET_N_NEXT,
2417 .next_nodes = {
2418#define _(s,n) [TCP_RESET_NEXT_##s] = n,
2419 foreach_tcp4_reset_next
2420#undef _
2421 },
Clement Durand6cf260c2017-04-13 13:27:04 +02002422 .format_trace = format_tcp_tx_trace,
Dave Barach68b0fb02017-02-28 15:15:56 -05002423};
2424/* *INDENT-ON* */
2425
2426/* *INDENT-OFF* */
2427VLIB_REGISTER_NODE (tcp6_reset_node) = {
Dave Barach68b0fb02017-02-28 15:15:56 -05002428 .name = "tcp6-reset",
2429 .vector_size = sizeof (u32),
2430 .n_errors = TCP_N_ERROR,
2431 .error_strings = tcp_error_strings,
2432 .n_next_nodes = TCP_RESET_N_NEXT,
2433 .next_nodes = {
2434#define _(s,n) [TCP_RESET_NEXT_##s] = n,
2435 foreach_tcp6_reset_next
2436#undef _
2437 },
Clement Durand6cf260c2017-04-13 13:27:04 +02002438 .format_trace = format_tcp_tx_trace,
Dave Barach68b0fb02017-02-28 15:15:56 -05002439};
2440/* *INDENT-ON* */
2441
Dave Barach68b0fb02017-02-28 15:15:56 -05002442/*
2443 * fd.io coding-style-patch-verification: ON
2444 *
2445 * Local Variables:
2446 * eval: (c-set-style "gnu")
2447 * End:
2448 */