blob: 915675c11ed654ac36167ed2905740a522c2306d [file] [log] [blame]
Dave Barach68b0fb02017-02-28 15:15:56 -05001/*
Florin Coras222e1f412019-02-16 20:47:32 -08002 * Copyright (c) 2016-2019 Cisco and/or its affiliates.
Dave Barach68b0fb02017-02-28 15:15:56 -05003 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
Dave Barach3bbcfab2017-08-15 19:03:44 -040016/**
17 * @file
18 * @brief TCP host stack utilities
19 */
20
Dave Barach68b0fb02017-02-28 15:15:56 -050021#include <vnet/tcp/tcp.h>
22#include <vnet/session/session.h>
23#include <vnet/fib/fib.h>
Florin Corasf6359c82017-06-19 12:26:09 -040024#include <vnet/dpo/load_balance.h>
Dave Barach3bbcfab2017-08-15 19:03:44 -040025#include <vnet/dpo/receive_dpo.h>
Neale Rannscbe25aa2019-09-30 10:53:31 +000026#include <vnet/ip-neighbor/ip_neighbor.h>
Dave Barach68b0fb02017-02-28 15:15:56 -050027#include <math.h>
28
29tcp_main_t tcp_main;
30
Florin Coras1c8ff632018-05-17 13:28:34 -070031typedef struct
32{
33 fib_protocol_t nh_proto;
34 vnet_link_t link_type;
35 ip46_address_t ip;
36 u32 sw_if_index;
37 u8 is_add;
38} tcp_add_del_adj_args_t;
39
40static void
41tcp_add_del_adj_cb (tcp_add_del_adj_args_t * args)
42{
43 u32 ai;
44 if (args->is_add)
45 {
46 adj_nbr_add_or_lock (args->nh_proto, args->link_type, &args->ip,
47 args->sw_if_index);
48 }
49 else
50 {
51 ai = adj_nbr_find (FIB_PROTOCOL_IP6, VNET_LINK_IP6, &args->ip,
52 args->sw_if_index);
53 if (ai != ADJ_INDEX_INVALID)
54 adj_unlock (ai);
55 }
56}
57
58static void
59tcp_add_del_adjacency (tcp_connection_t * tc, u8 is_add)
60{
61 tcp_add_del_adj_args_t args = {
62 .nh_proto = FIB_PROTOCOL_IP6,
63 .link_type = VNET_LINK_IP6,
64 .ip = tc->c_rmt_ip,
65 .sw_if_index = tc->sw_if_index,
66 .is_add = is_add
67 };
68 vlib_rpc_call_main_thread (tcp_add_del_adj_cb, (u8 *) & args,
69 sizeof (args));
70}
71
Florin Corasd9a145f2019-06-07 09:35:20 -070072static void
73tcp_cc_init (tcp_connection_t * tc)
74{
Florin Corasd9a145f2019-06-07 09:35:20 -070075 tc->cc_algo->init (tc);
76}
77
78static void
79tcp_cc_cleanup (tcp_connection_t * tc)
80{
81 if (tc->cc_algo->cleanup)
82 tc->cc_algo->cleanup (tc);
83}
84
85void
86tcp_cc_algo_register (tcp_cc_algorithm_type_e type,
87 const tcp_cc_algorithm_t * vft)
88{
89 tcp_main_t *tm = vnet_get_tcp_main ();
90 vec_validate (tm->cc_algos, type);
91
92 tm->cc_algos[type] = *vft;
93 hash_set_mem (tm->cc_algo_by_name, vft->name, type);
94}
95
96tcp_cc_algorithm_t *
97tcp_cc_algo_get (tcp_cc_algorithm_type_e type)
98{
99 tcp_main_t *tm = vnet_get_tcp_main ();
100 return &tm->cc_algos[type];
101}
102
Florin Coras4e116fb2019-06-10 08:33:50 -0700103tcp_cc_algorithm_type_e
104tcp_cc_algo_new_type (const tcp_cc_algorithm_t * vft)
105{
106 tcp_main_t *tm = vnet_get_tcp_main ();
107 tcp_cc_algo_register (++tm->cc_last_type, vft);
108 return tm->cc_last_type;
109}
110
Dave Barach68b0fb02017-02-28 15:15:56 -0500111static u32
Florin Coras04e53442017-07-16 17:12:15 -0700112tcp_connection_bind (u32 session_index, transport_endpoint_t * lcl)
Dave Barach68b0fb02017-02-28 15:15:56 -0500113{
114 tcp_main_t *tm = &tcp_main;
115 tcp_connection_t *listener;
Florin Corascea194d2017-10-02 00:18:51 -0700116 void *iface_ip;
Dave Barach68b0fb02017-02-28 15:15:56 -0500117
118 pool_get (tm->listener_pool, listener);
Dave Barachb7b92992018-10-17 10:38:51 -0400119 clib_memset (listener, 0, sizeof (*listener));
Dave Barach68b0fb02017-02-28 15:15:56 -0500120
121 listener->c_c_index = listener - tm->listener_pool;
Florin Coras0e495682017-09-19 22:27:18 -0700122 listener->c_lcl_port = lcl->port;
Dave Barach68b0fb02017-02-28 15:15:56 -0500123
Florin Corascea194d2017-10-02 00:18:51 -0700124 /* If we are provided a sw_if_index, bind using one of its ips */
125 if (ip_is_zero (&lcl->ip, 1) && lcl->sw_if_index != ENDPOINT_INVALID_INDEX)
Florin Coras6cf30ad2017-04-04 23:08:23 -0700126 {
Florin Corascea194d2017-10-02 00:18:51 -0700127 if ((iface_ip = ip_interface_get_first_ip (lcl->sw_if_index,
128 lcl->is_ip4)))
129 ip_set (&lcl->ip, iface_ip, lcl->is_ip4);
Florin Coras6cf30ad2017-04-04 23:08:23 -0700130 }
Florin Corascea194d2017-10-02 00:18:51 -0700131 ip_copy (&listener->c_lcl_ip, &lcl->ip, lcl->is_ip4);
132 listener->c_is_ip4 = lcl->is_ip4;
Florin Coras3cbc04b2017-10-02 00:18:51 -0700133 listener->c_proto = TRANSPORT_PROTO_TCP;
Dave Barach68b0fb02017-02-28 15:15:56 -0500134 listener->c_s_index = session_index;
Florin Corascea194d2017-10-02 00:18:51 -0700135 listener->c_fib_index = lcl->fib_index;
Dave Barach68b0fb02017-02-28 15:15:56 -0500136 listener->state = TCP_STATE_LISTEN;
Florin Coras12f69362019-08-16 09:44:00 -0700137 listener->cc_algo = tcp_cc_algo_get (tcp_cfg.cc_algo);
Dave Barach68b0fb02017-02-28 15:15:56 -0500138
Florin Corase69f4952017-03-07 10:06:24 -0800139 tcp_connection_timers_init (listener);
140
Florin Corasa436a422019-08-20 07:09:31 -0700141 TCP_EVT (TCP_EVT_BIND, listener);
Florin Corase69f4952017-03-07 10:06:24 -0800142
Dave Barach68b0fb02017-02-28 15:15:56 -0500143 return listener->c_c_index;
144}
145
Florin Coras0dbd5172018-06-25 16:19:34 -0700146static u32
Florin Coras04e53442017-07-16 17:12:15 -0700147tcp_session_bind (u32 session_index, transport_endpoint_t * tep)
Dave Barach68b0fb02017-02-28 15:15:56 -0500148{
Florin Coras04e53442017-07-16 17:12:15 -0700149 return tcp_connection_bind (session_index, tep);
Dave Barach68b0fb02017-02-28 15:15:56 -0500150}
151
152static void
Florin Corase69f4952017-03-07 10:06:24 -0800153tcp_connection_unbind (u32 listener_index)
Dave Barach68b0fb02017-02-28 15:15:56 -0500154{
155 tcp_main_t *tm = vnet_get_tcp_main ();
Dave Barach2c25a622017-06-26 11:35:07 -0400156 tcp_connection_t *tc;
157
158 tc = pool_elt_at_index (tm->listener_pool, listener_index);
159
Florin Corasa436a422019-08-20 07:09:31 -0700160 TCP_EVT (TCP_EVT_UNBIND, tc);
Dave Barach2c25a622017-06-26 11:35:07 -0400161
162 /* Poison the entry */
163 if (CLIB_DEBUG > 0)
Dave Barachb7b92992018-10-17 10:38:51 -0400164 clib_memset (tc, 0xFA, sizeof (*tc));
Dave Barach2c25a622017-06-26 11:35:07 -0400165
Dave Barach68b0fb02017-02-28 15:15:56 -0500166 pool_put_index (tm->listener_pool, listener_index);
167}
168
Florin Coras0dbd5172018-06-25 16:19:34 -0700169static u32
Florin Corase69f4952017-03-07 10:06:24 -0800170tcp_session_unbind (u32 listener_index)
Dave Barach68b0fb02017-02-28 15:15:56 -0500171{
Florin Corase69f4952017-03-07 10:06:24 -0800172 tcp_connection_unbind (listener_index);
Dave Barach68b0fb02017-02-28 15:15:56 -0500173 return 0;
174}
175
Florin Coras0dbd5172018-06-25 16:19:34 -0700176static transport_connection_t *
Dave Barach68b0fb02017-02-28 15:15:56 -0500177tcp_session_get_listener (u32 listener_index)
178{
179 tcp_main_t *tm = vnet_get_tcp_main ();
180 tcp_connection_t *tc;
181 tc = pool_elt_at_index (tm->listener_pool, listener_index);
182 return &tc->connection;
183}
184
Florin Coras68810622017-07-24 17:40:28 -0700185/**
186 * Cleanup half-open connection
187 *
188 */
Florin Coras0dbd5172018-06-25 16:19:34 -0700189static void
Florin Coras68810622017-07-24 17:40:28 -0700190tcp_half_open_connection_del (tcp_connection_t * tc)
191{
192 tcp_main_t *tm = vnet_get_tcp_main ();
193 clib_spinlock_lock_if_init (&tm->half_open_lock);
Florin Coras68810622017-07-24 17:40:28 -0700194 if (CLIB_DEBUG)
Dave Barachb7b92992018-10-17 10:38:51 -0400195 clib_memset (tc, 0xFA, sizeof (*tc));
Benoît Ganned4aeb842019-07-18 18:38:42 +0200196 pool_put (tm->half_open_connections, tc);
Florin Coras68810622017-07-24 17:40:28 -0700197 clib_spinlock_unlock_if_init (&tm->half_open_lock);
198}
199
200/**
201 * Try to cleanup half-open connection
202 *
203 * If called from a thread that doesn't own tc, the call won't have any
204 * effect.
205 *
206 * @param tc - connection to be cleaned up
207 * @return non-zero if cleanup failed.
208 */
209int
210tcp_half_open_connection_cleanup (tcp_connection_t * tc)
211{
212 /* Make sure this is the owning thread */
213 if (tc->c_thread_index != vlib_get_thread_index ())
214 return 1;
Florin Coras68810622017-07-24 17:40:28 -0700215 tcp_timer_reset (tc, TCP_TIMER_RETRANSMIT_SYN);
216 tcp_half_open_connection_del (tc);
217 return 0;
218}
219
Florin Coras0dbd5172018-06-25 16:19:34 -0700220static tcp_connection_t *
Florin Coras68810622017-07-24 17:40:28 -0700221tcp_half_open_connection_new (void)
222{
223 tcp_main_t *tm = vnet_get_tcp_main ();
224 tcp_connection_t *tc = 0;
Florin Coras4eeeaaf2017-09-05 14:03:37 -0400225 ASSERT (vlib_get_thread_index () == 0);
Florin Coras68810622017-07-24 17:40:28 -0700226 pool_get (tm->half_open_connections, tc);
Dave Barachb7b92992018-10-17 10:38:51 -0400227 clib_memset (tc, 0, sizeof (*tc));
Florin Coras68810622017-07-24 17:40:28 -0700228 tc->c_c_index = tc - tm->half_open_connections;
229 return tc;
230}
231
Dave Barach68b0fb02017-02-28 15:15:56 -0500232/**
233 * Cleans up connection state.
234 *
235 * No notifications.
236 */
237void
238tcp_connection_cleanup (tcp_connection_t * tc)
239{
240 tcp_main_t *tm = &tcp_main;
Dave Barach68b0fb02017-02-28 15:15:56 -0500241
Florin Corasa436a422019-08-20 07:09:31 -0700242 TCP_EVT (TCP_EVT_DELETE, tc);
Florin Coras070fd4b2019-04-02 19:03:23 -0700243
Dave Barach68b0fb02017-02-28 15:15:56 -0500244 /* Cleanup local endpoint if this was an active connect */
Florin Coras3cbc04b2017-10-02 00:18:51 -0700245 transport_endpoint_cleanup (TRANSPORT_PROTO_TCP, &tc->c_lcl_ip,
246 tc->c_lcl_port);
Dave Barach68b0fb02017-02-28 15:15:56 -0500247
Florin Coras68810622017-07-24 17:40:28 -0700248 /* Check if connection is not yet fully established */
Dave Barach68b0fb02017-02-28 15:15:56 -0500249 if (tc->state == TCP_STATE_SYN_SENT)
Dave Barach2c25a622017-06-26 11:35:07 -0400250 {
Florin Coras68810622017-07-24 17:40:28 -0700251 /* Try to remove the half-open connection. If this is not the owning
252 * thread, tc won't be removed. Retransmit or establish timers will
253 * eventually expire and call again cleanup on the right thread. */
Florin Corasc5347d92018-10-17 10:41:28 -0700254 if (tcp_half_open_connection_cleanup (tc))
255 tc->flags |= TCP_CONN_HALF_OPEN_DONE;
Dave Barach2c25a622017-06-26 11:35:07 -0400256 }
Dave Barach68b0fb02017-02-28 15:15:56 -0500257 else
Dave Barach2c25a622017-06-26 11:35:07 -0400258 {
259 int thread_index = tc->c_thread_index;
Florin Coras68810622017-07-24 17:40:28 -0700260
261 /* Make sure all timers are cleared */
262 tcp_connection_timers_reset (tc);
263
Florin Coras1c8ff632018-05-17 13:28:34 -0700264 if (!tc->c_is_ip4 && ip6_address_is_link_local_unicast (&tc->c_rmt_ip6))
265 tcp_add_del_adjacency (tc, 0);
266
Florin Corasd9a145f2019-06-07 09:35:20 -0700267 tcp_cc_cleanup (tc);
Florin Corasb691f762019-02-22 09:07:20 -0800268 vec_free (tc->snd_sacks);
269 vec_free (tc->snd_sacks_fl);
Florin Coras558e3e02019-09-06 12:56:58 -0700270 vec_free (tc->rcv_opts.sacks);
271 pool_free (tc->sack_sb.holes);
Florin Corasb691f762019-02-22 09:07:20 -0800272
Florin Corasbbcfaac2019-10-10 13:52:04 -0700273 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
Florin Coras52814732019-06-12 15:38:19 -0700274 tcp_bt_cleanup (tc);
275
Dave Barach2c25a622017-06-26 11:35:07 -0400276 /* Poison the entry */
277 if (CLIB_DEBUG > 0)
Dave Barachb7b92992018-10-17 10:38:51 -0400278 clib_memset (tc, 0xFA, sizeof (*tc));
Dave Barach2c25a622017-06-26 11:35:07 -0400279 pool_put (tm->connections[thread_index], tc);
280 }
Dave Barach68b0fb02017-02-28 15:15:56 -0500281}
282
283/**
284 * Connection removal.
285 *
286 * This should be called only once connection enters CLOSED state. Note
287 * that it notifies the session of the removal event, so if the goal is to
288 * just remove the connection, call tcp_connection_cleanup instead.
289 */
290void
291tcp_connection_del (tcp_connection_t * tc)
292{
Florin Coras5a2ec8f2018-12-27 11:53:11 -0800293 session_transport_delete_notify (&tc->connection);
Dave Barach68b0fb02017-02-28 15:15:56 -0500294 tcp_connection_cleanup (tc);
295}
296
Florin Coras6534b7a2017-07-18 05:38:03 -0400297tcp_connection_t *
Florin Coras8124cb72018-12-16 20:57:29 -0800298tcp_connection_alloc (u8 thread_index)
Florin Coras6534b7a2017-07-18 05:38:03 -0400299{
300 tcp_main_t *tm = vnet_get_tcp_main ();
301 tcp_connection_t *tc;
302
303 pool_get (tm->connections[thread_index], tc);
Dave Barachb7b92992018-10-17 10:38:51 -0400304 clib_memset (tc, 0, sizeof (*tc));
Florin Coras6534b7a2017-07-18 05:38:03 -0400305 tc->c_c_index = tc - tm->connections[thread_index];
306 tc->c_thread_index = thread_index;
307 return tc;
308}
309
Florin Coras12f69362019-08-16 09:44:00 -0700310tcp_connection_t *
311tcp_connection_alloc_w_base (u8 thread_index, tcp_connection_t * base)
312{
313 tcp_main_t *tm = vnet_get_tcp_main ();
314 tcp_connection_t *tc;
315
316 pool_get (tm->connections[thread_index], tc);
317 clib_memcpy_fast (tc, base, sizeof (*tc));
318 tc->c_c_index = tc - tm->connections[thread_index];
319 tc->c_thread_index = thread_index;
320 return tc;
321}
322
Florin Coras8124cb72018-12-16 20:57:29 -0800323void
324tcp_connection_free (tcp_connection_t * tc)
325{
326 tcp_main_t *tm = &tcp_main;
Florin Coras6416e622019-04-03 17:52:43 -0700327 if (CLIB_DEBUG)
328 {
329 u8 thread_index = tc->c_thread_index;
330 clib_memset (tc, 0xFA, sizeof (*tc));
331 pool_put (tm->connections[thread_index], tc);
332 return;
333 }
Florin Coras8124cb72018-12-16 20:57:29 -0800334 pool_put (tm->connections[tc->c_thread_index], tc);
Florin Coras8124cb72018-12-16 20:57:29 -0800335}
336
Florin Corasd79b41e2017-03-04 05:37:52 -0800337/** Notify session that connection has been reset.
338 *
339 * Switch state to closed and wait for session to call cleanup.
340 */
341void
342tcp_connection_reset (tcp_connection_t * tc)
343{
Florin Corasa436a422019-08-20 07:09:31 -0700344 TCP_EVT (TCP_EVT_RST_RCVD, tc);
Florin Coras11c05492017-05-10 12:29:14 -0700345 switch (tc->state)
346 {
347 case TCP_STATE_SYN_RCVD:
348 /* Cleanup everything. App wasn't notified yet */
Florin Coras5a2ec8f2018-12-27 11:53:11 -0800349 session_transport_delete_notify (&tc->connection);
Florin Coras11c05492017-05-10 12:29:14 -0700350 tcp_connection_cleanup (tc);
351 break;
352 case TCP_STATE_SYN_SENT:
Florin Coras3cbc04b2017-10-02 00:18:51 -0700353 session_stream_connect_notify (&tc->connection, 1 /* fail */ );
Florin Coras03afb6d2019-01-04 08:45:22 -0800354 tcp_connection_cleanup (tc);
Florin Coras6534b7a2017-07-18 05:38:03 -0400355 break;
Florin Coras11c05492017-05-10 12:29:14 -0700356 case TCP_STATE_ESTABLISHED:
Florin Coras25579b42018-06-06 17:55:02 -0700357 tcp_connection_timers_reset (tc);
358 /* Set the cleanup timer, in case the session layer/app don't
359 * cleanly close the connection */
Florin Coras9094b5c2019-08-12 14:17:47 -0700360 tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
Florin Coras5a2ec8f2018-12-27 11:53:11 -0800361 session_transport_reset_notify (&tc->connection);
Florin Corasfd4c3fe2019-11-07 12:33:12 -0800362 tcp_cong_recovery_off (tc);
Florin Coras3c514d52018-12-22 11:39:33 -0800363 tcp_connection_set_state (tc, TCP_STATE_CLOSED);
Florin Corasa0904f02019-07-22 20:55:11 -0700364 session_transport_closed_notify (&tc->connection);
Florin Coras25579b42018-06-06 17:55:02 -0700365 break;
Florin Coras11c05492017-05-10 12:29:14 -0700366 case TCP_STATE_CLOSE_WAIT:
367 case TCP_STATE_FIN_WAIT_1:
368 case TCP_STATE_FIN_WAIT_2:
369 case TCP_STATE_CLOSING:
Florin Coras54ddf432018-12-21 13:54:09 -0800370 case TCP_STATE_LAST_ACK:
Florin Coras11c05492017-05-10 12:29:14 -0700371 tcp_connection_timers_reset (tc);
Florin Coras9094b5c2019-08-12 14:17:47 -0700372 tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
Florin Corasfd4c3fe2019-11-07 12:33:12 -0800373 tcp_cong_recovery_off (tc);
Florin Coras3c514d52018-12-22 11:39:33 -0800374 /* Make sure we mark the session as closed. In some states we may
375 * be still trying to send data */
Florin Coras3c514d52018-12-22 11:39:33 -0800376 tcp_connection_set_state (tc, TCP_STATE_CLOSED);
Florin Corasa0904f02019-07-22 20:55:11 -0700377 session_transport_closed_notify (&tc->connection);
Florin Coras11c05492017-05-10 12:29:14 -0700378 break;
379 case TCP_STATE_CLOSED:
Florin Corasb0f662f2018-12-27 14:51:46 -0800380 case TCP_STATE_TIME_WAIT:
Florin Coras1af9ab52018-12-20 10:16:01 -0800381 break;
Florin Coras85a3ddd2018-12-24 16:54:34 -0800382 default:
383 TCP_DBG ("reset state: %u", tc->state);
Florin Coras11c05492017-05-10 12:29:14 -0700384 }
Florin Corasd79b41e2017-03-04 05:37:52 -0800385}
386
Dave Barach68b0fb02017-02-28 15:15:56 -0500387/**
388 * Begin connection closing procedure.
389 *
390 * If at the end the connection is not in CLOSED state, it is not removed.
391 * Instead, we rely on on TCP to advance through state machine to either
392 * 1) LAST_ACK (passive close) whereby when the last ACK is received
393 * tcp_connection_del is called. This notifies session of the delete and
394 * calls cleanup.
395 * 2) TIME_WAIT (active close) whereby after 2MSL the 2MSL timer triggers
396 * and cleanup is called.
Florin Corasd79b41e2017-03-04 05:37:52 -0800397 *
398 * N.B. Half-close connections are not supported
Dave Barach68b0fb02017-02-28 15:15:56 -0500399 */
400void
401tcp_connection_close (tcp_connection_t * tc)
402{
Florin Corasa436a422019-08-20 07:09:31 -0700403 TCP_EVT (TCP_EVT_CLOSE, tc);
Florin Corase69f4952017-03-07 10:06:24 -0800404
Florin Corasb2215d62017-08-01 16:56:58 -0700405 /* Send/Program FIN if needed and switch state */
406 switch (tc->state)
407 {
408 case TCP_STATE_SYN_SENT:
Florin Coras85a3ddd2018-12-24 16:54:34 -0800409 /* Try to cleanup. If not on the right thread, mark as half-open done.
410 * Connection will be cleaned up when establish timer pops */
411 tcp_connection_cleanup (tc);
Florin Corasb2215d62017-08-01 16:56:58 -0700412 break;
413 case TCP_STATE_SYN_RCVD:
Florin Corasc5347d92018-10-17 10:41:28 -0700414 tcp_connection_timers_reset (tc);
Florin Corasb2215d62017-08-01 16:56:58 -0700415 tcp_send_fin (tc);
Florin Coras3c514d52018-12-22 11:39:33 -0800416 tcp_connection_set_state (tc, TCP_STATE_FIN_WAIT_1);
Florin Coras9094b5c2019-08-12 14:17:47 -0700417 tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.finwait1_time);
Florin Corasb2215d62017-08-01 16:56:58 -0700418 break;
419 case TCP_STATE_ESTABLISHED:
Florin Corasf65074e2019-03-31 17:17:11 -0700420 /* If closing with unread data, reset the connection */
421 if (transport_max_rx_dequeue (&tc->connection))
422 {
423 tcp_send_reset (tc);
424 tcp_connection_timers_reset (tc);
425 tcp_connection_set_state (tc, TCP_STATE_CLOSED);
Florin Coras9094b5c2019-08-12 14:17:47 -0700426 tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
Florin Corasa0904f02019-07-22 20:55:11 -0700427 session_transport_closed_notify (&tc->connection);
Florin Corasf65074e2019-03-31 17:17:11 -0700428 break;
429 }
Florin Coras31c99552019-03-01 13:00:58 -0800430 if (!transport_max_tx_dequeue (&tc->connection))
Florin Corasb2215d62017-08-01 16:56:58 -0700431 tcp_send_fin (tc);
432 else
433 tc->flags |= TCP_CONN_FINPNDG;
Florin Coras3c514d52018-12-22 11:39:33 -0800434 tcp_connection_set_state (tc, TCP_STATE_FIN_WAIT_1);
Florin Corase96bf632018-12-18 22:44:27 -0800435 /* Set a timer in case the peer stops responding. Otherwise the
436 * connection will be stuck here forever. */
Florin Coras85a3ddd2018-12-24 16:54:34 -0800437 ASSERT (tc->timers[TCP_TIMER_WAITCLOSE] == TCP_TIMER_HANDLE_INVALID);
Florin Coras9094b5c2019-08-12 14:17:47 -0700438 tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.finwait1_time);
Florin Corasb2215d62017-08-01 16:56:58 -0700439 break;
440 case TCP_STATE_CLOSE_WAIT:
Florin Coras31c99552019-03-01 13:00:58 -0800441 if (!transport_max_tx_dequeue (&tc->connection))
Florin Coras25579b42018-06-06 17:55:02 -0700442 {
443 tcp_send_fin (tc);
444 tcp_connection_timers_reset (tc);
Florin Coras3c514d52018-12-22 11:39:33 -0800445 tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
Florin Coras9094b5c2019-08-12 14:17:47 -0700446 tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time);
Florin Coras25579b42018-06-06 17:55:02 -0700447 }
448 else
449 tc->flags |= TCP_CONN_FINPNDG;
Florin Corasb2215d62017-08-01 16:56:58 -0700450 break;
Florin Corasc87c91d2017-08-16 19:55:49 -0700451 case TCP_STATE_FIN_WAIT_1:
Florin Coras9094b5c2019-08-12 14:17:47 -0700452 tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.finwait1_time);
Florin Corasc87c91d2017-08-16 19:55:49 -0700453 break;
Florin Coras4af830c2018-12-04 09:21:36 -0800454 case TCP_STATE_CLOSED:
455 tcp_connection_timers_reset (tc);
Florin Coras85a3ddd2018-12-24 16:54:34 -0800456 /* Delete connection but instead of doing it now wait until next
457 * dispatch cycle to give the session layer a chance to clear
458 * unhandled events */
Florin Coras9094b5c2019-08-12 14:17:47 -0700459 tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
Florin Coras4af830c2018-12-04 09:21:36 -0800460 break;
Florin Corasb2215d62017-08-01 16:56:58 -0700461 default:
Florin Corasa096f2d2017-09-28 23:49:42 -0400462 TCP_DBG ("state: %u", tc->state);
Florin Corasb2215d62017-08-01 16:56:58 -0700463 }
Dave Barach68b0fb02017-02-28 15:15:56 -0500464}
465
Florin Coras0dbd5172018-06-25 16:19:34 -0700466static void
Dave Barach68b0fb02017-02-28 15:15:56 -0500467tcp_session_close (u32 conn_index, u32 thread_index)
468{
469 tcp_connection_t *tc;
470 tc = tcp_connection_get (conn_index, thread_index);
471 tcp_connection_close (tc);
472}
473
Florin Coras0dbd5172018-06-25 16:19:34 -0700474static void
Dave Barach68b0fb02017-02-28 15:15:56 -0500475tcp_session_cleanup (u32 conn_index, u32 thread_index)
476{
477 tcp_connection_t *tc;
478 tc = tcp_connection_get (conn_index, thread_index);
Florin Coras54c93cf2019-09-24 12:45:14 -0700479 if (!tc)
480 return;
Florin Coras85a3ddd2018-12-24 16:54:34 -0800481 tcp_connection_set_state (tc, TCP_STATE_CLOSED);
Florin Coras25579b42018-06-06 17:55:02 -0700482 tcp_connection_cleanup (tc);
Dave Barach68b0fb02017-02-28 15:15:56 -0500483}
484
Florin Corasdfb3b872019-08-16 17:48:44 -0700485static void
486tcp_session_reset (u32 conn_index, u32 thread_index)
487{
488 tcp_connection_t *tc;
489 tc = tcp_connection_get (conn_index, thread_index);
490 session_transport_closed_notify (&tc->connection);
491 tcp_send_reset (tc);
492 tcp_connection_timers_reset (tc);
Florin Corasfd4c3fe2019-11-07 12:33:12 -0800493 tcp_cong_recovery_off (tc);
Florin Corasdfb3b872019-08-16 17:48:44 -0700494 tcp_connection_set_state (tc, TCP_STATE_CLOSED);
495 tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
496}
497
Dave Barach68b0fb02017-02-28 15:15:56 -0500498/**
499 * Initialize all connection timers as invalid
500 */
501void
502tcp_connection_timers_init (tcp_connection_t * tc)
503{
504 int i;
505
506 /* Set all to invalid */
507 for (i = 0; i < TCP_N_TIMERS; i++)
508 {
509 tc->timers[i] = TCP_TIMER_HANDLE_INVALID;
510 }
511
512 tc->rto = TCP_RTO_INIT;
513}
514
515/**
516 * Stop all connection timers
517 */
518void
519tcp_connection_timers_reset (tcp_connection_t * tc)
520{
521 int i;
522 for (i = 0; i < TCP_N_TIMERS; i++)
523 {
524 tcp_timer_reset (tc, i);
525 }
526}
527
Dave Barach2c25a622017-06-26 11:35:07 -0400528#if 0
Florin Corasf6359c82017-06-19 12:26:09 -0400529typedef struct ip4_tcp_hdr
530{
531 ip4_header_t ip;
532 tcp_header_t tcp;
533} ip4_tcp_hdr_t;
534
535typedef struct ip6_tcp_hdr
536{
537 ip6_header_t ip;
538 tcp_header_t tcp;
539} ip6_tcp_hdr_t;
540
541static void
542tcp_connection_select_lb_bucket (tcp_connection_t * tc, const dpo_id_t * dpo,
543 dpo_id_t * result)
544{
545 const dpo_id_t *choice;
546 load_balance_t *lb;
547 int hash;
548
549 lb = load_balance_get (dpo->dpoi_index);
550 if (tc->c_is_ip4)
551 {
552 ip4_tcp_hdr_t hdr;
Dave Barachb7b92992018-10-17 10:38:51 -0400553 clib_memset (&hdr, 0, sizeof (hdr));
Florin Corasf6359c82017-06-19 12:26:09 -0400554 hdr.ip.protocol = IP_PROTOCOL_TCP;
555 hdr.ip.address_pair.src.as_u32 = tc->c_lcl_ip.ip4.as_u32;
556 hdr.ip.address_pair.dst.as_u32 = tc->c_rmt_ip.ip4.as_u32;
557 hdr.tcp.src_port = tc->c_lcl_port;
558 hdr.tcp.dst_port = tc->c_rmt_port;
559 hash = ip4_compute_flow_hash (&hdr.ip, lb->lb_hash_config);
560 }
561 else
562 {
563 ip6_tcp_hdr_t hdr;
Dave Barachb7b92992018-10-17 10:38:51 -0400564 clib_memset (&hdr, 0, sizeof (hdr));
Florin Corasf6359c82017-06-19 12:26:09 -0400565 hdr.ip.protocol = IP_PROTOCOL_TCP;
Dave Barach178cf492018-11-13 16:34:13 -0500566 clib_memcpy_fast (&hdr.ip.src_address, &tc->c_lcl_ip.ip6,
567 sizeof (ip6_address_t));
568 clib_memcpy_fast (&hdr.ip.dst_address, &tc->c_rmt_ip.ip6,
569 sizeof (ip6_address_t));
Florin Corasf6359c82017-06-19 12:26:09 -0400570 hdr.tcp.src_port = tc->c_lcl_port;
571 hdr.tcp.dst_port = tc->c_rmt_port;
572 hash = ip6_compute_flow_hash (&hdr.ip, lb->lb_hash_config);
573 }
574 choice = load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1);
575 dpo_copy (result, choice);
576}
577
578fib_node_index_t
579tcp_lookup_rmt_in_fib (tcp_connection_t * tc)
580{
581 fib_prefix_t prefix;
Florin Coras04e53442017-07-16 17:12:15 -0700582 u32 fib_index;
Florin Corasf6359c82017-06-19 12:26:09 -0400583
Dave Barach178cf492018-11-13 16:34:13 -0500584 clib_memcpy_fast (&prefix.fp_addr, &tc->c_rmt_ip, sizeof (prefix.fp_addr));
Florin Corasf6359c82017-06-19 12:26:09 -0400585 prefix.fp_proto = tc->c_is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
586 prefix.fp_len = tc->c_is_ip4 ? 32 : 128;
Florin Corascea194d2017-10-02 00:18:51 -0700587 fib_index = fib_table_find (prefix.fp_proto, tc->c_fib_index);
Florin Coras04e53442017-07-16 17:12:15 -0700588 return fib_table_lookup (fib_index, &prefix);
Florin Corasf6359c82017-06-19 12:26:09 -0400589}
590
591static int
592tcp_connection_stack_on_fib_entry (tcp_connection_t * tc)
593{
594 dpo_id_t choice = DPO_INVALID;
595 u32 output_node_index;
596 fib_entry_t *fe;
597
598 fe = fib_entry_get (tc->c_rmt_fei);
599 if (fe->fe_lb.dpoi_type != DPO_LOAD_BALANCE)
600 return -1;
601
602 tcp_connection_select_lb_bucket (tc, &fe->fe_lb, &choice);
603
604 output_node_index =
605 tc->c_is_ip4 ? tcp4_output_node.index : tcp6_output_node.index;
606 dpo_stack_from_node (output_node_index, &tc->c_rmt_dpo, &choice);
607 return 0;
608}
609
610/** Stack tcp connection on peer's fib entry.
611 *
612 * This ultimately populates the dpo the connection will use to send packets.
613 */
614static void
615tcp_connection_fib_attach (tcp_connection_t * tc)
616{
617 tc->c_rmt_fei = tcp_lookup_rmt_in_fib (tc);
618
619 ASSERT (tc->c_rmt_fei != FIB_NODE_INDEX_INVALID);
620
621 tcp_connection_stack_on_fib_entry (tc);
622}
Dave Barach2c25a622017-06-26 11:35:07 -0400623#endif /* 0 */
Florin Corasf6359c82017-06-19 12:26:09 -0400624
Florin Coras18e0d4f2019-01-02 12:22:02 -0800625/**
626 * Generate random iss as per rfc6528
627 */
628static u32
629tcp_generate_random_iss (tcp_connection_t * tc)
630{
631 tcp_main_t *tm = &tcp_main;
632 u64 tmp;
633
634 if (tc->c_is_ip4)
635 tmp = (u64) tc->c_lcl_ip.ip4.as_u32 << 32 | (u64) tc->c_rmt_ip.ip4.as_u32;
636 else
637 tmp = tc->c_lcl_ip.ip6.as_u64[0] ^ tc->c_lcl_ip.ip6.as_u64[1]
638 ^ tc->c_rmt_ip.ip6.as_u64[0] ^ tc->c_rmt_ip.ip6.as_u64[1];
639
640 tmp ^= tm->iss_seed.first | ((u64) tc->c_lcl_port << 16 | tc->c_rmt_port);
641 tmp ^= tm->iss_seed.second;
642 tmp = clib_xxhash (tmp) + clib_cpu_time_now ();
643 return ((tmp >> 32) ^ (tmp & 0xffffffff));
644}
Florin Coras0dbd5172018-06-25 16:19:34 -0700645
Florin Coras4eeeaaf2017-09-05 14:03:37 -0400646/**
Florin Corascedcf602019-08-27 12:15:43 -0700647 * Initialize max segment size we're able to process.
648 *
649 * The value is constrained by the output interface's MTU and by the size
650 * of the IP and TCP headers (see RFC6691). It is also what we advertise
651 * to our peer.
652 */
653static void
654tcp_init_rcv_mss (tcp_connection_t * tc)
655{
656 u8 ip_hdr_len;
657
658 ip_hdr_len = tc->c_is_ip4 ? sizeof (ip4_header_t) : sizeof (ip6_header_t);
659 tc->mss = tcp_cfg.default_mtu - sizeof (tcp_header_t) - ip_hdr_len;
660}
661
662static void
663tcp_init_mss (tcp_connection_t * tc)
664{
665 u16 default_min_mss = 536;
666
667 tcp_init_rcv_mss (tc);
668
669 /* TODO consider PMTU discovery */
670 tc->snd_mss = clib_min (tc->rcv_opts.mss, tc->mss);
671
672 if (tc->snd_mss < 45)
673 {
674 /* Assume that at least the min default mss works */
675 tc->snd_mss = default_min_mss;
676 tc->rcv_opts.mss = default_min_mss;
677 }
678
679 /* We should have enough space for 40 bytes of options */
680 ASSERT (tc->snd_mss > 45);
681
682 /* If we use timestamp option, account for it */
683 if (tcp_opts_tstamp (&tc->rcv_opts))
684 tc->snd_mss -= TCP_OPTION_LEN_TIMESTAMP;
685}
686
687/**
Florin Coras4eeeaaf2017-09-05 14:03:37 -0400688 * Initialize connection send variables.
689 */
690void
691tcp_init_snd_vars (tcp_connection_t * tc)
692{
Florin Coras3cbc04b2017-10-02 00:18:51 -0700693 /*
694 * We use the time to randomize iss and for setting up the initial
695 * timestamp. Make sure it's updated otherwise syn and ack in the
696 * handshake may make it look as if time has flown in the opposite
697 * direction for us.
698 */
Florin Corasbe72ae62018-11-01 11:23:03 -0700699 tcp_set_time_now (tcp_get_worker (vlib_get_thread_index ()));
Florin Coras3cbc04b2017-10-02 00:18:51 -0700700
Florin Corascedcf602019-08-27 12:15:43 -0700701 tcp_init_rcv_mss (tc);
Florin Coras18e0d4f2019-01-02 12:22:02 -0800702 tc->iss = tcp_generate_random_iss (tc);
Florin Coras4eeeaaf2017-09-05 14:03:37 -0400703 tc->snd_una = tc->iss;
704 tc->snd_nxt = tc->iss + 1;
705 tc->snd_una_max = tc->snd_nxt;
Florin Coras36ebcff2019-09-12 18:36:44 -0700706 tc->srtt = 100; /* 100 ms */
Florin Corasf4ce6ba2019-11-20 18:34:58 -0800707
708 if (!tcp_cfg.csum_offload)
709 tc->cfg_flags |= TCP_CFG_F_NO_CSUM_OFFLOAD;
Florin Coras4eeeaaf2017-09-05 14:03:37 -0400710}
711
Florin Corasd67f1122018-05-21 17:47:40 -0700712void
713tcp_enable_pacing (tcp_connection_t * tc)
714{
Florin Coras36ebcff2019-09-12 18:36:44 -0700715 u32 byte_rate;
716 byte_rate = tc->cwnd / (tc->srtt * TCP_TICK);
717 transport_connection_tx_pacer_init (&tc->connection, byte_rate, tc->cwnd);
Florin Corasd67f1122018-05-21 17:47:40 -0700718 tc->mrtt_us = (u32) ~ 0;
719}
720
Dave Barach68b0fb02017-02-28 15:15:56 -0500721/** Initialize tcp connection variables
722 *
723 * Should be called after having received a msg from the peer, i.e., a SYN or
724 * a SYNACK, such that connection options have already been exchanged. */
725void
726tcp_connection_init_vars (tcp_connection_t * tc)
727{
728 tcp_connection_timers_init (tc);
Florin Corasc8343412017-05-04 14:25:50 -0700729 tcp_init_mss (tc);
Florin Coras6792ec02017-03-13 03:49:51 -0700730 scoreboard_init (&tc->sack_sb);
Florin Coras4eeeaaf2017-09-05 14:03:37 -0400731 if (tc->state == TCP_STATE_SYN_RCVD)
732 tcp_init_snd_vars (tc);
733
Florin Coras36ebcff2019-09-12 18:36:44 -0700734 tcp_cc_init (tc);
735
Florin Coras1c8ff632018-05-17 13:28:34 -0700736 if (!tc->c_is_ip4 && ip6_address_is_link_local_unicast (&tc->c_rmt_ip6))
737 tcp_add_del_adjacency (tc, 1);
738
Florin Corasd67f1122018-05-21 17:47:40 -0700739 /* tcp_connection_fib_attach (tc); */
740
741 if (transport_connection_is_tx_paced (&tc->connection)
Florin Coras9094b5c2019-08-12 14:17:47 -0700742 || tcp_cfg.enable_tx_pacing)
Florin Corasd67f1122018-05-21 17:47:40 -0700743 tcp_enable_pacing (tc);
Florin Coras52814732019-06-12 15:38:19 -0700744
Florin Corasbbcfaac2019-10-10 13:52:04 -0700745 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
Florin Coras52814732019-06-12 15:38:19 -0700746 tcp_bt_init (tc);
Florin Corasedfe0ee2019-07-29 18:13:25 -0700747
Florin Corasbbcfaac2019-10-10 13:52:04 -0700748 if (!tcp_cfg.allow_tso)
749 tc->cfg_flags |= TCP_CFG_F_NO_TSO;
750
Florin Corasedfe0ee2019-07-29 18:13:25 -0700751 tc->start_ts = tcp_time_now_us (tc->c_thread_index);
Dave Barach68b0fb02017-02-28 15:15:56 -0500752}
753
Florin Coras3cbc04b2017-10-02 00:18:51 -0700754static int
755tcp_alloc_custom_local_endpoint (tcp_main_t * tm, ip46_address_t * lcl_addr,
756 u16 * lcl_port, u8 is_ip4)
757{
758 int index, port;
759 if (is_ip4)
760 {
Florin Coras9094b5c2019-08-12 14:17:47 -0700761 index = tm->last_v4_addr_rotor++;
762 if (tm->last_v4_addr_rotor >= vec_len (tcp_cfg.ip4_src_addrs))
763 tm->last_v4_addr_rotor = 0;
764 lcl_addr->ip4.as_u32 = tcp_cfg.ip4_src_addrs[index].as_u32;
Florin Coras3cbc04b2017-10-02 00:18:51 -0700765 }
766 else
767 {
Florin Coras9094b5c2019-08-12 14:17:47 -0700768 index = tm->last_v6_addr_rotor++;
769 if (tm->last_v6_addr_rotor >= vec_len (tcp_cfg.ip6_src_addrs))
770 tm->last_v6_addr_rotor = 0;
771 clib_memcpy_fast (&lcl_addr->ip6, &tcp_cfg.ip6_src_addrs[index],
Dave Barach178cf492018-11-13 16:34:13 -0500772 sizeof (ip6_address_t));
Florin Coras3cbc04b2017-10-02 00:18:51 -0700773 }
774 port = transport_alloc_local_port (TRANSPORT_PROTO_TCP, lcl_addr);
775 if (port < 1)
776 {
777 clib_warning ("Failed to allocate src port");
778 return -1;
779 }
780 *lcl_port = port;
781 return 0;
782}
783
Florin Coras0dbd5172018-06-25 16:19:34 -0700784static int
Florin Coras5665ced2018-10-25 18:03:45 -0700785tcp_session_open (transport_endpoint_cfg_t * rmt)
Dave Barach68b0fb02017-02-28 15:15:56 -0500786{
787 tcp_main_t *tm = vnet_get_tcp_main ();
788 tcp_connection_t *tc;
Dave Barach68b0fb02017-02-28 15:15:56 -0500789 ip46_address_t lcl_addr;
Florin Coras3cbc04b2017-10-02 00:18:51 -0700790 u16 lcl_port;
791 int rv;
Dave Barach68b0fb02017-02-28 15:15:56 -0500792
793 /*
Florin Coras3cbc04b2017-10-02 00:18:51 -0700794 * Allocate local endpoint
Dave Barach68b0fb02017-02-28 15:15:56 -0500795 */
Florin Coras9094b5c2019-08-12 14:17:47 -0700796 if ((rmt->is_ip4 && vec_len (tcp_cfg.ip4_src_addrs))
797 || (!rmt->is_ip4 && vec_len (tcp_cfg.ip6_src_addrs)))
Florin Coras3cbc04b2017-10-02 00:18:51 -0700798 rv = tcp_alloc_custom_local_endpoint (tm, &lcl_addr, &lcl_port,
799 rmt->is_ip4);
Dave Barach68b0fb02017-02-28 15:15:56 -0500800 else
Florin Coras3cbc04b2017-10-02 00:18:51 -0700801 rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_TCP,
802 rmt, &lcl_addr, &lcl_port);
Dave Barach2c25a622017-06-26 11:35:07 -0400803
Florin Coras3cbc04b2017-10-02 00:18:51 -0700804 if (rv)
805 return -1;
Dave Barach68b0fb02017-02-28 15:15:56 -0500806
807 /*
808 * Create connection and send SYN
809 */
Florin Coras68810622017-07-24 17:40:28 -0700810 clib_spinlock_lock_if_init (&tm->half_open_lock);
Florin Coras04e53442017-07-16 17:12:15 -0700811 tc = tcp_half_open_connection_new ();
Florin Coras3cbc04b2017-10-02 00:18:51 -0700812 ip_copy (&tc->c_rmt_ip, &rmt->ip, rmt->is_ip4);
813 ip_copy (&tc->c_lcl_ip, &lcl_addr, rmt->is_ip4);
Florin Coras0e495682017-09-19 22:27:18 -0700814 tc->c_rmt_port = rmt->port;
Dave Barach68b0fb02017-02-28 15:15:56 -0500815 tc->c_lcl_port = clib_host_to_net_u16 (lcl_port);
Florin Coras04e53442017-07-16 17:12:15 -0700816 tc->c_is_ip4 = rmt->is_ip4;
Florin Coras3cbc04b2017-10-02 00:18:51 -0700817 tc->c_proto = TRANSPORT_PROTO_TCP;
Florin Corascea194d2017-10-02 00:18:51 -0700818 tc->c_fib_index = rmt->fib_index;
Florin Coras12f69362019-08-16 09:44:00 -0700819 tc->cc_algo = tcp_cc_algo_get (tcp_cfg.cc_algo);
Dave Barach68b0fb02017-02-28 15:15:56 -0500820 /* The other connection vars will be initialized after SYN ACK */
821 tcp_connection_timers_init (tc);
822
Florin Corasa436a422019-08-20 07:09:31 -0700823 TCP_EVT (TCP_EVT_OPEN, tc);
Florin Coras6534b7a2017-07-18 05:38:03 -0400824 tc->state = TCP_STATE_SYN_SENT;
Florin Coras4eeeaaf2017-09-05 14:03:37 -0400825 tcp_init_snd_vars (tc);
Florin Coras6534b7a2017-07-18 05:38:03 -0400826 tcp_send_syn (tc);
Florin Coras68810622017-07-24 17:40:28 -0700827 clib_spinlock_unlock_if_init (&tm->half_open_lock);
Florin Corase69f4952017-03-07 10:06:24 -0800828
Dave Barach68b0fb02017-02-28 15:15:56 -0500829 return tc->c_c_index;
830}
831
Florin Corase69f4952017-03-07 10:06:24 -0800832const char *tcp_fsm_states[] = {
833#define _(sym, str) str,
834 foreach_tcp_fsm_state
835#undef _
836};
837
Dave Barach68b0fb02017-02-28 15:15:56 -0500838u8 *
Florin Corase69f4952017-03-07 10:06:24 -0800839format_tcp_state (u8 * s, va_list * args)
840{
Florin Corasbb292f42017-05-19 09:49:19 -0700841 u32 state = va_arg (*args, u32);
Florin Corase69f4952017-03-07 10:06:24 -0800842
Florin Corasbb292f42017-05-19 09:49:19 -0700843 if (state < TCP_N_STATES)
844 s = format (s, "%s", tcp_fsm_states[state]);
Florin Corase69f4952017-03-07 10:06:24 -0800845 else
Florin Corasbb292f42017-05-19 09:49:19 -0700846 s = format (s, "UNKNOWN (%d (0x%x))", state, state);
Florin Corase69f4952017-03-07 10:06:24 -0800847 return s;
848}
849
Florin Corasbbcfaac2019-10-10 13:52:04 -0700850const char *tcp_cfg_flags_str[] = {
851#define _(sym, str) str,
852 foreach_tcp_cfg_flag
853#undef _
854};
855
856static u8 *
857format_tcp_cfg_flags (u8 * s, va_list * args)
858{
859 tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
860 int i, last = -1;
861
862 for (i = 0; i < TCP_CFG_N_FLAG_BITS; i++)
863 if (tc->cfg_flags & (1 << i))
864 last = i;
865 for (i = 0; i < last; i++)
866 {
867 if (tc->cfg_flags & (1 << i))
868 s = format (s, "%s, ", tcp_cfg_flags_str[i]);
869 }
870 if (last >= 0)
871 s = format (s, "%s", tcp_cfg_flags_str[last]);
872 return s;
873}
874
Florin Corasa096f2d2017-09-28 23:49:42 -0400875const char *tcp_connection_flags_str[] = {
876#define _(sym, str) str,
877 foreach_tcp_connection_flag
878#undef _
879};
880
Florin Coras0dbd5172018-06-25 16:19:34 -0700881static u8 *
Florin Corasa096f2d2017-09-28 23:49:42 -0400882format_tcp_connection_flags (u8 * s, va_list * args)
883{
884 tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
885 int i, last = -1;
886
887 for (i = 0; i < TCP_CONN_N_FLAG_BITS; i++)
888 if (tc->flags & (1 << i))
889 last = i;
890 for (i = 0; i < last; i++)
891 {
892 if (tc->flags & (1 << i))
893 s = format (s, "%s, ", tcp_connection_flags_str[i]);
894 }
895 if (last >= 0)
896 s = format (s, "%s", tcp_connection_flags_str[last]);
897 return s;
898}
899
Florin Corase69f4952017-03-07 10:06:24 -0800900const char *tcp_conn_timers[] = {
901#define _(sym, str) str,
902 foreach_tcp_timer
903#undef _
904};
905
Florin Coras0dbd5172018-06-25 16:19:34 -0700906static u8 *
Florin Corase69f4952017-03-07 10:06:24 -0800907format_tcp_timers (u8 * s, va_list * args)
908{
909 tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
Florin Coras93992a92017-05-24 18:03:56 -0700910 int i, last = -1;
Florin Corase69f4952017-03-07 10:06:24 -0800911
912 for (i = 0; i < TCP_N_TIMERS; i++)
913 if (tc->timers[i] != TCP_TIMER_HANDLE_INVALID)
914 last = i;
915
Florin Corase69f4952017-03-07 10:06:24 -0800916 for (i = 0; i < last; i++)
917 {
918 if (tc->timers[i] != TCP_TIMER_HANDLE_INVALID)
919 s = format (s, "%s,", tcp_conn_timers[i]);
920 }
921
Florin Coras93992a92017-05-24 18:03:56 -0700922 if (last >= 0)
Florin Corasde9a8492018-10-24 22:18:58 -0700923 s = format (s, "%s", tcp_conn_timers[i]);
Florin Corase69f4952017-03-07 10:06:24 -0800924
925 return s;
926}
927
Florin Coras0dbd5172018-06-25 16:19:34 -0700928static u8 *
Florin Corasbb292f42017-05-19 09:49:19 -0700929format_tcp_congestion_status (u8 * s, va_list * args)
930{
931 tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
932 if (tcp_in_recovery (tc))
933 s = format (s, "recovery");
934 else if (tcp_in_fastrecovery (tc))
935 s = format (s, "fastrecovery");
936 else
937 s = format (s, "none");
938 return s;
939}
940
Florin Coras0dbd5172018-06-25 16:19:34 -0700941static i32
942tcp_rcv_wnd_available (tcp_connection_t * tc)
943{
944 return (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
945}
946
947static u8 *
Florin Corasde9a8492018-10-24 22:18:58 -0700948format_tcp_congestion (u8 * s, va_list * args)
949{
950 tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
Florin Corasbe237bf2019-09-27 08:16:40 -0700951 u32 indent = format_get_indent (s), prr_space = 0;
Florin Corasde9a8492018-10-24 22:18:58 -0700952
953 s = format (s, "%U ", format_tcp_congestion_status, tc);
Florin Coras0147c0f2019-03-11 21:59:00 -0700954 s = format (s, "algo %s cwnd %u ssthresh %u bytes_acked %u\n",
955 tc->cc_algo->name, tc->cwnd, tc->ssthresh, tc->bytes_acked);
Florin Corasbe237bf2019-09-27 08:16:40 -0700956 s = format (s, "%Ucc space %u prev_cwnd %u prev_ssthresh %u\n",
Florin Coras0147c0f2019-03-11 21:59:00 -0700957 format_white_space, indent, tcp_available_cc_snd_space (tc),
Florin Corasbe237bf2019-09-27 08:16:40 -0700958 tc->prev_cwnd, tc->prev_ssthresh);
959 s = format (s, "%Usnd_cong %u dupack %u limited_tx %u\n",
Florin Coras0147c0f2019-03-11 21:59:00 -0700960 format_white_space, indent, tc->snd_congestion - tc->iss,
961 tc->rcv_dupacks, tc->limited_transmit - tc->iss);
Florin Corasbe237bf2019-09-27 08:16:40 -0700962 s = format (s, "%Urxt_bytes %u rxt_delivered %u rxt_head %u rxt_ts %u\n",
963 format_white_space, indent, tc->snd_rxt_bytes,
964 tc->rxt_delivered, tc->rxt_head - tc->iss,
965 tcp_time_now_w_thread (tc->c_thread_index) - tc->snd_rxt_ts);
966 if (tcp_in_fastrecovery (tc))
967 prr_space = tcp_fastrecovery_prr_snd_space (tc);
968 s = format (s, "%Uprr_start %u prr_delivered %u prr space %u\n",
969 format_white_space, indent, tc->prr_start - tc->iss,
970 tc->prr_delivered, prr_space);
Florin Corasde9a8492018-10-24 22:18:58 -0700971 return s;
972}
973
974static u8 *
Florin Corasedfe0ee2019-07-29 18:13:25 -0700975format_tcp_stats (u8 * s, va_list * args)
976{
977 tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
978 u32 indent = format_get_indent (s);
979 s = format (s, "in segs %lu dsegs %lu bytes %lu dupacks %u\n",
980 tc->segs_in, tc->data_segs_in, tc->bytes_in, tc->dupacks_in);
981 s = format (s, "%Uout segs %lu dsegs %lu bytes %lu dupacks %u\n",
982 format_white_space, indent, tc->segs_out,
983 tc->data_segs_out, tc->bytes_out, tc->dupacks_out);
984 s = format (s, "%Ufr %u tr %u rxt segs %lu bytes %lu duration %.3f\n",
985 format_white_space, indent, tc->fr_occurences,
986 tc->tr_occurences, tc->segs_retrans, tc->bytes_retrans,
987 tcp_time_now_us (tc->c_thread_index) - tc->start_ts);
988 s = format (s, "%Uerr wnd data below %u above %u ack below %u above %u",
989 format_white_space, indent, tc->errors.below_data_wnd,
990 tc->errors.above_data_wnd, tc->errors.below_ack_wnd,
991 tc->errors.above_ack_wnd);
992 return s;
993}
994
995static u8 *
Florin Corasbb292f42017-05-19 09:49:19 -0700996format_tcp_vars (u8 * s, va_list * args)
997{
998 tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
Florin Corasbbcfaac2019-10-10 13:52:04 -0700999 s = format (s, " index: %u cfg: %U flags: %U timers: %U\n", tc->c_c_index,
1000 format_tcp_cfg_flags, tc, format_tcp_connection_flags, tc,
1001 format_tcp_timers, tc);
Dave Barach2c25a622017-06-26 11:35:07 -04001002 s = format (s, " snd_una %u snd_nxt %u snd_una_max %u",
Florin Corasbb292f42017-05-19 09:49:19 -07001003 tc->snd_una - tc->iss, tc->snd_nxt - tc->iss,
1004 tc->snd_una_max - tc->iss);
1005 s = format (s, " rcv_nxt %u rcv_las %u\n",
1006 tc->rcv_nxt - tc->irs, tc->rcv_las - tc->irs);
Florin Corasca031862018-09-24 13:58:05 -07001007 s = format (s, " snd_wnd %u rcv_wnd %u rcv_wscale %u ",
1008 tc->snd_wnd, tc->rcv_wnd, tc->rcv_wscale);
1009 s = format (s, "snd_wl1 %u snd_wl2 %u\n", tc->snd_wl1 - tc->irs,
Florin Corasbb292f42017-05-19 09:49:19 -07001010 tc->snd_wl2 - tc->iss);
Florin Coras0147c0f2019-03-11 21:59:00 -07001011 s = format (s, " flight size %u out space %u rcv_wnd_av %u",
Florin Coras4eeeaaf2017-09-05 14:03:37 -04001012 tcp_flight_size (tc), tcp_available_output_snd_space (tc),
Florin Corasde9a8492018-10-24 22:18:58 -07001013 tcp_rcv_wnd_available (tc));
Florin Coras0147c0f2019-03-11 21:59:00 -07001014 s = format (s, " tsval_recent %u\n", tc->tsval_recent);
Florin Coras537e85d2019-03-22 15:42:18 -07001015 s = format (s, " tsecr %u tsecr_last_ack %u tsval_recent_age %u",
Florin Coras0147c0f2019-03-11 21:59:00 -07001016 tc->rcv_opts.tsecr, tc->tsecr_last_ack,
Dave Barach2c25a622017-06-26 11:35:07 -04001017 tcp_time_now () - tc->tsval_recent_age);
Florin Coras537e85d2019-03-22 15:42:18 -07001018 s = format (s, " snd_mss %u\n", tc->snd_mss);
Florin Corase5b17912019-02-21 16:46:24 -08001019 s = format (s, " rto %u rto_boff %u srtt %u us %.3f rttvar %u rtt_ts %.4f",
Florin Corasefefc6b2018-11-07 12:49:19 -08001020 tc->rto, tc->rto_boff, tc->srtt, tc->mrtt_us * 1000, tc->rttvar,
1021 tc->rtt_ts);
1022 s = format (s, " rtt_seq %u\n", tc->rtt_seq - tc->iss);
Florin Corasd9035a42019-11-19 18:22:41 -08001023 s = format (s, " next_node %u opaque 0x%x\n", tc->next_node_index,
1024 tc->next_node_opaque);
Florin Corasde9a8492018-10-24 22:18:58 -07001025 s = format (s, " cong: %U", format_tcp_congestion, tc);
1026
Florin Corasde706082017-10-11 01:43:15 -07001027 if (tc->state >= TCP_STATE_ESTABLISHED)
Florin Corasd67f1122018-05-21 17:47:40 -07001028 {
Florin Corasde9a8492018-10-24 22:18:58 -07001029 s = format (s, " sboard: %U\n", format_tcp_scoreboard, &tc->sack_sb,
Florin Corasd67f1122018-05-21 17:47:40 -07001030 tc);
Florin Corasedfe0ee2019-07-29 18:13:25 -07001031 s = format (s, " stats: %U\n", format_tcp_stats, tc);
Florin Corasd67f1122018-05-21 17:47:40 -07001032 }
Florin Corasbb292f42017-05-19 09:49:19 -07001033 if (vec_len (tc->snd_sacks))
1034 s = format (s, " sacks tx: %U\n", format_tcp_sacks, tc);
1035
1036 return s;
1037}
1038
Florin Coras0dbd5172018-06-25 16:19:34 -07001039static u8 *
Florin Corasbb292f42017-05-19 09:49:19 -07001040format_tcp_connection_id (u8 * s, va_list * args)
Florin Corase69f4952017-03-07 10:06:24 -08001041{
1042 tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
Florin Corasa5464812017-04-19 13:00:05 -07001043 if (!tc)
1044 return s;
Florin Corase69f4952017-03-07 10:06:24 -08001045 if (tc->c_is_ip4)
1046 {
Florin Corasde9a8492018-10-24 22:18:58 -07001047 s = format (s, "[%d:%d][%s] %U:%d->%U:%d", tc->c_thread_index,
1048 tc->c_s_index, "T", format_ip4_address, &tc->c_lcl_ip4,
Florin Corase69f4952017-03-07 10:06:24 -08001049 clib_net_to_host_u16 (tc->c_lcl_port), format_ip4_address,
1050 &tc->c_rmt_ip4, clib_net_to_host_u16 (tc->c_rmt_port));
1051 }
1052 else
1053 {
Florin Corasde9a8492018-10-24 22:18:58 -07001054 s = format (s, "[%d:%d][%s] %U:%d->%U:%d", tc->c_thread_index,
1055 tc->c_s_index, "T", format_ip6_address, &tc->c_lcl_ip6,
Florin Corase69f4952017-03-07 10:06:24 -08001056 clib_net_to_host_u16 (tc->c_lcl_port), format_ip6_address,
1057 &tc->c_rmt_ip6, clib_net_to_host_u16 (tc->c_rmt_port));
1058 }
1059
1060 return s;
1061}
1062
1063u8 *
Florin Corasbb292f42017-05-19 09:49:19 -07001064format_tcp_connection (u8 * s, va_list * args)
Florin Corase69f4952017-03-07 10:06:24 -08001065{
1066 tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
Florin Corasbb292f42017-05-19 09:49:19 -07001067 u32 verbose = va_arg (*args, u32);
1068
Florin Corasc87c91d2017-08-16 19:55:49 -07001069 if (!tc)
1070 return s;
Florin Corasbb292f42017-05-19 09:49:19 -07001071 s = format (s, "%-50U", format_tcp_connection_id, tc);
1072 if (verbose)
1073 {
1074 s = format (s, "%-15U", format_tcp_state, tc->state);
1075 if (verbose > 1)
Florin Corasa096f2d2017-09-28 23:49:42 -04001076 s = format (s, "\n%U", format_tcp_vars, tc);
Florin Corasbb292f42017-05-19 09:49:19 -07001077 }
Florin Coras3eb50622017-07-13 01:24:57 -04001078
Florin Corase69f4952017-03-07 10:06:24 -08001079 return s;
1080}
1081
Florin Coras0dbd5172018-06-25 16:19:34 -07001082static u8 *
Florin Corase69f4952017-03-07 10:06:24 -08001083format_tcp_session (u8 * s, va_list * args)
Dave Barach68b0fb02017-02-28 15:15:56 -05001084{
1085 u32 tci = va_arg (*args, u32);
1086 u32 thread_index = va_arg (*args, u32);
Florin Corasbb292f42017-05-19 09:49:19 -07001087 u32 verbose = va_arg (*args, u32);
Dave Barach68b0fb02017-02-28 15:15:56 -05001088 tcp_connection_t *tc;
1089
1090 tc = tcp_connection_get (tci, thread_index);
Florin Coras6cf30ad2017-04-04 23:08:23 -07001091 if (tc)
Florin Coras93992a92017-05-24 18:03:56 -07001092 s = format (s, "%U", format_tcp_connection, tc, verbose);
Florin Coras6cf30ad2017-04-04 23:08:23 -07001093 else
Florin Coras1f152cd2017-08-18 19:28:03 -07001094 s = format (s, "empty\n");
Florin Coras93992a92017-05-24 18:03:56 -07001095 return s;
Dave Barach68b0fb02017-02-28 15:15:56 -05001096}
1097
Florin Coras0dbd5172018-06-25 16:19:34 -07001098static u8 *
Florin Corase69f4952017-03-07 10:06:24 -08001099format_tcp_listener_session (u8 * s, va_list * args)
Dave Barach68b0fb02017-02-28 15:15:56 -05001100{
1101 u32 tci = va_arg (*args, u32);
Aloys Augustina0abbff2019-07-12 12:16:16 +02001102 u32 __clib_unused thread_index = va_arg (*args, u32);
Florin Coras3389dd22019-02-01 18:00:05 -08001103 u32 verbose = va_arg (*args, u32);
Dave Barach68b0fb02017-02-28 15:15:56 -05001104 tcp_connection_t *tc = tcp_listener_get (tci);
Florin Coras3389dd22019-02-01 18:00:05 -08001105 s = format (s, "%-50U", format_tcp_connection_id, tc);
1106 if (verbose)
1107 s = format (s, "%-15U", format_tcp_state, tc->state);
1108 return s;
Dave Barach68b0fb02017-02-28 15:15:56 -05001109}
1110
Florin Coras0dbd5172018-06-25 16:19:34 -07001111static u8 *
Florin Corase69f4952017-03-07 10:06:24 -08001112format_tcp_half_open_session (u8 * s, va_list * args)
Dave Barach68b0fb02017-02-28 15:15:56 -05001113{
1114 u32 tci = va_arg (*args, u32);
Aloys Augustina0abbff2019-07-12 12:16:16 +02001115 u32 __clib_unused thread_index = va_arg (*args, u32);
Dave Barach68b0fb02017-02-28 15:15:56 -05001116 tcp_connection_t *tc = tcp_half_open_connection_get (tci);
Florin Corasbb292f42017-05-19 09:49:19 -07001117 return format (s, "%U", format_tcp_connection_id, tc);
Dave Barach68b0fb02017-02-28 15:15:56 -05001118}
1119
Florin Coras06d11012017-05-17 14:21:51 -07001120u8 *
1121format_tcp_sacks (u8 * s, va_list * args)
1122{
1123 tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
1124 sack_block_t *sacks = tc->snd_sacks;
1125 sack_block_t *block;
Dave Barach2c25a622017-06-26 11:35:07 -04001126 int i, len = 0;
1127
1128 len = vec_len (sacks);
1129 for (i = 0; i < len - 1; i++)
1130 {
1131 block = &sacks[i];
1132 s = format (s, " start %u end %u\n", block->start - tc->irs,
1133 block->end - tc->irs);
1134 }
1135 if (len)
1136 {
1137 block = &sacks[len - 1];
1138 s = format (s, " start %u end %u", block->start - tc->irs,
1139 block->end - tc->irs);
1140 }
Florin Coras06d11012017-05-17 14:21:51 -07001141 return s;
1142}
1143
1144u8 *
Florin Coras3eb50622017-07-13 01:24:57 -04001145format_tcp_rcv_sacks (u8 * s, va_list * args)
1146{
1147 tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
1148 sack_block_t *sacks = tc->rcv_opts.sacks;
1149 sack_block_t *block;
1150 int i, len = 0;
1151
1152 len = vec_len (sacks);
1153 for (i = 0; i < len - 1; i++)
1154 {
1155 block = &sacks[i];
1156 s = format (s, " start %u end %u\n", block->start - tc->iss,
1157 block->end - tc->iss);
1158 }
1159 if (len)
1160 {
1161 block = &sacks[len - 1];
1162 s = format (s, " start %u end %u", block->start - tc->iss,
1163 block->end - tc->iss);
1164 }
1165 return s;
1166}
1167
Florin Coras0dbd5172018-06-25 16:19:34 -07001168static u8 *
Florin Coras06d11012017-05-17 14:21:51 -07001169format_tcp_sack_hole (u8 * s, va_list * args)
1170{
1171 sack_scoreboard_hole_t *hole = va_arg (*args, sack_scoreboard_hole_t *);
Florin Coras1f152cd2017-08-18 19:28:03 -07001172 tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
1173 if (tc)
1174 s = format (s, " [%u, %u]", hole->start - tc->iss, hole->end - tc->iss);
1175 else
1176 s = format (s, " [%u, %u]", hole->start, hole->end);
Florin Coras06d11012017-05-17 14:21:51 -07001177 return s;
1178}
1179
1180u8 *
1181format_tcp_scoreboard (u8 * s, va_list * args)
1182{
1183 sack_scoreboard_t *sb = va_arg (*args, sack_scoreboard_t *);
Florin Coras1f152cd2017-08-18 19:28:03 -07001184 tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
Florin Coras06d11012017-05-17 14:21:51 -07001185 sack_scoreboard_hole_t *hole;
Florin Corasde9a8492018-10-24 22:18:58 -07001186 u32 indent = format_get_indent (s);
1187
Florin Corasbe237bf2019-09-27 08:16:40 -07001188 s = format (s, "sacked %u last_sacked %u lost %u last_lost %u"
1189 " rxt_sacked %u\n",
Florin Coras36ebcff2019-09-12 18:36:44 -07001190 sb->sacked_bytes, sb->last_sacked_bytes, sb->lost_bytes,
Florin Corasbe237bf2019-09-27 08:16:40 -07001191 sb->last_lost_bytes, sb->rxt_sacked);
1192 s = format (s, "%Ulast_delivered %u high_sacked %u is_reneging %u\n",
Florin Corasde9a8492018-10-24 22:18:58 -07001193 format_white_space, indent, sb->last_bytes_delivered,
Florin Coras558e3e02019-09-06 12:56:58 -07001194 sb->high_sacked - tc->iss, sb->is_reneging);
Florin Corasde9a8492018-10-24 22:18:58 -07001195 s = format (s, "%Ucur_rxt_hole %u high_rxt %u rescue_rxt %u",
1196 format_white_space, indent, sb->cur_rxt_hole,
1197 sb->high_rxt - tc->iss, sb->rescue_rxt - tc->iss);
Florin Coras93992a92017-05-24 18:03:56 -07001198
Florin Coras06d11012017-05-17 14:21:51 -07001199 hole = scoreboard_first_hole (sb);
Florin Coras93992a92017-05-24 18:03:56 -07001200 if (hole)
Florin Coras36ee9f12018-11-02 12:52:10 -07001201 s = format (s, "\n%Uhead %u tail %u %u holes:\n%U", format_white_space,
1202 indent, sb->head, sb->tail, pool_elts (sb->holes),
1203 format_white_space, indent);
Florin Coras93992a92017-05-24 18:03:56 -07001204
Florin Coras06d11012017-05-17 14:21:51 -07001205 while (hole)
1206 {
Florin Coras36ee9f12018-11-02 12:52:10 -07001207 s = format (s, "%U", format_tcp_sack_hole, hole, tc);
Florin Coras06d11012017-05-17 14:21:51 -07001208 hole = scoreboard_next_hole (sb, hole);
1209 }
Florin Coras3eb50622017-07-13 01:24:57 -04001210
Florin Coras06d11012017-05-17 14:21:51 -07001211 return s;
1212}
1213
Florin Coras0dbd5172018-06-25 16:19:34 -07001214static transport_connection_t *
Dave Barach68b0fb02017-02-28 15:15:56 -05001215tcp_session_get_transport (u32 conn_index, u32 thread_index)
1216{
1217 tcp_connection_t *tc = tcp_connection_get (conn_index, thread_index);
Florin Coras5bb23ec2019-08-31 09:45:13 -07001218 if (PREDICT_FALSE (!tc))
1219 return 0;
Dave Barach68b0fb02017-02-28 15:15:56 -05001220 return &tc->connection;
1221}
1222
Florin Coras0dbd5172018-06-25 16:19:34 -07001223static transport_connection_t *
Dave Barach68b0fb02017-02-28 15:15:56 -05001224tcp_half_open_session_get_transport (u32 conn_index)
1225{
1226 tcp_connection_t *tc = tcp_half_open_connection_get (conn_index);
1227 return &tc->connection;
1228}
1229
Simon Zhang1146ff42019-09-02 22:54:00 +08001230static u16
1231tcp_session_cal_goal_size (tcp_connection_t * tc)
1232{
1233 u16 goal_size = tc->snd_mss;
1234
1235 goal_size = TCP_MAX_GSO_SZ - tc->snd_mss % TCP_MAX_GSO_SZ;
1236 goal_size = clib_min (goal_size, tc->snd_wnd / 2);
1237
Simon Zhang8a047ed2019-09-24 21:16:56 +08001238 return goal_size > tc->snd_mss ? goal_size : tc->snd_mss;
Simon Zhang1146ff42019-09-02 22:54:00 +08001239}
1240
Florin Corasc8343412017-05-04 14:25:50 -07001241/**
1242 * Compute maximum segment size for session layer.
1243 *
1244 * Since the result needs to be the actual data length, it first computes
1245 * the tcp options to be used in the next burst and subtracts their
1246 * length from the connection's snd_mss.
1247 */
Florin Coras0dbd5172018-06-25 16:19:34 -07001248static u16
Dave Barach68b0fb02017-02-28 15:15:56 -05001249tcp_session_send_mss (transport_connection_t * trans_conn)
1250{
1251 tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
Florin Corasc8343412017-05-04 14:25:50 -07001252
1253 /* Ensure snd_mss does accurately reflect the amount of data we can push
1254 * in a segment. This also makes sure that options are updated according to
1255 * the current state of the connection. */
Florin Corasb26743d2018-06-26 09:31:04 -07001256 tcp_update_burst_snd_vars (tc);
Florin Corasc8343412017-05-04 14:25:50 -07001257
Florin Corasbbcfaac2019-10-10 13:52:04 -07001258 if (PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_TSO))
1259 return tcp_session_cal_goal_size (tc);
Simon Zhang1146ff42019-09-02 22:54:00 +08001260
Dave Barach68b0fb02017-02-28 15:15:56 -05001261 return tc->snd_mss;
1262}
1263
Florin Coras3af90fc2017-05-03 21:09:42 -07001264always_inline u32
1265tcp_round_snd_space (tcp_connection_t * tc, u32 snd_space)
1266{
Dave Barach2c25a622017-06-26 11:35:07 -04001267 if (PREDICT_FALSE (tc->snd_wnd < tc->snd_mss))
Florin Coras3af90fc2017-05-03 21:09:42 -07001268 {
Florin Corasdb84e572017-05-09 18:54:52 -07001269 return tc->snd_wnd <= snd_space ? tc->snd_wnd : 0;
Florin Coras3af90fc2017-05-03 21:09:42 -07001270 }
1271
Florin Coras1f152cd2017-08-18 19:28:03 -07001272 /* If not snd_wnd constrained and we can't write at least a segment,
1273 * don't try at all */
Dave Barach2c25a622017-06-26 11:35:07 -04001274 if (PREDICT_FALSE (snd_space < tc->snd_mss))
Florin Coras9d063042017-09-14 03:08:00 -04001275 return snd_space < tc->cwnd ? 0 : snd_space;
Florin Coras3af90fc2017-05-03 21:09:42 -07001276
1277 /* round down to mss multiple */
1278 return snd_space - (snd_space % tc->snd_mss);
1279}
1280
Florin Coras6792ec02017-03-13 03:49:51 -07001281/**
1282 * Compute tx window session is allowed to fill.
Florin Corasbb292f42017-05-19 09:49:19 -07001283 *
1284 * Takes into account available send space, snd_mss and the congestion
1285 * state of the connection. If possible, the value returned is a multiple
1286 * of snd_mss.
1287 *
1288 * @param tc tcp connection
1289 * @return number of bytes session is allowed to write
Florin Coras6792ec02017-03-13 03:49:51 -07001290 */
Florin Coras45ca73f2018-09-27 09:19:29 -07001291static inline u32
1292tcp_snd_space_inline (tcp_connection_t * tc)
Dave Barach68b0fb02017-02-28 15:15:56 -05001293{
Florin Coras36ebcff2019-09-12 18:36:44 -07001294 int snd_space;
Florin Coras6792ec02017-03-13 03:49:51 -07001295
Florin Coras4af830c2018-12-04 09:21:36 -08001296 if (PREDICT_FALSE (tcp_in_fastrecovery (tc)
1297 || tc->state == TCP_STATE_CLOSED))
Florin Coras36ee9f12018-11-02 12:52:10 -07001298 return 0;
1299
1300 snd_space = tcp_available_output_snd_space (tc);
1301
Florin Coras36ebcff2019-09-12 18:36:44 -07001302 /* If we got dupacks or sacked bytes but we're not yet in recovery, try
1303 * to force the peer to send enough dupacks to start retransmitting as
1304 * per Limited Transmit (RFC3042)
1305 */
1306 if (PREDICT_FALSE (tc->rcv_dupacks != 0 || tc->sack_sb.sacked_bytes))
Florin Coras6792ec02017-03-13 03:49:51 -07001307 {
Florin Coras36ebcff2019-09-12 18:36:44 -07001308 if (tc->limited_transmit != tc->snd_nxt
1309 && (seq_lt (tc->limited_transmit, tc->snd_nxt - 2 * tc->snd_mss)
1310 || seq_gt (tc->limited_transmit, tc->snd_nxt)))
Florin Coras36ee9f12018-11-02 12:52:10 -07001311 tc->limited_transmit = tc->snd_nxt;
Florin Coras36ebcff2019-09-12 18:36:44 -07001312
Florin Coras36ee9f12018-11-02 12:52:10 -07001313 ASSERT (seq_leq (tc->limited_transmit, tc->snd_nxt));
Florin Corasf03a59a2017-06-09 21:07:32 -07001314
Florin Coras36ebcff2019-09-12 18:36:44 -07001315 int snt_limited = tc->snd_nxt - tc->limited_transmit;
1316 snd_space = clib_max ((int) 2 * tc->snd_mss - snt_limited, 0);
Florin Coras3af90fc2017-05-03 21:09:42 -07001317 }
Florin Coras36ee9f12018-11-02 12:52:10 -07001318 return tcp_round_snd_space (tc, snd_space);
Dave Barach68b0fb02017-02-28 15:15:56 -05001319}
1320
Florin Coras45ca73f2018-09-27 09:19:29 -07001321u32
1322tcp_snd_space (tcp_connection_t * tc)
1323{
1324 return tcp_snd_space_inline (tc);
1325}
1326
Florin Coras0dbd5172018-06-25 16:19:34 -07001327static u32
Florin Corasbb292f42017-05-19 09:49:19 -07001328tcp_session_send_space (transport_connection_t * trans_conn)
1329{
1330 tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
Florin Coras45ca73f2018-09-27 09:19:29 -07001331 return clib_min (tcp_snd_space_inline (tc),
Florin Coras1f152cd2017-08-18 19:28:03 -07001332 tc->snd_wnd - (tc->snd_nxt - tc->snd_una));
Florin Corasbb292f42017-05-19 09:49:19 -07001333}
1334
Florin Coras0dbd5172018-06-25 16:19:34 -07001335static u32
Florin Corasd79b41e2017-03-04 05:37:52 -08001336tcp_session_tx_fifo_offset (transport_connection_t * trans_conn)
Dave Barach68b0fb02017-02-28 15:15:56 -05001337{
1338 tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
Florin Coras6792ec02017-03-13 03:49:51 -07001339
1340 ASSERT (seq_geq (tc->snd_nxt, tc->snd_una));
1341
1342 /* This still works if fast retransmit is on */
Florin Corasd79b41e2017-03-04 05:37:52 -08001343 return (tc->snd_nxt - tc->snd_una);
Dave Barach68b0fb02017-02-28 15:15:56 -05001344}
1345
Florin Coras0dbd5172018-06-25 16:19:34 -07001346static void
Florin Coras561af9b2017-12-09 10:19:43 -08001347tcp_update_time (f64 now, u8 thread_index)
1348{
Florin Corasbe72ae62018-11-01 11:23:03 -07001349 tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
1350
1351 tcp_set_time_now (wrk);
1352 tw_timer_expire_timers_16t_2w_512sl (&wrk->timer_wheel, now);
Florin Corasbe72ae62018-11-01 11:23:03 -07001353 tcp_flush_frames_to_output (wrk);
Florin Coras561af9b2017-12-09 10:19:43 -08001354}
1355
Florin Coras42ceddb2018-12-12 10:56:01 -08001356static void
1357tcp_session_flush_data (transport_connection_t * tconn)
1358{
1359 tcp_connection_t *tc = (tcp_connection_t *) tconn;
1360 if (tc->flags & TCP_CONN_PSH_PENDING)
1361 return;
1362 tc->flags |= TCP_CONN_PSH_PENDING;
Florin Coras47596832019-03-12 18:58:54 -07001363 tc->psh_seq = tc->snd_una + transport_max_tx_dequeue (tconn) - 1;
Florin Coras42ceddb2018-12-12 10:56:01 -08001364}
1365
Dave Barach68b0fb02017-02-28 15:15:56 -05001366/* *INDENT-OFF* */
Florin Coras04e53442017-07-16 17:12:15 -07001367const static transport_proto_vft_t tcp_proto = {
Florin Coras561af9b2017-12-09 10:19:43 -08001368 .enable = vnet_tcp_enable_disable,
Florin Coras1ee78302019-02-05 15:51:15 -08001369 .start_listen = tcp_session_bind,
1370 .stop_listen = tcp_session_unbind,
Florin Coras8b20bf52018-06-14 14:55:50 -07001371 .push_header = tcp_session_push_header,
Dave Barach68b0fb02017-02-28 15:15:56 -05001372 .get_connection = tcp_session_get_transport,
1373 .get_listener = tcp_session_get_listener,
1374 .get_half_open = tcp_half_open_session_get_transport,
Florin Coras1ee78302019-02-05 15:51:15 -08001375 .connect = tcp_session_open,
Dave Barach68b0fb02017-02-28 15:15:56 -05001376 .close = tcp_session_close,
1377 .cleanup = tcp_session_cleanup,
Florin Corasdfb3b872019-08-16 17:48:44 -07001378 .reset = tcp_session_reset,
Dave Barach68b0fb02017-02-28 15:15:56 -05001379 .send_mss = tcp_session_send_mss,
1380 .send_space = tcp_session_send_space,
Florin Coras561af9b2017-12-09 10:19:43 -08001381 .update_time = tcp_update_time,
Florin Corasd79b41e2017-03-04 05:37:52 -08001382 .tx_fifo_offset = tcp_session_tx_fifo_offset,
Florin Coras42ceddb2018-12-12 10:56:01 -08001383 .flush_data = tcp_session_flush_data,
Florin Coras26dd6de2019-07-23 23:54:47 -07001384 .custom_tx = tcp_session_custom_tx,
Florin Corase69f4952017-03-07 10:06:24 -08001385 .format_connection = format_tcp_session,
1386 .format_listener = format_tcp_listener_session,
1387 .format_half_open = format_tcp_half_open_session,
Nathan Skrzypczake971bc92019-06-19 13:42:37 +02001388 .transport_options = {
1389 .tx_type = TRANSPORT_TX_PEEK,
1390 .service_type = TRANSPORT_SERVICE_VC,
1391 },
Dave Barach68b0fb02017-02-28 15:15:56 -05001392};
1393/* *INDENT-ON* */
1394
Florin Corasd67f1122018-05-21 17:47:40 -07001395void
Florin Corasc44a5582018-11-01 16:30:54 -07001396tcp_connection_tx_pacer_update (tcp_connection_t * tc)
Florin Corasd67f1122018-05-21 17:47:40 -07001397{
Florin Corasd67f1122018-05-21 17:47:40 -07001398 if (!transport_connection_is_tx_paced (&tc->connection))
1399 return;
1400
Florin Coras11e9e352019-11-13 19:09:47 -08001401 f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us);
1402
Florin Corasd2067242019-08-16 10:33:49 -07001403 transport_connection_tx_pacer_update (&tc->connection,
Florin Coras11e9e352019-11-13 19:09:47 -08001404 tcp_cc_get_pacing_rate (tc),
1405 srtt * CLIB_US_TIME_FREQ);
Florin Corasd67f1122018-05-21 17:47:40 -07001406}
1407
Florin Corasc44a5582018-11-01 16:30:54 -07001408void
1409tcp_connection_tx_pacer_reset (tcp_connection_t * tc, u32 window,
1410 u32 start_bucket)
1411{
Florin Coras36ebcff2019-09-12 18:36:44 -07001412 f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us);
Florin Coras599db9e2019-11-25 09:41:37 -08001413 transport_connection_tx_pacer_reset (&tc->connection,
1414 tcp_cc_get_pacing_rate (tc),
1415 start_bucket,
Florin Coras11e9e352019-11-13 19:09:47 -08001416 srtt * CLIB_US_TIME_FREQ);
Florin Corasc44a5582018-11-01 16:30:54 -07001417}
1418
Florin Coras0dbd5172018-06-25 16:19:34 -07001419static void
Florin Corasb72a0ff2019-11-22 17:38:25 -08001420tcp_timer_waitclose_handler (u32 conn_index, u32 thread_index)
Dave Barach68b0fb02017-02-28 15:15:56 -05001421{
Dave Barach68b0fb02017-02-28 15:15:56 -05001422 tcp_connection_t *tc;
1423
Damjan Marion586afd72017-04-05 19:18:20 +02001424 tc = tcp_connection_get (conn_index, thread_index);
Florin Coras68810622017-07-24 17:40:28 -07001425 if (!tc)
1426 return;
Florin Coras75c48c12019-08-02 15:17:21 -07001427
Florin Coras54ddf432018-12-21 13:54:09 -08001428 switch (tc->state)
Florin Corasd79b41e2017-03-04 05:37:52 -08001429 {
Florin Coras54ddf432018-12-21 13:54:09 -08001430 case TCP_STATE_CLOSE_WAIT:
Florin Coras5f5d50e2018-10-01 08:32:04 -07001431 tcp_connection_timers_reset (tc);
Florin Coras5a2ec8f2018-12-27 11:53:11 -08001432 session_transport_closed_notify (&tc->connection);
Florin Coras54ddf432018-12-21 13:54:09 -08001433
1434 if (!(tc->flags & TCP_CONN_FINPNDG))
1435 {
1436 tcp_connection_set_state (tc, TCP_STATE_CLOSED);
Florin Coras9094b5c2019-08-12 14:17:47 -07001437 tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
Florin Coras54ddf432018-12-21 13:54:09 -08001438 break;
1439 }
1440
1441 /* Session didn't come back with a close. Send FIN either way
1442 * and switch to LAST_ACK. */
Florin Coras5f5d50e2018-10-01 08:32:04 -07001443 tcp_cong_recovery_off (tc);
Florin Coras54ddf432018-12-21 13:54:09 -08001444 /* Make sure we don't try to send unsent data */
Florin Coras47596832019-03-12 18:58:54 -07001445 tc->snd_nxt = tc->snd_una;
Florin Corasd79b41e2017-03-04 05:37:52 -08001446 tcp_send_fin (tc);
Florin Coras54ddf432018-12-21 13:54:09 -08001447 tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
Florin Corasd79b41e2017-03-04 05:37:52 -08001448
1449 /* Make sure we don't wait in LAST ACK forever */
Florin Coras9094b5c2019-08-12 14:17:47 -07001450 tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time);
Florin Corasd79b41e2017-03-04 05:37:52 -08001451
1452 /* Don't delete the connection yet */
Florin Coras54ddf432018-12-21 13:54:09 -08001453 break;
1454 case TCP_STATE_FIN_WAIT_1:
Florin Coras5c0f1662018-12-19 01:38:57 -08001455 tcp_connection_timers_reset (tc);
Florin Corasa0904f02019-07-22 20:55:11 -07001456 session_transport_closed_notify (&tc->connection);
Florin Coras78cc4b02018-12-20 18:24:49 -08001457 if (tc->flags & TCP_CONN_FINPNDG)
Florin Coras54ddf432018-12-21 13:54:09 -08001458 {
Florin Coras6416e622019-04-03 17:52:43 -07001459 /* If FIN pending, we haven't sent everything, but we did try.
1460 * Notify session layer that transport is closed. */
1461 tcp_connection_set_state (tc, TCP_STATE_CLOSED);
Florin Coras6416e622019-04-03 17:52:43 -07001462 tcp_send_reset (tc);
Florin Coras9094b5c2019-08-12 14:17:47 -07001463 tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
Florin Coras54ddf432018-12-21 13:54:09 -08001464 }
1465 else
1466 {
1467 /* We've sent the fin but no progress. Close the connection and
1468 * to make sure everything is flushed, setup a cleanup timer */
1469 tcp_connection_set_state (tc, TCP_STATE_CLOSED);
Florin Coras9094b5c2019-08-12 14:17:47 -07001470 tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
Florin Coras54ddf432018-12-21 13:54:09 -08001471 }
1472 break;
1473 case TCP_STATE_LAST_ACK:
1474 case TCP_STATE_CLOSING:
1475 tcp_connection_timers_reset (tc);
1476 tcp_connection_set_state (tc, TCP_STATE_CLOSED);
Florin Coras9094b5c2019-08-12 14:17:47 -07001477 tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
Florin Coras5a2ec8f2018-12-27 11:53:11 -08001478 session_transport_closed_notify (&tc->connection);
Florin Coras54ddf432018-12-21 13:54:09 -08001479 break;
1480 default:
1481 tcp_connection_del (tc);
1482 break;
Florin Corase96bf632018-12-18 22:44:27 -08001483 }
Dave Barach68b0fb02017-02-28 15:15:56 -05001484}
1485
1486/* *INDENT-OFF* */
1487static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] =
1488{
1489 tcp_timer_retransmit_handler,
1490 tcp_timer_delack_handler,
Florin Coras3e350af2017-03-30 02:54:28 -07001491 tcp_timer_persist_handler,
Florin Corasd79b41e2017-03-04 05:37:52 -08001492 tcp_timer_waitclose_handler,
Dave Barach68b0fb02017-02-28 15:15:56 -05001493 tcp_timer_retransmit_syn_handler,
Dave Barach68b0fb02017-02-28 15:15:56 -05001494};
1495/* *INDENT-ON* */
1496
1497static void
1498tcp_expired_timers_dispatch (u32 * expired_timers)
1499{
Florin Corasb72a0ff2019-11-22 17:38:25 -08001500 u32 thread_index = vlib_get_thread_index ();
Dave Barach68b0fb02017-02-28 15:15:56 -05001501 u32 connection_index, timer_id;
Florin Corasb72a0ff2019-11-22 17:38:25 -08001502 tcp_connection_t *tc;
1503 int i;
Dave Barach68b0fb02017-02-28 15:15:56 -05001504
Florin Corasb72a0ff2019-11-22 17:38:25 -08001505 /*
1506 * Invalidate all timer handles before dispatching. This avoids dangling
1507 * index references to timer wheel pool entries that have been freed.
1508 */
Dave Barach68b0fb02017-02-28 15:15:56 -05001509 for (i = 0; i < vec_len (expired_timers); i++)
1510 {
Dave Barach68b0fb02017-02-28 15:15:56 -05001511 connection_index = expired_timers[i] & 0x0FFFFFFF;
1512 timer_id = expired_timers[i] >> 28;
1513
Florin Corasb72a0ff2019-11-22 17:38:25 -08001514 if (timer_id != TCP_TIMER_RETRANSMIT_SYN)
1515 tc = tcp_connection_get (connection_index, thread_index);
1516 else
1517 tc = tcp_half_open_connection_get (connection_index);
1518
Florin Corasa436a422019-08-20 07:09:31 -07001519 TCP_EVT (TCP_EVT_TIMER_POP, connection_index, timer_id);
Florin Corase69f4952017-03-07 10:06:24 -08001520
Florin Corasb72a0ff2019-11-22 17:38:25 -08001521 tc->timers[timer_id] = TCP_TIMER_HANDLE_INVALID;
1522 }
1523
1524 /*
1525 * Dispatch expired timers
1526 */
1527 for (i = 0; i < vec_len (expired_timers); i++)
1528 {
1529 connection_index = expired_timers[i] & 0x0FFFFFFF;
1530 timer_id = expired_timers[i] >> 28;
1531 (*timer_expiration_handlers[timer_id]) (connection_index, thread_index);
Dave Barach68b0fb02017-02-28 15:15:56 -05001532 }
1533}
1534
Florin Coras0dbd5172018-06-25 16:19:34 -07001535static void
Dave Barach68b0fb02017-02-28 15:15:56 -05001536tcp_initialize_timer_wheels (tcp_main_t * tm)
1537{
1538 tw_timer_wheel_16t_2w_512sl_t *tw;
Florin Corasa5464812017-04-19 13:00:05 -07001539 /* *INDENT-OFF* */
1540 foreach_vlib_main (({
Florin Coras2c414432018-06-19 09:58:04 -07001541 tw = &tm->wrk_ctx[ii].timer_wheel;
Dave Barach68b0fb02017-02-28 15:15:56 -05001542 tw_timer_wheel_init_16t_2w_512sl (tw, tcp_expired_timers_dispatch,
Florin Coras9094b5c2019-08-12 14:17:47 -07001543 TCP_TIMER_TICK, ~0);
Florin Corasa5464812017-04-19 13:00:05 -07001544 tw->last_run_time = vlib_time_now (this_vlib_main);
1545 }));
1546 /* *INDENT-ON* */
Dave Barach68b0fb02017-02-28 15:15:56 -05001547}
1548
Florin Coras18e0d4f2019-01-02 12:22:02 -08001549static void
1550tcp_initialize_iss_seed (tcp_main_t * tm)
1551{
1552 u32 default_seed = random_default_seed ();
1553 u64 time_now = clib_cpu_time_now ();
1554
1555 tm->iss_seed.first = (u64) random_u32 (&default_seed) << 32;
1556 tm->iss_seed.second = random_u64 (&time_now);
1557}
1558
Florin Coras0dbd5172018-06-25 16:19:34 -07001559static clib_error_t *
Florin Corasa0b34a72017-03-07 01:20:52 -08001560tcp_main_enable (vlib_main_t * vm)
Dave Barach68b0fb02017-02-28 15:15:56 -05001561{
Dave Barach68b0fb02017-02-28 15:15:56 -05001562 vlib_thread_main_t *vtm = vlib_get_thread_main ();
Florin Corasbe72ae62018-11-01 11:23:03 -07001563 u32 num_threads, n_workers, prealloc_conn_per_wrk;
Dave Barach2c25a622017-06-26 11:35:07 -04001564 tcp_connection_t *tc __attribute__ ((unused));
Florin Corasbe72ae62018-11-01 11:23:03 -07001565 tcp_main_t *tm = vnet_get_tcp_main ();
1566 clib_error_t *error = 0;
1567 int thread;
Dave Barach68b0fb02017-02-28 15:15:56 -05001568
Dave Barach68b0fb02017-02-28 15:15:56 -05001569 if ((error = vlib_call_init_function (vm, ip_main_init)))
1570 return error;
1571 if ((error = vlib_call_init_function (vm, ip4_lookup_init)))
1572 return error;
1573 if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
1574 return error;
1575
1576 /*
1577 * Registrations
1578 */
1579
Dave Barach68b0fb02017-02-28 15:15:56 -05001580 ip4_register_protocol (IP_PROTOCOL_TCP, tcp4_input_node.index);
rootc9d1c5b2017-08-15 12:58:31 -04001581 ip6_register_protocol (IP_PROTOCOL_TCP, tcp6_input_node.index);
Dave Barach68b0fb02017-02-28 15:15:56 -05001582
Dave Barach68b0fb02017-02-28 15:15:56 -05001583 /*
1584 * Initialize data structures
1585 */
1586
1587 num_threads = 1 /* main thread */ + vtm->n_threads;
1588 vec_validate (tm->connections, num_threads - 1);
Florin Corase55a6d72018-10-31 23:09:22 -07001589 vec_validate (tm->wrk_ctx, num_threads - 1);
Florin Corasbe72ae62018-11-01 11:23:03 -07001590 n_workers = num_threads == 1 ? 1 : vtm->n_threads;
Florin Coras9094b5c2019-08-12 14:17:47 -07001591 prealloc_conn_per_wrk = tcp_cfg.preallocated_connections / n_workers;
Dave Barach68b0fb02017-02-28 15:15:56 -05001592
Florin Corasbe72ae62018-11-01 11:23:03 -07001593 for (thread = 0; thread < num_threads; thread++)
Dave Barach2c25a622017-06-26 11:35:07 -04001594 {
Florin Coras9ece3c02018-11-05 11:06:53 -08001595 vec_validate (tm->wrk_ctx[thread].pending_deq_acked, 255);
Florin Corasb11175d2018-11-09 14:34:08 -08001596 vec_validate (tm->wrk_ctx[thread].pending_disconnects, 255);
Florin Coras9ece3c02018-11-05 11:06:53 -08001597 vec_reset_length (tm->wrk_ctx[thread].pending_deq_acked);
Florin Corasb11175d2018-11-09 14:34:08 -08001598 vec_reset_length (tm->wrk_ctx[thread].pending_disconnects);
Florin Corase55a6d72018-10-31 23:09:22 -07001599 tm->wrk_ctx[thread].vm = vlib_mains[thread];
Florin Corasbe72ae62018-11-01 11:23:03 -07001600
1601 /*
1602 * Preallocate connections. Assume that thread 0 won't
1603 * use preallocated threads when running multi-core
1604 */
1605 if ((thread > 0 || num_threads == 1) && prealloc_conn_per_wrk)
1606 pool_init_fixed (tm->connections[thread], prealloc_conn_per_wrk);
Dave Barach2c25a622017-06-26 11:35:07 -04001607 }
1608
1609 /*
Dave Barachb7f1faa2017-08-29 11:43:37 -04001610 * Use a preallocated half-open connection pool?
Dave Barach2c25a622017-06-26 11:35:07 -04001611 */
Florin Coras9094b5c2019-08-12 14:17:47 -07001612 if (tcp_cfg.preallocated_half_open_connections)
Dave Barachb7f1faa2017-08-29 11:43:37 -04001613 pool_init_fixed (tm->half_open_connections,
Florin Coras9094b5c2019-08-12 14:17:47 -07001614 tcp_cfg.preallocated_half_open_connections);
Dave Barach2c25a622017-06-26 11:35:07 -04001615
Dave Barach68b0fb02017-02-28 15:15:56 -05001616 /* Initialize clocks per tick for TCP timestamp. Used to compute
1617 * monotonically increasing timestamps. */
1618 tm->tstamp_ticks_per_clock = vm->clib_time.seconds_per_clock
1619 / TCP_TSTAMP_RESOLUTION;
1620
Florin Coras04e53442017-07-16 17:12:15 -07001621 if (num_threads > 1)
Florin Coras68810622017-07-24 17:40:28 -07001622 {
1623 clib_spinlock_init (&tm->half_open_lock);
Florin Coras68810622017-07-24 17:40:28 -07001624 }
Florin Coras66b11312017-07-31 17:18:03 -07001625
Florin Coras2c414432018-06-19 09:58:04 -07001626 tcp_initialize_timer_wheels (tm);
Florin Coras18e0d4f2019-01-02 12:22:02 -08001627 tcp_initialize_iss_seed (tm);
Florin Coras66b11312017-07-31 17:18:03 -07001628
Damjan Marion8934a042019-02-09 23:29:26 +01001629 tm->bytes_per_buffer = vlib_buffer_get_default_data_size (vm);
Florin Coras4e116fb2019-06-10 08:33:50 -07001630 tm->cc_last_type = TCP_CC_LAST;
Dave Barach68b0fb02017-02-28 15:15:56 -05001631 return error;
1632}
1633
Florin Corasa0b34a72017-03-07 01:20:52 -08001634clib_error_t *
1635vnet_tcp_enable_disable (vlib_main_t * vm, u8 is_en)
1636{
1637 if (is_en)
1638 {
1639 if (tcp_main.is_enabled)
1640 return 0;
1641
1642 return tcp_main_enable (vm);
1643 }
1644 else
1645 {
1646 tcp_main.is_enabled = 0;
1647 }
1648
1649 return 0;
1650}
1651
Pierre Pfister7fe51f32017-09-20 08:48:36 +02001652void
1653tcp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add)
1654{
1655 tcp_main_t *tm = &tcp_main;
1656 if (is_ip4)
1657 tm->punt_unknown4 = is_add;
1658 else
1659 tm->punt_unknown6 = is_add;
1660}
1661
Florin Coras9094b5c2019-08-12 14:17:47 -07001662/**
1663 * Initialize default values for tcp parameters
1664 */
1665static void
1666tcp_configuration_init (void)
1667{
1668 /* Initial wnd for SYN. Fifos are not allocated at that point so use some
1669 * predefined value. For SYN-ACK we still want the scale to be computed in
1670 * the same way */
1671 tcp_cfg.max_rx_fifo = 32 << 20;
1672 tcp_cfg.min_rx_fifo = 4 << 10;
1673
Florin Corascedcf602019-08-27 12:15:43 -07001674 tcp_cfg.default_mtu = 1500;
Florin Coras9094b5c2019-08-12 14:17:47 -07001675 tcp_cfg.initial_cwnd_multiplier = 0;
1676 tcp_cfg.enable_tx_pacing = 1;
Florin Corasbbcfaac2019-10-10 13:52:04 -07001677 tcp_cfg.allow_tso = 0;
Florin Corasf4ce6ba2019-11-20 18:34:58 -08001678 tcp_cfg.csum_offload = 1;
Florin Coras9094b5c2019-08-12 14:17:47 -07001679 tcp_cfg.cc_algo = TCP_CC_NEWRENO;
Florin Coras017dc452019-08-30 11:06:35 -07001680 tcp_cfg.rwnd_min_update_ack = 1;
Florin Coras9094b5c2019-08-12 14:17:47 -07001681
1682 /* Time constants defined as timer tick (100ms) multiples */
1683 tcp_cfg.delack_time = 1; /* 0.1s */
1684 tcp_cfg.closewait_time = 20; /* 2s */
1685 tcp_cfg.timewait_time = 100; /* 10s */
1686 tcp_cfg.finwait1_time = 600; /* 60s */
1687 tcp_cfg.lastack_time = 300; /* 30s */
1688 tcp_cfg.finwait2_time = 300; /* 30s */
1689 tcp_cfg.closing_time = 300; /* 30s */
1690 tcp_cfg.cleanup_time = 1; /* 0.1s */
1691}
1692
Florin Coras0dbd5172018-06-25 16:19:34 -07001693static clib_error_t *
Florin Corasa0b34a72017-03-07 01:20:52 -08001694tcp_init (vlib_main_t * vm)
1695{
1696 tcp_main_t *tm = vnet_get_tcp_main ();
Florin Coras40364272017-11-16 09:57:50 -08001697 ip_main_t *im = &ip_main;
1698 ip_protocol_info_t *pi;
1699
1700 /* Session layer, and by implication tcp, are disabled by default */
Florin Corasa0b34a72017-03-07 01:20:52 -08001701 tm->is_enabled = 0;
Florin Coras40364272017-11-16 09:57:50 -08001702
1703 /* Register with IP for header parsing */
1704 pi = ip_get_protocol_info (im, IP_PROTOCOL_TCP);
1705 if (pi == 0)
1706 return clib_error_return (0, "TCP protocol info AWOL");
1707 pi->format_header = format_tcp_header;
1708 pi->unformat_pg_edit = unformat_pg_tcp_header;
1709
Florin Coras561af9b2017-12-09 10:19:43 -08001710 /* Register as transport with session layer */
1711 transport_register_protocol (TRANSPORT_PROTO_TCP, &tcp_proto,
1712 FIB_PROTOCOL_IP4, tcp4_output_node.index);
1713 transport_register_protocol (TRANSPORT_PROTO_TCP, &tcp_proto,
1714 FIB_PROTOCOL_IP6, tcp6_output_node.index);
1715
Dave Barach3bbcfab2017-08-15 19:03:44 -04001716 tcp_api_reference ();
Florin Coras9094b5c2019-08-12 14:17:47 -07001717 tcp_configuration_init ();
1718
Florin Corasfbf278a2019-03-26 14:05:38 -07001719 tm->cc_algo_by_name = hash_create_string (0, sizeof (uword));
Florin Coras9094b5c2019-08-12 14:17:47 -07001720
Florin Corasa0b34a72017-03-07 01:20:52 -08001721 return 0;
1722}
1723
Dave Barach68b0fb02017-02-28 15:15:56 -05001724VLIB_INIT_FUNCTION (tcp_init);
1725
Florin Coras2e31cc32018-09-25 14:00:34 -07001726uword
1727unformat_tcp_cc_algo (unformat_input_t * input, va_list * va)
1728{
Florin Corasd25d3642019-08-15 19:55:03 -07001729 tcp_cc_algorithm_type_e *result = va_arg (*va, tcp_cc_algorithm_type_e *);
Florin Corasfbf278a2019-03-26 14:05:38 -07001730 tcp_main_t *tm = &tcp_main;
1731 char *cc_algo_name;
1732 u8 found = 0;
1733 uword *p;
Florin Coras2e31cc32018-09-25 14:00:34 -07001734
Florin Corasfbf278a2019-03-26 14:05:38 -07001735 if (unformat (input, "%s", &cc_algo_name)
1736 && ((p = hash_get_mem (tm->cc_algo_by_name, cc_algo_name))))
1737 {
1738 *result = *p;
1739 found = 1;
1740 }
Florin Coras2e31cc32018-09-25 14:00:34 -07001741
Florin Corasfbf278a2019-03-26 14:05:38 -07001742 vec_free (cc_algo_name);
1743 return found;
Florin Coras2e31cc32018-09-25 14:00:34 -07001744}
1745
Florin Corasaa01abb2018-11-12 09:13:10 -08001746uword
1747unformat_tcp_cc_algo_cfg (unformat_input_t * input, va_list * va)
1748{
1749 tcp_main_t *tm = vnet_get_tcp_main ();
1750 tcp_cc_algorithm_t *cc_alg;
1751 unformat_input_t sub_input;
1752 int found = 0;
1753
1754 vec_foreach (cc_alg, tm->cc_algos)
1755 {
1756 if (!unformat (input, cc_alg->name))
1757 continue;
1758
1759 if (cc_alg->unformat_cfg
1760 && unformat (input, "%U", unformat_vlib_cli_sub_input, &sub_input))
1761 {
1762 if (cc_alg->unformat_cfg (&sub_input))
1763 found = 1;
1764 }
1765 }
1766 return found;
1767}
1768
Dave Barach2c25a622017-06-26 11:35:07 -04001769static clib_error_t *
1770tcp_config_fn (vlib_main_t * vm, unformat_input_t * input)
1771{
Florin Coras1df833e2019-09-22 19:05:50 -07001772 u32 cwnd_multiplier, tmp_time;
Florin Coras4e1fcf42019-08-31 09:46:24 -07001773 uword memory_size;
Florin Coras1df833e2019-09-22 19:05:50 -07001774
Dave Barach2c25a622017-06-26 11:35:07 -04001775 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1776 {
Florin Corasca031862018-09-24 13:58:05 -07001777 if (unformat (input, "preallocated-connections %d",
Florin Coras9094b5c2019-08-12 14:17:47 -07001778 &tcp_cfg.preallocated_connections))
Dave Barach2c25a622017-06-26 11:35:07 -04001779 ;
1780 else if (unformat (input, "preallocated-half-open-connections %d",
Florin Coras9094b5c2019-08-12 14:17:47 -07001781 &tcp_cfg.preallocated_half_open_connections))
Dave Barach2c25a622017-06-26 11:35:07 -04001782 ;
Florin Corasf988e692017-11-27 04:34:14 -05001783 else if (unformat (input, "buffer-fail-fraction %f",
Florin Coras9094b5c2019-08-12 14:17:47 -07001784 &tcp_cfg.buffer_fail_fraction))
Florin Corasf988e692017-11-27 04:34:14 -05001785 ;
Florin Corasca031862018-09-24 13:58:05 -07001786 else if (unformat (input, "max-rx-fifo %U", unformat_memory_size,
Florin Coras4e1fcf42019-08-31 09:46:24 -07001787 &memory_size))
Dave Wallacedb396562019-10-30 17:25:24 +00001788 {
1789 if (memory_size >= 0x100000000)
1790 {
1791 return clib_error_return
1792 (0, "max-rx-fifo %llu (0x%llx) too large", memory_size,
1793 memory_size);
1794 }
1795 tcp_cfg.max_rx_fifo = memory_size;
1796 }
Florin Coras9094b5c2019-08-12 14:17:47 -07001797 else if (unformat (input, "min-rx-fifo %U", unformat_memory_size,
Florin Coras4e1fcf42019-08-31 09:46:24 -07001798 &memory_size))
Dave Wallacedb396562019-10-30 17:25:24 +00001799 {
1800 if (memory_size >= 0x100000000)
1801 {
1802 return clib_error_return
1803 (0, "min-rx-fifo %llu (0x%llx) too large", memory_size,
1804 memory_size);
1805 }
1806 tcp_cfg.min_rx_fifo = memory_size;
1807 }
Florin Coras4e1fcf42019-08-31 09:46:24 -07001808 else if (unformat (input, "mtu %u", &tcp_cfg.default_mtu))
Florin Coras9094b5c2019-08-12 14:17:47 -07001809 ;
Florin Coras017dc452019-08-30 11:06:35 -07001810 else if (unformat (input, "rwnd-min-update-ack %d",
1811 &tcp_cfg.rwnd_min_update_ack))
1812 ;
Florin Coras4e1fcf42019-08-31 09:46:24 -07001813 else if (unformat (input, "initial-cwnd-multiplier %u",
Florin Coras1df833e2019-09-22 19:05:50 -07001814 &cwnd_multiplier))
1815 tcp_cfg.initial_cwnd_multiplier = cwnd_multiplier;
Florin Coras7ac053b2018-11-05 15:57:21 -08001816 else if (unformat (input, "no-tx-pacing"))
Florin Coras9094b5c2019-08-12 14:17:47 -07001817 tcp_cfg.enable_tx_pacing = 0;
Florin Corasbbcfaac2019-10-10 13:52:04 -07001818 else if (unformat (input, "tso"))
1819 tcp_cfg.allow_tso = 1;
Florin Corasf4ce6ba2019-11-20 18:34:58 -08001820 else if (unformat (input, "no-csum-offload"))
1821 tcp_cfg.csum_offload = 0;
Florin Coras2e31cc32018-09-25 14:00:34 -07001822 else if (unformat (input, "cc-algo %U", unformat_tcp_cc_algo,
Florin Coras9094b5c2019-08-12 14:17:47 -07001823 &tcp_cfg.cc_algo))
Florin Coras2e31cc32018-09-25 14:00:34 -07001824 ;
Florin Corasaa01abb2018-11-12 09:13:10 -08001825 else if (unformat (input, "%U", unformat_tcp_cc_algo_cfg))
1826 ;
Florin Coras1df833e2019-09-22 19:05:50 -07001827 else if (unformat (input, "closewait-time %u", &tmp_time))
1828 tcp_cfg.closewait_time = tmp_time / TCP_TIMER_TICK;
1829 else if (unformat (input, "timewait-time %u", &tmp_time))
1830 tcp_cfg.timewait_time = tmp_time / TCP_TIMER_TICK;
1831 else if (unformat (input, "finwait1-time %u", &tmp_time))
1832 tcp_cfg.finwait1_time = tmp_time / TCP_TIMER_TICK;
1833 else if (unformat (input, "finwait2-time %u", &tmp_time))
1834 tcp_cfg.finwait2_time = tmp_time / TCP_TIMER_TICK;
1835 else if (unformat (input, "lastack-time %u", &tmp_time))
1836 tcp_cfg.lastack_time = tmp_time / TCP_TIMER_TICK;
1837 else if (unformat (input, "closing-time %u", &tmp_time))
1838 tcp_cfg.closing_time = tmp_time / TCP_TIMER_TICK;
1839 else if (unformat (input, "cleanup-time %u", &tmp_time))
1840 tcp_cfg.cleanup_time = tmp_time / TCP_TIMER_TICK;
Dave Barach2c25a622017-06-26 11:35:07 -04001841 else
1842 return clib_error_return (0, "unknown input `%U'",
1843 format_unformat_error, input);
1844 }
1845 return 0;
1846}
1847
1848VLIB_CONFIG_FUNCTION (tcp_config_fn, "tcp");
1849
Dave Barach3bbcfab2017-08-15 19:03:44 -04001850
1851/**
1852 * \brief Configure an ipv4 source address range
1853 * @param vm vlib_main_t pointer
1854 * @param start first ipv4 address in the source address range
1855 * @param end last ipv4 address in the source address range
1856 * @param table_id VRF / table ID, 0 for the default FIB
1857 * @return 0 if all OK, else an error indication from api_errno.h
1858 */
1859
1860int
1861tcp_configure_v4_source_address_range (vlib_main_t * vm,
1862 ip4_address_t * start,
1863 ip4_address_t * end, u32 table_id)
1864{
Dave Barach3bbcfab2017-08-15 19:03:44 -04001865 u32 start_host_byte_order, end_host_byte_order;
1866 fib_prefix_t prefix;
Dave Barach3bbcfab2017-08-15 19:03:44 -04001867 fib_node_index_t fei;
1868 u32 fib_index = 0;
1869 u32 sw_if_index;
1870 int rv;
Dave Barach3bbcfab2017-08-15 19:03:44 -04001871
Dave Barachb7b92992018-10-17 10:38:51 -04001872 clib_memset (&prefix, 0, sizeof (prefix));
Dave Barach3bbcfab2017-08-15 19:03:44 -04001873
1874 fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
1875
1876 if (fib_index == ~0)
1877 return VNET_API_ERROR_NO_SUCH_FIB;
1878
1879 start_host_byte_order = clib_net_to_host_u32 (start->as_u32);
1880 end_host_byte_order = clib_net_to_host_u32 (end->as_u32);
1881
1882 /* sanity check for reversed args or some such */
1883 if ((end_host_byte_order - start_host_byte_order) > (10 << 10))
1884 return VNET_API_ERROR_INVALID_ARGUMENT;
1885
1886 /* Lookup the last address, to identify the interface involved */
1887 prefix.fp_len = 32;
1888 prefix.fp_proto = FIB_PROTOCOL_IP4;
1889 memcpy (&prefix.fp_addr.ip4, end, sizeof (ip4_address_t));
1890
1891 fei = fib_table_lookup (fib_index, &prefix);
1892
1893 /* Couldn't find route to destination. Bail out. */
1894 if (fei == FIB_NODE_INDEX_INVALID)
1895 return VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB;
1896
1897 sw_if_index = fib_entry_get_resolving_interface (fei);
1898
Dave Barach3bbcfab2017-08-15 19:03:44 -04001899 /* Configure proxy arp across the range */
Neale Rannscbe25aa2019-09-30 10:53:31 +00001900 rv = ip4_neighbor_proxy_add (fib_index, start, end);
Dave Barach3bbcfab2017-08-15 19:03:44 -04001901
1902 if (rv)
1903 return rv;
1904
Neale Rannscbe25aa2019-09-30 10:53:31 +00001905 rv = ip4_neighbor_proxy_enable (sw_if_index);
Neale Ranns57e53bb2019-05-29 13:58:43 +00001906
1907 if (rv)
1908 return rv;
1909
Dave Barach3bbcfab2017-08-15 19:03:44 -04001910 do
1911 {
1912 dpo_id_t dpo = DPO_INVALID;
1913
Florin Coras9094b5c2019-08-12 14:17:47 -07001914 vec_add1 (tcp_cfg.ip4_src_addrs, start[0]);
Dave Barach3bbcfab2017-08-15 19:03:44 -04001915
1916 /* Add local adjacencies for the range */
1917
1918 receive_dpo_add_or_lock (DPO_PROTO_IP4, ~0 /* sw_if_index */ ,
1919 NULL, &dpo);
1920 prefix.fp_len = 32;
1921 prefix.fp_proto = FIB_PROTOCOL_IP4;
1922 prefix.fp_addr.ip4.as_u32 = start->as_u32;
1923
1924 fib_table_entry_special_dpo_update (fib_index,
1925 &prefix,
1926 FIB_SOURCE_API,
1927 FIB_ENTRY_FLAG_EXCLUSIVE, &dpo);
1928 dpo_reset (&dpo);
1929
1930 start_host_byte_order++;
1931 start->as_u32 = clib_host_to_net_u32 (start_host_byte_order);
1932 }
1933 while (start_host_byte_order <= end_host_byte_order);
1934
1935 return 0;
1936}
1937
1938/**
1939 * \brief Configure an ipv6 source address range
1940 * @param vm vlib_main_t pointer
1941 * @param start first ipv6 address in the source address range
1942 * @param end last ipv6 address in the source address range
1943 * @param table_id VRF / table ID, 0 for the default FIB
1944 * @return 0 if all OK, else an error indication from api_errno.h
1945 */
1946
1947int
1948tcp_configure_v6_source_address_range (vlib_main_t * vm,
1949 ip6_address_t * start,
1950 ip6_address_t * end, u32 table_id)
1951{
Dave Barach3bbcfab2017-08-15 19:03:44 -04001952 fib_prefix_t prefix;
1953 u32 fib_index = 0;
1954 fib_node_index_t fei;
1955 u32 sw_if_index;
1956
Dave Barachb7b92992018-10-17 10:38:51 -04001957 clib_memset (&prefix, 0, sizeof (prefix));
Dave Barach3bbcfab2017-08-15 19:03:44 -04001958
1959 fib_index = fib_table_find (FIB_PROTOCOL_IP6, table_id);
1960
1961 if (fib_index == ~0)
1962 return VNET_API_ERROR_NO_SUCH_FIB;
1963
1964 while (1)
1965 {
1966 int i;
1967 ip6_address_t tmp;
1968 dpo_id_t dpo = DPO_INVALID;
1969
1970 /* Remember this address */
Florin Coras9094b5c2019-08-12 14:17:47 -07001971 vec_add1 (tcp_cfg.ip6_src_addrs, start[0]);
Dave Barach3bbcfab2017-08-15 19:03:44 -04001972
1973 /* Lookup the prefix, to identify the interface involved */
1974 prefix.fp_len = 128;
1975 prefix.fp_proto = FIB_PROTOCOL_IP6;
1976 memcpy (&prefix.fp_addr.ip6, start, sizeof (ip6_address_t));
1977
1978 fei = fib_table_lookup (fib_index, &prefix);
1979
1980 /* Couldn't find route to destination. Bail out. */
1981 if (fei == FIB_NODE_INDEX_INVALID)
1982 return VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB;
1983
1984 sw_if_index = fib_entry_get_resolving_interface (fei);
1985
1986 if (sw_if_index == (u32) ~ 0)
1987 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
1988
1989 /* Add a proxy neighbor discovery entry for this address */
Neale Rannscbe25aa2019-09-30 10:53:31 +00001990 ip6_neighbor_proxy_add (sw_if_index, start);
Dave Barach3bbcfab2017-08-15 19:03:44 -04001991
1992 /* Add a receive adjacency for this address */
1993 receive_dpo_add_or_lock (DPO_PROTO_IP6, ~0 /* sw_if_index */ ,
1994 NULL, &dpo);
1995
1996 fib_table_entry_special_dpo_update (fib_index,
1997 &prefix,
1998 FIB_SOURCE_API,
1999 FIB_ENTRY_FLAG_EXCLUSIVE, &dpo);
2000 dpo_reset (&dpo);
2001
2002 /* Done with the entire range? */
2003 if (!memcmp (start, end, sizeof (start[0])))
2004 break;
2005
2006 /* Increment the address. DGMS. */
2007 tmp = start[0];
2008 for (i = 15; i >= 0; i--)
2009 {
2010 tmp.as_u8[i] += 1;
2011 if (tmp.as_u8[i] != 0)
2012 break;
2013 }
2014 start[0] = tmp;
2015 }
2016 return 0;
2017}
2018
Dave Barach2c25a622017-06-26 11:35:07 -04002019static clib_error_t *
2020tcp_src_address (vlib_main_t * vm,
2021 unformat_input_t * input, vlib_cli_command_t * cmd_arg)
2022{
Dave Barach2c25a622017-06-26 11:35:07 -04002023 ip4_address_t v4start, v4end;
2024 ip6_address_t v6start, v6end;
Dave Barach3bbcfab2017-08-15 19:03:44 -04002025 u32 table_id = 0;
Dave Barach2c25a622017-06-26 11:35:07 -04002026 int v4set = 0;
2027 int v6set = 0;
Dave Barach3bbcfab2017-08-15 19:03:44 -04002028 int rv;
Dave Barach2c25a622017-06-26 11:35:07 -04002029
2030 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2031 {
2032 if (unformat (input, "%U - %U", unformat_ip4_address, &v4start,
2033 unformat_ip4_address, &v4end))
2034 v4set = 1;
2035 else if (unformat (input, "%U", unformat_ip4_address, &v4start))
2036 {
2037 memcpy (&v4end, &v4start, sizeof (v4start));
2038 v4set = 1;
2039 }
2040 else if (unformat (input, "%U - %U", unformat_ip6_address, &v6start,
Dave Barach3bbcfab2017-08-15 19:03:44 -04002041 unformat_ip6_address, &v6end))
Dave Barach2c25a622017-06-26 11:35:07 -04002042 v6set = 1;
2043 else if (unformat (input, "%U", unformat_ip6_address, &v6start))
2044 {
Yoann Desmouceaux6b297aa2017-09-20 10:34:22 +02002045 memcpy (&v6end, &v6start, sizeof (v6start));
Dave Barach2c25a622017-06-26 11:35:07 -04002046 v6set = 1;
2047 }
Dave Barach3bbcfab2017-08-15 19:03:44 -04002048 else if (unformat (input, "fib-table %d", &table_id))
2049 ;
Dave Barach2c25a622017-06-26 11:35:07 -04002050 else
2051 break;
2052 }
2053
2054 if (!v4set && !v6set)
2055 return clib_error_return (0, "at least one v4 or v6 address required");
2056
2057 if (v4set)
2058 {
Dave Barach3bbcfab2017-08-15 19:03:44 -04002059 rv = tcp_configure_v4_source_address_range (vm, &v4start, &v4end,
2060 table_id);
2061 switch (rv)
Dave Barach2c25a622017-06-26 11:35:07 -04002062 {
Dave Barach3bbcfab2017-08-15 19:03:44 -04002063 case 0:
2064 break;
2065
2066 case VNET_API_ERROR_NO_SUCH_FIB:
2067 return clib_error_return (0, "Invalid table-id %d", table_id);
2068
2069 case VNET_API_ERROR_INVALID_ARGUMENT:
2070 return clib_error_return (0, "Invalid address range %U - %U",
2071 format_ip4_address, &v4start,
2072 format_ip4_address, &v4end);
2073 default:
2074 return clib_error_return (0, "error %d", rv);
2075 break;
Dave Barach2c25a622017-06-26 11:35:07 -04002076 }
Dave Barach2c25a622017-06-26 11:35:07 -04002077 }
2078 if (v6set)
2079 {
Dave Barach3bbcfab2017-08-15 19:03:44 -04002080 rv = tcp_configure_v6_source_address_range (vm, &v6start, &v6end,
2081 table_id);
2082 switch (rv)
2083 {
2084 case 0:
2085 break;
2086
2087 case VNET_API_ERROR_NO_SUCH_FIB:
2088 return clib_error_return (0, "Invalid table-id %d", table_id);
2089
2090 default:
2091 return clib_error_return (0, "error %d", rv);
2092 break;
2093 }
Dave Barach2c25a622017-06-26 11:35:07 -04002094 }
2095 return 0;
2096}
2097
2098/* *INDENT-OFF* */
2099VLIB_CLI_COMMAND (tcp_src_address_command, static) =
2100{
2101 .path = "tcp src-address",
2102 .short_help = "tcp src-address <ip-addr> [- <ip-addr>] add src address range",
2103 .function = tcp_src_address,
2104};
2105/* *INDENT-ON* */
2106
Florin Coras3eb50622017-07-13 01:24:57 -04002107static u8 *
2108tcp_scoreboard_dump_trace (u8 * s, sack_scoreboard_t * sb)
2109{
2110#if TCP_SCOREBOARD_TRACE
Dave Barach2c25a622017-06-26 11:35:07 -04002111
Florin Coras3eb50622017-07-13 01:24:57 -04002112 scoreboard_trace_elt_t *block;
2113 int i = 0;
2114
2115 if (!sb->trace)
2116 return s;
2117
2118 s = format (s, "scoreboard trace:");
2119 vec_foreach (block, sb->trace)
2120 {
2121 s = format (s, "{%u, %u, %u, %u, %u}, ", block->start, block->end,
2122 block->ack, block->snd_una_max, block->group);
2123 if ((++i % 3) == 0)
2124 s = format (s, "\n");
2125 }
2126 return s;
2127#else
2128 return 0;
2129#endif
2130}
2131
2132static clib_error_t *
2133tcp_show_scoreboard_trace_fn (vlib_main_t * vm, unformat_input_t * input,
2134 vlib_cli_command_t * cmd_arg)
2135{
2136 transport_connection_t *tconn = 0;
2137 tcp_connection_t *tc;
2138 u8 *s = 0;
2139 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2140 {
2141 if (unformat (input, "%U", unformat_transport_connection, &tconn,
2142 TRANSPORT_PROTO_TCP))
2143 ;
2144 else
2145 return clib_error_return (0, "unknown input `%U'",
2146 format_unformat_error, input);
2147 }
2148
2149 if (!TCP_SCOREBOARD_TRACE)
2150 {
2151 vlib_cli_output (vm, "scoreboard tracing not enabled");
2152 return 0;
2153 }
2154
2155 tc = tcp_get_connection_from_transport (tconn);
2156 s = tcp_scoreboard_dump_trace (s, &tc->sack_sb);
2157 vlib_cli_output (vm, "%v", s);
2158 return 0;
2159}
2160
2161/* *INDENT-OFF* */
2162VLIB_CLI_COMMAND (tcp_show_scoreboard_trace_command, static) =
2163{
2164 .path = "show tcp scoreboard trace",
2165 .short_help = "show tcp scoreboard trace <connection>",
2166 .function = tcp_show_scoreboard_trace_fn,
2167};
2168/* *INDENT-ON* */
2169
2170u8 *
2171tcp_scoreboard_replay (u8 * s, tcp_connection_t * tc, u8 verbose)
2172{
2173 int i, trace_len;
2174 scoreboard_trace_elt_t *trace;
2175 u32 next_ack, left, group, has_new_ack = 0;
2176 tcp_connection_t _dummy_tc, *dummy_tc = &_dummy_tc;
2177 sack_block_t *block;
2178
Florin Coras776f3d82018-11-02 08:23:58 -07002179 if (!TCP_SCOREBOARD_TRACE)
2180 {
2181 s = format (s, "scoreboard tracing not enabled");
2182 return s;
2183 }
2184
Florin Coras3eb50622017-07-13 01:24:57 -04002185 if (!tc)
2186 return s;
2187
Dave Barachb7b92992018-10-17 10:38:51 -04002188 clib_memset (dummy_tc, 0, sizeof (*dummy_tc));
Florin Coras3eb50622017-07-13 01:24:57 -04002189 tcp_connection_timers_init (dummy_tc);
2190 scoreboard_init (&dummy_tc->sack_sb);
2191 dummy_tc->rcv_opts.flags |= TCP_OPTS_FLAG_SACK;
2192
Florin Coras776f3d82018-11-02 08:23:58 -07002193#if TCP_SCOREBOARD_TRACE
Florin Coras3eb50622017-07-13 01:24:57 -04002194 trace = tc->sack_sb.trace;
2195 trace_len = vec_len (tc->sack_sb.trace);
Florin Coras3eb50622017-07-13 01:24:57 -04002196#endif
2197
2198 for (i = 0; i < trace_len; i++)
2199 {
2200 if (trace[i].ack != 0)
2201 {
2202 dummy_tc->snd_una = trace[i].ack - 1448;
2203 dummy_tc->snd_una_max = trace[i].ack;
2204 }
2205 }
2206
2207 left = 0;
2208 while (left < trace_len)
2209 {
2210 group = trace[left].group;
2211 vec_reset_length (dummy_tc->rcv_opts.sacks);
2212 has_new_ack = 0;
2213 while (trace[left].group == group)
2214 {
2215 if (trace[left].ack != 0)
2216 {
2217 if (verbose)
2218 s = format (s, "Adding ack %u, snd_una_max %u, segs: ",
2219 trace[left].ack, trace[left].snd_una_max);
2220 dummy_tc->snd_una_max = trace[left].snd_una_max;
2221 next_ack = trace[left].ack;
2222 has_new_ack = 1;
2223 }
2224 else
2225 {
2226 if (verbose)
2227 s = format (s, "[%u, %u], ", trace[left].start,
2228 trace[left].end);
2229 vec_add2 (dummy_tc->rcv_opts.sacks, block, 1);
2230 block->start = trace[left].start;
2231 block->end = trace[left].end;
2232 }
2233 left++;
2234 }
2235
2236 /* Push segments */
2237 tcp_rcv_sacks (dummy_tc, next_ack);
2238 if (has_new_ack)
Florin Coras558e3e02019-09-06 12:56:58 -07002239 dummy_tc->snd_una = next_ack;
Florin Coras3eb50622017-07-13 01:24:57 -04002240
2241 if (verbose)
2242 s = format (s, "result: %U", format_tcp_scoreboard,
2243 &dummy_tc->sack_sb);
2244
2245 }
2246 s = format (s, "result: %U", format_tcp_scoreboard, &dummy_tc->sack_sb);
2247
2248 return s;
2249}
2250
2251static clib_error_t *
2252tcp_scoreboard_trace_fn (vlib_main_t * vm, unformat_input_t * input,
2253 vlib_cli_command_t * cmd_arg)
2254{
2255 transport_connection_t *tconn = 0;
2256 tcp_connection_t *tc = 0;
2257 u8 *str = 0;
2258 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2259 {
2260 if (unformat (input, "%U", unformat_transport_connection, &tconn,
2261 TRANSPORT_PROTO_TCP))
2262 ;
2263 else
2264 return clib_error_return (0, "unknown input `%U'",
2265 format_unformat_error, input);
2266 }
2267
2268 if (!TCP_SCOREBOARD_TRACE)
2269 {
2270 vlib_cli_output (vm, "scoreboard tracing not enabled");
2271 return 0;
2272 }
2273
2274 tc = tcp_get_connection_from_transport (tconn);
2275 if (!tc)
2276 {
2277 vlib_cli_output (vm, "connection not found");
2278 return 0;
2279 }
2280 str = tcp_scoreboard_replay (str, tc, 1);
2281 vlib_cli_output (vm, "%v", str);
2282 return 0;
2283}
2284
2285/* *INDENT-OFF* */
2286VLIB_CLI_COMMAND (tcp_replay_scoreboard_command, static) =
2287{
2288 .path = "tcp replay scoreboard",
2289 .short_help = "tcp replay scoreboard <connection>",
2290 .function = tcp_scoreboard_trace_fn,
2291};
2292/* *INDENT-ON* */
Dave Barach2c25a622017-06-26 11:35:07 -04002293
Pierre Pfister7fe51f32017-09-20 08:48:36 +02002294static clib_error_t *
2295show_tcp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
2296 vlib_cli_command_t * cmd_arg)
2297{
2298 tcp_main_t *tm = vnet_get_tcp_main ();
2299 if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2300 return clib_error_return (0, "unknown input `%U'", format_unformat_error,
2301 input);
2302 vlib_cli_output (vm, "IPv4 TCP punt: %s",
2303 tm->punt_unknown4 ? "enabled" : "disabled");
2304 vlib_cli_output (vm, "IPv6 TCP punt: %s",
2305 tm->punt_unknown6 ? "enabled" : "disabled");
2306 return 0;
2307}
2308/* *INDENT-OFF* */
2309VLIB_CLI_COMMAND (show_tcp_punt_command, static) =
2310{
2311 .path = "show tcp punt",
2312 .short_help = "show tcp punt",
2313 .function = show_tcp_punt_fn,
2314};
2315/* *INDENT-ON* */
2316
Dave Barach68b0fb02017-02-28 15:15:56 -05002317/*
2318 * fd.io coding-style-patch-verification: ON
2319 *
2320 * Local Variables:
2321 * eval: (c-set-style "gnu")
2322 * End:
2323 */