blob: 0d2e4b3e821ab381f4f6c56438fe947674db08e2 [file] [log] [blame]
Marco Varlese191a5942017-10-30 18:17:21 +01001/*
2 * Copyright (c) 2017 SUSE LLC.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15#ifndef included_vnet_sctp_h
16#define included_vnet_sctp_h
17
18#include <vnet/vnet.h>
19#include <vnet/ip/ip.h>
20#include <vnet/sctp/sctp_timer.h>
21#include <vnet/sctp/sctp_packet.h>
22#include <vnet/session/transport.h>
23#include <vnet/session/session.h>
24
25/* SCTP timers */
26#define foreach_sctp_timer \
27 _(T1_INIT, "T1_INIT") \
28 _(T1_COOKIE, "T1_COOKIE") \
29 _(T2_SHUTDOWN, "T2_SHUTDOWN") \
30 _(T3_RXTX, "T3_RXTX") \
Marco Varlese8ad6a2d2018-01-26 16:50:01 +010031 _(T4_HEARTBEAT, "T4_HB") \
Marco Varlese191a5942017-10-30 18:17:21 +010032 _(T5_SHUTDOWN_GUARD, "T5_SHUTDOWN_GUARD")
33
34typedef enum _sctp_timers
35{
36#define _(sym, str) SCTP_TIMER_##sym,
37 foreach_sctp_timer
38#undef _
39 SCTP_N_TIMERS
40} sctp_timers_e;
41
42#define SCTP_TIMER_HANDLE_INVALID ((u32) ~0)
43
Marco Varlesedf5a99c2018-02-06 13:48:30 +010044always_inline char *
45sctp_timer_to_string (u8 timer_id)
46{
47 switch (timer_id)
48 {
49 case SCTP_TIMER_T1_INIT:
50 return "SCTP_TIMER_T1_INIT";
51 case SCTP_TIMER_T1_COOKIE:
52 return "SCTP_TIMER_T1_COOKIE";
53 case SCTP_TIMER_T2_SHUTDOWN:
54 return "SCTP_TIMER_T2_SHUTDOWN";
55 case SCTP_TIMER_T3_RXTX:
56 return "SCTP_TIMER_T3_RXTX";
57 case SCTP_TIMER_T4_HEARTBEAT:
58 return "SCTP_TIMER_T4_HEARTBEAT";
59 case SCTP_TIMER_T5_SHUTDOWN_GUARD:
60 return "SCTP_TIMER_T5_SHUTDOWN_GUARD";
61 }
62 return NULL;
63}
64
Marco Varlese191a5942017-10-30 18:17:21 +010065typedef enum _sctp_error
66{
67#define sctp_error(n,s) SCTP_ERROR_##n,
68#include <vnet/sctp/sctp_error.def>
69#undef sctp_error
70 SCTP_N_ERROR,
71} sctp_error_t;
72
73#define NO_FLAG 0
74
75#define IS_T_BIT_SET(var) ((var) & (1))
76#define IS_E_BIT_SET(var) ((var) & (1))
77#define IS_B_BIT_SET(var) ((var) & (1<<1))
78#define IS_U_BIT_SET(var) ((var) & (1<<2))
79
Marco Varlesef3ab4892018-02-19 15:23:13 +010080#define MAX_SCTP_CONNECTIONS 8
Marco Varlese191a5942017-10-30 18:17:21 +010081#define MAIN_SCTP_SUB_CONN_IDX 0
82
83#if (VLIB_BUFFER_TRACE_TRAJECTORY)
84#define sctp_trajectory_add_start(b, start) \
85{ \
86 (*vlib_buffer_trace_trajectory_cb) (b, start); \
87}
88#else
89#define sctp_trajectory_add_start(b, start)
90#endif
91
Marco Varlese54432f82018-02-15 17:01:56 +010092enum _sctp_subconn_state
93{
94 SCTP_SUBCONN_STATE_DOWN = 0,
95 SCTP_SUBCONN_STATE_UP,
96 SCTP_SUBCONN_STATE_ALLOW_HB
97};
98
Marco Varlesef3ab4892018-02-19 15:23:13 +010099#define SCTP_INITIAL_SSHTRESH 65535
Marco Varlese191a5942017-10-30 18:17:21 +0100100typedef struct _sctp_sub_connection
101{
102 transport_connection_t connection; /**< Common transport data. First! */
103 void *parent; /**< Link to the parent-super connection */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100104
105 u32 error_count; /**< The current error count for this destination. */
106 u32 error_threshold; /**< Current error threshold for this destination,
107 i.e. what value marks the destination down if error count reaches this value. */
Marco Varlesef3ab4892018-02-19 15:23:13 +0100108 u32 cwnd; /**< Congestion control window (cwnd, in bytes), which is adjusted by
109 the sender based on observed network conditions. */
110 u32 ssthresh; /**< Slow-start threshold (in bytes), which is used by the
111 sender to distinguish slow-start and congestion avoidance phases. */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100112
Marco Varlese21c8baf2018-02-02 17:17:51 +0100113 u32 rtt_ts; /**< USED to hold the timestamp of when the packet has been sent */
114
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100115 u32 RTO; /**< The current retransmission timeout value. */
116 u32 SRTT; /**< The current smoothed round-trip time. */
Marco Varlese21c8baf2018-02-02 17:17:51 +0100117 f32 RTTVAR; /**< The current RTT variation. */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100118
119 u32 partially_acked_bytes; /**< The tracking method for increase of cwnd when in
120 congestion avoidance mode (see Section 7.2.2).*/
121
122 u8 state; /**< The current state of this destination, i.e., DOWN, UP, ALLOW-HB, NO-HEARTBEAT, etc. */
123
124 u16 PMTU; /**< The current known path MTU. */
125
126 u32 timers[SCTP_N_TIMERS]; /**< A timer used by each destination. */
127
128 u8 RTO_pending; /**< A flag used to track if one of the DATA chunks sent to
129 this address is currently being used to compute an RTT.
130 If this flag is 0, the next DATA chunk sent to this destination
131 should be used to compute an RTT and this flag should be set.
132 Every time the RTT calculation completes (i.e., the DATA chunk is SACK'd),
133 clear this flag. */
134
Marco Varlese54432f82018-02-15 17:01:56 +0100135 u32 last_seen; /**< The time to which this destination was last sent a packet to.
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100136 This can be used to determine if a HEARTBEAT is needed. */
Marco Varlese191a5942017-10-30 18:17:21 +0100137
Marco Varlesef3ab4892018-02-19 15:23:13 +0100138 u32 last_data_ts; /**< Used to hold the timestamp value of last time we sent a DATA chunk */
139
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100140 u8 unacknowledged_hb; /**< Used to track how many unacknowledged heartbeats we had;
Marco Varlese54432f82018-02-15 17:01:56 +0100141 If more than SCTP_PATH_MAX_RETRANS then connection is considered unreachable. */
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100142
Marco Varlesea38783e2018-02-13 12:38:52 +0100143 u8 is_retransmitting; /**< A flag (0 = no, 1 = yes) indicating whether the connection is retransmitting a previous packet */
144
Marco Varlesef3ab4892018-02-19 15:23:13 +0100145 u8 enqueue_state; /**< if set to 1 indicates that DATA is still being handled hence cannot shutdown this connection yet */
Marco Varlese54432f82018-02-15 17:01:56 +0100146
Marco Varlese191a5942017-10-30 18:17:21 +0100147} sctp_sub_connection_t;
148
149typedef struct
150{
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100151 u32 a_rwnd; /**< Maximum segment size advertised */
Marco Varlese191a5942017-10-30 18:17:21 +0100152
153} sctp_options_t;
154
Marco Varlese91389ac2018-01-31 11:00:01 +0100155/* Useful macros to deal with the out_of_order_map (array of bit) */
156#define SET_BIT(A,k) ( A[(k/32)] |= (1 << (k%32)) )
157#define CLEAR_BIT(A,k) ( A[(k/32)] &= ~(1 << (k%32)) )
158#define TEST_BIT(A,k) ( A[(k/32)] & (1 << (k%32)) )
159
160always_inline void
161_bytes_swap (void *pv, size_t n)
162{
163 char *p = pv;
164 size_t lo, hi;
165 for (lo = 0, hi = n - 1; hi > lo; lo++, hi--)
166 {
167 char tmp = p[lo];
168 p[lo] = p[hi];
169 p[hi] = tmp;
170 }
171}
172
173#define ENDIANESS_SWAP(x) _bytes_swap(&x, sizeof(x));
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100174
175#define MAX_INFLIGHT_PACKETS 128
176#define MAX_ENQUEABLE_SACKS 2
177
178/* This parameter indicates to the receiver how much increment in
179 * milliseconds the sender wishes the receiver to add to its default
180 * cookie life-span.
181 */
182#define SUGGESTED_COOKIE_LIFE_SPAN_INCREMENT 1000
183
Marco Varlese191a5942017-10-30 18:17:21 +0100184typedef struct _sctp_connection
185{
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100186 sctp_sub_connection_t sub_conn[MAX_SCTP_CONNECTIONS]; /**< Common transport data. First! */
Marco Varlese191a5942017-10-30 18:17:21 +0100187
188 u8 state; /**< SCTP state as per sctp_state_t */
189 u16 flags; /**< Chunk flag (see sctp_chunks_common_hdr_t) */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100190
Marco Varlese191a5942017-10-30 18:17:21 +0100191 u32 local_tag; /**< INIT_TAG generated locally */
192 u32 remote_tag; /**< INIT_TAG generated by the remote peer */
Marco Varlese191a5942017-10-30 18:17:21 +0100193
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100194 u32 local_initial_tsn; /**< Initial TSN generated locally */
195 u32 remote_initial_tsn; /**< Initial TSN generated by the remote-peer */
Marco Varlese191a5942017-10-30 18:17:21 +0100196
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100197 u32 peer_cookie_life_span_increment;
Marco Varlese191a5942017-10-30 18:17:21 +0100198
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100199 u32 overall_err_count; /**< The overall association error count. */
200 u32 overall_err_treshold; /**< The threshold for this association that if the Overall Error Count
201 reaches will cause this association to be torn down. */
Marco Varlese191a5942017-10-30 18:17:21 +0100202
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100203 u32 peer_rwnd; /**< Current calculated value of the peer's rwnd. */
204
205 u32 next_tsn; /**< The next TSN number to be assigned to a new DATA chunk.
206 This is sent in the INIT or INIT ACK chunk to the peer
207 and incremented each time a DATA chunk is assigned a
208 TSN (normally just prior to transmit or during
209 fragmentation). */
210
Marco Varlesef3ab4892018-02-19 15:23:13 +0100211 u32 last_unacked_tsn; /** < Last TSN number still unacked */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100212 u32 next_tsn_expected; /**< The next TSN number expected to be received. */
213
214 u32 last_rcvd_tsn; /**< This is the last TSN received in sequence. This value
215 is set initially by taking the peer's initial TSN,
216 received in the INIT or INIT ACK chunk, and
217 subtracting one from it. */
218
219 u32 out_of_order_map[MAX_INFLIGHT_PACKETS]; /**< An array of bits or bytes indicating which out-of-order
220 TSNs have been received (relative to the Last Rcvd TSN).
221 If no gaps exist, i.e., no out-of-order packets have been received,
222 this array will be set to all zero. */
223
224 u8 ack_state; /**< This flag indicates if the next received packet is set to be responded to with a SACK.
225 This is initialized to 0. When a packet is received it is incremented.
226 If this value reaches 2 or more, a SACK is sent and the value is reset to 0.
227 Note: This is used only when no DATA chunks are received out-of-order.
228 When DATA chunks are out-of-order, SACKs are not delayed (see Section 6). */
229
Marco Varlesef3ab4892018-02-19 15:23:13 +0100230 u8 smallest_PMTU_idx; /** The index of the sub-connection with the smallest PMTU discovered across all peer's transport addresses. */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100231
Marco Varlese91389ac2018-01-31 11:00:01 +0100232 u8 overall_sending_status; /**< 0 indicates first fragment of a user message
233 1 indicates normal stream
234 2 indicates last fragment of a user message */
235
Marco Varlese191a5942017-10-30 18:17:21 +0100236 sctp_options_t snd_opts;
Marco Varlese191a5942017-10-30 18:17:21 +0100237
238 u8 next_avail_sub_conn; /**< Represent the index of the next free slot in sub_conn */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100239
Marco Varlese191a5942017-10-30 18:17:21 +0100240} sctp_connection_t;
241
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100242typedef void (sctp_timer_expiration_handler) (u32 conn_index, u32 timer_id);
Marco Varlese191a5942017-10-30 18:17:21 +0100243
244sctp_connection_t *sctp_connection_new (u8 thread_index);
245void sctp_sub_connection_add_ip4 (u8 thread_index,
246 sctp_ipv4_addr_param_t * ipv4_addr);
247void sctp_sub_connection_add_ip6 (u8 thread_index,
248 sctp_ipv6_addr_param_t * ipv6_addr);
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100249void sctp_connection_close (sctp_connection_t * sctp_conn);
250void sctp_connection_cleanup (sctp_connection_t * sctp_conn);
251void sctp_connection_del (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100252
253u32 sctp_push_header (transport_connection_t * tconn, vlib_buffer_t * b);
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100254void sctp_send_init (sctp_connection_t * sctp_conn);
255void sctp_send_shutdown (sctp_connection_t * sctp_conn);
Marco Varlese54432f82018-02-15 17:01:56 +0100256void sctp_send_shutdown_ack (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlesebe2251b2018-02-07 12:22:41 +0100257 vlib_buffer_t * b);
Marco Varlese54432f82018-02-15 17:01:56 +0100258void sctp_send_shutdown_complete (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlesefae40392018-02-14 15:38:35 +0100259 vlib_buffer_t * b0);
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100260void sctp_send_heartbeat (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100261void sctp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index,
262 u8 is_ip4);
263void sctp_flush_frames_to_output (u8 thread_index);
264void sctp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
265
266format_function_t format_sctp_state;
267
268u8 *format_sctp_connection_id (u8 * s, va_list * args);
269u8 *format_sctp_connection (u8 * s, va_list * args);
270u8 *format_sctp_scoreboard (u8 * s, va_list * args);
271u8 *format_sctp_header (u8 * s, va_list * args);
272u8 *format_sctp_tx_trace (u8 * s, va_list * args);
273
274clib_error_t *sctp_init (vlib_main_t * vm);
Marco Varlese54432f82018-02-15 17:01:56 +0100275void sctp_connection_timers_init (sctp_connection_t * sctp_conn);
276void sctp_connection_timers_reset (sctp_connection_t * sctp_conn);
277void sctp_init_snd_vars (sctp_connection_t * sctp_conn);
278void sctp_init_mss (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100279
Marco Varlese54432f82018-02-15 17:01:56 +0100280void sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, u8 idx,
281 vlib_buffer_t * b, ip4_address_t * ip4_addr,
Marco Varlese191a5942017-10-30 18:17:21 +0100282 ip6_address_t * ip6_addr);
Marco Varlese54432f82018-02-15 17:01:56 +0100283void sctp_prepare_cookie_echo_chunk (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlese191a5942017-10-30 18:17:21 +0100284 vlib_buffer_t * b,
285 sctp_state_cookie_param_t * sc);
Marco Varlese54432f82018-02-15 17:01:56 +0100286void sctp_prepare_cookie_ack_chunk (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlese191a5942017-10-30 18:17:21 +0100287 vlib_buffer_t * b);
Marco Varlese54432f82018-02-15 17:01:56 +0100288void sctp_prepare_sack_chunk (sctp_connection_t * sctp_conn, u8 idx,
289 vlib_buffer_t * b);
290void sctp_prepare_heartbeat_ack_chunk (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100291 vlib_buffer_t * b);
Marco Varlese191a5942017-10-30 18:17:21 +0100292
Marco Varlese54432f82018-02-15 17:01:56 +0100293u16 sctp_check_outstanding_data_chunks (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100294
Marco Varlese191a5942017-10-30 18:17:21 +0100295#define IP_PROTOCOL_SCTP 132
296
297/** SSCTP FSM state definitions as per RFC4960. */
298#define foreach_sctp_fsm_state \
299 _(CLOSED, "CLOSED") \
300 _(COOKIE_WAIT, "COOKIE_WAIT") \
301 _(COOKIE_ECHOED, "COOKIE_ECHOED") \
302 _(ESTABLISHED, "ESTABLISHED") \
303 _(SHUTDOWN_PENDING, "SHUTDOWN_PENDING") \
304 _(SHUTDOWN_SENT, "SHUTDOWN_SENT") \
305 _(SHUTDOWN_RECEIVED, "SHUTDOWN_RECEIVED") \
306 _(SHUTDOWN_ACK_SENT, "SHUTDOWN_ACK_SENT")
307
308typedef enum _sctp_state
309{
310#define _(sym, str) SCTP_STATE_##sym,
311 foreach_sctp_fsm_state
312#undef _
313 SCTP_N_STATES
314} sctp_state_t;
315
316always_inline char *
317sctp_state_to_string (u8 state)
318{
319 switch (state)
320 {
321 case SCTP_STATE_CLOSED:
322 return "SCTP_STATE_CLOSED";
323 case SCTP_STATE_COOKIE_WAIT:
324 return "SCTP_STATE_COOKIE_WAIT";
325 case SCTP_STATE_COOKIE_ECHOED:
326 return "SCTP_STATE_COOKIE_ECHOED";
327 case SCTP_STATE_ESTABLISHED:
328 return "SCTP_STATE_ESTABLISHED";
329 case SCTP_STATE_SHUTDOWN_PENDING:
330 return "SCTP_STATE_SHUTDOWN_PENDING";
331 case SCTP_STATE_SHUTDOWN_SENT:
332 return "SCTP_STATE_SHUTDOWN_SENT";
333 case SCTP_STATE_SHUTDOWN_RECEIVED:
334 return "SCTP_STATE_SHUTDOWN_RECEIVED";
335 case SCTP_STATE_SHUTDOWN_ACK_SENT:
336 return "SCTP_STATE_SHUTDOWN_ACK_SENT";
337 }
338 return NULL;
339}
340
341always_inline char *
342sctp_chunk_to_string (u8 type)
343{
344 switch (type)
345 {
346 case DATA:
347 return "DATA";
348 case INIT:
349 return "INIT";
350 case INIT_ACK:
351 return "INIT_ACK";
352 case SACK:
353 return "SACK";
354 case HEARTBEAT:
355 return "HEARTBEAT";
356 case HEARTBEAT_ACK:
357 return "HEARTBEAT_ACK";
358 case ABORT:
359 return "ABORT";
360 case SHUTDOWN:
361 return "SHUTDOWN";
362 case SHUTDOWN_ACK:
363 return "SHUTDOWN_ACK";
364 case OPERATION_ERROR:
365 return "OPERATION_ERROR";
366 case COOKIE_ECHO:
367 return "COOKIE_ECHO";
368 case COOKIE_ACK:
369 return "COOKIE_ACK";
370 case ECNE:
371 return "ECNE";
372 case CWR:
373 return "CWR";
374 case SHUTDOWN_COMPLETE:
375 return "SHUTDOWN_COMPLETE";
376 }
377 return NULL;
378}
379
380always_inline char *
381sctp_optparam_type_to_string (u8 type)
382{
383 switch (type)
384 {
385 case SCTP_IPV4_ADDRESS_TYPE:
386 return "SCTP_IPV4_ADDRESS_TYPE";
387 case SCTP_IPV6_ADDRESS_TYPE:
388 return "SCTP_IPV6_ADDRESS_TYPE";
389 case SCTP_STATE_COOKIE_TYPE:
390 return "SCTP_STATE_COOKIE_TYPE";
391 case SCTP_UNRECOGNIZED_TYPE:
392 return "SCTP_UNRECOGNIZED_TYPE";
393 case SCTP_COOKIE_PRESERVATIVE_TYPE:
394 return "SCTP_COOKIE_PRESERVATIVE_TYPE";
395 case SCTP_HOSTNAME_ADDRESS_TYPE:
396 return "SCTP_HOSTNAME_ADDRESS_TYPE";
397 case SCTP_SUPPORTED_ADDRESS_TYPES:
398 return "SCTP_SUPPORTED_ADDRESS_TYPES";
399 }
400 return NULL;
401}
402
403#define SCTP_TICK 0.001 /**< SCTP tick period (s) */
404#define SHZ (u32) (1/SCTP_TICK) /**< SCTP tick frequency */
Marco Varlesea38783e2018-02-13 12:38:52 +0100405#define SCTP_TSTAMP_RESOLUTION SCTP_TICK /**< Time stamp resolution */
Marco Varlese191a5942017-10-30 18:17:21 +0100406
407/* As per RFC4960, page 83 */
408#define SCTP_RTO_INIT 3 * SHZ /* 3 seconds */
409#define SCTP_RTO_MIN 1 * SHZ /* 1 second */
410#define SCTP_RTO_MAX 60 * SHZ /* 60 seconds */
Marco Varlesef3ab4892018-02-19 15:23:13 +0100411#define SCTP_RTO_BURST 4
Marco Varlese191a5942017-10-30 18:17:21 +0100412#define SCTP_RTO_ALPHA 1/8
413#define SCTP_RTO_BETA 1/4
414#define SCTP_VALID_COOKIE_LIFE 60 * SHZ /* 60 seconds */
Marco Varlese54432f82018-02-15 17:01:56 +0100415#define SCTP_ASSOCIATION_MAX_RETRANS 10 // the overall connection
416#define SCTP_PATH_MAX_RETRANS 5 // number of attempts per destination address
417#define SCTP_MAX_INIT_RETRANS 8 // number of attempts
418#define SCTP_HB_INTERVAL 30 * SHZ
419#define SCTP_HB_MAX_BURST 1
Marco Varlese191a5942017-10-30 18:17:21 +0100420
Marco Varlesef3ab4892018-02-19 15:23:13 +0100421#define SCTP_DATA_IDLE_INTERVAL 15 * SHZ /* 15 seconds; the time-interval after which the connetion is considered IDLE */
422
Marco Varlese191a5942017-10-30 18:17:21 +0100423#define SCTP_TO_TIMER_TICK SCTP_TICK*10 /* Period for converting from SCTP_TICK */
424
425typedef struct _sctp_lookup_dispatch
426{
427 u8 next, error;
428} sctp_lookup_dispatch_t;
429
430typedef struct _sctp_main
431{
432 /* Per-worker thread SCTP connection pools */
433 sctp_connection_t **connections;
434
435 /* Pool of listeners. */
436 sctp_connection_t *listener_pool;
437
438 /** Dispatch table by state and flags */
439 sctp_lookup_dispatch_t dispatch_table[SCTP_N_STATES][64];
440
441 u8 log2_tstamp_clocks_per_tick;
442 f64 tstamp_ticks_per_clock;
443 u32 *time_now;
444
445 /** per-worker tx buffer free lists */
446 u32 **tx_buffers;
447 /** per-worker tx frames to SCTP 4/6 output nodes */
448 vlib_frame_t **tx_frames[2];
449 /** per-worker tx frames to ip 4/6 lookup nodes */
450 vlib_frame_t **ip_lookup_tx_frames[2];
451
452 /* Per worker-thread timer wheel for connections timers */
453 tw_timer_wheel_16t_2w_512sl_t *timer_wheels;
454
455 /* Pool of half-open connections on which we've sent a SYN */
456 sctp_connection_t *half_open_connections;
457 clib_spinlock_t half_open_lock;
458
459 /* TODO: Congestion control algorithms registered */
460 /* sctp_cc_algorithm_t *cc_algos; */
461
462 /* Flag that indicates if stack is on or off */
463 u8 is_enabled;
464
465 /** Number of preallocated connections */
466 u32 preallocated_connections;
467
468 /** Transport table (preallocation) size parameters */
469 u32 local_endpoints_table_memory;
470 u32 local_endpoints_table_buckets;
471
472 /** Vectors of src addresses. Optional unless one needs > 63K active-opens */
473 ip4_address_t *ip4_src_addresses;
474 u32 last_v4_address_rotor;
475 u32 last_v6_address_rotor;
476 ip6_address_t *ip6_src_addresses;
477
478 /** vlib buffer size */
479 u32 bytes_per_buffer;
480
481 u8 punt_unknown4;
482 u8 punt_unknown6;
483
484} sctp_main_t;
485
486extern sctp_main_t sctp_main;
487extern vlib_node_registration_t sctp4_input_node;
488extern vlib_node_registration_t sctp6_input_node;
489extern vlib_node_registration_t sctp4_output_node;
490extern vlib_node_registration_t sctp6_output_node;
491
492always_inline sctp_main_t *
493vnet_get_sctp_main ()
494{
495 return &sctp_main;
496}
497
498always_inline sctp_header_t *
499sctp_buffer_hdr (vlib_buffer_t * b)
500{
501 ASSERT ((signed) b->current_data >= (signed) -VLIB_BUFFER_PRE_DATA_SIZE);
502 return (sctp_header_t *) (b->data + b->current_data
503 + vnet_buffer (b)->sctp.hdr_offset);
504}
505
506clib_error_t *vnet_sctp_enable_disable (vlib_main_t * vm, u8 is_en);
507
508always_inline sctp_connection_t *
509sctp_half_open_connection_get (u32 conn_index)
510{
511 sctp_connection_t *tc = 0;
512 clib_spinlock_lock_if_init (&sctp_main.half_open_lock);
513 if (!pool_is_free_index (sctp_main.half_open_connections, conn_index))
514 tc = pool_elt_at_index (sctp_main.half_open_connections, conn_index);
515 tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = tc;
516 clib_spinlock_unlock_if_init (&sctp_main.half_open_lock);
517 return tc;
518}
519
520/**
521 * Cleanup half-open connection
522 *
523 */
524always_inline void
525sctp_half_open_connection_del (sctp_connection_t * tc)
526{
527 sctp_main_t *tm = vnet_get_sctp_main ();
528 clib_spinlock_lock_if_init (&tm->half_open_lock);
529 pool_put_index (tm->half_open_connections,
530 tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index);
531 if (CLIB_DEBUG)
532 memset (tc, 0xFA, sizeof (*tc));
533 clib_spinlock_unlock_if_init (&tm->half_open_lock);
534}
535
536always_inline u32
537sctp_set_time_now (u32 thread_index)
538{
539 sctp_main.time_now[thread_index] = clib_cpu_time_now ()
540 * sctp_main.tstamp_ticks_per_clock;
541 return sctp_main.time_now[thread_index];
542}
543
544always_inline void
545sctp_timer_set (sctp_connection_t * tc, u8 conn_idx, u8 timer_id,
546 u32 interval)
547{
548 ASSERT (tc->sub_conn[conn_idx].connection.thread_index ==
549 vlib_get_thread_index ());
550 ASSERT (tc->sub_conn[conn_idx].timers[timer_id] ==
551 SCTP_TIMER_HANDLE_INVALID);
552
553 sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
Marco Varlese21c8baf2018-02-02 17:17:51 +0100554 sub->timers[timer_id] =
Marco Varlese191a5942017-10-30 18:17:21 +0100555 tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
556 sub->c_c_index, timer_id, interval);
557}
558
559always_inline void
560sctp_timer_reset (sctp_connection_t * tc, u8 conn_idx, u8 timer_id)
561{
562 ASSERT (tc->sub_conn[conn_idx].c_thread_index == vlib_get_thread_index ());
563 if (tc->sub_conn[conn_idx].timers[timer_id] == SCTP_TIMER_HANDLE_INVALID)
564 return;
565
566 sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
567
568 tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
569 sub->timers[timer_id]);
570 sub->timers[timer_id] = SCTP_TIMER_HANDLE_INVALID;
571}
572
Marco Varlese191a5942017-10-30 18:17:21 +0100573/**
574 * Try to cleanup half-open connection
575 *
576 * If called from a thread that doesn't own tc, the call won't have any
577 * effect.
578 *
579 * @param tc - connection to be cleaned up
580 * @return non-zero if cleanup failed.
581 */
582always_inline int
583sctp_half_open_connection_cleanup (sctp_connection_t * tc)
584{
585 /* Make sure this is the owning thread */
586 if (tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_thread_index !=
587 vlib_get_thread_index ())
588 return 1;
589 sctp_timer_reset (tc, MAIN_SCTP_SUB_CONN_IDX, SCTP_TIMER_T1_INIT);
590 sctp_half_open_connection_del (tc);
591 return 0;
592}
593
594always_inline u32
595sctp_header_bytes ()
596{
597 return sizeof (sctp_header_t);
598}
599
600always_inline sctp_connection_t *
601sctp_get_connection_from_transport (transport_connection_t * tconn)
602{
603 ASSERT (tconn != NULL);
604
605 sctp_sub_connection_t *sub = (sctp_sub_connection_t *) tconn;
606#if SCTP_ADV_DEBUG
607 if (sub == NULL)
608 SCTP_ADV_DBG ("sub == NULL");
609 if (sub->parent == NULL)
610 SCTP_ADV_DBG ("sub->parent == NULL");
611#endif
612 return (sctp_connection_t *) sub->parent;
613}
614
615always_inline u32
616sctp_time_now (void)
617{
618 return sctp_main.time_now[vlib_get_thread_index ()];
619}
620
Marco Varlese21c8baf2018-02-02 17:17:51 +0100621#define ABS(x) ((x) > 0) ? (x) : -(x);
622
623always_inline void
624sctp_calculate_rto (sctp_connection_t * sctp_conn, u8 conn_idx)
625{
626 /* See RFC4960, 6.3.1. RTO Calculation */
627 u32 RTO = 0;
628 f32 RTTVAR = 0;
629 u32 now = sctp_time_now ();
630 u32 prev_ts = sctp_conn->sub_conn[conn_idx].rtt_ts;
631 u32 R = prev_ts - now;
632
633 if (sctp_conn->sub_conn[conn_idx].RTO == 0) // C1: Let's initialize our RTO
634 {
635 sctp_conn->sub_conn[conn_idx].RTO = SCTP_RTO_MIN;
636 return;
637 }
638
639 if (sctp_conn->sub_conn[conn_idx].RTO == SCTP_RTO_MIN && sctp_conn->sub_conn[conn_idx].SRTT == 0) // C2: First RTT calculation
640 {
641 sctp_conn->sub_conn[conn_idx].SRTT = R;
642 RTTVAR = R / 2;
643
644 if (RTTVAR == 0)
645 RTTVAR = 100e-3; /* 100 ms */
646
647 sctp_conn->sub_conn[conn_idx].RTTVAR = RTTVAR;
648 }
649 else // C3: RTT already exists; let's recalculate
650 {
651 RTTVAR = (1 - SCTP_RTO_BETA) * sctp_conn->sub_conn[conn_idx].RTTVAR +
652 SCTP_RTO_BETA * ABS (sctp_conn->sub_conn[conn_idx].SRTT - R);
653
654 if (RTTVAR == 0)
655 RTTVAR = 100e-3; /* 100 ms */
656
657 sctp_conn->sub_conn[conn_idx].RTTVAR = RTTVAR;
658
659 sctp_conn->sub_conn[conn_idx].SRTT =
660 (1 - SCTP_RTO_ALPHA) * sctp_conn->sub_conn[conn_idx].SRTT +
661 SCTP_RTO_ALPHA * R;
662 }
663
664 RTO =
665 sctp_conn->sub_conn[conn_idx].SRTT +
666 4 * sctp_conn->sub_conn[conn_idx].RTTVAR;
667 if (RTO < SCTP_RTO_MIN) // C6
668 RTO = SCTP_RTO_MIN;
669
670 if (RTO > SCTP_RTO_MAX) // C7
671 RTO = SCTP_RTO_MAX;
672
673 sctp_conn->sub_conn[conn_idx].RTO = RTO;
674}
675
Marco Varlese191a5942017-10-30 18:17:21 +0100676always_inline void
677sctp_timer_update (sctp_connection_t * tc, u8 conn_idx, u8 timer_id,
678 u32 interval)
679{
680 ASSERT (tc->sub_conn[conn_idx].connection.thread_index ==
681 vlib_get_thread_index ());
682 sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
683
684 if (tc->sub_conn[conn_idx].timers[timer_id] != SCTP_TIMER_HANDLE_INVALID)
685 tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
686 sub->timers[timer_id]);
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100687
Marco Varlese191a5942017-10-30 18:17:21 +0100688 tc->sub_conn[conn_idx].timers[timer_id] =
689 tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
690 sub->c_c_index, timer_id, interval);
691}
692
693always_inline sctp_connection_t *
694sctp_listener_get (u32 tli)
695{
696 return pool_elt_at_index (sctp_main.listener_pool, tli);
697}
698
699#endif
700
701always_inline sctp_connection_t *
702sctp_connection_get (u32 conn_index, u32 thread_index)
703{
704 if (PREDICT_FALSE
705 (pool_is_free_index (sctp_main.connections[thread_index], conn_index)))
706 return 0;
707 return pool_elt_at_index (sctp_main.connections[thread_index], conn_index);
708}
709
Marco Varlese54432f82018-02-15 17:01:56 +0100710#define SELECT_MAX_RETRIES 8
Marco Varlese191a5942017-10-30 18:17:21 +0100711
Marco Varlese54432f82018-02-15 17:01:56 +0100712always_inline u8
713sctp_data_subconn_select (sctp_connection_t * sctp_conn)
714{
Marco Varlese54432f82018-02-15 17:01:56 +0100715 u32 sub = MAIN_SCTP_SUB_CONN_IDX;
Marco Varlesef3ab4892018-02-19 15:23:13 +0100716 u8 i, cwnd = sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].cwnd;
717 for (i = 1; i < MAX_SCTP_CONNECTIONS; i++)
Marco Varlese191a5942017-10-30 18:17:21 +0100718 {
Marco Varlesef3ab4892018-02-19 15:23:13 +0100719 if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN)
720 continue;
721
722 if (sctp_conn->sub_conn[i].cwnd > cwnd)
723 {
724 sub = i;
725 cwnd = sctp_conn->sub_conn[i].cwnd;
726 }
Marco Varlese191a5942017-10-30 18:17:21 +0100727 }
Marco Varlese54432f82018-02-15 17:01:56 +0100728 return sub;
Marco Varlese191a5942017-10-30 18:17:21 +0100729}
730
731always_inline u8
Marco Varlese54432f82018-02-15 17:01:56 +0100732sctp_sub_conn_id_via_ip6h (sctp_connection_t * sctp_conn, ip6_header_t * ip6h)
Marco Varlese191a5942017-10-30 18:17:21 +0100733{
Marco Varlese54432f82018-02-15 17:01:56 +0100734 u8 i;
Marco Varlese191a5942017-10-30 18:17:21 +0100735
Marco Varlese54432f82018-02-15 17:01:56 +0100736 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
Marco Varlese191a5942017-10-30 18:17:21 +0100737 {
Marco Varlese54432f82018-02-15 17:01:56 +0100738 if (sctp_conn->sub_conn[i].connection.lcl_ip.ip6.as_u64[0] ==
739 ip6h->dst_address.as_u64[0] &&
740 sctp_conn->sub_conn[i].connection.lcl_ip.ip6.as_u64[1] ==
741 ip6h->dst_address.as_u64[1] &&
742 sctp_conn->sub_conn[i].connection.rmt_ip.ip6.as_u64[0] ==
743 ip6h->src_address.as_u64[0] &&
744 sctp_conn->sub_conn[i].connection.rmt_ip.ip6.as_u64[1] ==
745 ip6h->src_address.as_u64[1])
746 return i;
Marco Varlese191a5942017-10-30 18:17:21 +0100747 }
Marco Varlese54432f82018-02-15 17:01:56 +0100748 clib_warning ("Did not find a sub-connection; defaulting to %u",
749 MAIN_SCTP_SUB_CONN_IDX);
750 return MAIN_SCTP_SUB_CONN_IDX;
751}
752
753always_inline u8
754sctp_sub_conn_id_via_ip4h (sctp_connection_t * sctp_conn, ip4_header_t * ip4h)
755{
756 u8 i;
757
758 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
759 {
760 if (sctp_conn->sub_conn[i].connection.lcl_ip.ip4.as_u32 ==
761 ip4h->dst_address.as_u32
762 && sctp_conn->sub_conn[i].connection.rmt_ip.ip4.as_u32 ==
763 ip4h->src_address.as_u32)
764 return i;
765 }
766 clib_warning ("Did not find a sub-connection; defaulting to %u",
767 MAIN_SCTP_SUB_CONN_IDX);
768 return MAIN_SCTP_SUB_CONN_IDX;
Marco Varlese191a5942017-10-30 18:17:21 +0100769}
770
771/**
772 * Push SCTP header to buffer
773 *
774 * @param vm - vlib_main
775 * @param b - buffer to write the header to
776 * @param sp_net - source port net order
777 * @param dp_net - destination port net order
778 * @param sctp_hdr_opts_len - header and options length in bytes
779 *
780 * @return - pointer to start of SCTP header
781 */
782always_inline void *
783vlib_buffer_push_sctp_net_order (vlib_buffer_t * b, u16 sp, u16 dp,
784 u8 sctp_hdr_opts_len)
785{
786 sctp_full_hdr_t *full_hdr;
787
788 full_hdr = vlib_buffer_push_uninit (b, sctp_hdr_opts_len);
789
790 full_hdr->hdr.src_port = sp;
791 full_hdr->hdr.dst_port = dp;
792 full_hdr->hdr.checksum = 0;
793 return full_hdr;
794}
795
796/**
797 * Push SCTP header to buffer
798 *
799 * @param b - buffer to write the header to
800 * @param sp_net - source port net order
801 * @param dp_net - destination port net order
802 * @param sctp_hdr_opts_len - header and options length in bytes
803 *
804 * @return - pointer to start of SCTP header
805 */
806always_inline void *
807vlib_buffer_push_sctp (vlib_buffer_t * b, u16 sp_net, u16 dp_net,
808 u8 sctp_hdr_opts_len)
809{
810 return vlib_buffer_push_sctp_net_order (b, sp_net, dp_net,
811 sctp_hdr_opts_len);
812}
813
Marco Varlesef3ab4892018-02-19 15:23:13 +0100814always_inline void
815update_smallest_pmtu_idx (sctp_connection_t * sctp_conn)
816{
817 u8 i;
818 u8 smallest_pmtu_index = MAIN_SCTP_SUB_CONN_IDX;
819
820 for (i = 1; i < MAX_SCTP_CONNECTIONS; i++)
821 {
822 if (sctp_conn->sub_conn[i].state != SCTP_SUBCONN_STATE_DOWN)
823 {
824 if (sctp_conn->sub_conn[i].PMTU <
825 sctp_conn->sub_conn[smallest_pmtu_index].PMTU)
826 smallest_pmtu_index = i;
827 }
828 }
829
830 sctp_conn->smallest_PMTU_idx = smallest_pmtu_index;
831}
832
833/* As per RFC4960; section 7.2.1: Slow-Start */
834always_inline void
835sctp_init_cwnd (sctp_connection_t * sctp_conn)
836{
837 u8 i;
838 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
839 {
840 /* Section 7.2.1; point (1) */
841 sctp_conn->sub_conn[i].cwnd =
842 clib_min (4 * sctp_conn->sub_conn[i].PMTU,
843 clib_max (2 * sctp_conn->sub_conn[i].PMTU, 4380));
844
845 /* Section 7.2.1; point (3) */
846 sctp_conn->sub_conn[i].ssthresh = SCTP_INITIAL_SSHTRESH;
847
848 /* Section 7.2.2; point (1) */
849 sctp_conn->sub_conn[i].partially_acked_bytes = 0;
850 }
851}
852
853always_inline u8
854sctp_in_cong_recovery (sctp_connection_t * sctp_conn, u8 idx)
855{
856 return 0;
857}
858
859always_inline u8
860cwnd_fully_utilized (sctp_connection_t * sctp_conn, u8 idx)
861{
862 return 0;
863}
864
865/* As per RFC4960; section 7.2.1: Slow-Start */
866always_inline void
867update_cwnd (sctp_connection_t * sctp_conn)
868{
869 u8 i;
870
871 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
872 {
873 /* Section 7.2.1; point (2) */
874 if (sctp_conn->sub_conn[i].is_retransmitting)
875 {
876 sctp_conn->sub_conn[i].cwnd = 1 * sctp_conn->sub_conn[i].PMTU;
877 continue;
878 }
879
880 /* Section 7.2.2; point (4) */
881 if (sctp_conn->sub_conn[i].last_data_ts >
882 sctp_time_now () + SCTP_DATA_IDLE_INTERVAL)
883 {
884 sctp_conn->sub_conn[i].cwnd =
885 clib_max (sctp_conn->sub_conn[i].cwnd / 2,
886 4 * sctp_conn->sub_conn[i].PMTU);
887 continue;
888 }
889
890 /* Section 7.2.1; point (5) */
891 if (sctp_conn->sub_conn[i].cwnd <= sctp_conn->sub_conn[i].ssthresh)
892 {
893 if (!cwnd_fully_utilized (sctp_conn, i))
894 continue;
895
896 if (sctp_in_cong_recovery (sctp_conn, i))
897 continue;
898
899 sctp_conn->sub_conn[i].cwnd =
900 clib_min (sctp_conn->sub_conn[i].PMTU, 1);
901 }
902 }
903}
904
Marco Varlese191a5942017-10-30 18:17:21 +0100905/*
906 * fd.io coding-style-patch-verification: ON
907 *
908 * Local Variables:
909 * eval: (c-set-style "gnu")
910 * End:
911 */