blob: 048d153ac553500c6a851fd58f39ab1c2e027ba1 [file] [log] [blame]
Marco Varlese191a5942017-10-30 18:17:21 +01001/*
2 * Copyright (c) 2017 SUSE LLC.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15#ifndef included_vnet_sctp_h
16#define included_vnet_sctp_h
17
18#include <vnet/vnet.h>
19#include <vnet/ip/ip.h>
20#include <vnet/sctp/sctp_timer.h>
21#include <vnet/sctp/sctp_packet.h>
22#include <vnet/session/transport.h>
23#include <vnet/session/session.h>
24
25/* SCTP timers */
26#define foreach_sctp_timer \
27 _(T1_INIT, "T1_INIT") \
28 _(T1_COOKIE, "T1_COOKIE") \
29 _(T2_SHUTDOWN, "T2_SHUTDOWN") \
30 _(T3_RXTX, "T3_RXTX") \
Marco Varlese8ad6a2d2018-01-26 16:50:01 +010031 _(T4_HEARTBEAT, "T4_HB") \
Marco Varlese191a5942017-10-30 18:17:21 +010032 _(T5_SHUTDOWN_GUARD, "T5_SHUTDOWN_GUARD")
33
34typedef enum _sctp_timers
35{
36#define _(sym, str) SCTP_TIMER_##sym,
37 foreach_sctp_timer
38#undef _
39 SCTP_N_TIMERS
40} sctp_timers_e;
41
42#define SCTP_TIMER_HANDLE_INVALID ((u32) ~0)
43
Marco Varlesedf5a99c2018-02-06 13:48:30 +010044always_inline char *
45sctp_timer_to_string (u8 timer_id)
46{
47 switch (timer_id)
48 {
49 case SCTP_TIMER_T1_INIT:
50 return "SCTP_TIMER_T1_INIT";
51 case SCTP_TIMER_T1_COOKIE:
52 return "SCTP_TIMER_T1_COOKIE";
53 case SCTP_TIMER_T2_SHUTDOWN:
54 return "SCTP_TIMER_T2_SHUTDOWN";
55 case SCTP_TIMER_T3_RXTX:
56 return "SCTP_TIMER_T3_RXTX";
57 case SCTP_TIMER_T4_HEARTBEAT:
58 return "SCTP_TIMER_T4_HEARTBEAT";
59 case SCTP_TIMER_T5_SHUTDOWN_GUARD:
60 return "SCTP_TIMER_T5_SHUTDOWN_GUARD";
61 }
62 return NULL;
63}
64
Marco Varlese191a5942017-10-30 18:17:21 +010065typedef enum _sctp_error
66{
67#define sctp_error(n,s) SCTP_ERROR_##n,
68#include <vnet/sctp/sctp_error.def>
69#undef sctp_error
70 SCTP_N_ERROR,
71} sctp_error_t;
72
73#define NO_FLAG 0
74
75#define IS_T_BIT_SET(var) ((var) & (1))
76#define IS_E_BIT_SET(var) ((var) & (1))
77#define IS_B_BIT_SET(var) ((var) & (1<<1))
78#define IS_U_BIT_SET(var) ((var) & (1<<2))
79
Marco Varlesef3ab4892018-02-19 15:23:13 +010080#define MAX_SCTP_CONNECTIONS 8
Marco Varlese191a5942017-10-30 18:17:21 +010081#define MAIN_SCTP_SUB_CONN_IDX 0
82
83#if (VLIB_BUFFER_TRACE_TRAJECTORY)
84#define sctp_trajectory_add_start(b, start) \
85{ \
86 (*vlib_buffer_trace_trajectory_cb) (b, start); \
87}
88#else
89#define sctp_trajectory_add_start(b, start)
90#endif
91
Marco Varlese54432f82018-02-15 17:01:56 +010092enum _sctp_subconn_state
93{
94 SCTP_SUBCONN_STATE_DOWN = 0,
95 SCTP_SUBCONN_STATE_UP,
96 SCTP_SUBCONN_STATE_ALLOW_HB
97};
98
Marco Varlesef3ab4892018-02-19 15:23:13 +010099#define SCTP_INITIAL_SSHTRESH 65535
Marco Varlese191a5942017-10-30 18:17:21 +0100100typedef struct _sctp_sub_connection
101{
102 transport_connection_t connection; /**< Common transport data. First! */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100103
Marco Varlese04e5d642018-02-23 17:43:06 +0100104 u8 subconn_idx; /**< This indicates the position of this sub-connection in the super-set container of connections pool */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100105 u32 error_count; /**< The current error count for this destination. */
106 u32 error_threshold; /**< Current error threshold for this destination,
107 i.e. what value marks the destination down if error count reaches this value. */
Marco Varlesef3ab4892018-02-19 15:23:13 +0100108 u32 cwnd; /**< Congestion control window (cwnd, in bytes), which is adjusted by
109 the sender based on observed network conditions. */
110 u32 ssthresh; /**< Slow-start threshold (in bytes), which is used by the
111 sender to distinguish slow-start and congestion avoidance phases. */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100112
Marco Varlese21c8baf2018-02-02 17:17:51 +0100113 u32 rtt_ts; /**< USED to hold the timestamp of when the packet has been sent */
114
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100115 u32 RTO; /**< The current retransmission timeout value. */
116 u32 SRTT; /**< The current smoothed round-trip time. */
Marco Varlese21c8baf2018-02-02 17:17:51 +0100117 f32 RTTVAR; /**< The current RTT variation. */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100118
119 u32 partially_acked_bytes; /**< The tracking method for increase of cwnd when in
120 congestion avoidance mode (see Section 7.2.2).*/
121
122 u8 state; /**< The current state of this destination, i.e., DOWN, UP, ALLOW-HB, NO-HEARTBEAT, etc. */
123
124 u16 PMTU; /**< The current known path MTU. */
125
126 u32 timers[SCTP_N_TIMERS]; /**< A timer used by each destination. */
127
128 u8 RTO_pending; /**< A flag used to track if one of the DATA chunks sent to
129 this address is currently being used to compute an RTT.
130 If this flag is 0, the next DATA chunk sent to this destination
131 should be used to compute an RTT and this flag should be set.
132 Every time the RTT calculation completes (i.e., the DATA chunk is SACK'd),
133 clear this flag. */
134
Marco Varlese54432f82018-02-15 17:01:56 +0100135 u32 last_seen; /**< The time to which this destination was last sent a packet to.
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100136 This can be used to determine if a HEARTBEAT is needed. */
Marco Varlese191a5942017-10-30 18:17:21 +0100137
Marco Varlesef3ab4892018-02-19 15:23:13 +0100138 u32 last_data_ts; /**< Used to hold the timestamp value of last time we sent a DATA chunk */
139
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100140 u8 unacknowledged_hb; /**< Used to track how many unacknowledged heartbeats we had;
Marco Varlese54432f82018-02-15 17:01:56 +0100141 If more than SCTP_PATH_MAX_RETRANS then connection is considered unreachable. */
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100142
Marco Varlesea38783e2018-02-13 12:38:52 +0100143 u8 is_retransmitting; /**< A flag (0 = no, 1 = yes) indicating whether the connection is retransmitting a previous packet */
144
Marco Varlesef3ab4892018-02-19 15:23:13 +0100145 u8 enqueue_state; /**< if set to 1 indicates that DATA is still being handled hence cannot shutdown this connection yet */
Marco Varlese54432f82018-02-15 17:01:56 +0100146
Marco Varlese191a5942017-10-30 18:17:21 +0100147} sctp_sub_connection_t;
148
149typedef struct
150{
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100151 u32 a_rwnd; /**< Maximum segment size advertised */
Marco Varlese191a5942017-10-30 18:17:21 +0100152
153} sctp_options_t;
154
Marco Varlese91389ac2018-01-31 11:00:01 +0100155/* Useful macros to deal with the out_of_order_map (array of bit) */
156#define SET_BIT(A,k) ( A[(k/32)] |= (1 << (k%32)) )
157#define CLEAR_BIT(A,k) ( A[(k/32)] &= ~(1 << (k%32)) )
158#define TEST_BIT(A,k) ( A[(k/32)] & (1 << (k%32)) )
159
160always_inline void
161_bytes_swap (void *pv, size_t n)
162{
163 char *p = pv;
164 size_t lo, hi;
165 for (lo = 0, hi = n - 1; hi > lo; lo++, hi--)
166 {
167 char tmp = p[lo];
168 p[lo] = p[hi];
169 p[hi] = tmp;
170 }
171}
172
173#define ENDIANESS_SWAP(x) _bytes_swap(&x, sizeof(x));
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100174
175#define MAX_INFLIGHT_PACKETS 128
176#define MAX_ENQUEABLE_SACKS 2
177
178/* This parameter indicates to the receiver how much increment in
179 * milliseconds the sender wishes the receiver to add to its default
180 * cookie life-span.
181 */
182#define SUGGESTED_COOKIE_LIFE_SPAN_INCREMENT 1000
183
Marco Varlese191a5942017-10-30 18:17:21 +0100184typedef struct _sctp_connection
185{
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100186 sctp_sub_connection_t sub_conn[MAX_SCTP_CONNECTIONS]; /**< Common transport data. First! */
Marco Varlese191a5942017-10-30 18:17:21 +0100187
188 u8 state; /**< SCTP state as per sctp_state_t */
189 u16 flags; /**< Chunk flag (see sctp_chunks_common_hdr_t) */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100190
Marco Varlese191a5942017-10-30 18:17:21 +0100191 u32 local_tag; /**< INIT_TAG generated locally */
192 u32 remote_tag; /**< INIT_TAG generated by the remote peer */
Marco Varlese191a5942017-10-30 18:17:21 +0100193
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100194 u32 local_initial_tsn; /**< Initial TSN generated locally */
195 u32 remote_initial_tsn; /**< Initial TSN generated by the remote-peer */
Marco Varlese191a5942017-10-30 18:17:21 +0100196
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100197 u32 peer_cookie_life_span_increment;
Marco Varlese191a5942017-10-30 18:17:21 +0100198
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100199 u32 overall_err_count; /**< The overall association error count. */
200 u32 overall_err_treshold; /**< The threshold for this association that if the Overall Error Count
201 reaches will cause this association to be torn down. */
Marco Varlese191a5942017-10-30 18:17:21 +0100202
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100203 u32 peer_rwnd; /**< Current calculated value of the peer's rwnd. */
204
205 u32 next_tsn; /**< The next TSN number to be assigned to a new DATA chunk.
206 This is sent in the INIT or INIT ACK chunk to the peer
207 and incremented each time a DATA chunk is assigned a
208 TSN (normally just prior to transmit or during
209 fragmentation). */
210
Marco Varlesef3ab4892018-02-19 15:23:13 +0100211 u32 last_unacked_tsn; /** < Last TSN number still unacked */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100212 u32 next_tsn_expected; /**< The next TSN number expected to be received. */
213
214 u32 last_rcvd_tsn; /**< This is the last TSN received in sequence. This value
215 is set initially by taking the peer's initial TSN,
216 received in the INIT or INIT ACK chunk, and
217 subtracting one from it. */
218
219 u32 out_of_order_map[MAX_INFLIGHT_PACKETS]; /**< An array of bits or bytes indicating which out-of-order
220 TSNs have been received (relative to the Last Rcvd TSN).
221 If no gaps exist, i.e., no out-of-order packets have been received,
222 this array will be set to all zero. */
223
224 u8 ack_state; /**< This flag indicates if the next received packet is set to be responded to with a SACK.
225 This is initialized to 0. When a packet is received it is incremented.
226 If this value reaches 2 or more, a SACK is sent and the value is reset to 0.
227 Note: This is used only when no DATA chunks are received out-of-order.
228 When DATA chunks are out-of-order, SACKs are not delayed (see Section 6). */
229
Marco Varlesef3ab4892018-02-19 15:23:13 +0100230 u8 smallest_PMTU_idx; /** The index of the sub-connection with the smallest PMTU discovered across all peer's transport addresses. */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100231
Marco Varlese91389ac2018-01-31 11:00:01 +0100232 u8 overall_sending_status; /**< 0 indicates first fragment of a user message
233 1 indicates normal stream
234 2 indicates last fragment of a user message */
235
Marco Varlese191a5942017-10-30 18:17:21 +0100236 sctp_options_t snd_opts;
Marco Varlese191a5942017-10-30 18:17:21 +0100237
238 u8 next_avail_sub_conn; /**< Represent the index of the next free slot in sub_conn */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100239
Marco Varlese191a5942017-10-30 18:17:21 +0100240} sctp_connection_t;
241
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100242typedef void (sctp_timer_expiration_handler) (u32 conn_index, u32 timer_id);
Marco Varlese191a5942017-10-30 18:17:21 +0100243
244sctp_connection_t *sctp_connection_new (u8 thread_index);
245void sctp_sub_connection_add_ip4 (u8 thread_index,
246 sctp_ipv4_addr_param_t * ipv4_addr);
247void sctp_sub_connection_add_ip6 (u8 thread_index,
248 sctp_ipv6_addr_param_t * ipv6_addr);
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100249void sctp_connection_close (sctp_connection_t * sctp_conn);
250void sctp_connection_cleanup (sctp_connection_t * sctp_conn);
251void sctp_connection_del (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100252
253u32 sctp_push_header (transport_connection_t * tconn, vlib_buffer_t * b);
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100254void sctp_send_init (sctp_connection_t * sctp_conn);
255void sctp_send_shutdown (sctp_connection_t * sctp_conn);
Marco Varlese54432f82018-02-15 17:01:56 +0100256void sctp_send_shutdown_ack (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlesebe2251b2018-02-07 12:22:41 +0100257 vlib_buffer_t * b);
Marco Varlese54432f82018-02-15 17:01:56 +0100258void sctp_send_shutdown_complete (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlesefae40392018-02-14 15:38:35 +0100259 vlib_buffer_t * b0);
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100260void sctp_send_heartbeat (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100261void sctp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index,
262 u8 is_ip4);
263void sctp_flush_frames_to_output (u8 thread_index);
264void sctp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
265
266format_function_t format_sctp_state;
267
268u8 *format_sctp_connection_id (u8 * s, va_list * args);
269u8 *format_sctp_connection (u8 * s, va_list * args);
270u8 *format_sctp_scoreboard (u8 * s, va_list * args);
271u8 *format_sctp_header (u8 * s, va_list * args);
272u8 *format_sctp_tx_trace (u8 * s, va_list * args);
273
274clib_error_t *sctp_init (vlib_main_t * vm);
Marco Varlese54432f82018-02-15 17:01:56 +0100275void sctp_connection_timers_init (sctp_connection_t * sctp_conn);
276void sctp_connection_timers_reset (sctp_connection_t * sctp_conn);
277void sctp_init_snd_vars (sctp_connection_t * sctp_conn);
278void sctp_init_mss (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100279
Marco Varlese54432f82018-02-15 17:01:56 +0100280void sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, u8 idx,
281 vlib_buffer_t * b, ip4_address_t * ip4_addr,
Marco Varlese191a5942017-10-30 18:17:21 +0100282 ip6_address_t * ip6_addr);
Marco Varlese54432f82018-02-15 17:01:56 +0100283void sctp_prepare_cookie_echo_chunk (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlese191a5942017-10-30 18:17:21 +0100284 vlib_buffer_t * b,
285 sctp_state_cookie_param_t * sc);
Marco Varlese54432f82018-02-15 17:01:56 +0100286void sctp_prepare_cookie_ack_chunk (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlese191a5942017-10-30 18:17:21 +0100287 vlib_buffer_t * b);
Marco Varlese54432f82018-02-15 17:01:56 +0100288void sctp_prepare_sack_chunk (sctp_connection_t * sctp_conn, u8 idx,
289 vlib_buffer_t * b);
290void sctp_prepare_heartbeat_ack_chunk (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100291 vlib_buffer_t * b);
Marco Varlese191a5942017-10-30 18:17:21 +0100292
Marco Varlese54432f82018-02-15 17:01:56 +0100293u16 sctp_check_outstanding_data_chunks (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100294
Marco Varlese191a5942017-10-30 18:17:21 +0100295#define IP_PROTOCOL_SCTP 132
296
297/** SSCTP FSM state definitions as per RFC4960. */
298#define foreach_sctp_fsm_state \
299 _(CLOSED, "CLOSED") \
300 _(COOKIE_WAIT, "COOKIE_WAIT") \
301 _(COOKIE_ECHOED, "COOKIE_ECHOED") \
302 _(ESTABLISHED, "ESTABLISHED") \
303 _(SHUTDOWN_PENDING, "SHUTDOWN_PENDING") \
304 _(SHUTDOWN_SENT, "SHUTDOWN_SENT") \
305 _(SHUTDOWN_RECEIVED, "SHUTDOWN_RECEIVED") \
306 _(SHUTDOWN_ACK_SENT, "SHUTDOWN_ACK_SENT")
307
308typedef enum _sctp_state
309{
310#define _(sym, str) SCTP_STATE_##sym,
311 foreach_sctp_fsm_state
312#undef _
313 SCTP_N_STATES
314} sctp_state_t;
315
316always_inline char *
317sctp_state_to_string (u8 state)
318{
319 switch (state)
320 {
321 case SCTP_STATE_CLOSED:
322 return "SCTP_STATE_CLOSED";
323 case SCTP_STATE_COOKIE_WAIT:
324 return "SCTP_STATE_COOKIE_WAIT";
325 case SCTP_STATE_COOKIE_ECHOED:
326 return "SCTP_STATE_COOKIE_ECHOED";
327 case SCTP_STATE_ESTABLISHED:
328 return "SCTP_STATE_ESTABLISHED";
329 case SCTP_STATE_SHUTDOWN_PENDING:
330 return "SCTP_STATE_SHUTDOWN_PENDING";
331 case SCTP_STATE_SHUTDOWN_SENT:
332 return "SCTP_STATE_SHUTDOWN_SENT";
333 case SCTP_STATE_SHUTDOWN_RECEIVED:
334 return "SCTP_STATE_SHUTDOWN_RECEIVED";
335 case SCTP_STATE_SHUTDOWN_ACK_SENT:
336 return "SCTP_STATE_SHUTDOWN_ACK_SENT";
337 }
338 return NULL;
339}
340
341always_inline char *
342sctp_chunk_to_string (u8 type)
343{
344 switch (type)
345 {
346 case DATA:
347 return "DATA";
348 case INIT:
349 return "INIT";
350 case INIT_ACK:
351 return "INIT_ACK";
352 case SACK:
353 return "SACK";
354 case HEARTBEAT:
355 return "HEARTBEAT";
356 case HEARTBEAT_ACK:
357 return "HEARTBEAT_ACK";
358 case ABORT:
359 return "ABORT";
360 case SHUTDOWN:
361 return "SHUTDOWN";
362 case SHUTDOWN_ACK:
363 return "SHUTDOWN_ACK";
364 case OPERATION_ERROR:
365 return "OPERATION_ERROR";
366 case COOKIE_ECHO:
367 return "COOKIE_ECHO";
368 case COOKIE_ACK:
369 return "COOKIE_ACK";
370 case ECNE:
371 return "ECNE";
372 case CWR:
373 return "CWR";
374 case SHUTDOWN_COMPLETE:
375 return "SHUTDOWN_COMPLETE";
376 }
377 return NULL;
378}
379
380always_inline char *
381sctp_optparam_type_to_string (u8 type)
382{
383 switch (type)
384 {
385 case SCTP_IPV4_ADDRESS_TYPE:
386 return "SCTP_IPV4_ADDRESS_TYPE";
387 case SCTP_IPV6_ADDRESS_TYPE:
388 return "SCTP_IPV6_ADDRESS_TYPE";
389 case SCTP_STATE_COOKIE_TYPE:
390 return "SCTP_STATE_COOKIE_TYPE";
391 case SCTP_UNRECOGNIZED_TYPE:
392 return "SCTP_UNRECOGNIZED_TYPE";
393 case SCTP_COOKIE_PRESERVATIVE_TYPE:
394 return "SCTP_COOKIE_PRESERVATIVE_TYPE";
395 case SCTP_HOSTNAME_ADDRESS_TYPE:
396 return "SCTP_HOSTNAME_ADDRESS_TYPE";
397 case SCTP_SUPPORTED_ADDRESS_TYPES:
398 return "SCTP_SUPPORTED_ADDRESS_TYPES";
399 }
400 return NULL;
401}
402
403#define SCTP_TICK 0.001 /**< SCTP tick period (s) */
404#define SHZ (u32) (1/SCTP_TICK) /**< SCTP tick frequency */
Marco Varlesea38783e2018-02-13 12:38:52 +0100405#define SCTP_TSTAMP_RESOLUTION SCTP_TICK /**< Time stamp resolution */
Marco Varlese191a5942017-10-30 18:17:21 +0100406
407/* As per RFC4960, page 83 */
408#define SCTP_RTO_INIT 3 * SHZ /* 3 seconds */
409#define SCTP_RTO_MIN 1 * SHZ /* 1 second */
410#define SCTP_RTO_MAX 60 * SHZ /* 60 seconds */
Marco Varlesef3ab4892018-02-19 15:23:13 +0100411#define SCTP_RTO_BURST 4
Marco Varlese191a5942017-10-30 18:17:21 +0100412#define SCTP_RTO_ALPHA 1/8
413#define SCTP_RTO_BETA 1/4
414#define SCTP_VALID_COOKIE_LIFE 60 * SHZ /* 60 seconds */
Marco Varlese54432f82018-02-15 17:01:56 +0100415#define SCTP_ASSOCIATION_MAX_RETRANS 10 // the overall connection
416#define SCTP_PATH_MAX_RETRANS 5 // number of attempts per destination address
417#define SCTP_MAX_INIT_RETRANS 8 // number of attempts
418#define SCTP_HB_INTERVAL 30 * SHZ
419#define SCTP_HB_MAX_BURST 1
Marco Varlese191a5942017-10-30 18:17:21 +0100420
Marco Varlesef3ab4892018-02-19 15:23:13 +0100421#define SCTP_DATA_IDLE_INTERVAL 15 * SHZ /* 15 seconds; the time-interval after which the connetion is considered IDLE */
422
Marco Varlese191a5942017-10-30 18:17:21 +0100423#define SCTP_TO_TIMER_TICK SCTP_TICK*10 /* Period for converting from SCTP_TICK */
424
425typedef struct _sctp_lookup_dispatch
426{
427 u8 next, error;
428} sctp_lookup_dispatch_t;
429
430typedef struct _sctp_main
431{
432 /* Per-worker thread SCTP connection pools */
433 sctp_connection_t **connections;
434
435 /* Pool of listeners. */
436 sctp_connection_t *listener_pool;
437
438 /** Dispatch table by state and flags */
439 sctp_lookup_dispatch_t dispatch_table[SCTP_N_STATES][64];
440
441 u8 log2_tstamp_clocks_per_tick;
442 f64 tstamp_ticks_per_clock;
443 u32 *time_now;
444
445 /** per-worker tx buffer free lists */
446 u32 **tx_buffers;
447 /** per-worker tx frames to SCTP 4/6 output nodes */
448 vlib_frame_t **tx_frames[2];
449 /** per-worker tx frames to ip 4/6 lookup nodes */
450 vlib_frame_t **ip_lookup_tx_frames[2];
451
452 /* Per worker-thread timer wheel for connections timers */
453 tw_timer_wheel_16t_2w_512sl_t *timer_wheels;
454
455 /* Pool of half-open connections on which we've sent a SYN */
456 sctp_connection_t *half_open_connections;
457 clib_spinlock_t half_open_lock;
458
459 /* TODO: Congestion control algorithms registered */
460 /* sctp_cc_algorithm_t *cc_algos; */
461
462 /* Flag that indicates if stack is on or off */
463 u8 is_enabled;
464
465 /** Number of preallocated connections */
466 u32 preallocated_connections;
467
468 /** Transport table (preallocation) size parameters */
469 u32 local_endpoints_table_memory;
470 u32 local_endpoints_table_buckets;
471
472 /** Vectors of src addresses. Optional unless one needs > 63K active-opens */
473 ip4_address_t *ip4_src_addresses;
474 u32 last_v4_address_rotor;
475 u32 last_v6_address_rotor;
476 ip6_address_t *ip6_src_addresses;
477
478 /** vlib buffer size */
479 u32 bytes_per_buffer;
480
481 u8 punt_unknown4;
482 u8 punt_unknown6;
483
484} sctp_main_t;
485
486extern sctp_main_t sctp_main;
487extern vlib_node_registration_t sctp4_input_node;
488extern vlib_node_registration_t sctp6_input_node;
489extern vlib_node_registration_t sctp4_output_node;
490extern vlib_node_registration_t sctp6_output_node;
491
492always_inline sctp_main_t *
493vnet_get_sctp_main ()
494{
495 return &sctp_main;
496}
497
498always_inline sctp_header_t *
499sctp_buffer_hdr (vlib_buffer_t * b)
500{
501 ASSERT ((signed) b->current_data >= (signed) -VLIB_BUFFER_PRE_DATA_SIZE);
502 return (sctp_header_t *) (b->data + b->current_data
503 + vnet_buffer (b)->sctp.hdr_offset);
504}
505
506clib_error_t *vnet_sctp_enable_disable (vlib_main_t * vm, u8 is_en);
507
508always_inline sctp_connection_t *
509sctp_half_open_connection_get (u32 conn_index)
510{
511 sctp_connection_t *tc = 0;
512 clib_spinlock_lock_if_init (&sctp_main.half_open_lock);
513 if (!pool_is_free_index (sctp_main.half_open_connections, conn_index))
514 tc = pool_elt_at_index (sctp_main.half_open_connections, conn_index);
Marco Varlese04e5d642018-02-23 17:43:06 +0100515 tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].subconn_idx = MAIN_SCTP_SUB_CONN_IDX;
Marco Varlese191a5942017-10-30 18:17:21 +0100516 clib_spinlock_unlock_if_init (&sctp_main.half_open_lock);
517 return tc;
518}
519
520/**
521 * Cleanup half-open connection
522 *
523 */
524always_inline void
525sctp_half_open_connection_del (sctp_connection_t * tc)
526{
Marco Varlese15cc6a82018-02-21 12:39:52 +0100527 sctp_main_t *sctp_main = vnet_get_sctp_main ();
528 clib_spinlock_lock_if_init (&sctp_main->half_open_lock);
529 pool_put_index (sctp_main->half_open_connections,
Marco Varlese191a5942017-10-30 18:17:21 +0100530 tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index);
531 if (CLIB_DEBUG)
532 memset (tc, 0xFA, sizeof (*tc));
Marco Varlese15cc6a82018-02-21 12:39:52 +0100533 clib_spinlock_unlock_if_init (&sctp_main->half_open_lock);
Marco Varlese191a5942017-10-30 18:17:21 +0100534}
535
536always_inline u32
537sctp_set_time_now (u32 thread_index)
538{
539 sctp_main.time_now[thread_index] = clib_cpu_time_now ()
540 * sctp_main.tstamp_ticks_per_clock;
541 return sctp_main.time_now[thread_index];
542}
543
544always_inline void
545sctp_timer_set (sctp_connection_t * tc, u8 conn_idx, u8 timer_id,
546 u32 interval)
547{
548 ASSERT (tc->sub_conn[conn_idx].connection.thread_index ==
549 vlib_get_thread_index ());
550 ASSERT (tc->sub_conn[conn_idx].timers[timer_id] ==
551 SCTP_TIMER_HANDLE_INVALID);
552
553 sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
Marco Varlese21c8baf2018-02-02 17:17:51 +0100554 sub->timers[timer_id] =
Marco Varlese191a5942017-10-30 18:17:21 +0100555 tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
556 sub->c_c_index, timer_id, interval);
557}
558
559always_inline void
560sctp_timer_reset (sctp_connection_t * tc, u8 conn_idx, u8 timer_id)
561{
562 ASSERT (tc->sub_conn[conn_idx].c_thread_index == vlib_get_thread_index ());
563 if (tc->sub_conn[conn_idx].timers[timer_id] == SCTP_TIMER_HANDLE_INVALID)
564 return;
565
566 sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
567
568 tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
569 sub->timers[timer_id]);
570 sub->timers[timer_id] = SCTP_TIMER_HANDLE_INVALID;
571}
572
Marco Varlese191a5942017-10-30 18:17:21 +0100573/**
574 * Try to cleanup half-open connection
575 *
576 * If called from a thread that doesn't own tc, the call won't have any
577 * effect.
578 *
579 * @param tc - connection to be cleaned up
580 * @return non-zero if cleanup failed.
581 */
582always_inline int
583sctp_half_open_connection_cleanup (sctp_connection_t * tc)
584{
585 /* Make sure this is the owning thread */
586 if (tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_thread_index !=
587 vlib_get_thread_index ())
588 return 1;
589 sctp_timer_reset (tc, MAIN_SCTP_SUB_CONN_IDX, SCTP_TIMER_T1_INIT);
590 sctp_half_open_connection_del (tc);
591 return 0;
592}
593
594always_inline u32
595sctp_header_bytes ()
596{
597 return sizeof (sctp_header_t);
598}
599
600always_inline sctp_connection_t *
601sctp_get_connection_from_transport (transport_connection_t * tconn)
602{
603 ASSERT (tconn != NULL);
604
605 sctp_sub_connection_t *sub = (sctp_sub_connection_t *) tconn;
606#if SCTP_ADV_DEBUG
607 if (sub == NULL)
608 SCTP_ADV_DBG ("sub == NULL");
609 if (sub->parent == NULL)
610 SCTP_ADV_DBG ("sub->parent == NULL");
611#endif
Marco Varlese04e5d642018-02-23 17:43:06 +0100612 if (sub->subconn_idx > 0)
613 return (sctp_connection_t *) sub -
614 (sizeof (sctp_sub_connection_t) * (sub->subconn_idx - 1));
615
616 return (sctp_connection_t *) sub;
Marco Varlese191a5942017-10-30 18:17:21 +0100617}
618
619always_inline u32
620sctp_time_now (void)
621{
622 return sctp_main.time_now[vlib_get_thread_index ()];
623}
624
Marco Varlese21c8baf2018-02-02 17:17:51 +0100625#define ABS(x) ((x) > 0) ? (x) : -(x);
626
627always_inline void
628sctp_calculate_rto (sctp_connection_t * sctp_conn, u8 conn_idx)
629{
630 /* See RFC4960, 6.3.1. RTO Calculation */
631 u32 RTO = 0;
632 f32 RTTVAR = 0;
633 u32 now = sctp_time_now ();
634 u32 prev_ts = sctp_conn->sub_conn[conn_idx].rtt_ts;
635 u32 R = prev_ts - now;
636
637 if (sctp_conn->sub_conn[conn_idx].RTO == 0) // C1: Let's initialize our RTO
638 {
639 sctp_conn->sub_conn[conn_idx].RTO = SCTP_RTO_MIN;
640 return;
641 }
642
643 if (sctp_conn->sub_conn[conn_idx].RTO == SCTP_RTO_MIN && sctp_conn->sub_conn[conn_idx].SRTT == 0) // C2: First RTT calculation
644 {
645 sctp_conn->sub_conn[conn_idx].SRTT = R;
646 RTTVAR = R / 2;
647
648 if (RTTVAR == 0)
649 RTTVAR = 100e-3; /* 100 ms */
650
651 sctp_conn->sub_conn[conn_idx].RTTVAR = RTTVAR;
652 }
653 else // C3: RTT already exists; let's recalculate
654 {
655 RTTVAR = (1 - SCTP_RTO_BETA) * sctp_conn->sub_conn[conn_idx].RTTVAR +
656 SCTP_RTO_BETA * ABS (sctp_conn->sub_conn[conn_idx].SRTT - R);
657
658 if (RTTVAR == 0)
659 RTTVAR = 100e-3; /* 100 ms */
660
661 sctp_conn->sub_conn[conn_idx].RTTVAR = RTTVAR;
662
663 sctp_conn->sub_conn[conn_idx].SRTT =
664 (1 - SCTP_RTO_ALPHA) * sctp_conn->sub_conn[conn_idx].SRTT +
665 SCTP_RTO_ALPHA * R;
666 }
667
668 RTO =
669 sctp_conn->sub_conn[conn_idx].SRTT +
670 4 * sctp_conn->sub_conn[conn_idx].RTTVAR;
671 if (RTO < SCTP_RTO_MIN) // C6
672 RTO = SCTP_RTO_MIN;
673
674 if (RTO > SCTP_RTO_MAX) // C7
675 RTO = SCTP_RTO_MAX;
676
677 sctp_conn->sub_conn[conn_idx].RTO = RTO;
678}
679
Marco Varlese191a5942017-10-30 18:17:21 +0100680always_inline void
681sctp_timer_update (sctp_connection_t * tc, u8 conn_idx, u8 timer_id,
682 u32 interval)
683{
684 ASSERT (tc->sub_conn[conn_idx].connection.thread_index ==
685 vlib_get_thread_index ());
686 sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
687
688 if (tc->sub_conn[conn_idx].timers[timer_id] != SCTP_TIMER_HANDLE_INVALID)
689 tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
690 sub->timers[timer_id]);
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100691
Marco Varlese191a5942017-10-30 18:17:21 +0100692 tc->sub_conn[conn_idx].timers[timer_id] =
693 tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
694 sub->c_c_index, timer_id, interval);
695}
696
697always_inline sctp_connection_t *
698sctp_listener_get (u32 tli)
699{
700 return pool_elt_at_index (sctp_main.listener_pool, tli);
701}
702
703#endif
704
705always_inline sctp_connection_t *
706sctp_connection_get (u32 conn_index, u32 thread_index)
707{
708 if (PREDICT_FALSE
709 (pool_is_free_index (sctp_main.connections[thread_index], conn_index)))
710 return 0;
711 return pool_elt_at_index (sctp_main.connections[thread_index], conn_index);
712}
713
Marco Varlese54432f82018-02-15 17:01:56 +0100714#define SELECT_MAX_RETRIES 8
Marco Varlese191a5942017-10-30 18:17:21 +0100715
Marco Varlese54432f82018-02-15 17:01:56 +0100716always_inline u8
717sctp_data_subconn_select (sctp_connection_t * sctp_conn)
718{
Marco Varlese54432f82018-02-15 17:01:56 +0100719 u32 sub = MAIN_SCTP_SUB_CONN_IDX;
Marco Varlesef3ab4892018-02-19 15:23:13 +0100720 u8 i, cwnd = sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].cwnd;
721 for (i = 1; i < MAX_SCTP_CONNECTIONS; i++)
Marco Varlese191a5942017-10-30 18:17:21 +0100722 {
Marco Varlesef3ab4892018-02-19 15:23:13 +0100723 if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN)
724 continue;
725
726 if (sctp_conn->sub_conn[i].cwnd > cwnd)
727 {
728 sub = i;
729 cwnd = sctp_conn->sub_conn[i].cwnd;
730 }
Marco Varlese191a5942017-10-30 18:17:21 +0100731 }
Marco Varlese54432f82018-02-15 17:01:56 +0100732 return sub;
Marco Varlese191a5942017-10-30 18:17:21 +0100733}
734
735always_inline u8
Marco Varlese54432f82018-02-15 17:01:56 +0100736sctp_sub_conn_id_via_ip6h (sctp_connection_t * sctp_conn, ip6_header_t * ip6h)
Marco Varlese191a5942017-10-30 18:17:21 +0100737{
Marco Varlese54432f82018-02-15 17:01:56 +0100738 u8 i;
Marco Varlese191a5942017-10-30 18:17:21 +0100739
Marco Varlese54432f82018-02-15 17:01:56 +0100740 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
Marco Varlese191a5942017-10-30 18:17:21 +0100741 {
Marco Varlese54432f82018-02-15 17:01:56 +0100742 if (sctp_conn->sub_conn[i].connection.lcl_ip.ip6.as_u64[0] ==
743 ip6h->dst_address.as_u64[0] &&
744 sctp_conn->sub_conn[i].connection.lcl_ip.ip6.as_u64[1] ==
745 ip6h->dst_address.as_u64[1] &&
746 sctp_conn->sub_conn[i].connection.rmt_ip.ip6.as_u64[0] ==
747 ip6h->src_address.as_u64[0] &&
748 sctp_conn->sub_conn[i].connection.rmt_ip.ip6.as_u64[1] ==
749 ip6h->src_address.as_u64[1])
750 return i;
Marco Varlese191a5942017-10-30 18:17:21 +0100751 }
Marco Varlese54432f82018-02-15 17:01:56 +0100752 clib_warning ("Did not find a sub-connection; defaulting to %u",
753 MAIN_SCTP_SUB_CONN_IDX);
754 return MAIN_SCTP_SUB_CONN_IDX;
755}
756
757always_inline u8
758sctp_sub_conn_id_via_ip4h (sctp_connection_t * sctp_conn, ip4_header_t * ip4h)
759{
760 u8 i;
761
762 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
763 {
764 if (sctp_conn->sub_conn[i].connection.lcl_ip.ip4.as_u32 ==
765 ip4h->dst_address.as_u32
766 && sctp_conn->sub_conn[i].connection.rmt_ip.ip4.as_u32 ==
767 ip4h->src_address.as_u32)
768 return i;
769 }
770 clib_warning ("Did not find a sub-connection; defaulting to %u",
771 MAIN_SCTP_SUB_CONN_IDX);
772 return MAIN_SCTP_SUB_CONN_IDX;
Marco Varlese191a5942017-10-30 18:17:21 +0100773}
774
775/**
776 * Push SCTP header to buffer
777 *
778 * @param vm - vlib_main
779 * @param b - buffer to write the header to
780 * @param sp_net - source port net order
781 * @param dp_net - destination port net order
782 * @param sctp_hdr_opts_len - header and options length in bytes
783 *
784 * @return - pointer to start of SCTP header
785 */
786always_inline void *
787vlib_buffer_push_sctp_net_order (vlib_buffer_t * b, u16 sp, u16 dp,
788 u8 sctp_hdr_opts_len)
789{
790 sctp_full_hdr_t *full_hdr;
791
792 full_hdr = vlib_buffer_push_uninit (b, sctp_hdr_opts_len);
793
794 full_hdr->hdr.src_port = sp;
795 full_hdr->hdr.dst_port = dp;
796 full_hdr->hdr.checksum = 0;
797 return full_hdr;
798}
799
800/**
801 * Push SCTP header to buffer
802 *
803 * @param b - buffer to write the header to
804 * @param sp_net - source port net order
805 * @param dp_net - destination port net order
806 * @param sctp_hdr_opts_len - header and options length in bytes
807 *
808 * @return - pointer to start of SCTP header
809 */
810always_inline void *
811vlib_buffer_push_sctp (vlib_buffer_t * b, u16 sp_net, u16 dp_net,
812 u8 sctp_hdr_opts_len)
813{
814 return vlib_buffer_push_sctp_net_order (b, sp_net, dp_net,
815 sctp_hdr_opts_len);
816}
817
Marco Varlesef3ab4892018-02-19 15:23:13 +0100818always_inline void
819update_smallest_pmtu_idx (sctp_connection_t * sctp_conn)
820{
821 u8 i;
822 u8 smallest_pmtu_index = MAIN_SCTP_SUB_CONN_IDX;
823
824 for (i = 1; i < MAX_SCTP_CONNECTIONS; i++)
825 {
826 if (sctp_conn->sub_conn[i].state != SCTP_SUBCONN_STATE_DOWN)
827 {
828 if (sctp_conn->sub_conn[i].PMTU <
829 sctp_conn->sub_conn[smallest_pmtu_index].PMTU)
830 smallest_pmtu_index = i;
831 }
832 }
833
834 sctp_conn->smallest_PMTU_idx = smallest_pmtu_index;
835}
836
837/* As per RFC4960; section 7.2.1: Slow-Start */
838always_inline void
839sctp_init_cwnd (sctp_connection_t * sctp_conn)
840{
841 u8 i;
842 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
843 {
844 /* Section 7.2.1; point (1) */
845 sctp_conn->sub_conn[i].cwnd =
846 clib_min (4 * sctp_conn->sub_conn[i].PMTU,
847 clib_max (2 * sctp_conn->sub_conn[i].PMTU, 4380));
848
849 /* Section 7.2.1; point (3) */
850 sctp_conn->sub_conn[i].ssthresh = SCTP_INITIAL_SSHTRESH;
851
852 /* Section 7.2.2; point (1) */
853 sctp_conn->sub_conn[i].partially_acked_bytes = 0;
854 }
855}
856
857always_inline u8
858sctp_in_cong_recovery (sctp_connection_t * sctp_conn, u8 idx)
859{
860 return 0;
861}
862
863always_inline u8
864cwnd_fully_utilized (sctp_connection_t * sctp_conn, u8 idx)
865{
866 return 0;
867}
868
869/* As per RFC4960; section 7.2.1: Slow-Start */
870always_inline void
871update_cwnd (sctp_connection_t * sctp_conn)
872{
873 u8 i;
874
875 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
876 {
877 /* Section 7.2.1; point (2) */
878 if (sctp_conn->sub_conn[i].is_retransmitting)
879 {
880 sctp_conn->sub_conn[i].cwnd = 1 * sctp_conn->sub_conn[i].PMTU;
881 continue;
882 }
883
884 /* Section 7.2.2; point (4) */
885 if (sctp_conn->sub_conn[i].last_data_ts >
886 sctp_time_now () + SCTP_DATA_IDLE_INTERVAL)
887 {
888 sctp_conn->sub_conn[i].cwnd =
889 clib_max (sctp_conn->sub_conn[i].cwnd / 2,
890 4 * sctp_conn->sub_conn[i].PMTU);
891 continue;
892 }
893
894 /* Section 7.2.1; point (5) */
895 if (sctp_conn->sub_conn[i].cwnd <= sctp_conn->sub_conn[i].ssthresh)
896 {
897 if (!cwnd_fully_utilized (sctp_conn, i))
898 continue;
899
900 if (sctp_in_cong_recovery (sctp_conn, i))
901 continue;
902
903 sctp_conn->sub_conn[i].cwnd =
904 clib_min (sctp_conn->sub_conn[i].PMTU, 1);
905 }
906 }
907}
908
Marco Varlese191a5942017-10-30 18:17:21 +0100909/*
910 * fd.io coding-style-patch-verification: ON
911 *
912 * Local Variables:
913 * eval: (c-set-style "gnu")
914 * End:
915 */