blob: 487ff9e48247a0e7d3f779d4d17447d6fbb23e2c [file] [log] [blame]
Marco Varlese191a5942017-10-30 18:17:21 +01001/*
2 * Copyright (c) 2017 SUSE LLC.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15#ifndef included_vnet_sctp_h
16#define included_vnet_sctp_h
17
18#include <vnet/vnet.h>
19#include <vnet/ip/ip.h>
20#include <vnet/sctp/sctp_timer.h>
21#include <vnet/sctp/sctp_packet.h>
22#include <vnet/session/transport.h>
23#include <vnet/session/session.h>
24
25/* SCTP timers */
26#define foreach_sctp_timer \
27 _(T1_INIT, "T1_INIT") \
28 _(T1_COOKIE, "T1_COOKIE") \
29 _(T2_SHUTDOWN, "T2_SHUTDOWN") \
30 _(T3_RXTX, "T3_RXTX") \
Marco Varlese8ad6a2d2018-01-26 16:50:01 +010031 _(T4_HEARTBEAT, "T4_HB") \
Marco Varlese191a5942017-10-30 18:17:21 +010032 _(T5_SHUTDOWN_GUARD, "T5_SHUTDOWN_GUARD")
33
34typedef enum _sctp_timers
35{
36#define _(sym, str) SCTP_TIMER_##sym,
37 foreach_sctp_timer
38#undef _
39 SCTP_N_TIMERS
40} sctp_timers_e;
41
42#define SCTP_TIMER_HANDLE_INVALID ((u32) ~0)
43
Marco Varlesedf5a99c2018-02-06 13:48:30 +010044always_inline char *
45sctp_timer_to_string (u8 timer_id)
46{
47 switch (timer_id)
48 {
49 case SCTP_TIMER_T1_INIT:
50 return "SCTP_TIMER_T1_INIT";
51 case SCTP_TIMER_T1_COOKIE:
52 return "SCTP_TIMER_T1_COOKIE";
53 case SCTP_TIMER_T2_SHUTDOWN:
54 return "SCTP_TIMER_T2_SHUTDOWN";
55 case SCTP_TIMER_T3_RXTX:
56 return "SCTP_TIMER_T3_RXTX";
57 case SCTP_TIMER_T4_HEARTBEAT:
58 return "SCTP_TIMER_T4_HEARTBEAT";
59 case SCTP_TIMER_T5_SHUTDOWN_GUARD:
60 return "SCTP_TIMER_T5_SHUTDOWN_GUARD";
61 }
62 return NULL;
63}
64
Marco Varlese191a5942017-10-30 18:17:21 +010065typedef enum _sctp_error
66{
67#define sctp_error(n,s) SCTP_ERROR_##n,
68#include <vnet/sctp/sctp_error.def>
69#undef sctp_error
70 SCTP_N_ERROR,
71} sctp_error_t;
72
73#define NO_FLAG 0
74
75#define IS_T_BIT_SET(var) ((var) & (1))
76#define IS_E_BIT_SET(var) ((var) & (1))
77#define IS_B_BIT_SET(var) ((var) & (1<<1))
78#define IS_U_BIT_SET(var) ((var) & (1<<2))
79
Marco Varlesef3ab4892018-02-19 15:23:13 +010080#define MAX_SCTP_CONNECTIONS 8
Marco Varlese191a5942017-10-30 18:17:21 +010081#define MAIN_SCTP_SUB_CONN_IDX 0
82
83#if (VLIB_BUFFER_TRACE_TRAJECTORY)
84#define sctp_trajectory_add_start(b, start) \
85{ \
86 (*vlib_buffer_trace_trajectory_cb) (b, start); \
87}
88#else
89#define sctp_trajectory_add_start(b, start)
90#endif
91
Marco Varlese54432f82018-02-15 17:01:56 +010092enum _sctp_subconn_state
93{
94 SCTP_SUBCONN_STATE_DOWN = 0,
95 SCTP_SUBCONN_STATE_UP,
96 SCTP_SUBCONN_STATE_ALLOW_HB
97};
98
Marco Varlesef3ab4892018-02-19 15:23:13 +010099#define SCTP_INITIAL_SSHTRESH 65535
Marco Varlese191a5942017-10-30 18:17:21 +0100100typedef struct _sctp_sub_connection
101{
102 transport_connection_t connection; /**< Common transport data. First! */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100103
Marco Varlese04e5d642018-02-23 17:43:06 +0100104 u8 subconn_idx; /**< This indicates the position of this sub-connection in the super-set container of connections pool */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100105 u32 error_count; /**< The current error count for this destination. */
106 u32 error_threshold; /**< Current error threshold for this destination,
107 i.e. what value marks the destination down if error count reaches this value. */
Marco Varlesef3ab4892018-02-19 15:23:13 +0100108 u32 cwnd; /**< Congestion control window (cwnd, in bytes), which is adjusted by
109 the sender based on observed network conditions. */
110 u32 ssthresh; /**< Slow-start threshold (in bytes), which is used by the
111 sender to distinguish slow-start and congestion avoidance phases. */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100112
Marco Varlese21c8baf2018-02-02 17:17:51 +0100113 u32 rtt_ts; /**< USED to hold the timestamp of when the packet has been sent */
114
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100115 u32 RTO; /**< The current retransmission timeout value. */
116 u32 SRTT; /**< The current smoothed round-trip time. */
Marco Varlese21c8baf2018-02-02 17:17:51 +0100117 f32 RTTVAR; /**< The current RTT variation. */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100118
119 u32 partially_acked_bytes; /**< The tracking method for increase of cwnd when in
120 congestion avoidance mode (see Section 7.2.2).*/
121
122 u8 state; /**< The current state of this destination, i.e., DOWN, UP, ALLOW-HB, NO-HEARTBEAT, etc. */
123
124 u16 PMTU; /**< The current known path MTU. */
125
126 u32 timers[SCTP_N_TIMERS]; /**< A timer used by each destination. */
127
128 u8 RTO_pending; /**< A flag used to track if one of the DATA chunks sent to
129 this address is currently being used to compute an RTT.
130 If this flag is 0, the next DATA chunk sent to this destination
131 should be used to compute an RTT and this flag should be set.
132 Every time the RTT calculation completes (i.e., the DATA chunk is SACK'd),
133 clear this flag. */
134
Marco Varlese54432f82018-02-15 17:01:56 +0100135 u32 last_seen; /**< The time to which this destination was last sent a packet to.
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100136 This can be used to determine if a HEARTBEAT is needed. */
Marco Varlese191a5942017-10-30 18:17:21 +0100137
Marco Varlesef3ab4892018-02-19 15:23:13 +0100138 u32 last_data_ts; /**< Used to hold the timestamp value of last time we sent a DATA chunk */
139
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100140 u8 unacknowledged_hb; /**< Used to track how many unacknowledged heartbeats we had;
Marco Varlese54432f82018-02-15 17:01:56 +0100141 If more than SCTP_PATH_MAX_RETRANS then connection is considered unreachable. */
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100142
Marco Varlesea38783e2018-02-13 12:38:52 +0100143 u8 is_retransmitting; /**< A flag (0 = no, 1 = yes) indicating whether the connection is retransmitting a previous packet */
144
Marco Varlesef3ab4892018-02-19 15:23:13 +0100145 u8 enqueue_state; /**< if set to 1 indicates that DATA is still being handled hence cannot shutdown this connection yet */
Marco Varlese54432f82018-02-15 17:01:56 +0100146
Marco Varlese191a5942017-10-30 18:17:21 +0100147} sctp_sub_connection_t;
148
149typedef struct
150{
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100151 u32 a_rwnd; /**< Maximum segment size advertised */
Marco Varlese191a5942017-10-30 18:17:21 +0100152
153} sctp_options_t;
154
Marco Varlese91389ac2018-01-31 11:00:01 +0100155/* Useful macros to deal with the out_of_order_map (array of bit) */
156#define SET_BIT(A,k) ( A[(k/32)] |= (1 << (k%32)) )
157#define CLEAR_BIT(A,k) ( A[(k/32)] &= ~(1 << (k%32)) )
158#define TEST_BIT(A,k) ( A[(k/32)] & (1 << (k%32)) )
159
160always_inline void
161_bytes_swap (void *pv, size_t n)
162{
163 char *p = pv;
164 size_t lo, hi;
165 for (lo = 0, hi = n - 1; hi > lo; lo++, hi--)
166 {
167 char tmp = p[lo];
168 p[lo] = p[hi];
169 p[hi] = tmp;
170 }
171}
172
173#define ENDIANESS_SWAP(x) _bytes_swap(&x, sizeof(x));
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100174
175#define MAX_INFLIGHT_PACKETS 128
176#define MAX_ENQUEABLE_SACKS 2
177
178/* This parameter indicates to the receiver how much increment in
179 * milliseconds the sender wishes the receiver to add to its default
180 * cookie life-span.
181 */
182#define SUGGESTED_COOKIE_LIFE_SPAN_INCREMENT 1000
183
Marco Varlese191a5942017-10-30 18:17:21 +0100184typedef struct _sctp_connection
185{
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100186 sctp_sub_connection_t sub_conn[MAX_SCTP_CONNECTIONS]; /**< Common transport data. First! */
Marco Varlese191a5942017-10-30 18:17:21 +0100187
188 u8 state; /**< SCTP state as per sctp_state_t */
189 u16 flags; /**< Chunk flag (see sctp_chunks_common_hdr_t) */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100190
Marco Varlese191a5942017-10-30 18:17:21 +0100191 u32 local_tag; /**< INIT_TAG generated locally */
192 u32 remote_tag; /**< INIT_TAG generated by the remote peer */
Marco Varlese191a5942017-10-30 18:17:21 +0100193
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100194 u32 local_initial_tsn; /**< Initial TSN generated locally */
195 u32 remote_initial_tsn; /**< Initial TSN generated by the remote-peer */
Marco Varlese191a5942017-10-30 18:17:21 +0100196
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100197 u32 peer_cookie_life_span_increment;
Marco Varlese191a5942017-10-30 18:17:21 +0100198
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100199 u32 overall_err_count; /**< The overall association error count. */
200 u32 overall_err_treshold; /**< The threshold for this association that if the Overall Error Count
201 reaches will cause this association to be torn down. */
Marco Varlese191a5942017-10-30 18:17:21 +0100202
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100203 u32 peer_rwnd; /**< Current calculated value of the peer's rwnd. */
204
205 u32 next_tsn; /**< The next TSN number to be assigned to a new DATA chunk.
206 This is sent in the INIT or INIT ACK chunk to the peer
207 and incremented each time a DATA chunk is assigned a
208 TSN (normally just prior to transmit or during
209 fragmentation). */
210
Marco Varlesef3ab4892018-02-19 15:23:13 +0100211 u32 last_unacked_tsn; /** < Last TSN number still unacked */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100212 u32 next_tsn_expected; /**< The next TSN number expected to be received. */
213
214 u32 last_rcvd_tsn; /**< This is the last TSN received in sequence. This value
215 is set initially by taking the peer's initial TSN,
216 received in the INIT or INIT ACK chunk, and
217 subtracting one from it. */
218
219 u32 out_of_order_map[MAX_INFLIGHT_PACKETS]; /**< An array of bits or bytes indicating which out-of-order
220 TSNs have been received (relative to the Last Rcvd TSN).
221 If no gaps exist, i.e., no out-of-order packets have been received,
222 this array will be set to all zero. */
223
224 u8 ack_state; /**< This flag indicates if the next received packet is set to be responded to with a SACK.
225 This is initialized to 0. When a packet is received it is incremented.
226 If this value reaches 2 or more, a SACK is sent and the value is reset to 0.
227 Note: This is used only when no DATA chunks are received out-of-order.
228 When DATA chunks are out-of-order, SACKs are not delayed (see Section 6). */
229
Marco Varlesef3ab4892018-02-19 15:23:13 +0100230 u8 smallest_PMTU_idx; /** The index of the sub-connection with the smallest PMTU discovered across all peer's transport addresses. */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100231
Marco Varlese91389ac2018-01-31 11:00:01 +0100232 u8 overall_sending_status; /**< 0 indicates first fragment of a user message
233 1 indicates normal stream
234 2 indicates last fragment of a user message */
235
Marco Varlese191a5942017-10-30 18:17:21 +0100236 sctp_options_t snd_opts;
Marco Varlese191a5942017-10-30 18:17:21 +0100237
Marco Varleseeacf3cf2018-02-26 14:52:25 +0100238 u8 forming_association_changed; /**< This is a flag indicating whether the original association has been modified during
239 the life-span of the association itself. For instance, a new sub-connection might have been added. */
240
Marco Varlese191a5942017-10-30 18:17:21 +0100241} sctp_connection_t;
242
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100243typedef void (sctp_timer_expiration_handler) (u32 conn_index, u32 timer_id);
Marco Varlese191a5942017-10-30 18:17:21 +0100244
245sctp_connection_t *sctp_connection_new (u8 thread_index);
Marco Varlese3c6a9762018-03-01 11:19:59 +0100246
247u8
248sctp_sub_connection_add_ip4 (vlib_main_t * vm,
249 ip4_address_t * lcl_addr,
250 ip4_address_t * rmt_addr);
251
252u8
253sctp_sub_connection_add_ip6 (vlib_main_t * vm,
254 ip6_address_t * lcl_addr,
255 ip6_address_t * rmt_addr);
256
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100257void sctp_connection_close (sctp_connection_t * sctp_conn);
258void sctp_connection_cleanup (sctp_connection_t * sctp_conn);
259void sctp_connection_del (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100260
261u32 sctp_push_header (transport_connection_t * tconn, vlib_buffer_t * b);
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100262void sctp_send_init (sctp_connection_t * sctp_conn);
263void sctp_send_shutdown (sctp_connection_t * sctp_conn);
Marco Varlese54432f82018-02-15 17:01:56 +0100264void sctp_send_shutdown_ack (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlesebe2251b2018-02-07 12:22:41 +0100265 vlib_buffer_t * b);
Marco Varlese54432f82018-02-15 17:01:56 +0100266void sctp_send_shutdown_complete (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlesefae40392018-02-14 15:38:35 +0100267 vlib_buffer_t * b0);
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100268void sctp_send_heartbeat (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100269void sctp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index,
270 u8 is_ip4);
271void sctp_flush_frames_to_output (u8 thread_index);
272void sctp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
273
274format_function_t format_sctp_state;
275
276u8 *format_sctp_connection_id (u8 * s, va_list * args);
277u8 *format_sctp_connection (u8 * s, va_list * args);
278u8 *format_sctp_scoreboard (u8 * s, va_list * args);
279u8 *format_sctp_header (u8 * s, va_list * args);
280u8 *format_sctp_tx_trace (u8 * s, va_list * args);
281
282clib_error_t *sctp_init (vlib_main_t * vm);
Marco Varlese54432f82018-02-15 17:01:56 +0100283void sctp_connection_timers_init (sctp_connection_t * sctp_conn);
284void sctp_connection_timers_reset (sctp_connection_t * sctp_conn);
285void sctp_init_snd_vars (sctp_connection_t * sctp_conn);
286void sctp_init_mss (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100287
Marco Varlese54432f82018-02-15 17:01:56 +0100288void sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, u8 idx,
289 vlib_buffer_t * b, ip4_address_t * ip4_addr,
Marco Varlese191a5942017-10-30 18:17:21 +0100290 ip6_address_t * ip6_addr);
Marco Varleseeacf3cf2018-02-26 14:52:25 +0100291void
292sctp_prepare_initack_chunk_for_collision (sctp_connection_t * sctp_conn,
293 u8 idx, vlib_buffer_t * b,
294 ip4_address_t * ip4_addr,
295 ip6_address_t * ip6_addr);
296void sctp_prepare_abort_for_collision (sctp_connection_t * sctp_conn, u8 idx,
297 vlib_buffer_t * b,
298 ip4_address_t * ip4_addr,
299 ip6_address_t * ip6_addr);
Marco Varlese8c5f67f2018-02-27 09:38:31 +0100300void
301sctp_prepare_operation_error (sctp_connection_t * sctp_conn, u8 idx,
302 vlib_buffer_t * b, u8 err_cause);
Marco Varlese54432f82018-02-15 17:01:56 +0100303void sctp_prepare_cookie_echo_chunk (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlese191a5942017-10-30 18:17:21 +0100304 vlib_buffer_t * b,
305 sctp_state_cookie_param_t * sc);
Marco Varlese54432f82018-02-15 17:01:56 +0100306void sctp_prepare_cookie_ack_chunk (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlese191a5942017-10-30 18:17:21 +0100307 vlib_buffer_t * b);
Marco Varlese54432f82018-02-15 17:01:56 +0100308void sctp_prepare_sack_chunk (sctp_connection_t * sctp_conn, u8 idx,
309 vlib_buffer_t * b);
310void sctp_prepare_heartbeat_ack_chunk (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100311 vlib_buffer_t * b);
Marco Varlese191a5942017-10-30 18:17:21 +0100312
Marco Varlese54432f82018-02-15 17:01:56 +0100313u16 sctp_check_outstanding_data_chunks (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100314
Marco Varlese3c6a9762018-03-01 11:19:59 +0100315void sctp_api_reference (void);
316
Marco Varlese191a5942017-10-30 18:17:21 +0100317#define IP_PROTOCOL_SCTP 132
318
319/** SSCTP FSM state definitions as per RFC4960. */
320#define foreach_sctp_fsm_state \
321 _(CLOSED, "CLOSED") \
322 _(COOKIE_WAIT, "COOKIE_WAIT") \
323 _(COOKIE_ECHOED, "COOKIE_ECHOED") \
324 _(ESTABLISHED, "ESTABLISHED") \
325 _(SHUTDOWN_PENDING, "SHUTDOWN_PENDING") \
326 _(SHUTDOWN_SENT, "SHUTDOWN_SENT") \
327 _(SHUTDOWN_RECEIVED, "SHUTDOWN_RECEIVED") \
328 _(SHUTDOWN_ACK_SENT, "SHUTDOWN_ACK_SENT")
329
330typedef enum _sctp_state
331{
332#define _(sym, str) SCTP_STATE_##sym,
333 foreach_sctp_fsm_state
334#undef _
335 SCTP_N_STATES
336} sctp_state_t;
337
338always_inline char *
339sctp_state_to_string (u8 state)
340{
341 switch (state)
342 {
343 case SCTP_STATE_CLOSED:
344 return "SCTP_STATE_CLOSED";
345 case SCTP_STATE_COOKIE_WAIT:
346 return "SCTP_STATE_COOKIE_WAIT";
347 case SCTP_STATE_COOKIE_ECHOED:
348 return "SCTP_STATE_COOKIE_ECHOED";
349 case SCTP_STATE_ESTABLISHED:
350 return "SCTP_STATE_ESTABLISHED";
351 case SCTP_STATE_SHUTDOWN_PENDING:
352 return "SCTP_STATE_SHUTDOWN_PENDING";
353 case SCTP_STATE_SHUTDOWN_SENT:
354 return "SCTP_STATE_SHUTDOWN_SENT";
355 case SCTP_STATE_SHUTDOWN_RECEIVED:
356 return "SCTP_STATE_SHUTDOWN_RECEIVED";
357 case SCTP_STATE_SHUTDOWN_ACK_SENT:
358 return "SCTP_STATE_SHUTDOWN_ACK_SENT";
359 }
360 return NULL;
361}
362
363always_inline char *
364sctp_chunk_to_string (u8 type)
365{
366 switch (type)
367 {
368 case DATA:
369 return "DATA";
370 case INIT:
371 return "INIT";
372 case INIT_ACK:
373 return "INIT_ACK";
374 case SACK:
375 return "SACK";
376 case HEARTBEAT:
377 return "HEARTBEAT";
378 case HEARTBEAT_ACK:
379 return "HEARTBEAT_ACK";
380 case ABORT:
381 return "ABORT";
382 case SHUTDOWN:
383 return "SHUTDOWN";
384 case SHUTDOWN_ACK:
385 return "SHUTDOWN_ACK";
386 case OPERATION_ERROR:
387 return "OPERATION_ERROR";
388 case COOKIE_ECHO:
389 return "COOKIE_ECHO";
390 case COOKIE_ACK:
391 return "COOKIE_ACK";
392 case ECNE:
393 return "ECNE";
394 case CWR:
395 return "CWR";
396 case SHUTDOWN_COMPLETE:
397 return "SHUTDOWN_COMPLETE";
398 }
399 return NULL;
400}
401
402always_inline char *
403sctp_optparam_type_to_string (u8 type)
404{
405 switch (type)
406 {
407 case SCTP_IPV4_ADDRESS_TYPE:
408 return "SCTP_IPV4_ADDRESS_TYPE";
409 case SCTP_IPV6_ADDRESS_TYPE:
410 return "SCTP_IPV6_ADDRESS_TYPE";
411 case SCTP_STATE_COOKIE_TYPE:
412 return "SCTP_STATE_COOKIE_TYPE";
413 case SCTP_UNRECOGNIZED_TYPE:
414 return "SCTP_UNRECOGNIZED_TYPE";
415 case SCTP_COOKIE_PRESERVATIVE_TYPE:
416 return "SCTP_COOKIE_PRESERVATIVE_TYPE";
417 case SCTP_HOSTNAME_ADDRESS_TYPE:
418 return "SCTP_HOSTNAME_ADDRESS_TYPE";
419 case SCTP_SUPPORTED_ADDRESS_TYPES:
420 return "SCTP_SUPPORTED_ADDRESS_TYPES";
421 }
422 return NULL;
423}
424
425#define SCTP_TICK 0.001 /**< SCTP tick period (s) */
426#define SHZ (u32) (1/SCTP_TICK) /**< SCTP tick frequency */
Marco Varlesea38783e2018-02-13 12:38:52 +0100427#define SCTP_TSTAMP_RESOLUTION SCTP_TICK /**< Time stamp resolution */
Marco Varlese191a5942017-10-30 18:17:21 +0100428
429/* As per RFC4960, page 83 */
430#define SCTP_RTO_INIT 3 * SHZ /* 3 seconds */
431#define SCTP_RTO_MIN 1 * SHZ /* 1 second */
432#define SCTP_RTO_MAX 60 * SHZ /* 60 seconds */
Marco Varlesef3ab4892018-02-19 15:23:13 +0100433#define SCTP_RTO_BURST 4
Marco Varlese191a5942017-10-30 18:17:21 +0100434#define SCTP_RTO_ALPHA 1/8
435#define SCTP_RTO_BETA 1/4
436#define SCTP_VALID_COOKIE_LIFE 60 * SHZ /* 60 seconds */
Marco Varlese54432f82018-02-15 17:01:56 +0100437#define SCTP_ASSOCIATION_MAX_RETRANS 10 // the overall connection
438#define SCTP_PATH_MAX_RETRANS 5 // number of attempts per destination address
439#define SCTP_MAX_INIT_RETRANS 8 // number of attempts
440#define SCTP_HB_INTERVAL 30 * SHZ
441#define SCTP_HB_MAX_BURST 1
Marco Varlese191a5942017-10-30 18:17:21 +0100442
Marco Varlesef3ab4892018-02-19 15:23:13 +0100443#define SCTP_DATA_IDLE_INTERVAL 15 * SHZ /* 15 seconds; the time-interval after which the connetion is considered IDLE */
444
Marco Varlese191a5942017-10-30 18:17:21 +0100445#define SCTP_TO_TIMER_TICK SCTP_TICK*10 /* Period for converting from SCTP_TICK */
446
447typedef struct _sctp_lookup_dispatch
448{
449 u8 next, error;
450} sctp_lookup_dispatch_t;
451
452typedef struct _sctp_main
453{
454 /* Per-worker thread SCTP connection pools */
455 sctp_connection_t **connections;
456
457 /* Pool of listeners. */
458 sctp_connection_t *listener_pool;
459
460 /** Dispatch table by state and flags */
461 sctp_lookup_dispatch_t dispatch_table[SCTP_N_STATES][64];
462
463 u8 log2_tstamp_clocks_per_tick;
464 f64 tstamp_ticks_per_clock;
465 u32 *time_now;
466
467 /** per-worker tx buffer free lists */
468 u32 **tx_buffers;
469 /** per-worker tx frames to SCTP 4/6 output nodes */
470 vlib_frame_t **tx_frames[2];
471 /** per-worker tx frames to ip 4/6 lookup nodes */
472 vlib_frame_t **ip_lookup_tx_frames[2];
473
474 /* Per worker-thread timer wheel for connections timers */
475 tw_timer_wheel_16t_2w_512sl_t *timer_wheels;
476
477 /* Pool of half-open connections on which we've sent a SYN */
478 sctp_connection_t *half_open_connections;
479 clib_spinlock_t half_open_lock;
480
481 /* TODO: Congestion control algorithms registered */
482 /* sctp_cc_algorithm_t *cc_algos; */
483
484 /* Flag that indicates if stack is on or off */
485 u8 is_enabled;
486
487 /** Number of preallocated connections */
488 u32 preallocated_connections;
489
490 /** Transport table (preallocation) size parameters */
491 u32 local_endpoints_table_memory;
492 u32 local_endpoints_table_buckets;
493
494 /** Vectors of src addresses. Optional unless one needs > 63K active-opens */
495 ip4_address_t *ip4_src_addresses;
496 u32 last_v4_address_rotor;
497 u32 last_v6_address_rotor;
498 ip6_address_t *ip6_src_addresses;
499
500 /** vlib buffer size */
501 u32 bytes_per_buffer;
502
503 u8 punt_unknown4;
504 u8 punt_unknown6;
505
506} sctp_main_t;
507
508extern sctp_main_t sctp_main;
509extern vlib_node_registration_t sctp4_input_node;
510extern vlib_node_registration_t sctp6_input_node;
511extern vlib_node_registration_t sctp4_output_node;
512extern vlib_node_registration_t sctp6_output_node;
513
514always_inline sctp_main_t *
515vnet_get_sctp_main ()
516{
517 return &sctp_main;
518}
519
520always_inline sctp_header_t *
521sctp_buffer_hdr (vlib_buffer_t * b)
522{
523 ASSERT ((signed) b->current_data >= (signed) -VLIB_BUFFER_PRE_DATA_SIZE);
524 return (sctp_header_t *) (b->data + b->current_data
525 + vnet_buffer (b)->sctp.hdr_offset);
526}
527
528clib_error_t *vnet_sctp_enable_disable (vlib_main_t * vm, u8 is_en);
529
530always_inline sctp_connection_t *
531sctp_half_open_connection_get (u32 conn_index)
532{
533 sctp_connection_t *tc = 0;
534 clib_spinlock_lock_if_init (&sctp_main.half_open_lock);
535 if (!pool_is_free_index (sctp_main.half_open_connections, conn_index))
536 tc = pool_elt_at_index (sctp_main.half_open_connections, conn_index);
Marco Varlese04e5d642018-02-23 17:43:06 +0100537 tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].subconn_idx = MAIN_SCTP_SUB_CONN_IDX;
Marco Varlese191a5942017-10-30 18:17:21 +0100538 clib_spinlock_unlock_if_init (&sctp_main.half_open_lock);
539 return tc;
540}
541
542/**
543 * Cleanup half-open connection
544 *
545 */
546always_inline void
547sctp_half_open_connection_del (sctp_connection_t * tc)
548{
Marco Varlese15cc6a82018-02-21 12:39:52 +0100549 sctp_main_t *sctp_main = vnet_get_sctp_main ();
550 clib_spinlock_lock_if_init (&sctp_main->half_open_lock);
551 pool_put_index (sctp_main->half_open_connections,
Marco Varlese191a5942017-10-30 18:17:21 +0100552 tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index);
553 if (CLIB_DEBUG)
554 memset (tc, 0xFA, sizeof (*tc));
Marco Varlese15cc6a82018-02-21 12:39:52 +0100555 clib_spinlock_unlock_if_init (&sctp_main->half_open_lock);
Marco Varlese191a5942017-10-30 18:17:21 +0100556}
557
558always_inline u32
559sctp_set_time_now (u32 thread_index)
560{
561 sctp_main.time_now[thread_index] = clib_cpu_time_now ()
562 * sctp_main.tstamp_ticks_per_clock;
563 return sctp_main.time_now[thread_index];
564}
565
566always_inline void
567sctp_timer_set (sctp_connection_t * tc, u8 conn_idx, u8 timer_id,
568 u32 interval)
569{
570 ASSERT (tc->sub_conn[conn_idx].connection.thread_index ==
571 vlib_get_thread_index ());
572 ASSERT (tc->sub_conn[conn_idx].timers[timer_id] ==
573 SCTP_TIMER_HANDLE_INVALID);
574
575 sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
Marco Varlese21c8baf2018-02-02 17:17:51 +0100576 sub->timers[timer_id] =
Marco Varlese191a5942017-10-30 18:17:21 +0100577 tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
578 sub->c_c_index, timer_id, interval);
579}
580
581always_inline void
582sctp_timer_reset (sctp_connection_t * tc, u8 conn_idx, u8 timer_id)
583{
584 ASSERT (tc->sub_conn[conn_idx].c_thread_index == vlib_get_thread_index ());
585 if (tc->sub_conn[conn_idx].timers[timer_id] == SCTP_TIMER_HANDLE_INVALID)
586 return;
587
588 sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
589
590 tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
591 sub->timers[timer_id]);
592 sub->timers[timer_id] = SCTP_TIMER_HANDLE_INVALID;
593}
594
Marco Varlese191a5942017-10-30 18:17:21 +0100595/**
596 * Try to cleanup half-open connection
597 *
598 * If called from a thread that doesn't own tc, the call won't have any
599 * effect.
600 *
601 * @param tc - connection to be cleaned up
602 * @return non-zero if cleanup failed.
603 */
604always_inline int
605sctp_half_open_connection_cleanup (sctp_connection_t * tc)
606{
607 /* Make sure this is the owning thread */
608 if (tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_thread_index !=
609 vlib_get_thread_index ())
610 return 1;
611 sctp_timer_reset (tc, MAIN_SCTP_SUB_CONN_IDX, SCTP_TIMER_T1_INIT);
612 sctp_half_open_connection_del (tc);
613 return 0;
614}
615
616always_inline u32
617sctp_header_bytes ()
618{
619 return sizeof (sctp_header_t);
620}
621
622always_inline sctp_connection_t *
623sctp_get_connection_from_transport (transport_connection_t * tconn)
624{
625 ASSERT (tconn != NULL);
626
627 sctp_sub_connection_t *sub = (sctp_sub_connection_t *) tconn;
628#if SCTP_ADV_DEBUG
629 if (sub == NULL)
630 SCTP_ADV_DBG ("sub == NULL");
631 if (sub->parent == NULL)
632 SCTP_ADV_DBG ("sub->parent == NULL");
633#endif
Marco Varlese04e5d642018-02-23 17:43:06 +0100634 if (sub->subconn_idx > 0)
635 return (sctp_connection_t *) sub -
636 (sizeof (sctp_sub_connection_t) * (sub->subconn_idx - 1));
637
638 return (sctp_connection_t *) sub;
Marco Varlese191a5942017-10-30 18:17:21 +0100639}
640
641always_inline u32
642sctp_time_now (void)
643{
644 return sctp_main.time_now[vlib_get_thread_index ()];
645}
646
Marco Varlese21c8baf2018-02-02 17:17:51 +0100647#define ABS(x) ((x) > 0) ? (x) : -(x);
648
649always_inline void
650sctp_calculate_rto (sctp_connection_t * sctp_conn, u8 conn_idx)
651{
652 /* See RFC4960, 6.3.1. RTO Calculation */
653 u32 RTO = 0;
654 f32 RTTVAR = 0;
655 u32 now = sctp_time_now ();
656 u32 prev_ts = sctp_conn->sub_conn[conn_idx].rtt_ts;
657 u32 R = prev_ts - now;
658
659 if (sctp_conn->sub_conn[conn_idx].RTO == 0) // C1: Let's initialize our RTO
660 {
661 sctp_conn->sub_conn[conn_idx].RTO = SCTP_RTO_MIN;
662 return;
663 }
664
665 if (sctp_conn->sub_conn[conn_idx].RTO == SCTP_RTO_MIN && sctp_conn->sub_conn[conn_idx].SRTT == 0) // C2: First RTT calculation
666 {
667 sctp_conn->sub_conn[conn_idx].SRTT = R;
668 RTTVAR = R / 2;
669
670 if (RTTVAR == 0)
671 RTTVAR = 100e-3; /* 100 ms */
672
673 sctp_conn->sub_conn[conn_idx].RTTVAR = RTTVAR;
674 }
675 else // C3: RTT already exists; let's recalculate
676 {
677 RTTVAR = (1 - SCTP_RTO_BETA) * sctp_conn->sub_conn[conn_idx].RTTVAR +
678 SCTP_RTO_BETA * ABS (sctp_conn->sub_conn[conn_idx].SRTT - R);
679
680 if (RTTVAR == 0)
681 RTTVAR = 100e-3; /* 100 ms */
682
683 sctp_conn->sub_conn[conn_idx].RTTVAR = RTTVAR;
684
685 sctp_conn->sub_conn[conn_idx].SRTT =
686 (1 - SCTP_RTO_ALPHA) * sctp_conn->sub_conn[conn_idx].SRTT +
687 SCTP_RTO_ALPHA * R;
688 }
689
690 RTO =
691 sctp_conn->sub_conn[conn_idx].SRTT +
692 4 * sctp_conn->sub_conn[conn_idx].RTTVAR;
693 if (RTO < SCTP_RTO_MIN) // C6
694 RTO = SCTP_RTO_MIN;
695
696 if (RTO > SCTP_RTO_MAX) // C7
697 RTO = SCTP_RTO_MAX;
698
699 sctp_conn->sub_conn[conn_idx].RTO = RTO;
700}
701
Marco Varlese191a5942017-10-30 18:17:21 +0100702always_inline void
703sctp_timer_update (sctp_connection_t * tc, u8 conn_idx, u8 timer_id,
704 u32 interval)
705{
706 ASSERT (tc->sub_conn[conn_idx].connection.thread_index ==
707 vlib_get_thread_index ());
708 sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
709
710 if (tc->sub_conn[conn_idx].timers[timer_id] != SCTP_TIMER_HANDLE_INVALID)
711 tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
712 sub->timers[timer_id]);
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100713
Marco Varlese191a5942017-10-30 18:17:21 +0100714 tc->sub_conn[conn_idx].timers[timer_id] =
715 tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
716 sub->c_c_index, timer_id, interval);
717}
718
719always_inline sctp_connection_t *
720sctp_listener_get (u32 tli)
721{
722 return pool_elt_at_index (sctp_main.listener_pool, tli);
723}
724
725#endif
726
727always_inline sctp_connection_t *
728sctp_connection_get (u32 conn_index, u32 thread_index)
729{
730 if (PREDICT_FALSE
731 (pool_is_free_index (sctp_main.connections[thread_index], conn_index)))
732 return 0;
733 return pool_elt_at_index (sctp_main.connections[thread_index], conn_index);
734}
735
Marco Varlese54432f82018-02-15 17:01:56 +0100736#define SELECT_MAX_RETRIES 8
Marco Varlese191a5942017-10-30 18:17:21 +0100737
Marco Varlese54432f82018-02-15 17:01:56 +0100738always_inline u8
739sctp_data_subconn_select (sctp_connection_t * sctp_conn)
740{
Marco Varlese54432f82018-02-15 17:01:56 +0100741 u32 sub = MAIN_SCTP_SUB_CONN_IDX;
Marco Varlesef3ab4892018-02-19 15:23:13 +0100742 u8 i, cwnd = sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].cwnd;
743 for (i = 1; i < MAX_SCTP_CONNECTIONS; i++)
Marco Varlese191a5942017-10-30 18:17:21 +0100744 {
Marco Varlesef3ab4892018-02-19 15:23:13 +0100745 if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN)
746 continue;
747
748 if (sctp_conn->sub_conn[i].cwnd > cwnd)
749 {
750 sub = i;
751 cwnd = sctp_conn->sub_conn[i].cwnd;
752 }
Marco Varlese191a5942017-10-30 18:17:21 +0100753 }
Marco Varlese54432f82018-02-15 17:01:56 +0100754 return sub;
Marco Varlese191a5942017-10-30 18:17:21 +0100755}
756
757always_inline u8
Marco Varlese54432f82018-02-15 17:01:56 +0100758sctp_sub_conn_id_via_ip6h (sctp_connection_t * sctp_conn, ip6_header_t * ip6h)
Marco Varlese191a5942017-10-30 18:17:21 +0100759{
Marco Varlese54432f82018-02-15 17:01:56 +0100760 u8 i;
Marco Varlese191a5942017-10-30 18:17:21 +0100761
Marco Varlese54432f82018-02-15 17:01:56 +0100762 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
Marco Varlese191a5942017-10-30 18:17:21 +0100763 {
Marco Varlese54432f82018-02-15 17:01:56 +0100764 if (sctp_conn->sub_conn[i].connection.lcl_ip.ip6.as_u64[0] ==
765 ip6h->dst_address.as_u64[0] &&
766 sctp_conn->sub_conn[i].connection.lcl_ip.ip6.as_u64[1] ==
767 ip6h->dst_address.as_u64[1] &&
768 sctp_conn->sub_conn[i].connection.rmt_ip.ip6.as_u64[0] ==
769 ip6h->src_address.as_u64[0] &&
770 sctp_conn->sub_conn[i].connection.rmt_ip.ip6.as_u64[1] ==
771 ip6h->src_address.as_u64[1])
772 return i;
Marco Varlese191a5942017-10-30 18:17:21 +0100773 }
Marco Varlese54432f82018-02-15 17:01:56 +0100774 clib_warning ("Did not find a sub-connection; defaulting to %u",
775 MAIN_SCTP_SUB_CONN_IDX);
776 return MAIN_SCTP_SUB_CONN_IDX;
777}
778
779always_inline u8
780sctp_sub_conn_id_via_ip4h (sctp_connection_t * sctp_conn, ip4_header_t * ip4h)
781{
782 u8 i;
783
784 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
785 {
786 if (sctp_conn->sub_conn[i].connection.lcl_ip.ip4.as_u32 ==
787 ip4h->dst_address.as_u32
788 && sctp_conn->sub_conn[i].connection.rmt_ip.ip4.as_u32 ==
789 ip4h->src_address.as_u32)
790 return i;
791 }
792 clib_warning ("Did not find a sub-connection; defaulting to %u",
793 MAIN_SCTP_SUB_CONN_IDX);
794 return MAIN_SCTP_SUB_CONN_IDX;
Marco Varlese191a5942017-10-30 18:17:21 +0100795}
796
797/**
798 * Push SCTP header to buffer
799 *
800 * @param vm - vlib_main
801 * @param b - buffer to write the header to
802 * @param sp_net - source port net order
803 * @param dp_net - destination port net order
804 * @param sctp_hdr_opts_len - header and options length in bytes
805 *
806 * @return - pointer to start of SCTP header
807 */
808always_inline void *
809vlib_buffer_push_sctp_net_order (vlib_buffer_t * b, u16 sp, u16 dp,
810 u8 sctp_hdr_opts_len)
811{
812 sctp_full_hdr_t *full_hdr;
813
814 full_hdr = vlib_buffer_push_uninit (b, sctp_hdr_opts_len);
815
816 full_hdr->hdr.src_port = sp;
817 full_hdr->hdr.dst_port = dp;
818 full_hdr->hdr.checksum = 0;
819 return full_hdr;
820}
821
822/**
823 * Push SCTP header to buffer
824 *
825 * @param b - buffer to write the header to
826 * @param sp_net - source port net order
827 * @param dp_net - destination port net order
828 * @param sctp_hdr_opts_len - header and options length in bytes
829 *
830 * @return - pointer to start of SCTP header
831 */
832always_inline void *
833vlib_buffer_push_sctp (vlib_buffer_t * b, u16 sp_net, u16 dp_net,
834 u8 sctp_hdr_opts_len)
835{
836 return vlib_buffer_push_sctp_net_order (b, sp_net, dp_net,
837 sctp_hdr_opts_len);
838}
839
Marco Varlese3c6a9762018-03-01 11:19:59 +0100840always_inline u8
841sctp_next_avail_subconn (sctp_connection_t * sctp_conn)
842{
843 u8 i;
844
845 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
846 {
847 if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN)
848 return i;
849 }
850 return MAX_SCTP_CONNECTIONS;
851}
852
Marco Varlesef3ab4892018-02-19 15:23:13 +0100853always_inline void
854update_smallest_pmtu_idx (sctp_connection_t * sctp_conn)
855{
856 u8 i;
857 u8 smallest_pmtu_index = MAIN_SCTP_SUB_CONN_IDX;
858
859 for (i = 1; i < MAX_SCTP_CONNECTIONS; i++)
860 {
861 if (sctp_conn->sub_conn[i].state != SCTP_SUBCONN_STATE_DOWN)
862 {
863 if (sctp_conn->sub_conn[i].PMTU <
864 sctp_conn->sub_conn[smallest_pmtu_index].PMTU)
865 smallest_pmtu_index = i;
866 }
867 }
868
869 sctp_conn->smallest_PMTU_idx = smallest_pmtu_index;
870}
871
872/* As per RFC4960; section 7.2.1: Slow-Start */
873always_inline void
874sctp_init_cwnd (sctp_connection_t * sctp_conn)
875{
876 u8 i;
877 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
878 {
879 /* Section 7.2.1; point (1) */
880 sctp_conn->sub_conn[i].cwnd =
881 clib_min (4 * sctp_conn->sub_conn[i].PMTU,
882 clib_max (2 * sctp_conn->sub_conn[i].PMTU, 4380));
883
884 /* Section 7.2.1; point (3) */
885 sctp_conn->sub_conn[i].ssthresh = SCTP_INITIAL_SSHTRESH;
886
887 /* Section 7.2.2; point (1) */
888 sctp_conn->sub_conn[i].partially_acked_bytes = 0;
889 }
890}
891
892always_inline u8
893sctp_in_cong_recovery (sctp_connection_t * sctp_conn, u8 idx)
894{
895 return 0;
896}
897
898always_inline u8
899cwnd_fully_utilized (sctp_connection_t * sctp_conn, u8 idx)
900{
901 return 0;
902}
903
904/* As per RFC4960; section 7.2.1: Slow-Start */
905always_inline void
906update_cwnd (sctp_connection_t * sctp_conn)
907{
908 u8 i;
909
910 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
911 {
912 /* Section 7.2.1; point (2) */
913 if (sctp_conn->sub_conn[i].is_retransmitting)
914 {
915 sctp_conn->sub_conn[i].cwnd = 1 * sctp_conn->sub_conn[i].PMTU;
916 continue;
917 }
918
919 /* Section 7.2.2; point (4) */
920 if (sctp_conn->sub_conn[i].last_data_ts >
921 sctp_time_now () + SCTP_DATA_IDLE_INTERVAL)
922 {
923 sctp_conn->sub_conn[i].cwnd =
924 clib_max (sctp_conn->sub_conn[i].cwnd / 2,
925 4 * sctp_conn->sub_conn[i].PMTU);
926 continue;
927 }
928
929 /* Section 7.2.1; point (5) */
930 if (sctp_conn->sub_conn[i].cwnd <= sctp_conn->sub_conn[i].ssthresh)
931 {
932 if (!cwnd_fully_utilized (sctp_conn, i))
933 continue;
934
935 if (sctp_in_cong_recovery (sctp_conn, i))
936 continue;
937
938 sctp_conn->sub_conn[i].cwnd =
939 clib_min (sctp_conn->sub_conn[i].PMTU, 1);
940 }
941 }
942}
943
Marco Varlese191a5942017-10-30 18:17:21 +0100944/*
945 * fd.io coding-style-patch-verification: ON
946 *
947 * Local Variables:
948 * eval: (c-set-style "gnu")
949 * End:
950 */