blob: de5eb8f66851b41234a2a0cedd7ec64fe2b1e110 [file] [log] [blame]
Marco Varlese191a5942017-10-30 18:17:21 +01001/*
2 * Copyright (c) 2017 SUSE LLC.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15#ifndef included_vnet_sctp_h
16#define included_vnet_sctp_h
17
18#include <vnet/vnet.h>
19#include <vnet/ip/ip.h>
20#include <vnet/sctp/sctp_timer.h>
21#include <vnet/sctp/sctp_packet.h>
22#include <vnet/session/transport.h>
23#include <vnet/session/session.h>
24
25/* SCTP timers */
26#define foreach_sctp_timer \
27 _(T1_INIT, "T1_INIT") \
28 _(T1_COOKIE, "T1_COOKIE") \
29 _(T2_SHUTDOWN, "T2_SHUTDOWN") \
30 _(T3_RXTX, "T3_RXTX") \
Marco Varlese8ad6a2d2018-01-26 16:50:01 +010031 _(T4_HEARTBEAT, "T4_HB") \
Marco Varlese191a5942017-10-30 18:17:21 +010032 _(T5_SHUTDOWN_GUARD, "T5_SHUTDOWN_GUARD")
33
34typedef enum _sctp_timers
35{
36#define _(sym, str) SCTP_TIMER_##sym,
37 foreach_sctp_timer
38#undef _
39 SCTP_N_TIMERS
40} sctp_timers_e;
41
42#define SCTP_TIMER_HANDLE_INVALID ((u32) ~0)
43
Marco Varlesedf5a99c2018-02-06 13:48:30 +010044always_inline char *
45sctp_timer_to_string (u8 timer_id)
46{
47 switch (timer_id)
48 {
49 case SCTP_TIMER_T1_INIT:
50 return "SCTP_TIMER_T1_INIT";
51 case SCTP_TIMER_T1_COOKIE:
52 return "SCTP_TIMER_T1_COOKIE";
53 case SCTP_TIMER_T2_SHUTDOWN:
54 return "SCTP_TIMER_T2_SHUTDOWN";
55 case SCTP_TIMER_T3_RXTX:
56 return "SCTP_TIMER_T3_RXTX";
57 case SCTP_TIMER_T4_HEARTBEAT:
58 return "SCTP_TIMER_T4_HEARTBEAT";
59 case SCTP_TIMER_T5_SHUTDOWN_GUARD:
60 return "SCTP_TIMER_T5_SHUTDOWN_GUARD";
61 }
62 return NULL;
63}
64
Marco Varlese191a5942017-10-30 18:17:21 +010065typedef enum _sctp_error
66{
67#define sctp_error(n,s) SCTP_ERROR_##n,
68#include <vnet/sctp/sctp_error.def>
69#undef sctp_error
70 SCTP_N_ERROR,
71} sctp_error_t;
72
73#define NO_FLAG 0
74
75#define IS_T_BIT_SET(var) ((var) & (1))
76#define IS_E_BIT_SET(var) ((var) & (1))
77#define IS_B_BIT_SET(var) ((var) & (1<<1))
78#define IS_U_BIT_SET(var) ((var) & (1<<2))
79
Marco Varlesef3ab4892018-02-19 15:23:13 +010080#define MAX_SCTP_CONNECTIONS 8
Marco Varlese191a5942017-10-30 18:17:21 +010081#define MAIN_SCTP_SUB_CONN_IDX 0
82
83#if (VLIB_BUFFER_TRACE_TRAJECTORY)
84#define sctp_trajectory_add_start(b, start) \
85{ \
86 (*vlib_buffer_trace_trajectory_cb) (b, start); \
87}
88#else
89#define sctp_trajectory_add_start(b, start)
90#endif
91
Marco Varlese54432f82018-02-15 17:01:56 +010092enum _sctp_subconn_state
93{
94 SCTP_SUBCONN_STATE_DOWN = 0,
95 SCTP_SUBCONN_STATE_UP,
96 SCTP_SUBCONN_STATE_ALLOW_HB
97};
98
Marco Varlesef3ab4892018-02-19 15:23:13 +010099#define SCTP_INITIAL_SSHTRESH 65535
Marco Varlese191a5942017-10-30 18:17:21 +0100100typedef struct _sctp_sub_connection
101{
102 transport_connection_t connection; /**< Common transport data. First! */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100103
Marco Varlese04e5d642018-02-23 17:43:06 +0100104 u8 subconn_idx; /**< This indicates the position of this sub-connection in the super-set container of connections pool */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100105 u32 error_count; /**< The current error count for this destination. */
106 u32 error_threshold; /**< Current error threshold for this destination,
107 i.e. what value marks the destination down if error count reaches this value. */
Marco Varlesef3ab4892018-02-19 15:23:13 +0100108 u32 cwnd; /**< Congestion control window (cwnd, in bytes), which is adjusted by
109 the sender based on observed network conditions. */
110 u32 ssthresh; /**< Slow-start threshold (in bytes), which is used by the
111 sender to distinguish slow-start and congestion avoidance phases. */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100112
Marco Varlese21c8baf2018-02-02 17:17:51 +0100113 u32 rtt_ts; /**< USED to hold the timestamp of when the packet has been sent */
114
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100115 u32 RTO; /**< The current retransmission timeout value. */
116 u32 SRTT; /**< The current smoothed round-trip time. */
Marco Varlese21c8baf2018-02-02 17:17:51 +0100117 f32 RTTVAR; /**< The current RTT variation. */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100118
119 u32 partially_acked_bytes; /**< The tracking method for increase of cwnd when in
120 congestion avoidance mode (see Section 7.2.2).*/
121
122 u8 state; /**< The current state of this destination, i.e., DOWN, UP, ALLOW-HB, NO-HEARTBEAT, etc. */
123
124 u16 PMTU; /**< The current known path MTU. */
125
126 u32 timers[SCTP_N_TIMERS]; /**< A timer used by each destination. */
127
128 u8 RTO_pending; /**< A flag used to track if one of the DATA chunks sent to
129 this address is currently being used to compute an RTT.
130 If this flag is 0, the next DATA chunk sent to this destination
131 should be used to compute an RTT and this flag should be set.
132 Every time the RTT calculation completes (i.e., the DATA chunk is SACK'd),
133 clear this flag. */
134
Marco Varlese54432f82018-02-15 17:01:56 +0100135 u32 last_seen; /**< The time to which this destination was last sent a packet to.
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100136 This can be used to determine if a HEARTBEAT is needed. */
Marco Varlese191a5942017-10-30 18:17:21 +0100137
Marco Varlesef3ab4892018-02-19 15:23:13 +0100138 u32 last_data_ts; /**< Used to hold the timestamp value of last time we sent a DATA chunk */
139
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100140 u8 unacknowledged_hb; /**< Used to track how many unacknowledged heartbeats we had;
Marco Varlese54432f82018-02-15 17:01:56 +0100141 If more than SCTP_PATH_MAX_RETRANS then connection is considered unreachable. */
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100142
Marco Varlesea38783e2018-02-13 12:38:52 +0100143 u8 is_retransmitting; /**< A flag (0 = no, 1 = yes) indicating whether the connection is retransmitting a previous packet */
144
Marco Varlesef3ab4892018-02-19 15:23:13 +0100145 u8 enqueue_state; /**< if set to 1 indicates that DATA is still being handled hence cannot shutdown this connection yet */
Marco Varlese54432f82018-02-15 17:01:56 +0100146
Marco Varlese191a5942017-10-30 18:17:21 +0100147} sctp_sub_connection_t;
148
149typedef struct
150{
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100151 u32 a_rwnd; /**< Maximum segment size advertised */
Marco Varlese191a5942017-10-30 18:17:21 +0100152
153} sctp_options_t;
154
Marco Varlese91389ac2018-01-31 11:00:01 +0100155/* Useful macros to deal with the out_of_order_map (array of bit) */
156#define SET_BIT(A,k) ( A[(k/32)] |= (1 << (k%32)) )
157#define CLEAR_BIT(A,k) ( A[(k/32)] &= ~(1 << (k%32)) )
158#define TEST_BIT(A,k) ( A[(k/32)] & (1 << (k%32)) )
159
160always_inline void
161_bytes_swap (void *pv, size_t n)
162{
163 char *p = pv;
164 size_t lo, hi;
165 for (lo = 0, hi = n - 1; hi > lo; lo++, hi--)
166 {
167 char tmp = p[lo];
168 p[lo] = p[hi];
169 p[hi] = tmp;
170 }
171}
172
173#define ENDIANESS_SWAP(x) _bytes_swap(&x, sizeof(x));
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100174
175#define MAX_INFLIGHT_PACKETS 128
176#define MAX_ENQUEABLE_SACKS 2
177
178/* This parameter indicates to the receiver how much increment in
179 * milliseconds the sender wishes the receiver to add to its default
180 * cookie life-span.
181 */
182#define SUGGESTED_COOKIE_LIFE_SPAN_INCREMENT 1000
183
Marco Varlese191a5942017-10-30 18:17:21 +0100184typedef struct _sctp_connection
185{
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100186 sctp_sub_connection_t sub_conn[MAX_SCTP_CONNECTIONS]; /**< Common transport data. First! */
Marco Varlese191a5942017-10-30 18:17:21 +0100187
188 u8 state; /**< SCTP state as per sctp_state_t */
189 u16 flags; /**< Chunk flag (see sctp_chunks_common_hdr_t) */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100190
Marco Varlese191a5942017-10-30 18:17:21 +0100191 u32 local_tag; /**< INIT_TAG generated locally */
192 u32 remote_tag; /**< INIT_TAG generated by the remote peer */
Marco Varlese191a5942017-10-30 18:17:21 +0100193
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100194 u32 local_initial_tsn; /**< Initial TSN generated locally */
195 u32 remote_initial_tsn; /**< Initial TSN generated by the remote-peer */
Marco Varlese191a5942017-10-30 18:17:21 +0100196
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100197 u32 peer_cookie_life_span_increment;
Marco Varlese191a5942017-10-30 18:17:21 +0100198
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100199 u32 overall_err_count; /**< The overall association error count. */
200 u32 overall_err_treshold; /**< The threshold for this association that if the Overall Error Count
201 reaches will cause this association to be torn down. */
Marco Varlese191a5942017-10-30 18:17:21 +0100202
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100203 u32 peer_rwnd; /**< Current calculated value of the peer's rwnd. */
204
205 u32 next_tsn; /**< The next TSN number to be assigned to a new DATA chunk.
206 This is sent in the INIT or INIT ACK chunk to the peer
207 and incremented each time a DATA chunk is assigned a
208 TSN (normally just prior to transmit or during
209 fragmentation). */
210
Marco Varlesef3ab4892018-02-19 15:23:13 +0100211 u32 last_unacked_tsn; /** < Last TSN number still unacked */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100212 u32 next_tsn_expected; /**< The next TSN number expected to be received. */
213
214 u32 last_rcvd_tsn; /**< This is the last TSN received in sequence. This value
215 is set initially by taking the peer's initial TSN,
216 received in the INIT or INIT ACK chunk, and
217 subtracting one from it. */
218
219 u32 out_of_order_map[MAX_INFLIGHT_PACKETS]; /**< An array of bits or bytes indicating which out-of-order
220 TSNs have been received (relative to the Last Rcvd TSN).
221 If no gaps exist, i.e., no out-of-order packets have been received,
222 this array will be set to all zero. */
223
224 u8 ack_state; /**< This flag indicates if the next received packet is set to be responded to with a SACK.
225 This is initialized to 0. When a packet is received it is incremented.
226 If this value reaches 2 or more, a SACK is sent and the value is reset to 0.
227 Note: This is used only when no DATA chunks are received out-of-order.
228 When DATA chunks are out-of-order, SACKs are not delayed (see Section 6). */
229
Marco Varlesef3ab4892018-02-19 15:23:13 +0100230 u8 smallest_PMTU_idx; /** The index of the sub-connection with the smallest PMTU discovered across all peer's transport addresses. */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100231
Marco Varlese91389ac2018-01-31 11:00:01 +0100232 u8 overall_sending_status; /**< 0 indicates first fragment of a user message
233 1 indicates normal stream
234 2 indicates last fragment of a user message */
235
Marco Varlese191a5942017-10-30 18:17:21 +0100236 sctp_options_t snd_opts;
Marco Varlese191a5942017-10-30 18:17:21 +0100237
238 u8 next_avail_sub_conn; /**< Represent the index of the next free slot in sub_conn */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100239
Marco Varleseeacf3cf2018-02-26 14:52:25 +0100240 u8 forming_association_changed; /**< This is a flag indicating whether the original association has been modified during
241 the life-span of the association itself. For instance, a new sub-connection might have been added. */
242
Marco Varlese191a5942017-10-30 18:17:21 +0100243} sctp_connection_t;
244
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100245typedef void (sctp_timer_expiration_handler) (u32 conn_index, u32 timer_id);
Marco Varlese191a5942017-10-30 18:17:21 +0100246
247sctp_connection_t *sctp_connection_new (u8 thread_index);
248void sctp_sub_connection_add_ip4 (u8 thread_index,
249 sctp_ipv4_addr_param_t * ipv4_addr);
250void sctp_sub_connection_add_ip6 (u8 thread_index,
251 sctp_ipv6_addr_param_t * ipv6_addr);
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100252void sctp_connection_close (sctp_connection_t * sctp_conn);
253void sctp_connection_cleanup (sctp_connection_t * sctp_conn);
254void sctp_connection_del (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100255
256u32 sctp_push_header (transport_connection_t * tconn, vlib_buffer_t * b);
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100257void sctp_send_init (sctp_connection_t * sctp_conn);
258void sctp_send_shutdown (sctp_connection_t * sctp_conn);
Marco Varlese54432f82018-02-15 17:01:56 +0100259void sctp_send_shutdown_ack (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlesebe2251b2018-02-07 12:22:41 +0100260 vlib_buffer_t * b);
Marco Varlese54432f82018-02-15 17:01:56 +0100261void sctp_send_shutdown_complete (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlesefae40392018-02-14 15:38:35 +0100262 vlib_buffer_t * b0);
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100263void sctp_send_heartbeat (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100264void sctp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index,
265 u8 is_ip4);
266void sctp_flush_frames_to_output (u8 thread_index);
267void sctp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
268
269format_function_t format_sctp_state;
270
271u8 *format_sctp_connection_id (u8 * s, va_list * args);
272u8 *format_sctp_connection (u8 * s, va_list * args);
273u8 *format_sctp_scoreboard (u8 * s, va_list * args);
274u8 *format_sctp_header (u8 * s, va_list * args);
275u8 *format_sctp_tx_trace (u8 * s, va_list * args);
276
277clib_error_t *sctp_init (vlib_main_t * vm);
Marco Varlese54432f82018-02-15 17:01:56 +0100278void sctp_connection_timers_init (sctp_connection_t * sctp_conn);
279void sctp_connection_timers_reset (sctp_connection_t * sctp_conn);
280void sctp_init_snd_vars (sctp_connection_t * sctp_conn);
281void sctp_init_mss (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100282
Marco Varlese54432f82018-02-15 17:01:56 +0100283void sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, u8 idx,
284 vlib_buffer_t * b, ip4_address_t * ip4_addr,
Marco Varlese191a5942017-10-30 18:17:21 +0100285 ip6_address_t * ip6_addr);
Marco Varleseeacf3cf2018-02-26 14:52:25 +0100286void
287sctp_prepare_initack_chunk_for_collision (sctp_connection_t * sctp_conn,
288 u8 idx, vlib_buffer_t * b,
289 ip4_address_t * ip4_addr,
290 ip6_address_t * ip6_addr);
291void sctp_prepare_abort_for_collision (sctp_connection_t * sctp_conn, u8 idx,
292 vlib_buffer_t * b,
293 ip4_address_t * ip4_addr,
294 ip6_address_t * ip6_addr);
295
Marco Varlese54432f82018-02-15 17:01:56 +0100296void sctp_prepare_cookie_echo_chunk (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlese191a5942017-10-30 18:17:21 +0100297 vlib_buffer_t * b,
298 sctp_state_cookie_param_t * sc);
Marco Varlese54432f82018-02-15 17:01:56 +0100299void sctp_prepare_cookie_ack_chunk (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlese191a5942017-10-30 18:17:21 +0100300 vlib_buffer_t * b);
Marco Varlese54432f82018-02-15 17:01:56 +0100301void sctp_prepare_sack_chunk (sctp_connection_t * sctp_conn, u8 idx,
302 vlib_buffer_t * b);
303void sctp_prepare_heartbeat_ack_chunk (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100304 vlib_buffer_t * b);
Marco Varlese191a5942017-10-30 18:17:21 +0100305
Marco Varlese54432f82018-02-15 17:01:56 +0100306u16 sctp_check_outstanding_data_chunks (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100307
Marco Varlese191a5942017-10-30 18:17:21 +0100308#define IP_PROTOCOL_SCTP 132
309
310/** SSCTP FSM state definitions as per RFC4960. */
311#define foreach_sctp_fsm_state \
312 _(CLOSED, "CLOSED") \
313 _(COOKIE_WAIT, "COOKIE_WAIT") \
314 _(COOKIE_ECHOED, "COOKIE_ECHOED") \
315 _(ESTABLISHED, "ESTABLISHED") \
316 _(SHUTDOWN_PENDING, "SHUTDOWN_PENDING") \
317 _(SHUTDOWN_SENT, "SHUTDOWN_SENT") \
318 _(SHUTDOWN_RECEIVED, "SHUTDOWN_RECEIVED") \
319 _(SHUTDOWN_ACK_SENT, "SHUTDOWN_ACK_SENT")
320
321typedef enum _sctp_state
322{
323#define _(sym, str) SCTP_STATE_##sym,
324 foreach_sctp_fsm_state
325#undef _
326 SCTP_N_STATES
327} sctp_state_t;
328
329always_inline char *
330sctp_state_to_string (u8 state)
331{
332 switch (state)
333 {
334 case SCTP_STATE_CLOSED:
335 return "SCTP_STATE_CLOSED";
336 case SCTP_STATE_COOKIE_WAIT:
337 return "SCTP_STATE_COOKIE_WAIT";
338 case SCTP_STATE_COOKIE_ECHOED:
339 return "SCTP_STATE_COOKIE_ECHOED";
340 case SCTP_STATE_ESTABLISHED:
341 return "SCTP_STATE_ESTABLISHED";
342 case SCTP_STATE_SHUTDOWN_PENDING:
343 return "SCTP_STATE_SHUTDOWN_PENDING";
344 case SCTP_STATE_SHUTDOWN_SENT:
345 return "SCTP_STATE_SHUTDOWN_SENT";
346 case SCTP_STATE_SHUTDOWN_RECEIVED:
347 return "SCTP_STATE_SHUTDOWN_RECEIVED";
348 case SCTP_STATE_SHUTDOWN_ACK_SENT:
349 return "SCTP_STATE_SHUTDOWN_ACK_SENT";
350 }
351 return NULL;
352}
353
354always_inline char *
355sctp_chunk_to_string (u8 type)
356{
357 switch (type)
358 {
359 case DATA:
360 return "DATA";
361 case INIT:
362 return "INIT";
363 case INIT_ACK:
364 return "INIT_ACK";
365 case SACK:
366 return "SACK";
367 case HEARTBEAT:
368 return "HEARTBEAT";
369 case HEARTBEAT_ACK:
370 return "HEARTBEAT_ACK";
371 case ABORT:
372 return "ABORT";
373 case SHUTDOWN:
374 return "SHUTDOWN";
375 case SHUTDOWN_ACK:
376 return "SHUTDOWN_ACK";
377 case OPERATION_ERROR:
378 return "OPERATION_ERROR";
379 case COOKIE_ECHO:
380 return "COOKIE_ECHO";
381 case COOKIE_ACK:
382 return "COOKIE_ACK";
383 case ECNE:
384 return "ECNE";
385 case CWR:
386 return "CWR";
387 case SHUTDOWN_COMPLETE:
388 return "SHUTDOWN_COMPLETE";
389 }
390 return NULL;
391}
392
393always_inline char *
394sctp_optparam_type_to_string (u8 type)
395{
396 switch (type)
397 {
398 case SCTP_IPV4_ADDRESS_TYPE:
399 return "SCTP_IPV4_ADDRESS_TYPE";
400 case SCTP_IPV6_ADDRESS_TYPE:
401 return "SCTP_IPV6_ADDRESS_TYPE";
402 case SCTP_STATE_COOKIE_TYPE:
403 return "SCTP_STATE_COOKIE_TYPE";
404 case SCTP_UNRECOGNIZED_TYPE:
405 return "SCTP_UNRECOGNIZED_TYPE";
406 case SCTP_COOKIE_PRESERVATIVE_TYPE:
407 return "SCTP_COOKIE_PRESERVATIVE_TYPE";
408 case SCTP_HOSTNAME_ADDRESS_TYPE:
409 return "SCTP_HOSTNAME_ADDRESS_TYPE";
410 case SCTP_SUPPORTED_ADDRESS_TYPES:
411 return "SCTP_SUPPORTED_ADDRESS_TYPES";
412 }
413 return NULL;
414}
415
416#define SCTP_TICK 0.001 /**< SCTP tick period (s) */
417#define SHZ (u32) (1/SCTP_TICK) /**< SCTP tick frequency */
Marco Varlesea38783e2018-02-13 12:38:52 +0100418#define SCTP_TSTAMP_RESOLUTION SCTP_TICK /**< Time stamp resolution */
Marco Varlese191a5942017-10-30 18:17:21 +0100419
420/* As per RFC4960, page 83 */
421#define SCTP_RTO_INIT 3 * SHZ /* 3 seconds */
422#define SCTP_RTO_MIN 1 * SHZ /* 1 second */
423#define SCTP_RTO_MAX 60 * SHZ /* 60 seconds */
Marco Varlesef3ab4892018-02-19 15:23:13 +0100424#define SCTP_RTO_BURST 4
Marco Varlese191a5942017-10-30 18:17:21 +0100425#define SCTP_RTO_ALPHA 1/8
426#define SCTP_RTO_BETA 1/4
427#define SCTP_VALID_COOKIE_LIFE 60 * SHZ /* 60 seconds */
Marco Varlese54432f82018-02-15 17:01:56 +0100428#define SCTP_ASSOCIATION_MAX_RETRANS 10 // the overall connection
429#define SCTP_PATH_MAX_RETRANS 5 // number of attempts per destination address
430#define SCTP_MAX_INIT_RETRANS 8 // number of attempts
431#define SCTP_HB_INTERVAL 30 * SHZ
432#define SCTP_HB_MAX_BURST 1
Marco Varlese191a5942017-10-30 18:17:21 +0100433
Marco Varlesef3ab4892018-02-19 15:23:13 +0100434#define SCTP_DATA_IDLE_INTERVAL 15 * SHZ /* 15 seconds; the time-interval after which the connetion is considered IDLE */
435
Marco Varlese191a5942017-10-30 18:17:21 +0100436#define SCTP_TO_TIMER_TICK SCTP_TICK*10 /* Period for converting from SCTP_TICK */
437
438typedef struct _sctp_lookup_dispatch
439{
440 u8 next, error;
441} sctp_lookup_dispatch_t;
442
443typedef struct _sctp_main
444{
445 /* Per-worker thread SCTP connection pools */
446 sctp_connection_t **connections;
447
448 /* Pool of listeners. */
449 sctp_connection_t *listener_pool;
450
451 /** Dispatch table by state and flags */
452 sctp_lookup_dispatch_t dispatch_table[SCTP_N_STATES][64];
453
454 u8 log2_tstamp_clocks_per_tick;
455 f64 tstamp_ticks_per_clock;
456 u32 *time_now;
457
458 /** per-worker tx buffer free lists */
459 u32 **tx_buffers;
460 /** per-worker tx frames to SCTP 4/6 output nodes */
461 vlib_frame_t **tx_frames[2];
462 /** per-worker tx frames to ip 4/6 lookup nodes */
463 vlib_frame_t **ip_lookup_tx_frames[2];
464
465 /* Per worker-thread timer wheel for connections timers */
466 tw_timer_wheel_16t_2w_512sl_t *timer_wheels;
467
468 /* Pool of half-open connections on which we've sent a SYN */
469 sctp_connection_t *half_open_connections;
470 clib_spinlock_t half_open_lock;
471
472 /* TODO: Congestion control algorithms registered */
473 /* sctp_cc_algorithm_t *cc_algos; */
474
475 /* Flag that indicates if stack is on or off */
476 u8 is_enabled;
477
478 /** Number of preallocated connections */
479 u32 preallocated_connections;
480
481 /** Transport table (preallocation) size parameters */
482 u32 local_endpoints_table_memory;
483 u32 local_endpoints_table_buckets;
484
485 /** Vectors of src addresses. Optional unless one needs > 63K active-opens */
486 ip4_address_t *ip4_src_addresses;
487 u32 last_v4_address_rotor;
488 u32 last_v6_address_rotor;
489 ip6_address_t *ip6_src_addresses;
490
491 /** vlib buffer size */
492 u32 bytes_per_buffer;
493
494 u8 punt_unknown4;
495 u8 punt_unknown6;
496
497} sctp_main_t;
498
499extern sctp_main_t sctp_main;
500extern vlib_node_registration_t sctp4_input_node;
501extern vlib_node_registration_t sctp6_input_node;
502extern vlib_node_registration_t sctp4_output_node;
503extern vlib_node_registration_t sctp6_output_node;
504
505always_inline sctp_main_t *
506vnet_get_sctp_main ()
507{
508 return &sctp_main;
509}
510
511always_inline sctp_header_t *
512sctp_buffer_hdr (vlib_buffer_t * b)
513{
514 ASSERT ((signed) b->current_data >= (signed) -VLIB_BUFFER_PRE_DATA_SIZE);
515 return (sctp_header_t *) (b->data + b->current_data
516 + vnet_buffer (b)->sctp.hdr_offset);
517}
518
519clib_error_t *vnet_sctp_enable_disable (vlib_main_t * vm, u8 is_en);
520
521always_inline sctp_connection_t *
522sctp_half_open_connection_get (u32 conn_index)
523{
524 sctp_connection_t *tc = 0;
525 clib_spinlock_lock_if_init (&sctp_main.half_open_lock);
526 if (!pool_is_free_index (sctp_main.half_open_connections, conn_index))
527 tc = pool_elt_at_index (sctp_main.half_open_connections, conn_index);
Marco Varlese04e5d642018-02-23 17:43:06 +0100528 tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].subconn_idx = MAIN_SCTP_SUB_CONN_IDX;
Marco Varlese191a5942017-10-30 18:17:21 +0100529 clib_spinlock_unlock_if_init (&sctp_main.half_open_lock);
530 return tc;
531}
532
533/**
534 * Cleanup half-open connection
535 *
536 */
537always_inline void
538sctp_half_open_connection_del (sctp_connection_t * tc)
539{
Marco Varlese15cc6a82018-02-21 12:39:52 +0100540 sctp_main_t *sctp_main = vnet_get_sctp_main ();
541 clib_spinlock_lock_if_init (&sctp_main->half_open_lock);
542 pool_put_index (sctp_main->half_open_connections,
Marco Varlese191a5942017-10-30 18:17:21 +0100543 tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index);
544 if (CLIB_DEBUG)
545 memset (tc, 0xFA, sizeof (*tc));
Marco Varlese15cc6a82018-02-21 12:39:52 +0100546 clib_spinlock_unlock_if_init (&sctp_main->half_open_lock);
Marco Varlese191a5942017-10-30 18:17:21 +0100547}
548
549always_inline u32
550sctp_set_time_now (u32 thread_index)
551{
552 sctp_main.time_now[thread_index] = clib_cpu_time_now ()
553 * sctp_main.tstamp_ticks_per_clock;
554 return sctp_main.time_now[thread_index];
555}
556
557always_inline void
558sctp_timer_set (sctp_connection_t * tc, u8 conn_idx, u8 timer_id,
559 u32 interval)
560{
561 ASSERT (tc->sub_conn[conn_idx].connection.thread_index ==
562 vlib_get_thread_index ());
563 ASSERT (tc->sub_conn[conn_idx].timers[timer_id] ==
564 SCTP_TIMER_HANDLE_INVALID);
565
566 sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
Marco Varlese21c8baf2018-02-02 17:17:51 +0100567 sub->timers[timer_id] =
Marco Varlese191a5942017-10-30 18:17:21 +0100568 tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
569 sub->c_c_index, timer_id, interval);
570}
571
572always_inline void
573sctp_timer_reset (sctp_connection_t * tc, u8 conn_idx, u8 timer_id)
574{
575 ASSERT (tc->sub_conn[conn_idx].c_thread_index == vlib_get_thread_index ());
576 if (tc->sub_conn[conn_idx].timers[timer_id] == SCTP_TIMER_HANDLE_INVALID)
577 return;
578
579 sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
580
581 tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
582 sub->timers[timer_id]);
583 sub->timers[timer_id] = SCTP_TIMER_HANDLE_INVALID;
584}
585
Marco Varlese191a5942017-10-30 18:17:21 +0100586/**
587 * Try to cleanup half-open connection
588 *
589 * If called from a thread that doesn't own tc, the call won't have any
590 * effect.
591 *
592 * @param tc - connection to be cleaned up
593 * @return non-zero if cleanup failed.
594 */
595always_inline int
596sctp_half_open_connection_cleanup (sctp_connection_t * tc)
597{
598 /* Make sure this is the owning thread */
599 if (tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_thread_index !=
600 vlib_get_thread_index ())
601 return 1;
602 sctp_timer_reset (tc, MAIN_SCTP_SUB_CONN_IDX, SCTP_TIMER_T1_INIT);
603 sctp_half_open_connection_del (tc);
604 return 0;
605}
606
607always_inline u32
608sctp_header_bytes ()
609{
610 return sizeof (sctp_header_t);
611}
612
613always_inline sctp_connection_t *
614sctp_get_connection_from_transport (transport_connection_t * tconn)
615{
616 ASSERT (tconn != NULL);
617
618 sctp_sub_connection_t *sub = (sctp_sub_connection_t *) tconn;
619#if SCTP_ADV_DEBUG
620 if (sub == NULL)
621 SCTP_ADV_DBG ("sub == NULL");
622 if (sub->parent == NULL)
623 SCTP_ADV_DBG ("sub->parent == NULL");
624#endif
Marco Varlese04e5d642018-02-23 17:43:06 +0100625 if (sub->subconn_idx > 0)
626 return (sctp_connection_t *) sub -
627 (sizeof (sctp_sub_connection_t) * (sub->subconn_idx - 1));
628
629 return (sctp_connection_t *) sub;
Marco Varlese191a5942017-10-30 18:17:21 +0100630}
631
632always_inline u32
633sctp_time_now (void)
634{
635 return sctp_main.time_now[vlib_get_thread_index ()];
636}
637
Marco Varlese21c8baf2018-02-02 17:17:51 +0100638#define ABS(x) ((x) > 0) ? (x) : -(x);
639
640always_inline void
641sctp_calculate_rto (sctp_connection_t * sctp_conn, u8 conn_idx)
642{
643 /* See RFC4960, 6.3.1. RTO Calculation */
644 u32 RTO = 0;
645 f32 RTTVAR = 0;
646 u32 now = sctp_time_now ();
647 u32 prev_ts = sctp_conn->sub_conn[conn_idx].rtt_ts;
648 u32 R = prev_ts - now;
649
650 if (sctp_conn->sub_conn[conn_idx].RTO == 0) // C1: Let's initialize our RTO
651 {
652 sctp_conn->sub_conn[conn_idx].RTO = SCTP_RTO_MIN;
653 return;
654 }
655
656 if (sctp_conn->sub_conn[conn_idx].RTO == SCTP_RTO_MIN && sctp_conn->sub_conn[conn_idx].SRTT == 0) // C2: First RTT calculation
657 {
658 sctp_conn->sub_conn[conn_idx].SRTT = R;
659 RTTVAR = R / 2;
660
661 if (RTTVAR == 0)
662 RTTVAR = 100e-3; /* 100 ms */
663
664 sctp_conn->sub_conn[conn_idx].RTTVAR = RTTVAR;
665 }
666 else // C3: RTT already exists; let's recalculate
667 {
668 RTTVAR = (1 - SCTP_RTO_BETA) * sctp_conn->sub_conn[conn_idx].RTTVAR +
669 SCTP_RTO_BETA * ABS (sctp_conn->sub_conn[conn_idx].SRTT - R);
670
671 if (RTTVAR == 0)
672 RTTVAR = 100e-3; /* 100 ms */
673
674 sctp_conn->sub_conn[conn_idx].RTTVAR = RTTVAR;
675
676 sctp_conn->sub_conn[conn_idx].SRTT =
677 (1 - SCTP_RTO_ALPHA) * sctp_conn->sub_conn[conn_idx].SRTT +
678 SCTP_RTO_ALPHA * R;
679 }
680
681 RTO =
682 sctp_conn->sub_conn[conn_idx].SRTT +
683 4 * sctp_conn->sub_conn[conn_idx].RTTVAR;
684 if (RTO < SCTP_RTO_MIN) // C6
685 RTO = SCTP_RTO_MIN;
686
687 if (RTO > SCTP_RTO_MAX) // C7
688 RTO = SCTP_RTO_MAX;
689
690 sctp_conn->sub_conn[conn_idx].RTO = RTO;
691}
692
Marco Varlese191a5942017-10-30 18:17:21 +0100693always_inline void
694sctp_timer_update (sctp_connection_t * tc, u8 conn_idx, u8 timer_id,
695 u32 interval)
696{
697 ASSERT (tc->sub_conn[conn_idx].connection.thread_index ==
698 vlib_get_thread_index ());
699 sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
700
701 if (tc->sub_conn[conn_idx].timers[timer_id] != SCTP_TIMER_HANDLE_INVALID)
702 tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
703 sub->timers[timer_id]);
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100704
Marco Varlese191a5942017-10-30 18:17:21 +0100705 tc->sub_conn[conn_idx].timers[timer_id] =
706 tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
707 sub->c_c_index, timer_id, interval);
708}
709
710always_inline sctp_connection_t *
711sctp_listener_get (u32 tli)
712{
713 return pool_elt_at_index (sctp_main.listener_pool, tli);
714}
715
716#endif
717
718always_inline sctp_connection_t *
719sctp_connection_get (u32 conn_index, u32 thread_index)
720{
721 if (PREDICT_FALSE
722 (pool_is_free_index (sctp_main.connections[thread_index], conn_index)))
723 return 0;
724 return pool_elt_at_index (sctp_main.connections[thread_index], conn_index);
725}
726
Marco Varlese54432f82018-02-15 17:01:56 +0100727#define SELECT_MAX_RETRIES 8
Marco Varlese191a5942017-10-30 18:17:21 +0100728
Marco Varlese54432f82018-02-15 17:01:56 +0100729always_inline u8
730sctp_data_subconn_select (sctp_connection_t * sctp_conn)
731{
Marco Varlese54432f82018-02-15 17:01:56 +0100732 u32 sub = MAIN_SCTP_SUB_CONN_IDX;
Marco Varlesef3ab4892018-02-19 15:23:13 +0100733 u8 i, cwnd = sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].cwnd;
734 for (i = 1; i < MAX_SCTP_CONNECTIONS; i++)
Marco Varlese191a5942017-10-30 18:17:21 +0100735 {
Marco Varlesef3ab4892018-02-19 15:23:13 +0100736 if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN)
737 continue;
738
739 if (sctp_conn->sub_conn[i].cwnd > cwnd)
740 {
741 sub = i;
742 cwnd = sctp_conn->sub_conn[i].cwnd;
743 }
Marco Varlese191a5942017-10-30 18:17:21 +0100744 }
Marco Varlese54432f82018-02-15 17:01:56 +0100745 return sub;
Marco Varlese191a5942017-10-30 18:17:21 +0100746}
747
748always_inline u8
Marco Varlese54432f82018-02-15 17:01:56 +0100749sctp_sub_conn_id_via_ip6h (sctp_connection_t * sctp_conn, ip6_header_t * ip6h)
Marco Varlese191a5942017-10-30 18:17:21 +0100750{
Marco Varlese54432f82018-02-15 17:01:56 +0100751 u8 i;
Marco Varlese191a5942017-10-30 18:17:21 +0100752
Marco Varlese54432f82018-02-15 17:01:56 +0100753 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
Marco Varlese191a5942017-10-30 18:17:21 +0100754 {
Marco Varlese54432f82018-02-15 17:01:56 +0100755 if (sctp_conn->sub_conn[i].connection.lcl_ip.ip6.as_u64[0] ==
756 ip6h->dst_address.as_u64[0] &&
757 sctp_conn->sub_conn[i].connection.lcl_ip.ip6.as_u64[1] ==
758 ip6h->dst_address.as_u64[1] &&
759 sctp_conn->sub_conn[i].connection.rmt_ip.ip6.as_u64[0] ==
760 ip6h->src_address.as_u64[0] &&
761 sctp_conn->sub_conn[i].connection.rmt_ip.ip6.as_u64[1] ==
762 ip6h->src_address.as_u64[1])
763 return i;
Marco Varlese191a5942017-10-30 18:17:21 +0100764 }
Marco Varlese54432f82018-02-15 17:01:56 +0100765 clib_warning ("Did not find a sub-connection; defaulting to %u",
766 MAIN_SCTP_SUB_CONN_IDX);
767 return MAIN_SCTP_SUB_CONN_IDX;
768}
769
770always_inline u8
771sctp_sub_conn_id_via_ip4h (sctp_connection_t * sctp_conn, ip4_header_t * ip4h)
772{
773 u8 i;
774
775 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
776 {
777 if (sctp_conn->sub_conn[i].connection.lcl_ip.ip4.as_u32 ==
778 ip4h->dst_address.as_u32
779 && sctp_conn->sub_conn[i].connection.rmt_ip.ip4.as_u32 ==
780 ip4h->src_address.as_u32)
781 return i;
782 }
783 clib_warning ("Did not find a sub-connection; defaulting to %u",
784 MAIN_SCTP_SUB_CONN_IDX);
785 return MAIN_SCTP_SUB_CONN_IDX;
Marco Varlese191a5942017-10-30 18:17:21 +0100786}
787
788/**
789 * Push SCTP header to buffer
790 *
791 * @param vm - vlib_main
792 * @param b - buffer to write the header to
793 * @param sp_net - source port net order
794 * @param dp_net - destination port net order
795 * @param sctp_hdr_opts_len - header and options length in bytes
796 *
797 * @return - pointer to start of SCTP header
798 */
799always_inline void *
800vlib_buffer_push_sctp_net_order (vlib_buffer_t * b, u16 sp, u16 dp,
801 u8 sctp_hdr_opts_len)
802{
803 sctp_full_hdr_t *full_hdr;
804
805 full_hdr = vlib_buffer_push_uninit (b, sctp_hdr_opts_len);
806
807 full_hdr->hdr.src_port = sp;
808 full_hdr->hdr.dst_port = dp;
809 full_hdr->hdr.checksum = 0;
810 return full_hdr;
811}
812
813/**
814 * Push SCTP header to buffer
815 *
816 * @param b - buffer to write the header to
817 * @param sp_net - source port net order
818 * @param dp_net - destination port net order
819 * @param sctp_hdr_opts_len - header and options length in bytes
820 *
821 * @return - pointer to start of SCTP header
822 */
823always_inline void *
824vlib_buffer_push_sctp (vlib_buffer_t * b, u16 sp_net, u16 dp_net,
825 u8 sctp_hdr_opts_len)
826{
827 return vlib_buffer_push_sctp_net_order (b, sp_net, dp_net,
828 sctp_hdr_opts_len);
829}
830
Marco Varlesef3ab4892018-02-19 15:23:13 +0100831always_inline void
832update_smallest_pmtu_idx (sctp_connection_t * sctp_conn)
833{
834 u8 i;
835 u8 smallest_pmtu_index = MAIN_SCTP_SUB_CONN_IDX;
836
837 for (i = 1; i < MAX_SCTP_CONNECTIONS; i++)
838 {
839 if (sctp_conn->sub_conn[i].state != SCTP_SUBCONN_STATE_DOWN)
840 {
841 if (sctp_conn->sub_conn[i].PMTU <
842 sctp_conn->sub_conn[smallest_pmtu_index].PMTU)
843 smallest_pmtu_index = i;
844 }
845 }
846
847 sctp_conn->smallest_PMTU_idx = smallest_pmtu_index;
848}
849
850/* As per RFC4960; section 7.2.1: Slow-Start */
851always_inline void
852sctp_init_cwnd (sctp_connection_t * sctp_conn)
853{
854 u8 i;
855 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
856 {
857 /* Section 7.2.1; point (1) */
858 sctp_conn->sub_conn[i].cwnd =
859 clib_min (4 * sctp_conn->sub_conn[i].PMTU,
860 clib_max (2 * sctp_conn->sub_conn[i].PMTU, 4380));
861
862 /* Section 7.2.1; point (3) */
863 sctp_conn->sub_conn[i].ssthresh = SCTP_INITIAL_SSHTRESH;
864
865 /* Section 7.2.2; point (1) */
866 sctp_conn->sub_conn[i].partially_acked_bytes = 0;
867 }
868}
869
870always_inline u8
871sctp_in_cong_recovery (sctp_connection_t * sctp_conn, u8 idx)
872{
873 return 0;
874}
875
876always_inline u8
877cwnd_fully_utilized (sctp_connection_t * sctp_conn, u8 idx)
878{
879 return 0;
880}
881
882/* As per RFC4960; section 7.2.1: Slow-Start */
883always_inline void
884update_cwnd (sctp_connection_t * sctp_conn)
885{
886 u8 i;
887
888 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
889 {
890 /* Section 7.2.1; point (2) */
891 if (sctp_conn->sub_conn[i].is_retransmitting)
892 {
893 sctp_conn->sub_conn[i].cwnd = 1 * sctp_conn->sub_conn[i].PMTU;
894 continue;
895 }
896
897 /* Section 7.2.2; point (4) */
898 if (sctp_conn->sub_conn[i].last_data_ts >
899 sctp_time_now () + SCTP_DATA_IDLE_INTERVAL)
900 {
901 sctp_conn->sub_conn[i].cwnd =
902 clib_max (sctp_conn->sub_conn[i].cwnd / 2,
903 4 * sctp_conn->sub_conn[i].PMTU);
904 continue;
905 }
906
907 /* Section 7.2.1; point (5) */
908 if (sctp_conn->sub_conn[i].cwnd <= sctp_conn->sub_conn[i].ssthresh)
909 {
910 if (!cwnd_fully_utilized (sctp_conn, i))
911 continue;
912
913 if (sctp_in_cong_recovery (sctp_conn, i))
914 continue;
915
916 sctp_conn->sub_conn[i].cwnd =
917 clib_min (sctp_conn->sub_conn[i].PMTU, 1);
918 }
919 }
920}
921
Marco Varlese191a5942017-10-30 18:17:21 +0100922/*
923 * fd.io coding-style-patch-verification: ON
924 *
925 * Local Variables:
926 * eval: (c-set-style "gnu")
927 * End:
928 */