blob: 9f17e5337ba17966c2cc47c95ff14ad3df84c9e5 [file] [log] [blame]
Marco Varlese191a5942017-10-30 18:17:21 +01001/*
2 * Copyright (c) 2017 SUSE LLC.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15#ifndef included_vnet_sctp_h
16#define included_vnet_sctp_h
17
18#include <vnet/vnet.h>
19#include <vnet/ip/ip.h>
20#include <vnet/sctp/sctp_timer.h>
21#include <vnet/sctp/sctp_packet.h>
22#include <vnet/session/transport.h>
23#include <vnet/session/session.h>
24
25/* SCTP timers */
26#define foreach_sctp_timer \
27 _(T1_INIT, "T1_INIT") \
28 _(T1_COOKIE, "T1_COOKIE") \
29 _(T2_SHUTDOWN, "T2_SHUTDOWN") \
30 _(T3_RXTX, "T3_RXTX") \
Marco Varlese8ad6a2d2018-01-26 16:50:01 +010031 _(T4_HEARTBEAT, "T4_HB") \
Marco Varlese191a5942017-10-30 18:17:21 +010032 _(T5_SHUTDOWN_GUARD, "T5_SHUTDOWN_GUARD")
33
34typedef enum _sctp_timers
35{
36#define _(sym, str) SCTP_TIMER_##sym,
37 foreach_sctp_timer
38#undef _
39 SCTP_N_TIMERS
40} sctp_timers_e;
41
42#define SCTP_TIMER_HANDLE_INVALID ((u32) ~0)
43
Marco Varlesedf5a99c2018-02-06 13:48:30 +010044always_inline char *
45sctp_timer_to_string (u8 timer_id)
46{
47 switch (timer_id)
48 {
49 case SCTP_TIMER_T1_INIT:
50 return "SCTP_TIMER_T1_INIT";
51 case SCTP_TIMER_T1_COOKIE:
52 return "SCTP_TIMER_T1_COOKIE";
53 case SCTP_TIMER_T2_SHUTDOWN:
54 return "SCTP_TIMER_T2_SHUTDOWN";
55 case SCTP_TIMER_T3_RXTX:
56 return "SCTP_TIMER_T3_RXTX";
57 case SCTP_TIMER_T4_HEARTBEAT:
58 return "SCTP_TIMER_T4_HEARTBEAT";
59 case SCTP_TIMER_T5_SHUTDOWN_GUARD:
60 return "SCTP_TIMER_T5_SHUTDOWN_GUARD";
61 }
62 return NULL;
63}
64
Marco Varlese191a5942017-10-30 18:17:21 +010065typedef enum _sctp_error
66{
67#define sctp_error(n,s) SCTP_ERROR_##n,
68#include <vnet/sctp/sctp_error.def>
69#undef sctp_error
70 SCTP_N_ERROR,
71} sctp_error_t;
72
73#define NO_FLAG 0
74
75#define IS_T_BIT_SET(var) ((var) & (1))
76#define IS_E_BIT_SET(var) ((var) & (1))
77#define IS_B_BIT_SET(var) ((var) & (1<<1))
78#define IS_U_BIT_SET(var) ((var) & (1<<2))
79
Marco Varlesef3ab4892018-02-19 15:23:13 +010080#define MAX_SCTP_CONNECTIONS 8
Marco Varlese191a5942017-10-30 18:17:21 +010081#define MAIN_SCTP_SUB_CONN_IDX 0
82
83#if (VLIB_BUFFER_TRACE_TRAJECTORY)
84#define sctp_trajectory_add_start(b, start) \
85{ \
86 (*vlib_buffer_trace_trajectory_cb) (b, start); \
87}
88#else
89#define sctp_trajectory_add_start(b, start)
90#endif
91
Marco Varlese54432f82018-02-15 17:01:56 +010092enum _sctp_subconn_state
93{
94 SCTP_SUBCONN_STATE_DOWN = 0,
95 SCTP_SUBCONN_STATE_UP,
96 SCTP_SUBCONN_STATE_ALLOW_HB
97};
98
Marco Varlesef3ab4892018-02-19 15:23:13 +010099#define SCTP_INITIAL_SSHTRESH 65535
Marco Varlese191a5942017-10-30 18:17:21 +0100100typedef struct _sctp_sub_connection
101{
102 transport_connection_t connection; /**< Common transport data. First! */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100103
Marco Varlese04e5d642018-02-23 17:43:06 +0100104 u8 subconn_idx; /**< This indicates the position of this sub-connection in the super-set container of connections pool */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100105 u32 error_count; /**< The current error count for this destination. */
106 u32 error_threshold; /**< Current error threshold for this destination,
107 i.e. what value marks the destination down if error count reaches this value. */
Marco Varlesef3ab4892018-02-19 15:23:13 +0100108 u32 cwnd; /**< Congestion control window (cwnd, in bytes), which is adjusted by
109 the sender based on observed network conditions. */
110 u32 ssthresh; /**< Slow-start threshold (in bytes), which is used by the
111 sender to distinguish slow-start and congestion avoidance phases. */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100112
Marco Varlese21c8baf2018-02-02 17:17:51 +0100113 u32 rtt_ts; /**< USED to hold the timestamp of when the packet has been sent */
114
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100115 u32 RTO; /**< The current retransmission timeout value. */
116 u32 SRTT; /**< The current smoothed round-trip time. */
Marco Varlese21c8baf2018-02-02 17:17:51 +0100117 f32 RTTVAR; /**< The current RTT variation. */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100118
119 u32 partially_acked_bytes; /**< The tracking method for increase of cwnd when in
120 congestion avoidance mode (see Section 7.2.2).*/
121
122 u8 state; /**< The current state of this destination, i.e., DOWN, UP, ALLOW-HB, NO-HEARTBEAT, etc. */
123
124 u16 PMTU; /**< The current known path MTU. */
125
126 u32 timers[SCTP_N_TIMERS]; /**< A timer used by each destination. */
127
128 u8 RTO_pending; /**< A flag used to track if one of the DATA chunks sent to
129 this address is currently being used to compute an RTT.
130 If this flag is 0, the next DATA chunk sent to this destination
131 should be used to compute an RTT and this flag should be set.
132 Every time the RTT calculation completes (i.e., the DATA chunk is SACK'd),
133 clear this flag. */
134
Marco Varlese54432f82018-02-15 17:01:56 +0100135 u32 last_seen; /**< The time to which this destination was last sent a packet to.
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100136 This can be used to determine if a HEARTBEAT is needed. */
Marco Varlese191a5942017-10-30 18:17:21 +0100137
Marco Varlesef3ab4892018-02-19 15:23:13 +0100138 u32 last_data_ts; /**< Used to hold the timestamp value of last time we sent a DATA chunk */
139
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100140 u8 unacknowledged_hb; /**< Used to track how many unacknowledged heartbeats we had;
Marco Varlese54432f82018-02-15 17:01:56 +0100141 If more than SCTP_PATH_MAX_RETRANS then connection is considered unreachable. */
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100142
Marco Varlesea38783e2018-02-13 12:38:52 +0100143 u8 is_retransmitting; /**< A flag (0 = no, 1 = yes) indicating whether the connection is retransmitting a previous packet */
144
Marco Varlesef3ab4892018-02-19 15:23:13 +0100145 u8 enqueue_state; /**< if set to 1 indicates that DATA is still being handled hence cannot shutdown this connection yet */
Marco Varlese54432f82018-02-15 17:01:56 +0100146
Marco Varlese191a5942017-10-30 18:17:21 +0100147} sctp_sub_connection_t;
148
149typedef struct
150{
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100151 u32 a_rwnd; /**< Maximum segment size advertised */
Marco Varlese191a5942017-10-30 18:17:21 +0100152
153} sctp_options_t;
154
Marco Varlese91389ac2018-01-31 11:00:01 +0100155/* Useful macros to deal with the out_of_order_map (array of bit) */
156#define SET_BIT(A,k) ( A[(k/32)] |= (1 << (k%32)) )
157#define CLEAR_BIT(A,k) ( A[(k/32)] &= ~(1 << (k%32)) )
158#define TEST_BIT(A,k) ( A[(k/32)] & (1 << (k%32)) )
159
160always_inline void
161_bytes_swap (void *pv, size_t n)
162{
163 char *p = pv;
164 size_t lo, hi;
165 for (lo = 0, hi = n - 1; hi > lo; lo++, hi--)
166 {
167 char tmp = p[lo];
168 p[lo] = p[hi];
169 p[hi] = tmp;
170 }
171}
172
173#define ENDIANESS_SWAP(x) _bytes_swap(&x, sizeof(x));
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100174
175#define MAX_INFLIGHT_PACKETS 128
176#define MAX_ENQUEABLE_SACKS 2
177
178/* This parameter indicates to the receiver how much increment in
179 * milliseconds the sender wishes the receiver to add to its default
180 * cookie life-span.
181 */
182#define SUGGESTED_COOKIE_LIFE_SPAN_INCREMENT 1000
183
Marco Varlese191a5942017-10-30 18:17:21 +0100184typedef struct _sctp_connection
185{
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100186 sctp_sub_connection_t sub_conn[MAX_SCTP_CONNECTIONS]; /**< Common transport data. First! */
Marco Varlese191a5942017-10-30 18:17:21 +0100187
188 u8 state; /**< SCTP state as per sctp_state_t */
189 u16 flags; /**< Chunk flag (see sctp_chunks_common_hdr_t) */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100190
Marco Varlese191a5942017-10-30 18:17:21 +0100191 u32 local_tag; /**< INIT_TAG generated locally */
192 u32 remote_tag; /**< INIT_TAG generated by the remote peer */
Marco Varlese191a5942017-10-30 18:17:21 +0100193
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100194 u32 local_initial_tsn; /**< Initial TSN generated locally */
195 u32 remote_initial_tsn; /**< Initial TSN generated by the remote-peer */
Marco Varlese191a5942017-10-30 18:17:21 +0100196
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100197 u32 peer_cookie_life_span_increment;
Marco Varlese191a5942017-10-30 18:17:21 +0100198
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100199 u32 overall_err_count; /**< The overall association error count. */
200 u32 overall_err_treshold; /**< The threshold for this association that if the Overall Error Count
201 reaches will cause this association to be torn down. */
Marco Varlese191a5942017-10-30 18:17:21 +0100202
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100203 u32 peer_rwnd; /**< Current calculated value of the peer's rwnd. */
204
205 u32 next_tsn; /**< The next TSN number to be assigned to a new DATA chunk.
206 This is sent in the INIT or INIT ACK chunk to the peer
207 and incremented each time a DATA chunk is assigned a
208 TSN (normally just prior to transmit or during
209 fragmentation). */
210
Marco Varlesef3ab4892018-02-19 15:23:13 +0100211 u32 last_unacked_tsn; /** < Last TSN number still unacked */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100212 u32 next_tsn_expected; /**< The next TSN number expected to be received. */
213
214 u32 last_rcvd_tsn; /**< This is the last TSN received in sequence. This value
215 is set initially by taking the peer's initial TSN,
216 received in the INIT or INIT ACK chunk, and
217 subtracting one from it. */
218
219 u32 out_of_order_map[MAX_INFLIGHT_PACKETS]; /**< An array of bits or bytes indicating which out-of-order
220 TSNs have been received (relative to the Last Rcvd TSN).
221 If no gaps exist, i.e., no out-of-order packets have been received,
222 this array will be set to all zero. */
223
224 u8 ack_state; /**< This flag indicates if the next received packet is set to be responded to with a SACK.
225 This is initialized to 0. When a packet is received it is incremented.
226 If this value reaches 2 or more, a SACK is sent and the value is reset to 0.
227 Note: This is used only when no DATA chunks are received out-of-order.
228 When DATA chunks are out-of-order, SACKs are not delayed (see Section 6). */
229
Marco Varlesef3ab4892018-02-19 15:23:13 +0100230 u8 smallest_PMTU_idx; /** The index of the sub-connection with the smallest PMTU discovered across all peer's transport addresses. */
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100231
Marco Varlese91389ac2018-01-31 11:00:01 +0100232 u8 overall_sending_status; /**< 0 indicates first fragment of a user message
233 1 indicates normal stream
234 2 indicates last fragment of a user message */
235
Marco Varlese191a5942017-10-30 18:17:21 +0100236 sctp_options_t snd_opts;
Marco Varlese191a5942017-10-30 18:17:21 +0100237
Marco Varleseeacf3cf2018-02-26 14:52:25 +0100238 u8 forming_association_changed; /**< This is a flag indicating whether the original association has been modified during
239 the life-span of the association itself. For instance, a new sub-connection might have been added. */
240
Marco Varlese191a5942017-10-30 18:17:21 +0100241} sctp_connection_t;
242
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100243typedef void (sctp_timer_expiration_handler) (u32 conn_index, u32 timer_id);
Marco Varlese191a5942017-10-30 18:17:21 +0100244
245sctp_connection_t *sctp_connection_new (u8 thread_index);
Marco Varlese3c6a9762018-03-01 11:19:59 +0100246
247u8
248sctp_sub_connection_add_ip4 (vlib_main_t * vm,
249 ip4_address_t * lcl_addr,
250 ip4_address_t * rmt_addr);
251
252u8
253sctp_sub_connection_add_ip6 (vlib_main_t * vm,
254 ip6_address_t * lcl_addr,
255 ip6_address_t * rmt_addr);
256
Marco Varlese465c0872018-03-01 14:01:46 +0100257u8
258sctp_sub_connection_del_ip4 (ip4_address_t * lcl_addr,
259 ip4_address_t * rmt_addr);
260
261u8
262sctp_sub_connection_del_ip6 (ip6_address_t * lcl_addr,
263 ip6_address_t * rmt_addr);
264
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100265void sctp_connection_close (sctp_connection_t * sctp_conn);
266void sctp_connection_cleanup (sctp_connection_t * sctp_conn);
267void sctp_connection_del (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100268
269u32 sctp_push_header (transport_connection_t * tconn, vlib_buffer_t * b);
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100270void sctp_send_init (sctp_connection_t * sctp_conn);
271void sctp_send_shutdown (sctp_connection_t * sctp_conn);
Marco Varlese54432f82018-02-15 17:01:56 +0100272void sctp_send_shutdown_ack (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlesebe2251b2018-02-07 12:22:41 +0100273 vlib_buffer_t * b);
Marco Varlese54432f82018-02-15 17:01:56 +0100274void sctp_send_shutdown_complete (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlesefae40392018-02-14 15:38:35 +0100275 vlib_buffer_t * b0);
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100276void sctp_send_heartbeat (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100277void sctp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index,
278 u8 is_ip4);
279void sctp_flush_frames_to_output (u8 thread_index);
280void sctp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
281
282format_function_t format_sctp_state;
283
284u8 *format_sctp_connection_id (u8 * s, va_list * args);
285u8 *format_sctp_connection (u8 * s, va_list * args);
286u8 *format_sctp_scoreboard (u8 * s, va_list * args);
287u8 *format_sctp_header (u8 * s, va_list * args);
288u8 *format_sctp_tx_trace (u8 * s, va_list * args);
289
290clib_error_t *sctp_init (vlib_main_t * vm);
Marco Varlese54432f82018-02-15 17:01:56 +0100291void sctp_connection_timers_init (sctp_connection_t * sctp_conn);
292void sctp_connection_timers_reset (sctp_connection_t * sctp_conn);
293void sctp_init_snd_vars (sctp_connection_t * sctp_conn);
294void sctp_init_mss (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100295
Marco Varlese54432f82018-02-15 17:01:56 +0100296void sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, u8 idx,
297 vlib_buffer_t * b, ip4_address_t * ip4_addr,
Marco Varlese191a5942017-10-30 18:17:21 +0100298 ip6_address_t * ip6_addr);
Marco Varleseeacf3cf2018-02-26 14:52:25 +0100299void
300sctp_prepare_initack_chunk_for_collision (sctp_connection_t * sctp_conn,
301 u8 idx, vlib_buffer_t * b,
302 ip4_address_t * ip4_addr,
303 ip6_address_t * ip6_addr);
304void sctp_prepare_abort_for_collision (sctp_connection_t * sctp_conn, u8 idx,
305 vlib_buffer_t * b,
306 ip4_address_t * ip4_addr,
307 ip6_address_t * ip6_addr);
Marco Varlese8c5f67f2018-02-27 09:38:31 +0100308void
309sctp_prepare_operation_error (sctp_connection_t * sctp_conn, u8 idx,
310 vlib_buffer_t * b, u8 err_cause);
Marco Varlese54432f82018-02-15 17:01:56 +0100311void sctp_prepare_cookie_echo_chunk (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlese191a5942017-10-30 18:17:21 +0100312 vlib_buffer_t * b,
313 sctp_state_cookie_param_t * sc);
Marco Varlese54432f82018-02-15 17:01:56 +0100314void sctp_prepare_cookie_ack_chunk (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlese191a5942017-10-30 18:17:21 +0100315 vlib_buffer_t * b);
Marco Varlese54432f82018-02-15 17:01:56 +0100316void sctp_prepare_sack_chunk (sctp_connection_t * sctp_conn, u8 idx,
317 vlib_buffer_t * b);
318void sctp_prepare_heartbeat_ack_chunk (sctp_connection_t * sctp_conn, u8 idx,
Marco Varlesedf5a99c2018-02-06 13:48:30 +0100319 vlib_buffer_t * b);
Marco Varlese191a5942017-10-30 18:17:21 +0100320
Marco Varlese54432f82018-02-15 17:01:56 +0100321u16 sctp_check_outstanding_data_chunks (sctp_connection_t * sctp_conn);
Marco Varlese191a5942017-10-30 18:17:21 +0100322
Marco Varlese3c6a9762018-03-01 11:19:59 +0100323void sctp_api_reference (void);
324
Marco Varlese191a5942017-10-30 18:17:21 +0100325#define IP_PROTOCOL_SCTP 132
326
327/** SSCTP FSM state definitions as per RFC4960. */
328#define foreach_sctp_fsm_state \
329 _(CLOSED, "CLOSED") \
330 _(COOKIE_WAIT, "COOKIE_WAIT") \
331 _(COOKIE_ECHOED, "COOKIE_ECHOED") \
332 _(ESTABLISHED, "ESTABLISHED") \
333 _(SHUTDOWN_PENDING, "SHUTDOWN_PENDING") \
334 _(SHUTDOWN_SENT, "SHUTDOWN_SENT") \
335 _(SHUTDOWN_RECEIVED, "SHUTDOWN_RECEIVED") \
336 _(SHUTDOWN_ACK_SENT, "SHUTDOWN_ACK_SENT")
337
338typedef enum _sctp_state
339{
340#define _(sym, str) SCTP_STATE_##sym,
341 foreach_sctp_fsm_state
342#undef _
343 SCTP_N_STATES
344} sctp_state_t;
345
346always_inline char *
347sctp_state_to_string (u8 state)
348{
349 switch (state)
350 {
351 case SCTP_STATE_CLOSED:
352 return "SCTP_STATE_CLOSED";
353 case SCTP_STATE_COOKIE_WAIT:
354 return "SCTP_STATE_COOKIE_WAIT";
355 case SCTP_STATE_COOKIE_ECHOED:
356 return "SCTP_STATE_COOKIE_ECHOED";
357 case SCTP_STATE_ESTABLISHED:
358 return "SCTP_STATE_ESTABLISHED";
359 case SCTP_STATE_SHUTDOWN_PENDING:
360 return "SCTP_STATE_SHUTDOWN_PENDING";
361 case SCTP_STATE_SHUTDOWN_SENT:
362 return "SCTP_STATE_SHUTDOWN_SENT";
363 case SCTP_STATE_SHUTDOWN_RECEIVED:
364 return "SCTP_STATE_SHUTDOWN_RECEIVED";
365 case SCTP_STATE_SHUTDOWN_ACK_SENT:
366 return "SCTP_STATE_SHUTDOWN_ACK_SENT";
367 }
368 return NULL;
369}
370
371always_inline char *
372sctp_chunk_to_string (u8 type)
373{
374 switch (type)
375 {
376 case DATA:
377 return "DATA";
378 case INIT:
379 return "INIT";
380 case INIT_ACK:
381 return "INIT_ACK";
382 case SACK:
383 return "SACK";
384 case HEARTBEAT:
385 return "HEARTBEAT";
386 case HEARTBEAT_ACK:
387 return "HEARTBEAT_ACK";
388 case ABORT:
389 return "ABORT";
390 case SHUTDOWN:
391 return "SHUTDOWN";
392 case SHUTDOWN_ACK:
393 return "SHUTDOWN_ACK";
394 case OPERATION_ERROR:
395 return "OPERATION_ERROR";
396 case COOKIE_ECHO:
397 return "COOKIE_ECHO";
398 case COOKIE_ACK:
399 return "COOKIE_ACK";
400 case ECNE:
401 return "ECNE";
402 case CWR:
403 return "CWR";
404 case SHUTDOWN_COMPLETE:
405 return "SHUTDOWN_COMPLETE";
406 }
407 return NULL;
408}
409
410always_inline char *
411sctp_optparam_type_to_string (u8 type)
412{
413 switch (type)
414 {
415 case SCTP_IPV4_ADDRESS_TYPE:
416 return "SCTP_IPV4_ADDRESS_TYPE";
417 case SCTP_IPV6_ADDRESS_TYPE:
418 return "SCTP_IPV6_ADDRESS_TYPE";
419 case SCTP_STATE_COOKIE_TYPE:
420 return "SCTP_STATE_COOKIE_TYPE";
421 case SCTP_UNRECOGNIZED_TYPE:
422 return "SCTP_UNRECOGNIZED_TYPE";
423 case SCTP_COOKIE_PRESERVATIVE_TYPE:
424 return "SCTP_COOKIE_PRESERVATIVE_TYPE";
425 case SCTP_HOSTNAME_ADDRESS_TYPE:
426 return "SCTP_HOSTNAME_ADDRESS_TYPE";
427 case SCTP_SUPPORTED_ADDRESS_TYPES:
428 return "SCTP_SUPPORTED_ADDRESS_TYPES";
429 }
430 return NULL;
431}
432
433#define SCTP_TICK 0.001 /**< SCTP tick period (s) */
434#define SHZ (u32) (1/SCTP_TICK) /**< SCTP tick frequency */
Marco Varlesea38783e2018-02-13 12:38:52 +0100435#define SCTP_TSTAMP_RESOLUTION SCTP_TICK /**< Time stamp resolution */
Marco Varlese191a5942017-10-30 18:17:21 +0100436
437/* As per RFC4960, page 83 */
438#define SCTP_RTO_INIT 3 * SHZ /* 3 seconds */
439#define SCTP_RTO_MIN 1 * SHZ /* 1 second */
440#define SCTP_RTO_MAX 60 * SHZ /* 60 seconds */
Marco Varlesef3ab4892018-02-19 15:23:13 +0100441#define SCTP_RTO_BURST 4
Marco Varlese191a5942017-10-30 18:17:21 +0100442#define SCTP_RTO_ALPHA 1/8
443#define SCTP_RTO_BETA 1/4
444#define SCTP_VALID_COOKIE_LIFE 60 * SHZ /* 60 seconds */
Marco Varlese54432f82018-02-15 17:01:56 +0100445#define SCTP_ASSOCIATION_MAX_RETRANS 10 // the overall connection
446#define SCTP_PATH_MAX_RETRANS 5 // number of attempts per destination address
447#define SCTP_MAX_INIT_RETRANS 8 // number of attempts
448#define SCTP_HB_INTERVAL 30 * SHZ
449#define SCTP_HB_MAX_BURST 1
Marco Varlese191a5942017-10-30 18:17:21 +0100450
Marco Varlesef3ab4892018-02-19 15:23:13 +0100451#define SCTP_DATA_IDLE_INTERVAL 15 * SHZ /* 15 seconds; the time-interval after which the connetion is considered IDLE */
452
Marco Varlese191a5942017-10-30 18:17:21 +0100453#define SCTP_TO_TIMER_TICK SCTP_TICK*10 /* Period for converting from SCTP_TICK */
454
455typedef struct _sctp_lookup_dispatch
456{
457 u8 next, error;
458} sctp_lookup_dispatch_t;
459
460typedef struct _sctp_main
461{
462 /* Per-worker thread SCTP connection pools */
463 sctp_connection_t **connections;
464
465 /* Pool of listeners. */
466 sctp_connection_t *listener_pool;
467
468 /** Dispatch table by state and flags */
469 sctp_lookup_dispatch_t dispatch_table[SCTP_N_STATES][64];
470
471 u8 log2_tstamp_clocks_per_tick;
472 f64 tstamp_ticks_per_clock;
473 u32 *time_now;
474
475 /** per-worker tx buffer free lists */
476 u32 **tx_buffers;
477 /** per-worker tx frames to SCTP 4/6 output nodes */
478 vlib_frame_t **tx_frames[2];
479 /** per-worker tx frames to ip 4/6 lookup nodes */
480 vlib_frame_t **ip_lookup_tx_frames[2];
481
482 /* Per worker-thread timer wheel for connections timers */
483 tw_timer_wheel_16t_2w_512sl_t *timer_wheels;
484
485 /* Pool of half-open connections on which we've sent a SYN */
486 sctp_connection_t *half_open_connections;
487 clib_spinlock_t half_open_lock;
488
489 /* TODO: Congestion control algorithms registered */
490 /* sctp_cc_algorithm_t *cc_algos; */
491
492 /* Flag that indicates if stack is on or off */
493 u8 is_enabled;
494
495 /** Number of preallocated connections */
496 u32 preallocated_connections;
497
498 /** Transport table (preallocation) size parameters */
499 u32 local_endpoints_table_memory;
500 u32 local_endpoints_table_buckets;
501
502 /** Vectors of src addresses. Optional unless one needs > 63K active-opens */
503 ip4_address_t *ip4_src_addresses;
504 u32 last_v4_address_rotor;
505 u32 last_v6_address_rotor;
506 ip6_address_t *ip6_src_addresses;
507
508 /** vlib buffer size */
509 u32 bytes_per_buffer;
510
511 u8 punt_unknown4;
512 u8 punt_unknown6;
513
514} sctp_main_t;
515
516extern sctp_main_t sctp_main;
517extern vlib_node_registration_t sctp4_input_node;
518extern vlib_node_registration_t sctp6_input_node;
519extern vlib_node_registration_t sctp4_output_node;
520extern vlib_node_registration_t sctp6_output_node;
521
522always_inline sctp_main_t *
523vnet_get_sctp_main ()
524{
525 return &sctp_main;
526}
527
528always_inline sctp_header_t *
529sctp_buffer_hdr (vlib_buffer_t * b)
530{
531 ASSERT ((signed) b->current_data >= (signed) -VLIB_BUFFER_PRE_DATA_SIZE);
532 return (sctp_header_t *) (b->data + b->current_data
533 + vnet_buffer (b)->sctp.hdr_offset);
534}
535
536clib_error_t *vnet_sctp_enable_disable (vlib_main_t * vm, u8 is_en);
537
538always_inline sctp_connection_t *
539sctp_half_open_connection_get (u32 conn_index)
540{
541 sctp_connection_t *tc = 0;
542 clib_spinlock_lock_if_init (&sctp_main.half_open_lock);
543 if (!pool_is_free_index (sctp_main.half_open_connections, conn_index))
544 tc = pool_elt_at_index (sctp_main.half_open_connections, conn_index);
Marco Varlese04e5d642018-02-23 17:43:06 +0100545 tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].subconn_idx = MAIN_SCTP_SUB_CONN_IDX;
Marco Varlese191a5942017-10-30 18:17:21 +0100546 clib_spinlock_unlock_if_init (&sctp_main.half_open_lock);
547 return tc;
548}
549
550/**
551 * Cleanup half-open connection
552 *
553 */
554always_inline void
555sctp_half_open_connection_del (sctp_connection_t * tc)
556{
Marco Varlese15cc6a82018-02-21 12:39:52 +0100557 sctp_main_t *sctp_main = vnet_get_sctp_main ();
558 clib_spinlock_lock_if_init (&sctp_main->half_open_lock);
559 pool_put_index (sctp_main->half_open_connections,
Marco Varlese191a5942017-10-30 18:17:21 +0100560 tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index);
561 if (CLIB_DEBUG)
562 memset (tc, 0xFA, sizeof (*tc));
Marco Varlese15cc6a82018-02-21 12:39:52 +0100563 clib_spinlock_unlock_if_init (&sctp_main->half_open_lock);
Marco Varlese191a5942017-10-30 18:17:21 +0100564}
565
566always_inline u32
567sctp_set_time_now (u32 thread_index)
568{
569 sctp_main.time_now[thread_index] = clib_cpu_time_now ()
570 * sctp_main.tstamp_ticks_per_clock;
571 return sctp_main.time_now[thread_index];
572}
573
574always_inline void
575sctp_timer_set (sctp_connection_t * tc, u8 conn_idx, u8 timer_id,
576 u32 interval)
577{
578 ASSERT (tc->sub_conn[conn_idx].connection.thread_index ==
579 vlib_get_thread_index ());
580 ASSERT (tc->sub_conn[conn_idx].timers[timer_id] ==
581 SCTP_TIMER_HANDLE_INVALID);
582
583 sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
Marco Varlese21c8baf2018-02-02 17:17:51 +0100584 sub->timers[timer_id] =
Marco Varlese191a5942017-10-30 18:17:21 +0100585 tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
586 sub->c_c_index, timer_id, interval);
587}
588
589always_inline void
590sctp_timer_reset (sctp_connection_t * tc, u8 conn_idx, u8 timer_id)
591{
592 ASSERT (tc->sub_conn[conn_idx].c_thread_index == vlib_get_thread_index ());
593 if (tc->sub_conn[conn_idx].timers[timer_id] == SCTP_TIMER_HANDLE_INVALID)
594 return;
595
596 sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
597
598 tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
599 sub->timers[timer_id]);
600 sub->timers[timer_id] = SCTP_TIMER_HANDLE_INVALID;
601}
602
Marco Varlese191a5942017-10-30 18:17:21 +0100603/**
604 * Try to cleanup half-open connection
605 *
606 * If called from a thread that doesn't own tc, the call won't have any
607 * effect.
608 *
609 * @param tc - connection to be cleaned up
610 * @return non-zero if cleanup failed.
611 */
612always_inline int
613sctp_half_open_connection_cleanup (sctp_connection_t * tc)
614{
615 /* Make sure this is the owning thread */
616 if (tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_thread_index !=
617 vlib_get_thread_index ())
618 return 1;
619 sctp_timer_reset (tc, MAIN_SCTP_SUB_CONN_IDX, SCTP_TIMER_T1_INIT);
620 sctp_half_open_connection_del (tc);
621 return 0;
622}
623
624always_inline u32
625sctp_header_bytes ()
626{
627 return sizeof (sctp_header_t);
628}
629
630always_inline sctp_connection_t *
631sctp_get_connection_from_transport (transport_connection_t * tconn)
632{
633 ASSERT (tconn != NULL);
634
635 sctp_sub_connection_t *sub = (sctp_sub_connection_t *) tconn;
636#if SCTP_ADV_DEBUG
637 if (sub == NULL)
638 SCTP_ADV_DBG ("sub == NULL");
639 if (sub->parent == NULL)
640 SCTP_ADV_DBG ("sub->parent == NULL");
641#endif
Marco Varlese04e5d642018-02-23 17:43:06 +0100642 if (sub->subconn_idx > 0)
643 return (sctp_connection_t *) sub -
644 (sizeof (sctp_sub_connection_t) * (sub->subconn_idx - 1));
645
646 return (sctp_connection_t *) sub;
Marco Varlese191a5942017-10-30 18:17:21 +0100647}
648
649always_inline u32
650sctp_time_now (void)
651{
652 return sctp_main.time_now[vlib_get_thread_index ()];
653}
654
Marco Varlese21c8baf2018-02-02 17:17:51 +0100655#define ABS(x) ((x) > 0) ? (x) : -(x);
656
657always_inline void
658sctp_calculate_rto (sctp_connection_t * sctp_conn, u8 conn_idx)
659{
660 /* See RFC4960, 6.3.1. RTO Calculation */
661 u32 RTO = 0;
662 f32 RTTVAR = 0;
663 u32 now = sctp_time_now ();
664 u32 prev_ts = sctp_conn->sub_conn[conn_idx].rtt_ts;
665 u32 R = prev_ts - now;
666
667 if (sctp_conn->sub_conn[conn_idx].RTO == 0) // C1: Let's initialize our RTO
668 {
669 sctp_conn->sub_conn[conn_idx].RTO = SCTP_RTO_MIN;
670 return;
671 }
672
673 if (sctp_conn->sub_conn[conn_idx].RTO == SCTP_RTO_MIN && sctp_conn->sub_conn[conn_idx].SRTT == 0) // C2: First RTT calculation
674 {
675 sctp_conn->sub_conn[conn_idx].SRTT = R;
676 RTTVAR = R / 2;
677
678 if (RTTVAR == 0)
679 RTTVAR = 100e-3; /* 100 ms */
680
681 sctp_conn->sub_conn[conn_idx].RTTVAR = RTTVAR;
682 }
683 else // C3: RTT already exists; let's recalculate
684 {
685 RTTVAR = (1 - SCTP_RTO_BETA) * sctp_conn->sub_conn[conn_idx].RTTVAR +
686 SCTP_RTO_BETA * ABS (sctp_conn->sub_conn[conn_idx].SRTT - R);
687
688 if (RTTVAR == 0)
689 RTTVAR = 100e-3; /* 100 ms */
690
691 sctp_conn->sub_conn[conn_idx].RTTVAR = RTTVAR;
692
693 sctp_conn->sub_conn[conn_idx].SRTT =
694 (1 - SCTP_RTO_ALPHA) * sctp_conn->sub_conn[conn_idx].SRTT +
695 SCTP_RTO_ALPHA * R;
696 }
697
698 RTO =
699 sctp_conn->sub_conn[conn_idx].SRTT +
700 4 * sctp_conn->sub_conn[conn_idx].RTTVAR;
701 if (RTO < SCTP_RTO_MIN) // C6
702 RTO = SCTP_RTO_MIN;
703
704 if (RTO > SCTP_RTO_MAX) // C7
705 RTO = SCTP_RTO_MAX;
706
707 sctp_conn->sub_conn[conn_idx].RTO = RTO;
708}
709
Marco Varlese191a5942017-10-30 18:17:21 +0100710always_inline void
711sctp_timer_update (sctp_connection_t * tc, u8 conn_idx, u8 timer_id,
712 u32 interval)
713{
714 ASSERT (tc->sub_conn[conn_idx].connection.thread_index ==
715 vlib_get_thread_index ());
716 sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
717
718 if (tc->sub_conn[conn_idx].timers[timer_id] != SCTP_TIMER_HANDLE_INVALID)
719 tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
720 sub->timers[timer_id]);
Marco Varlese8ad6a2d2018-01-26 16:50:01 +0100721
Marco Varlese191a5942017-10-30 18:17:21 +0100722 tc->sub_conn[conn_idx].timers[timer_id] =
723 tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
724 sub->c_c_index, timer_id, interval);
725}
726
727always_inline sctp_connection_t *
728sctp_listener_get (u32 tli)
729{
730 return pool_elt_at_index (sctp_main.listener_pool, tli);
731}
732
733#endif
734
735always_inline sctp_connection_t *
736sctp_connection_get (u32 conn_index, u32 thread_index)
737{
738 if (PREDICT_FALSE
739 (pool_is_free_index (sctp_main.connections[thread_index], conn_index)))
740 return 0;
741 return pool_elt_at_index (sctp_main.connections[thread_index], conn_index);
742}
743
Marco Varlese54432f82018-02-15 17:01:56 +0100744#define SELECT_MAX_RETRIES 8
Marco Varlese191a5942017-10-30 18:17:21 +0100745
Marco Varlese54432f82018-02-15 17:01:56 +0100746always_inline u8
747sctp_data_subconn_select (sctp_connection_t * sctp_conn)
748{
Marco Varlese54432f82018-02-15 17:01:56 +0100749 u32 sub = MAIN_SCTP_SUB_CONN_IDX;
Marco Varlesef3ab4892018-02-19 15:23:13 +0100750 u8 i, cwnd = sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].cwnd;
751 for (i = 1; i < MAX_SCTP_CONNECTIONS; i++)
Marco Varlese191a5942017-10-30 18:17:21 +0100752 {
Marco Varlesef3ab4892018-02-19 15:23:13 +0100753 if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN)
754 continue;
755
756 if (sctp_conn->sub_conn[i].cwnd > cwnd)
757 {
758 sub = i;
759 cwnd = sctp_conn->sub_conn[i].cwnd;
760 }
Marco Varlese191a5942017-10-30 18:17:21 +0100761 }
Marco Varlese54432f82018-02-15 17:01:56 +0100762 return sub;
Marco Varlese191a5942017-10-30 18:17:21 +0100763}
764
765always_inline u8
Marco Varlese54432f82018-02-15 17:01:56 +0100766sctp_sub_conn_id_via_ip6h (sctp_connection_t * sctp_conn, ip6_header_t * ip6h)
Marco Varlese191a5942017-10-30 18:17:21 +0100767{
Marco Varlese54432f82018-02-15 17:01:56 +0100768 u8 i;
Marco Varlese191a5942017-10-30 18:17:21 +0100769
Marco Varlese54432f82018-02-15 17:01:56 +0100770 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
Marco Varlese191a5942017-10-30 18:17:21 +0100771 {
Marco Varlese54432f82018-02-15 17:01:56 +0100772 if (sctp_conn->sub_conn[i].connection.lcl_ip.ip6.as_u64[0] ==
773 ip6h->dst_address.as_u64[0] &&
774 sctp_conn->sub_conn[i].connection.lcl_ip.ip6.as_u64[1] ==
775 ip6h->dst_address.as_u64[1] &&
776 sctp_conn->sub_conn[i].connection.rmt_ip.ip6.as_u64[0] ==
777 ip6h->src_address.as_u64[0] &&
778 sctp_conn->sub_conn[i].connection.rmt_ip.ip6.as_u64[1] ==
779 ip6h->src_address.as_u64[1])
780 return i;
Marco Varlese191a5942017-10-30 18:17:21 +0100781 }
Marco Varlese54432f82018-02-15 17:01:56 +0100782 clib_warning ("Did not find a sub-connection; defaulting to %u",
783 MAIN_SCTP_SUB_CONN_IDX);
784 return MAIN_SCTP_SUB_CONN_IDX;
785}
786
787always_inline u8
788sctp_sub_conn_id_via_ip4h (sctp_connection_t * sctp_conn, ip4_header_t * ip4h)
789{
790 u8 i;
791
792 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
793 {
794 if (sctp_conn->sub_conn[i].connection.lcl_ip.ip4.as_u32 ==
795 ip4h->dst_address.as_u32
796 && sctp_conn->sub_conn[i].connection.rmt_ip.ip4.as_u32 ==
797 ip4h->src_address.as_u32)
798 return i;
799 }
800 clib_warning ("Did not find a sub-connection; defaulting to %u",
801 MAIN_SCTP_SUB_CONN_IDX);
802 return MAIN_SCTP_SUB_CONN_IDX;
Marco Varlese191a5942017-10-30 18:17:21 +0100803}
804
805/**
806 * Push SCTP header to buffer
807 *
808 * @param vm - vlib_main
809 * @param b - buffer to write the header to
810 * @param sp_net - source port net order
811 * @param dp_net - destination port net order
812 * @param sctp_hdr_opts_len - header and options length in bytes
813 *
814 * @return - pointer to start of SCTP header
815 */
816always_inline void *
817vlib_buffer_push_sctp_net_order (vlib_buffer_t * b, u16 sp, u16 dp,
818 u8 sctp_hdr_opts_len)
819{
820 sctp_full_hdr_t *full_hdr;
821
822 full_hdr = vlib_buffer_push_uninit (b, sctp_hdr_opts_len);
823
824 full_hdr->hdr.src_port = sp;
825 full_hdr->hdr.dst_port = dp;
826 full_hdr->hdr.checksum = 0;
827 return full_hdr;
828}
829
830/**
831 * Push SCTP header to buffer
832 *
833 * @param b - buffer to write the header to
834 * @param sp_net - source port net order
835 * @param dp_net - destination port net order
836 * @param sctp_hdr_opts_len - header and options length in bytes
837 *
838 * @return - pointer to start of SCTP header
839 */
840always_inline void *
841vlib_buffer_push_sctp (vlib_buffer_t * b, u16 sp_net, u16 dp_net,
842 u8 sctp_hdr_opts_len)
843{
844 return vlib_buffer_push_sctp_net_order (b, sp_net, dp_net,
845 sctp_hdr_opts_len);
846}
847
Marco Varlese3c6a9762018-03-01 11:19:59 +0100848always_inline u8
849sctp_next_avail_subconn (sctp_connection_t * sctp_conn)
850{
851 u8 i;
852
853 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
854 {
855 if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN)
856 return i;
857 }
858 return MAX_SCTP_CONNECTIONS;
859}
860
Marco Varlesef3ab4892018-02-19 15:23:13 +0100861always_inline void
862update_smallest_pmtu_idx (sctp_connection_t * sctp_conn)
863{
864 u8 i;
865 u8 smallest_pmtu_index = MAIN_SCTP_SUB_CONN_IDX;
866
867 for (i = 1; i < MAX_SCTP_CONNECTIONS; i++)
868 {
869 if (sctp_conn->sub_conn[i].state != SCTP_SUBCONN_STATE_DOWN)
870 {
871 if (sctp_conn->sub_conn[i].PMTU <
872 sctp_conn->sub_conn[smallest_pmtu_index].PMTU)
873 smallest_pmtu_index = i;
874 }
875 }
876
877 sctp_conn->smallest_PMTU_idx = smallest_pmtu_index;
878}
879
880/* As per RFC4960; section 7.2.1: Slow-Start */
881always_inline void
882sctp_init_cwnd (sctp_connection_t * sctp_conn)
883{
884 u8 i;
885 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
886 {
887 /* Section 7.2.1; point (1) */
888 sctp_conn->sub_conn[i].cwnd =
889 clib_min (4 * sctp_conn->sub_conn[i].PMTU,
890 clib_max (2 * sctp_conn->sub_conn[i].PMTU, 4380));
891
892 /* Section 7.2.1; point (3) */
893 sctp_conn->sub_conn[i].ssthresh = SCTP_INITIAL_SSHTRESH;
894
895 /* Section 7.2.2; point (1) */
896 sctp_conn->sub_conn[i].partially_acked_bytes = 0;
897 }
898}
899
900always_inline u8
901sctp_in_cong_recovery (sctp_connection_t * sctp_conn, u8 idx)
902{
903 return 0;
904}
905
906always_inline u8
907cwnd_fully_utilized (sctp_connection_t * sctp_conn, u8 idx)
908{
909 return 0;
910}
911
912/* As per RFC4960; section 7.2.1: Slow-Start */
913always_inline void
914update_cwnd (sctp_connection_t * sctp_conn)
915{
916 u8 i;
917
918 for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
919 {
920 /* Section 7.2.1; point (2) */
921 if (sctp_conn->sub_conn[i].is_retransmitting)
922 {
923 sctp_conn->sub_conn[i].cwnd = 1 * sctp_conn->sub_conn[i].PMTU;
924 continue;
925 }
926
927 /* Section 7.2.2; point (4) */
928 if (sctp_conn->sub_conn[i].last_data_ts >
929 sctp_time_now () + SCTP_DATA_IDLE_INTERVAL)
930 {
931 sctp_conn->sub_conn[i].cwnd =
932 clib_max (sctp_conn->sub_conn[i].cwnd / 2,
933 4 * sctp_conn->sub_conn[i].PMTU);
934 continue;
935 }
936
937 /* Section 7.2.1; point (5) */
938 if (sctp_conn->sub_conn[i].cwnd <= sctp_conn->sub_conn[i].ssthresh)
939 {
940 if (!cwnd_fully_utilized (sctp_conn, i))
941 continue;
942
943 if (sctp_in_cong_recovery (sctp_conn, i))
944 continue;
945
946 sctp_conn->sub_conn[i].cwnd =
947 clib_min (sctp_conn->sub_conn[i].PMTU, 1);
948 }
949 }
950}
951
Marco Varlese191a5942017-10-30 18:17:21 +0100952/*
953 * fd.io coding-style-patch-verification: ON
954 *
955 * Local Variables:
956 * eval: (c-set-style "gnu")
957 * End:
958 */