blob: 114a5b9e0f1037faf7d861e77ced2ea866667ba8 [file] [log] [blame]
Dave Barach68b0fb02017-02-28 15:15:56 -05001/*
2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include <vnet/tcp/tcp.h>
17#include <vnet/lisp-cp/packets.h>
18
19vlib_node_registration_t tcp4_output_node;
20vlib_node_registration_t tcp6_output_node;
21
22typedef enum _tcp_output_nect
23{
24 TCP_OUTPUT_NEXT_DROP,
25 TCP_OUTPUT_NEXT_IP_LOOKUP,
26 TCP_OUTPUT_N_NEXT
27} tcp_output_next_t;
28
29#define foreach_tcp4_output_next \
30 _ (DROP, "error-drop") \
31 _ (IP_LOOKUP, "ip4-lookup")
32
33#define foreach_tcp6_output_next \
34 _ (DROP, "error-drop") \
35 _ (IP_LOOKUP, "ip6-lookup")
36
37static char *tcp_error_strings[] = {
38#define tcp_error(n,s) s,
39#include <vnet/tcp/tcp_error.def>
40#undef tcp_error
41};
42
43typedef struct
44{
45 u16 src_port;
46 u16 dst_port;
47 u8 state;
48} tcp_tx_trace_t;
49
50u16 dummy_mtu = 400;
51
52u8 *
53format_tcp_tx_trace (u8 * s, va_list * args)
54{
55 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
56 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
57
58 s = format (s, "TBD\n");
59
60 return s;
61}
62
63void
64tcp_set_snd_mss (tcp_connection_t * tc)
65{
66 u16 snd_mss;
67
68 /* TODO find our iface MTU */
69 snd_mss = dummy_mtu;
70
71 /* TODO cache mss and consider PMTU discovery */
72 snd_mss = tc->opt.mss < snd_mss ? tc->opt.mss : snd_mss;
73
74 tc->snd_mss = snd_mss;
75
76 if (tc->snd_mss == 0)
77 {
78 clib_warning ("snd mss is 0");
79 tc->snd_mss = dummy_mtu;
80 }
81}
82
83static u8
84tcp_window_compute_scale (u32 available_space)
85{
86 u8 wnd_scale = 0;
87 while (wnd_scale < TCP_MAX_WND_SCALE
88 && (available_space >> wnd_scale) > TCP_WND_MAX)
89 wnd_scale++;
90 return wnd_scale;
91}
92
93/**
Florin Corase04c2992017-03-01 08:17:34 -080094 * TCP's IW as recommended by RFC6928
95 */
96always_inline u32
97tcp_initial_wnd_unscaled (tcp_connection_t * tc)
98{
99 return TCP_IW_N_SEGMENTS * dummy_mtu;
100}
101
102/**
Dave Barach68b0fb02017-02-28 15:15:56 -0500103 * Compute initial window and scale factor. As per RFC1323, window field in
104 * SYN and SYN-ACK segments is never scaled.
105 */
106u32
107tcp_initial_window_to_advertise (tcp_connection_t * tc)
108{
Florin Corase04c2992017-03-01 08:17:34 -0800109 u32 max_fifo;
Dave Barach68b0fb02017-02-28 15:15:56 -0500110
111 /* Initial wnd for SYN. Fifos are not allocated yet.
Florin Corase04c2992017-03-01 08:17:34 -0800112 * Use some predefined value. For SYN-ACK we still want the
113 * scale to be computed in the same way */
114 max_fifo = TCP_MAX_RX_FIFO_SIZE;
Dave Barach68b0fb02017-02-28 15:15:56 -0500115
Florin Corase04c2992017-03-01 08:17:34 -0800116 tc->rcv_wscale = tcp_window_compute_scale (max_fifo);
117 tc->rcv_wnd = tcp_initial_wnd_unscaled (tc);
Dave Barach68b0fb02017-02-28 15:15:56 -0500118
119 return clib_min (tc->rcv_wnd, TCP_WND_MAX);
120}
121
122/**
123 * Compute and return window to advertise, scaled as per RFC1323
124 */
125u32
126tcp_window_to_advertise (tcp_connection_t * tc, tcp_state_t state)
127{
Florin Corase04c2992017-03-01 08:17:34 -0800128 u32 available_space, max_fifo, observed_wnd;
Dave Barach68b0fb02017-02-28 15:15:56 -0500129
Florin Corase04c2992017-03-01 08:17:34 -0800130 if (state < TCP_STATE_ESTABLISHED)
Dave Barach68b0fb02017-02-28 15:15:56 -0500131 return tcp_initial_window_to_advertise (tc);
132
Florin Corase04c2992017-03-01 08:17:34 -0800133 /*
134 * Figure out how much space we have available
135 */
Dave Barach68b0fb02017-02-28 15:15:56 -0500136 available_space = stream_session_max_enqueue (&tc->connection);
Florin Corase04c2992017-03-01 08:17:34 -0800137 max_fifo = stream_session_fifo_size (&tc->connection);
Dave Barach68b0fb02017-02-28 15:15:56 -0500138
Florin Corase04c2992017-03-01 08:17:34 -0800139 ASSERT (tc->opt.mss < max_fifo);
Dave Barach68b0fb02017-02-28 15:15:56 -0500140
Florin Corase04c2992017-03-01 08:17:34 -0800141 if (available_space < tc->opt.mss && available_space < max_fifo / 8)
142 available_space = 0;
Dave Barach68b0fb02017-02-28 15:15:56 -0500143
Florin Corase04c2992017-03-01 08:17:34 -0800144 /*
145 * Use the above and what we know about what we've previously advertised
146 * to compute the new window
147 */
148 observed_wnd = tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
149
150 /* Bad. Thou shalt not shrink */
151 if (available_space < observed_wnd)
152 {
153 if (available_space == 0)
154 clib_warning ("Didn't shrink rcv window despite not having space");
155 }
156
157 tc->rcv_wnd = clib_min (available_space, TCP_WND_MAX << tc->rcv_wscale);
158
159 if (tc->rcv_wnd == 0)
160 {
161 tc->flags |= TCP_CONN_SENT_RCV_WND0;
162 }
163
164 return tc->rcv_wnd >> tc->rcv_wscale;
Dave Barach68b0fb02017-02-28 15:15:56 -0500165}
166
167/**
168 * Write TCP options to segment.
169 */
170u32
171tcp_options_write (u8 * data, tcp_options_t * opts)
172{
173 u32 opts_len = 0;
174 u32 buf, seq_len = 4;
175
176 if (tcp_opts_mss (opts))
177 {
178 *data++ = TCP_OPTION_MSS;
179 *data++ = TCP_OPTION_LEN_MSS;
180 buf = clib_host_to_net_u16 (opts->mss);
181 clib_memcpy (data, &buf, sizeof (opts->mss));
182 data += sizeof (opts->mss);
183 opts_len += TCP_OPTION_LEN_MSS;
184 }
185
186 if (tcp_opts_wscale (opts))
187 {
188 *data++ = TCP_OPTION_WINDOW_SCALE;
189 *data++ = TCP_OPTION_LEN_WINDOW_SCALE;
190 *data++ = opts->wscale;
191 opts_len += TCP_OPTION_LEN_WINDOW_SCALE;
192 }
193
194 if (tcp_opts_sack_permitted (opts))
195 {
196 *data++ = TCP_OPTION_SACK_PERMITTED;
197 *data++ = TCP_OPTION_LEN_SACK_PERMITTED;
198 opts_len += TCP_OPTION_LEN_SACK_PERMITTED;
199 }
200
201 if (tcp_opts_tstamp (opts))
202 {
203 *data++ = TCP_OPTION_TIMESTAMP;
204 *data++ = TCP_OPTION_LEN_TIMESTAMP;
205 buf = clib_host_to_net_u32 (opts->tsval);
206 clib_memcpy (data, &buf, sizeof (opts->tsval));
207 data += sizeof (opts->tsval);
208 buf = clib_host_to_net_u32 (opts->tsecr);
209 clib_memcpy (data, &buf, sizeof (opts->tsecr));
210 data += sizeof (opts->tsecr);
211 opts_len += TCP_OPTION_LEN_TIMESTAMP;
212 }
213
214 if (tcp_opts_sack (opts))
215 {
216 int i;
217 u32 n_sack_blocks = clib_min (vec_len (opts->sacks),
218 TCP_OPTS_MAX_SACK_BLOCKS);
219
220 if (n_sack_blocks != 0)
221 {
222 *data++ = TCP_OPTION_SACK_BLOCK;
223 *data++ = 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
224 for (i = 0; i < n_sack_blocks; i++)
225 {
226 buf = clib_host_to_net_u32 (opts->sacks[i].start);
227 clib_memcpy (data, &buf, seq_len);
228 data += seq_len;
229 buf = clib_host_to_net_u32 (opts->sacks[i].end);
230 clib_memcpy (data, &buf, seq_len);
231 data += seq_len;
232 }
233 opts_len += 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
234 }
235 }
236
237 /* Terminate TCP options */
238 if (opts_len % 4)
239 {
240 *data++ = TCP_OPTION_EOL;
241 opts_len += TCP_OPTION_LEN_EOL;
242 }
243
244 /* Pad with zeroes to a u32 boundary */
245 while (opts_len % 4)
246 {
247 *data++ = TCP_OPTION_NOOP;
248 opts_len += TCP_OPTION_LEN_NOOP;
249 }
250 return opts_len;
251}
252
253always_inline int
Florin Corase04c2992017-03-01 08:17:34 -0800254tcp_make_syn_options (tcp_options_t * opts, u8 wnd_scale)
Dave Barach68b0fb02017-02-28 15:15:56 -0500255{
256 u8 len = 0;
257
258 opts->flags |= TCP_OPTS_FLAG_MSS;
259 opts->mss = dummy_mtu; /*XXX discover that */
260 len += TCP_OPTION_LEN_MSS;
261
262 opts->flags |= TCP_OPTS_FLAG_WSCALE;
Florin Corase04c2992017-03-01 08:17:34 -0800263 opts->wscale = wnd_scale;
Dave Barach68b0fb02017-02-28 15:15:56 -0500264 len += TCP_OPTION_LEN_WINDOW_SCALE;
265
266 opts->flags |= TCP_OPTS_FLAG_TSTAMP;
267 opts->tsval = tcp_time_now ();
268 opts->tsecr = 0;
269 len += TCP_OPTION_LEN_TIMESTAMP;
270
271 opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
272 len += TCP_OPTION_LEN_SACK_PERMITTED;
273
274 /* Align to needed boundary */
275 len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
276 return len;
277}
278
279always_inline int
280tcp_make_synack_options (tcp_connection_t * tc, tcp_options_t * opts)
281{
282 u8 len = 0;
283
284 opts->flags |= TCP_OPTS_FLAG_MSS;
285 opts->mss = dummy_mtu; /*XXX discover that */
286 len += TCP_OPTION_LEN_MSS;
287
288 if (tcp_opts_wscale (&tc->opt))
289 {
290 opts->flags |= TCP_OPTS_FLAG_WSCALE;
291 opts->wscale = tc->rcv_wscale;
292 len += TCP_OPTION_LEN_WINDOW_SCALE;
293 }
294
295 if (tcp_opts_tstamp (&tc->opt))
296 {
297 opts->flags |= TCP_OPTS_FLAG_TSTAMP;
298 opts->tsval = tcp_time_now ();
299 opts->tsecr = tc->tsval_recent;
300 len += TCP_OPTION_LEN_TIMESTAMP;
301 }
302
303 if (tcp_opts_sack_permitted (&tc->opt))
304 {
305 opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
306 len += TCP_OPTION_LEN_SACK_PERMITTED;
307 }
308
309 /* Align to needed boundary */
310 len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
311 return len;
312}
313
314always_inline int
315tcp_make_established_options (tcp_connection_t * tc, tcp_options_t * opts)
316{
317 u8 len = 0;
318
319 opts->flags = 0;
320
321 if (tcp_opts_tstamp (&tc->opt))
322 {
323 opts->flags |= TCP_OPTS_FLAG_TSTAMP;
324 opts->tsval = tcp_time_now ();
325 opts->tsecr = tc->tsval_recent;
326 len += TCP_OPTION_LEN_TIMESTAMP;
327 }
328 if (tcp_opts_sack_permitted (&tc->opt))
329 {
330 if (vec_len (tc->snd_sacks))
331 {
332 opts->flags |= TCP_OPTS_FLAG_SACK;
333 opts->sacks = tc->snd_sacks;
334 opts->n_sack_blocks = vec_len (tc->snd_sacks);
335 len += 2 + TCP_OPTION_LEN_SACK_BLOCK * opts->n_sack_blocks;
336 }
337 }
338
339 /* Align to needed boundary */
340 len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
341 return len;
342}
343
344always_inline int
345tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts,
346 tcp_state_t state)
347{
348 switch (state)
349 {
350 case TCP_STATE_ESTABLISHED:
351 case TCP_STATE_FIN_WAIT_1:
352 return tcp_make_established_options (tc, opts);
353 case TCP_STATE_SYN_RCVD:
354 return tcp_make_synack_options (tc, opts);
355 case TCP_STATE_SYN_SENT:
Florin Corase04c2992017-03-01 08:17:34 -0800356 return tcp_make_syn_options (opts, tc->rcv_wscale);
Dave Barach68b0fb02017-02-28 15:15:56 -0500357 default:
358 clib_warning ("Not handled!");
359 return 0;
360 }
361}
362
363#define tcp_get_free_buffer_index(tm, bidx) \
364do { \
365 u32 *my_tx_buffers, n_free_buffers; \
366 u32 cpu_index = tm->vlib_main->cpu_index; \
367 my_tx_buffers = tm->tx_buffers[cpu_index]; \
368 if (PREDICT_FALSE(vec_len (my_tx_buffers) == 0)) \
369 { \
370 n_free_buffers = 32; /* TODO config or macro */ \
371 vec_validate (my_tx_buffers, n_free_buffers - 1); \
372 _vec_len(my_tx_buffers) = vlib_buffer_alloc_from_free_list ( \
373 tm->vlib_main, my_tx_buffers, n_free_buffers, \
374 VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); \
375 tm->tx_buffers[cpu_index] = my_tx_buffers; \
376 } \
377 /* buffer shortage */ \
378 if (PREDICT_FALSE (vec_len (my_tx_buffers) == 0)) \
379 return; \
380 *bidx = my_tx_buffers[_vec_len (my_tx_buffers)-1]; \
381 _vec_len (my_tx_buffers) -= 1; \
382} while (0)
383
384always_inline void
385tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b)
386{
387 vlib_buffer_t *it = b;
388 do
389 {
390 it->current_data = 0;
391 it->current_length = 0;
392 it->total_length_not_including_first_buffer = 0;
393 }
394 while ((it->flags & VLIB_BUFFER_NEXT_PRESENT)
395 && (it = vlib_get_buffer (vm, it->next_buffer)));
396
397 /* Leave enough space for headers */
398 vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
Florin Corasd79b41e2017-03-04 05:37:52 -0800399 vnet_buffer (b)->tcp.flags = 0;
Dave Barach68b0fb02017-02-28 15:15:56 -0500400}
401
402/**
403 * Prepare ACK
404 */
405void
406tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state,
407 u8 flags)
408{
409 tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
410 u8 tcp_opts_len, tcp_hdr_opts_len;
411 tcp_header_t *th;
412 u16 wnd;
413
414 wnd = tcp_window_to_advertise (tc, state);
415
416 /* Make and write options */
417 tcp_opts_len = tcp_make_established_options (tc, snd_opts);
418 tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
419
420 th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
421 tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
422
423 tcp_options_write ((u8 *) (th + 1), snd_opts);
424
425 /* Mark as ACK */
426 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
427}
428
429/**
430 * Convert buffer to ACK
431 */
432void
433tcp_make_ack (tcp_connection_t * tc, vlib_buffer_t * b)
434{
435 tcp_main_t *tm = vnet_get_tcp_main ();
436 vlib_main_t *vm = tm->vlib_main;
437
438 tcp_reuse_buffer (vm, b);
439 tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK);
440 vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK;
441}
442
443/**
444 * Convert buffer to FIN-ACK
445 */
446void
Florin Corasd79b41e2017-03-04 05:37:52 -0800447tcp_make_fin (tcp_connection_t * tc, vlib_buffer_t * b)
Dave Barach68b0fb02017-02-28 15:15:56 -0500448{
449 tcp_main_t *tm = vnet_get_tcp_main ();
450 vlib_main_t *vm = tm->vlib_main;
Florin Corasd79b41e2017-03-04 05:37:52 -0800451 u8 flags = 0;
Dave Barach68b0fb02017-02-28 15:15:56 -0500452
453 tcp_reuse_buffer (vm, b);
Florin Corasd79b41e2017-03-04 05:37:52 -0800454
Florin Corase69f4952017-03-07 10:06:24 -0800455 flags = TCP_FLAG_FIN | TCP_FLAG_ACK;
Florin Corasd79b41e2017-03-04 05:37:52 -0800456 tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, flags);
Dave Barach68b0fb02017-02-28 15:15:56 -0500457
458 /* Reset flags, make sure ack is sent */
Dave Barach68b0fb02017-02-28 15:15:56 -0500459 vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK;
460
461 tc->snd_nxt += 1;
462}
463
464/**
465 * Convert buffer to SYN-ACK
466 */
467void
468tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b)
469{
470 tcp_main_t *tm = vnet_get_tcp_main ();
471 vlib_main_t *vm = tm->vlib_main;
472 tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
473 u8 tcp_opts_len, tcp_hdr_opts_len;
474 tcp_header_t *th;
475 u16 initial_wnd;
476 u32 time_now;
477
478 memset (snd_opts, 0, sizeof (*snd_opts));
479
480 tcp_reuse_buffer (vm, b);
481
482 /* Set random initial sequence */
483 time_now = tcp_time_now ();
484
485 tc->iss = random_u32 (&time_now);
486 tc->snd_una = tc->iss;
487 tc->snd_nxt = tc->iss + 1;
488 tc->snd_una_max = tc->snd_nxt;
489
490 initial_wnd = tcp_initial_window_to_advertise (tc);
491
492 /* Make and write options */
493 tcp_opts_len = tcp_make_synack_options (tc, snd_opts);
494 tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
495
496 th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
497 tc->rcv_nxt, tcp_hdr_opts_len,
498 TCP_FLAG_SYN | TCP_FLAG_ACK, initial_wnd);
499
500 tcp_options_write ((u8 *) (th + 1), snd_opts);
501
502 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
503 vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK;
504
505 /* Init retransmit timer */
Florin Corasd79b41e2017-03-04 05:37:52 -0800506 tcp_retransmit_timer_set (tc);
Dave Barach68b0fb02017-02-28 15:15:56 -0500507}
508
509always_inline void
510tcp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
511 u8 is_ip4)
512{
513 u32 *to_next, next_index;
514 vlib_frame_t *f;
515
516 b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
517 b->error = 0;
518
519 /* Default FIB for now */
520 vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
521
522 /* Send to IP lookup */
523 next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
524 f = vlib_get_frame_to_node (vm, next_index);
525
526 /* Enqueue the packet */
527 to_next = vlib_frame_vector_args (f);
528 to_next[0] = bi;
529 f->n_vectors = 1;
530 vlib_put_frame_to_node (vm, next_index, f);
531}
532
533int
534tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b0,
535 tcp_state_t state, u32 my_thread_index, u8 is_ip4)
536{
537 u8 tcp_hdr_len = sizeof (tcp_header_t);
538 ip4_header_t *ih4;
539 ip6_header_t *ih6;
540 tcp_header_t *th0;
541 ip4_address_t src_ip40;
542 ip6_address_t src_ip60;
543 u16 src_port0;
544 u32 tmp;
545
546 /* Find IP and TCP headers */
547 if (is_ip4)
548 {
549 ih4 = vlib_buffer_get_current (b0);
550 th0 = ip4_next_header (ih4);
551 }
552 else
553 {
554 ih6 = vlib_buffer_get_current (b0);
555 th0 = ip6_next_header (ih6);
556 }
557
558 /* Swap src and dst ip */
559 if (is_ip4)
560 {
561 ASSERT ((ih4->ip_version_and_header_length & 0xF0) == 0x40);
562 src_ip40.as_u32 = ih4->src_address.as_u32;
563 ih4->src_address.as_u32 = ih4->dst_address.as_u32;
564 ih4->dst_address.as_u32 = src_ip40.as_u32;
565
566 /* Chop the end of the pkt */
567 b0->current_length += ip4_header_bytes (ih4) + tcp_hdr_len;
568 }
569 else
570 {
571 ASSERT ((ih6->ip_version_traffic_class_and_flow_label & 0xF0) == 0x60);
572 clib_memcpy (&src_ip60, &ih6->src_address, sizeof (ip6_address_t));
573 clib_memcpy (&ih6->src_address, &ih6->dst_address,
574 sizeof (ip6_address_t));
575 clib_memcpy (&ih6->dst_address, &src_ip60, sizeof (ip6_address_t));
576
577 /* Chop the end of the pkt */
578 b0->current_length += sizeof (ip6_header_t) + tcp_hdr_len;
579 }
580
581 /* Try to determine what/why we're actually resetting and swap
582 * src and dst ports */
583 if (state == TCP_STATE_CLOSED)
584 {
585 if (!tcp_syn (th0))
586 return -1;
587
588 tmp = clib_net_to_host_u32 (th0->seq_number);
589
590 /* Got a SYN for no listener. */
591 th0->flags = TCP_FLAG_RST | TCP_FLAG_ACK;
592 th0->ack_number = clib_host_to_net_u32 (tmp + 1);
593 th0->seq_number = 0;
594
595 }
596 else if (state >= TCP_STATE_SYN_SENT)
597 {
598 th0->flags = TCP_FLAG_RST | TCP_FLAG_ACK;
599 th0->seq_number = th0->ack_number;
600 th0->ack_number = 0;
601 }
602
603 src_port0 = th0->src_port;
604 th0->src_port = th0->dst_port;
605 th0->dst_port = src_port0;
606 th0->window = 0;
607 th0->data_offset_and_reserved = (tcp_hdr_len >> 2) << 4;
608 th0->urgent_pointer = 0;
609
610 /* Compute checksum */
611 if (is_ip4)
612 {
613 th0->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ih4);
614 }
615 else
616 {
617 int bogus = ~0;
618 th0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ih6, &bogus);
619 ASSERT (!bogus);
620 }
621
622 return 0;
623}
624
625/**
626 * Send reset without reusing existing buffer
627 */
628void
629tcp_send_reset (vlib_buffer_t * pkt, u8 is_ip4)
630{
631 vlib_buffer_t *b;
632 u32 bi;
633 tcp_main_t *tm = vnet_get_tcp_main ();
634 vlib_main_t *vm = tm->vlib_main;
635 u8 tcp_hdr_len, flags = 0;
636 tcp_header_t *th, *pkt_th;
637 u32 seq, ack;
638 ip4_header_t *ih4, *pkt_ih4;
639 ip6_header_t *ih6, *pkt_ih6;
640
641 tcp_get_free_buffer_index (tm, &bi);
642 b = vlib_get_buffer (vm, bi);
643
644 /* Leave enough space for headers */
645 vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
646
647 /* Make and write options */
648 tcp_hdr_len = sizeof (tcp_header_t);
649
650 if (is_ip4)
651 {
652 pkt_ih4 = vlib_buffer_get_current (pkt);
653 pkt_th = ip4_next_header (pkt_ih4);
654 }
655 else
656 {
657 pkt_ih6 = vlib_buffer_get_current (pkt);
658 pkt_th = ip6_next_header (pkt_ih6);
659 }
660
661 if (tcp_ack (pkt_th))
662 {
663 flags = TCP_FLAG_RST;
664 seq = pkt_th->ack_number;
665 ack = 0;
666 }
667 else
668 {
669 flags = TCP_FLAG_RST | TCP_FLAG_ACK;
670 seq = 0;
671 ack = clib_host_to_net_u32 (vnet_buffer (pkt)->tcp.seq_end);
672 }
673
674 th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port,
675 seq, ack, tcp_hdr_len, flags, 0);
676
677 /* Swap src and dst ip */
678 if (is_ip4)
679 {
680 ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
681 ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
682 &pkt_ih4->src_address, IP_PROTOCOL_TCP);
683 th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
684 }
685 else
686 {
687 int bogus = ~0;
688 pkt_ih6 = (ip6_header_t *) (pkt_th - 1);
689 ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) ==
690 0x60);
691 ih6 =
692 vlib_buffer_push_ip6 (vm, b, &pkt_ih6->dst_address,
693 &pkt_ih6->src_address, IP_PROTOCOL_TCP);
694 th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
695 ASSERT (!bogus);
696 }
697
698 tcp_enqueue_to_ip_lookup (vm, b, bi, is_ip4);
699}
700
701void
702tcp_push_ip_hdr (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b)
703{
704 tcp_header_t *th = vlib_buffer_get_current (b);
705
706 if (tc->c_is_ip4)
707 {
708 ip4_header_t *ih;
709 ih = vlib_buffer_push_ip4 (tm->vlib_main, b, &tc->c_lcl_ip4,
710 &tc->c_rmt_ip4, IP_PROTOCOL_TCP);
711 th->checksum = ip4_tcp_udp_compute_checksum (tm->vlib_main, b, ih);
712 }
713 else
714 {
715 ip6_header_t *ih;
716 int bogus = ~0;
717
718 ih = vlib_buffer_push_ip6 (tm->vlib_main, b, &tc->c_lcl_ip6,
719 &tc->c_rmt_ip6, IP_PROTOCOL_TCP);
720 th->checksum = ip6_tcp_udp_icmp_compute_checksum (tm->vlib_main, b, ih,
721 &bogus);
722 ASSERT (!bogus);
723 }
724}
725
726/**
727 * Send SYN
728 *
729 * Builds a SYN packet for a half-open connection and sends it to ipx_lookup.
730 * The packet is not forwarded through tcpx_output to avoid doing lookups
731 * in the half_open pool.
732 */
733void
734tcp_send_syn (tcp_connection_t * tc)
735{
736 vlib_buffer_t *b;
737 u32 bi;
738 tcp_main_t *tm = vnet_get_tcp_main ();
739 vlib_main_t *vm = tm->vlib_main;
740 u8 tcp_hdr_opts_len, tcp_opts_len;
741 tcp_header_t *th;
742 u32 time_now;
743 u16 initial_wnd;
744 tcp_options_t snd_opts;
745
746 tcp_get_free_buffer_index (tm, &bi);
747 b = vlib_get_buffer (vm, bi);
748
749 /* Leave enough space for headers */
750 vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
751
752 /* Set random initial sequence */
753 time_now = tcp_time_now ();
754
755 tc->iss = random_u32 (&time_now);
756 tc->snd_una = tc->iss;
757 tc->snd_una_max = tc->snd_nxt = tc->iss + 1;
758
759 initial_wnd = tcp_initial_window_to_advertise (tc);
760
761 /* Make and write options */
762 memset (&snd_opts, 0, sizeof (snd_opts));
Florin Corase04c2992017-03-01 08:17:34 -0800763 tcp_opts_len = tcp_make_syn_options (&snd_opts, tc->rcv_wscale);
Dave Barach68b0fb02017-02-28 15:15:56 -0500764 tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
765
766 th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
767 tc->rcv_nxt, tcp_hdr_opts_len, TCP_FLAG_SYN,
768 initial_wnd);
769
770 tcp_options_write ((u8 *) (th + 1), &snd_opts);
771
772 /* Measure RTT with this */
773 tc->rtt_ts = tcp_time_now ();
774 tc->rtt_seq = tc->snd_nxt;
775
776 /* Start retransmit trimer */
777 tcp_timer_set (tc, TCP_TIMER_RETRANSMIT_SYN, tc->rto * TCP_TO_TIMER_TICK);
778 tc->rto_boff = 0;
779
780 /* Set the connection establishment timer */
781 tcp_timer_set (tc, TCP_TIMER_ESTABLISH, TCP_ESTABLISH_TIME);
782
783 tcp_push_ip_hdr (tm, tc, b);
784 tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4);
785}
786
787always_inline void
788tcp_enqueue_to_output (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, u8 is_ip4)
789{
790 u32 *to_next, next_index;
791 vlib_frame_t *f;
792
793 b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
794 b->error = 0;
795
796 /* Decide where to send the packet */
797 next_index = is_ip4 ? tcp4_output_node.index : tcp6_output_node.index;
798 f = vlib_get_frame_to_node (vm, next_index);
799
800 /* Enqueue the packet */
801 to_next = vlib_frame_vector_args (f);
802 to_next[0] = bi;
803 f->n_vectors = 1;
804 vlib_put_frame_to_node (vm, next_index, f);
805}
806
807/**
808 * Send FIN
809 */
810void
811tcp_send_fin (tcp_connection_t * tc)
812{
813 vlib_buffer_t *b;
814 u32 bi;
815 tcp_main_t *tm = vnet_get_tcp_main ();
816 vlib_main_t *vm = tm->vlib_main;
817
818 tcp_get_free_buffer_index (tm, &bi);
819 b = vlib_get_buffer (vm, bi);
820
821 /* Leave enough space for headers */
822 vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
823
Florin Corasd79b41e2017-03-04 05:37:52 -0800824 tcp_make_fin (tc, b);
Dave Barach68b0fb02017-02-28 15:15:56 -0500825 tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
Florin Corasd79b41e2017-03-04 05:37:52 -0800826 tc->flags |= TCP_CONN_FINSNT;
Florin Corase69f4952017-03-07 10:06:24 -0800827 TCP_EVT_DBG (TCP_EVT_FIN_SENT, tc);
Dave Barach68b0fb02017-02-28 15:15:56 -0500828}
829
830always_inline u8
831tcp_make_state_flags (tcp_state_t next_state)
832{
833 switch (next_state)
834 {
835 case TCP_STATE_ESTABLISHED:
836 return TCP_FLAG_ACK;
837 case TCP_STATE_SYN_RCVD:
838 return TCP_FLAG_SYN | TCP_FLAG_ACK;
839 case TCP_STATE_SYN_SENT:
840 return TCP_FLAG_SYN;
841 case TCP_STATE_LAST_ACK:
842 case TCP_STATE_FIN_WAIT_1:
843 return TCP_FLAG_FIN;
844 default:
845 clib_warning ("Shouldn't be here!");
846 }
847 return 0;
848}
849
850/**
851 * Push TCP header and update connection variables
852 */
853static void
854tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b,
855 tcp_state_t next_state)
856{
857 u32 advertise_wnd, data_len;
858 u8 tcp_opts_len, tcp_hdr_opts_len, opts_write_len, flags;
859 tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
860 tcp_header_t *th;
861
862 data_len = b->current_length;
863 vnet_buffer (b)->tcp.flags = 0;
864
865 /* Make and write options */
866 memset (snd_opts, 0, sizeof (*snd_opts));
867 tcp_opts_len = tcp_make_options (tc, snd_opts, next_state);
868 tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
869
870 /* Get rcv window to advertise */
871 advertise_wnd = tcp_window_to_advertise (tc, next_state);
872 flags = tcp_make_state_flags (next_state);
873
874 /* Push header and options */
875 th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
876 tc->rcv_nxt, tcp_hdr_opts_len, flags,
877 advertise_wnd);
878
879 opts_write_len = tcp_options_write ((u8 *) (th + 1), snd_opts);
880
881 ASSERT (opts_write_len == tcp_opts_len);
882
883 /* Tag the buffer with the connection index */
884 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
885
886 tc->snd_nxt += data_len;
Florin Corase69f4952017-03-07 10:06:24 -0800887 TCP_EVT_DBG (TCP_EVT_PKTIZE, tc);
Dave Barach68b0fb02017-02-28 15:15:56 -0500888}
889
890/* Send delayed ACK when timer expires */
891void
892tcp_timer_delack_handler (u32 index)
893{
894 tcp_main_t *tm = vnet_get_tcp_main ();
895 vlib_main_t *vm = tm->vlib_main;
896 u32 thread_index = os_get_cpu_number ();
897 tcp_connection_t *tc;
898 vlib_buffer_t *b;
899 u32 bi;
900
901 tc = tcp_connection_get (index, thread_index);
902
903 /* Get buffer */
904 tcp_get_free_buffer_index (tm, &bi);
905 b = vlib_get_buffer (vm, bi);
906
907 /* Fill in the ACK */
908 tcp_make_ack (tc, b);
909
910 tc->timers[TCP_TIMER_DELACK] = TCP_TIMER_HANDLE_INVALID;
911 tc->flags &= ~TCP_CONN_DELACK;
912
913 tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
914}
915
916/** Build a retransmit segment
917 *
918 * @return the number of bytes in the segment or 0 if there's nothing to
919 * retransmit
920 * */
921u32
922tcp_prepare_retransmit_segment (tcp_connection_t * tc, vlib_buffer_t * b,
923 u32 max_bytes)
924{
925 tcp_main_t *tm = vnet_get_tcp_main ();
926 vlib_main_t *vm = tm->vlib_main;
927 u32 n_bytes, offset = 0;
928 sack_scoreboard_hole_t *hole;
929 u32 hole_size;
930
931 tcp_reuse_buffer (vm, b);
932
Florin Corase04c2992017-03-01 08:17:34 -0800933 ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
Dave Barach68b0fb02017-02-28 15:15:56 -0500934 ASSERT (max_bytes != 0);
935
936 if (tcp_opts_sack_permitted (&tc->opt))
937 {
938 /* XXX get first hole not retransmitted yet */
939 hole = scoreboard_first_hole (&tc->sack_sb);
940 if (!hole)
941 return 0;
942
943 offset = hole->start - tc->snd_una;
944 hole_size = hole->end - hole->start;
945
946 ASSERT (hole_size);
947
948 if (hole_size < max_bytes)
949 max_bytes = hole_size;
950 }
951 else
952 {
953 if (seq_geq (tc->snd_nxt, tc->snd_una_max))
954 return 0;
955 }
956
957 n_bytes = stream_session_peek_bytes (&tc->connection,
958 vlib_buffer_get_current (b), offset,
959 max_bytes);
960 ASSERT (n_bytes != 0);
961
Dave Barach68b0fb02017-02-28 15:15:56 -0500962 tcp_push_hdr_i (tc, b, tc->state);
963
964 return n_bytes;
965}
966
967static void
968tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
969{
970 tcp_main_t *tm = vnet_get_tcp_main ();
971 vlib_main_t *vm = tm->vlib_main;
972 u32 thread_index = os_get_cpu_number ();
973 tcp_connection_t *tc;
974 vlib_buffer_t *b;
975 u32 bi, max_bytes, snd_space;
976
977 if (is_syn)
978 {
979 tc = tcp_half_open_connection_get (index);
980 }
981 else
982 {
983 tc = tcp_connection_get (index, thread_index);
984 }
985
986 /* Make sure timer handle is set to invalid */
987 tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID;
988
989 /* Increment RTO backoff (also equal to number of retries) */
990 tc->rto_boff += 1;
991
992 /* Go back to first un-acked byte */
993 tc->snd_nxt = tc->snd_una;
994
995 /* Get buffer */
996 tcp_get_free_buffer_index (tm, &bi);
997 b = vlib_get_buffer (vm, bi);
998
Florin Corase04c2992017-03-01 08:17:34 -0800999 if (tc->state >= TCP_STATE_ESTABLISHED)
Dave Barach68b0fb02017-02-28 15:15:56 -05001000 {
1001 tcp_fastrecovery_off (tc);
1002
1003 /* Exponential backoff */
1004 tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1005
1006 /* Figure out what and how many bytes we can send */
1007 snd_space = tcp_available_snd_space (tc);
1008 max_bytes = clib_min (tc->snd_mss, snd_space);
Florin Corase04c2992017-03-01 08:17:34 -08001009
1010 if (max_bytes == 0)
1011 {
1012 clib_warning ("no wnd to retransmit");
1013 return;
1014 }
Dave Barach68b0fb02017-02-28 15:15:56 -05001015 tcp_prepare_retransmit_segment (tc, b, max_bytes);
1016
1017 tc->rtx_bytes += max_bytes;
1018
1019 /* No fancy recovery for now! */
1020 scoreboard_clear (&tc->sack_sb);
1021 }
1022 else
1023 {
1024 /* Retransmit for SYN/SYNACK */
1025 ASSERT (tc->state == TCP_STATE_SYN_RCVD
1026 || tc->state == TCP_STATE_SYN_SENT);
1027
1028 /* Try without increasing RTO a number of times. If this fails,
1029 * start growing RTO exponentially */
1030 if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
1031 tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1032
1033 vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
Florin Corase04c2992017-03-01 08:17:34 -08001034
Dave Barach68b0fb02017-02-28 15:15:56 -05001035 tcp_push_hdr_i (tc, b, tc->state);
Florin Corase04c2992017-03-01 08:17:34 -08001036
1037 /* Account for the SYN */
1038 tc->snd_nxt += 1;
Dave Barach68b0fb02017-02-28 15:15:56 -05001039 }
1040
1041 if (!is_syn)
1042 {
1043 tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
1044
1045 /* Re-enable retransmit timer */
Florin Corasd79b41e2017-03-04 05:37:52 -08001046 tcp_retransmit_timer_set (tc);
Dave Barach68b0fb02017-02-28 15:15:56 -05001047 }
1048 else
1049 {
1050 ASSERT (tc->state == TCP_STATE_SYN_SENT);
1051
1052 /* This goes straight to ipx_lookup */
1053 tcp_push_ip_hdr (tm, tc, b);
1054 tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4);
1055
1056 /* Re-enable retransmit timer */
1057 tcp_timer_set (tc, TCP_TIMER_RETRANSMIT_SYN,
1058 tc->rto * TCP_TO_TIMER_TICK);
1059 }
1060}
1061
1062void
1063tcp_timer_retransmit_handler (u32 index)
1064{
1065 tcp_timer_retransmit_handler_i (index, 0);
1066}
1067
1068void
1069tcp_timer_retransmit_syn_handler (u32 index)
1070{
1071 tcp_timer_retransmit_handler_i (index, 1);
1072}
1073
1074/**
1075 * Retansmit first unacked segment */
1076void
1077tcp_retransmit_first_unacked (tcp_connection_t * tc)
1078{
1079 tcp_main_t *tm = vnet_get_tcp_main ();
1080 u32 snd_nxt = tc->snd_nxt;
1081 vlib_buffer_t *b;
1082 u32 bi;
1083
1084 tc->snd_nxt = tc->snd_una;
1085
1086 /* Get buffer */
1087 tcp_get_free_buffer_index (tm, &bi);
1088 b = vlib_get_buffer (tm->vlib_main, bi);
1089
1090 tcp_prepare_retransmit_segment (tc, b, tc->snd_mss);
1091 tcp_enqueue_to_output (tm->vlib_main, b, bi, tc->c_is_ip4);
1092
1093 tc->snd_nxt = snd_nxt;
1094 tc->rtx_bytes += tc->snd_mss;
1095}
1096
1097void
1098tcp_fast_retransmit (tcp_connection_t * tc)
1099{
1100 tcp_main_t *tm = vnet_get_tcp_main ();
1101 u32 snd_space, max_bytes, n_bytes, bi;
1102 vlib_buffer_t *b;
1103
1104 ASSERT (tcp_in_fastrecovery (tc));
1105
1106 clib_warning ("fast retransmit!");
1107
1108 /* Start resending from first un-acked segment */
1109 tc->snd_nxt = tc->snd_una;
1110
1111 snd_space = tcp_available_snd_space (tc);
1112
1113 while (snd_space)
1114 {
1115 tcp_get_free_buffer_index (tm, &bi);
1116 b = vlib_get_buffer (tm->vlib_main, bi);
1117
1118 max_bytes = clib_min (tc->snd_mss, snd_space);
1119 n_bytes = tcp_prepare_retransmit_segment (tc, b, max_bytes);
1120
1121 /* Nothing left to retransmit */
1122 if (n_bytes == 0)
1123 return;
1124
1125 tcp_enqueue_to_output (tm->vlib_main, b, bi, tc->c_is_ip4);
1126
1127 snd_space -= n_bytes;
1128 }
1129
1130 /* If window allows, send new data */
1131 tc->snd_nxt = tc->snd_una_max;
1132}
1133
1134always_inline u32
1135tcp_session_has_ooo_data (tcp_connection_t * tc)
1136{
1137 stream_session_t *s =
1138 stream_session_get (tc->c_s_index, tc->c_thread_index);
1139 return svm_fifo_has_ooo_data (s->server_rx_fifo);
1140}
1141
1142always_inline uword
1143tcp46_output_inline (vlib_main_t * vm,
1144 vlib_node_runtime_t * node,
1145 vlib_frame_t * from_frame, int is_ip4)
1146{
Dave Barach68b0fb02017-02-28 15:15:56 -05001147 u32 n_left_from, next_index, *from, *to_next;
1148 u32 my_thread_index = vm->cpu_index;
1149
1150 from = vlib_frame_vector_args (from_frame);
1151 n_left_from = from_frame->n_vectors;
1152
1153 next_index = node->cached_next_index;
1154
1155 while (n_left_from > 0)
1156 {
1157 u32 n_left_to_next;
1158
1159 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1160
1161 while (n_left_from > 0 && n_left_to_next > 0)
1162 {
1163 u32 bi0;
1164 vlib_buffer_t *b0;
1165 tcp_connection_t *tc0;
1166 tcp_header_t *th0;
1167 u32 error0 = TCP_ERROR_PKTS_SENT, next0 = TCP_OUTPUT_NEXT_IP_LOOKUP;
1168
1169 bi0 = from[0];
1170 to_next[0] = bi0;
1171 from += 1;
1172 to_next += 1;
1173 n_left_from -= 1;
1174 n_left_to_next -= 1;
1175
1176 b0 = vlib_get_buffer (vm, bi0);
1177 tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index,
1178 my_thread_index);
Florin Corasd79b41e2017-03-04 05:37:52 -08001179 if (PREDICT_FALSE (tc0 == 0 || tc0->state == TCP_STATE_CLOSED))
1180 {
1181 error0 = TCP_ERROR_INVALID_CONNECTION;
1182 next0 = TCP_OUTPUT_NEXT_DROP;
1183 goto done;
1184 }
1185
Dave Barach68b0fb02017-02-28 15:15:56 -05001186 th0 = vlib_buffer_get_current (b0);
Florin Corase69f4952017-03-07 10:06:24 -08001187 TCP_EVT_DBG (TCP_EVT_OUTPUT, tc0, th0->flags, b0->current_length);
Dave Barach68b0fb02017-02-28 15:15:56 -05001188
1189 if (is_ip4)
1190 {
1191 ip4_header_t *ih0;
1192 ih0 = vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4,
1193 &tc0->c_rmt_ip4, IP_PROTOCOL_TCP);
1194 th0->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ih0);
1195 }
1196 else
1197 {
1198 ip6_header_t *ih0;
1199 int bogus = ~0;
1200
1201 ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6,
1202 &tc0->c_rmt_ip6, IP_PROTOCOL_TCP);
1203 th0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ih0,
1204 &bogus);
1205 ASSERT (!bogus);
1206 }
1207
1208 /* Filter out DUPACKs if there are no OOO segments left */
1209 if (PREDICT_FALSE
1210 (vnet_buffer (b0)->tcp.flags & TCP_BUF_FLAG_DUPACK))
1211 {
Florin Corase04c2992017-03-01 08:17:34 -08001212 ASSERT (tc0->snt_dupacks > 0);
Dave Barach68b0fb02017-02-28 15:15:56 -05001213 tc0->snt_dupacks--;
Dave Barach68b0fb02017-02-28 15:15:56 -05001214 if (!tcp_session_has_ooo_data (tc0))
1215 {
1216 error0 = TCP_ERROR_FILTERED_DUPACKS;
1217 next0 = TCP_OUTPUT_NEXT_DROP;
1218 goto done;
1219 }
1220 }
1221
1222 /* Retransmitted SYNs do reach this but it should be harmless */
1223 tc0->rcv_las = tc0->rcv_nxt;
1224
1225 /* Stop DELACK timer and fix flags */
1226 tc0->flags &=
1227 ~(TCP_CONN_SNDACK | TCP_CONN_DELACK | TCP_CONN_BURSTACK);
1228 if (tcp_timer_is_active (tc0, TCP_TIMER_DELACK))
1229 {
1230 tcp_timer_reset (tc0, TCP_TIMER_DELACK);
1231 }
1232
1233 /* If not retransmitting
1234 * 1) update snd_una_max (SYN, SYNACK, new data, FIN)
1235 * 2) If we're not tracking an ACK, start tracking */
1236 if (seq_lt (tc0->snd_una_max, tc0->snd_nxt))
1237 {
1238 tc0->snd_una_max = tc0->snd_nxt;
1239 if (tc0->rtt_ts == 0)
1240 {
1241 tc0->rtt_ts = tcp_time_now ();
1242 tc0->rtt_seq = tc0->snd_nxt;
1243 }
1244 }
1245
1246 /* Set the retransmit timer if not set already and not
1247 * doing a pure ACK */
1248 if (!tcp_timer_is_active (tc0, TCP_TIMER_RETRANSMIT)
1249 && tc0->snd_nxt != tc0->snd_una)
1250 {
Florin Corasd79b41e2017-03-04 05:37:52 -08001251 tcp_retransmit_timer_set (tc0);
Dave Barach68b0fb02017-02-28 15:15:56 -05001252 tc0->rto_boff = 0;
1253 }
1254
1255 /* set fib index to default and lookup node */
1256 /* XXX network virtualization (vrf/vni) */
1257 vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
1258 vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1259
1260 b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
Dave Barach68b0fb02017-02-28 15:15:56 -05001261 done:
Florin Corase69f4952017-03-07 10:06:24 -08001262 b0->error = node->errors[error0];
Dave Barach68b0fb02017-02-28 15:15:56 -05001263 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
1264 {
1265
1266 }
1267
1268 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1269 n_left_to_next, bi0, next0);
1270 }
1271
1272 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1273 }
1274
1275 return from_frame->n_vectors;
1276}
1277
1278static uword
1279tcp4_output (vlib_main_t * vm, vlib_node_runtime_t * node,
1280 vlib_frame_t * from_frame)
1281{
1282 return tcp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ );
1283}
1284
1285static uword
1286tcp6_output (vlib_main_t * vm, vlib_node_runtime_t * node,
1287 vlib_frame_t * from_frame)
1288{
1289 return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ );
1290}
1291
Florin Corase69f4952017-03-07 10:06:24 -08001292/* *INDENT-OFF* */
Dave Barach68b0fb02017-02-28 15:15:56 -05001293VLIB_REGISTER_NODE (tcp4_output_node) =
1294{
1295 .function = tcp4_output,.name = "tcp4-output",
1296 /* Takes a vector of packets. */
Florin Corase69f4952017-03-07 10:06:24 -08001297 .vector_size = sizeof (u32),
1298 .n_errors = TCP_N_ERROR,
1299 .error_strings = tcp_error_strings,
1300 .n_next_nodes = TCP_OUTPUT_N_NEXT,
1301 .next_nodes = {
Dave Barach68b0fb02017-02-28 15:15:56 -05001302#define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
1303 foreach_tcp4_output_next
1304#undef _
Florin Corase69f4952017-03-07 10:06:24 -08001305 },
1306 .format_buffer = format_tcp_header,
1307 .format_trace = format_tcp_tx_trace,
1308};
1309/* *INDENT-ON* */
Dave Barach68b0fb02017-02-28 15:15:56 -05001310
Florin Corase69f4952017-03-07 10:06:24 -08001311VLIB_NODE_FUNCTION_MULTIARCH (tcp4_output_node, tcp4_output);
1312
1313/* *INDENT-OFF* */
Dave Barach68b0fb02017-02-28 15:15:56 -05001314VLIB_REGISTER_NODE (tcp6_output_node) =
1315{
Florin Corase69f4952017-03-07 10:06:24 -08001316 .function = tcp6_output,
1317 .name = "tcp6-output",
Dave Barach68b0fb02017-02-28 15:15:56 -05001318 /* Takes a vector of packets. */
Florin Corase69f4952017-03-07 10:06:24 -08001319 .vector_size = sizeof (u32),
1320 .n_errors = TCP_N_ERROR,
1321 .error_strings = tcp_error_strings,
1322 .n_next_nodes = TCP_OUTPUT_N_NEXT,
1323 .next_nodes = {
Dave Barach68b0fb02017-02-28 15:15:56 -05001324#define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
1325 foreach_tcp6_output_next
1326#undef _
Florin Corase69f4952017-03-07 10:06:24 -08001327 },
1328 .format_buffer = format_tcp_header,
1329 .format_trace = format_tcp_tx_trace,
1330};
1331/* *INDENT-ON* */
Dave Barach68b0fb02017-02-28 15:15:56 -05001332
Florin Corase69f4952017-03-07 10:06:24 -08001333VLIB_NODE_FUNCTION_MULTIARCH (tcp6_output_node, tcp6_output);
1334
1335u32
Dave Barach68b0fb02017-02-28 15:15:56 -05001336tcp_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
1337{
1338 tcp_connection_t *tc;
1339
1340 tc = (tcp_connection_t *) tconn;
1341 tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED);
1342 return 0;
1343}
1344
1345typedef enum _tcp_reset_next
1346{
1347 TCP_RESET_NEXT_DROP,
1348 TCP_RESET_NEXT_IP_LOOKUP,
1349 TCP_RESET_N_NEXT
1350} tcp_reset_next_t;
1351
1352#define foreach_tcp4_reset_next \
1353 _(DROP, "error-drop") \
1354 _(IP_LOOKUP, "ip4-lookup")
1355
1356#define foreach_tcp6_reset_next \
1357 _(DROP, "error-drop") \
1358 _(IP_LOOKUP, "ip6-lookup")
1359
1360static uword
1361tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
1362 vlib_frame_t * from_frame, u8 is_ip4)
1363{
1364 u32 n_left_from, next_index, *from, *to_next;
1365 u32 my_thread_index = vm->cpu_index;
1366
1367 from = vlib_frame_vector_args (from_frame);
1368 n_left_from = from_frame->n_vectors;
1369
1370 next_index = node->cached_next_index;
1371
1372 while (n_left_from > 0)
1373 {
1374 u32 n_left_to_next;
1375
1376 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1377
1378 while (n_left_from > 0 && n_left_to_next > 0)
1379 {
1380 u32 bi0;
1381 vlib_buffer_t *b0;
1382 u32 error0 = TCP_ERROR_RST_SENT, next0 = TCP_RESET_NEXT_IP_LOOKUP;
1383
1384 bi0 = from[0];
1385 to_next[0] = bi0;
1386 from += 1;
1387 to_next += 1;
1388 n_left_from -= 1;
1389 n_left_to_next -= 1;
1390
1391 b0 = vlib_get_buffer (vm, bi0);
1392
1393 if (tcp_make_reset_in_place (vm, b0, vnet_buffer (b0)->tcp.flags,
1394 my_thread_index, is_ip4))
1395 {
1396 error0 = TCP_ERROR_LOOKUP_DROPS;
1397 next0 = TCP_RESET_NEXT_DROP;
1398 goto done;
1399 }
1400
1401 /* Prepare to send to IP lookup */
1402 vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0;
1403 next0 = TCP_RESET_NEXT_IP_LOOKUP;
1404
1405 done:
Florin Corase69f4952017-03-07 10:06:24 -08001406 b0->error = node->errors[error0];
Dave Barach68b0fb02017-02-28 15:15:56 -05001407 b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
1408 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
1409 {
1410
1411 }
1412
1413 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1414 n_left_to_next, bi0, next0);
1415 }
1416 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1417 }
1418 return from_frame->n_vectors;
1419}
1420
1421static uword
1422tcp4_send_reset (vlib_main_t * vm, vlib_node_runtime_t * node,
1423 vlib_frame_t * from_frame)
1424{
1425 return tcp46_send_reset_inline (vm, node, from_frame, 1);
1426}
1427
1428static uword
1429tcp6_send_reset (vlib_main_t * vm, vlib_node_runtime_t * node,
1430 vlib_frame_t * from_frame)
1431{
1432 return tcp46_send_reset_inline (vm, node, from_frame, 0);
1433}
1434
1435/* *INDENT-OFF* */
1436VLIB_REGISTER_NODE (tcp4_reset_node) = {
1437 .function = tcp4_send_reset,
1438 .name = "tcp4-reset",
1439 .vector_size = sizeof (u32),
1440 .n_errors = TCP_N_ERROR,
1441 .error_strings = tcp_error_strings,
1442 .n_next_nodes = TCP_RESET_N_NEXT,
1443 .next_nodes = {
1444#define _(s,n) [TCP_RESET_NEXT_##s] = n,
1445 foreach_tcp4_reset_next
1446#undef _
1447 },
1448};
1449/* *INDENT-ON* */
1450
Florin Corase69f4952017-03-07 10:06:24 -08001451VLIB_NODE_FUNCTION_MULTIARCH (tcp4_reset_node, tcp4_send_reset);
1452
Dave Barach68b0fb02017-02-28 15:15:56 -05001453/* *INDENT-OFF* */
1454VLIB_REGISTER_NODE (tcp6_reset_node) = {
1455 .function = tcp6_send_reset,
1456 .name = "tcp6-reset",
1457 .vector_size = sizeof (u32),
1458 .n_errors = TCP_N_ERROR,
1459 .error_strings = tcp_error_strings,
1460 .n_next_nodes = TCP_RESET_N_NEXT,
1461 .next_nodes = {
1462#define _(s,n) [TCP_RESET_NEXT_##s] = n,
1463 foreach_tcp6_reset_next
1464#undef _
1465 },
1466};
1467/* *INDENT-ON* */
1468
Florin Corase69f4952017-03-07 10:06:24 -08001469VLIB_NODE_FUNCTION_MULTIARCH (tcp6_reset_node, tcp6_send_reset);
1470
Dave Barach68b0fb02017-02-28 15:15:56 -05001471/*
1472 * fd.io coding-style-patch-verification: ON
1473 *
1474 * Local Variables:
1475 * eval: (c-set-style "gnu")
1476 * End:
1477 */