blob: b550fdd65dd8b54f45f120d8ebb20d3b842b3558 [file] [log] [blame]
Florin Coras52814732019-06-12 15:38:19 -07001/*
2 * Copyright (c) 2019 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 *
15 * TCP byte tracker that can generate delivery rate estimates. Based on
16 * draft-cheng-iccrg-delivery-rate-estimation-00
17 */
18
19#include <vnet/tcp/tcp.h>
20
21static tcp_bt_sample_t *
22bt_get_sample (tcp_byte_tracker_t * bt, u32 bts_index)
23{
24 if (pool_is_free_index (bt->samples, bts_index))
25 return 0;
26 return pool_elt_at_index (bt->samples, bts_index);
27}
28
29static tcp_bt_sample_t *
30bt_next_sample (tcp_byte_tracker_t * bt, tcp_bt_sample_t * bts)
31{
32 return bt_get_sample (bt, bts->next);
33}
34
35static tcp_bt_sample_t *
36bt_prev_sample (tcp_byte_tracker_t * bt, tcp_bt_sample_t * bts)
37{
38 return bt_get_sample (bt, bts->prev);
39}
40
41static u32
42bt_sample_index (tcp_byte_tracker_t * bt, tcp_bt_sample_t * bts)
43{
44 if (!bts)
45 return TCP_BTS_INVALID_INDEX;
46 return bts - bt->samples;
47}
48
49static inline int
50bt_seq_lt (u32 a, u32 b)
51{
52 return seq_lt (a, b);
53}
54
55static tcp_bt_sample_t *
56bt_alloc_sample (tcp_byte_tracker_t * bt, u32 min_seq)
57{
58 tcp_bt_sample_t *bts;
59
60 pool_get_zero (bt->samples, bts);
61 bts->next = bts->prev = TCP_BTS_INVALID_INDEX;
62 bts->min_seq = min_seq;
63 rb_tree_add_custom (&bt->sample_lookup, bts->min_seq, bts - bt->samples,
64 bt_seq_lt);
65 return bts;
66}
67
68static void
69bt_free_sample (tcp_byte_tracker_t * bt, tcp_bt_sample_t * bts)
70{
71 if (bts->prev != TCP_BTS_INVALID_INDEX)
72 {
73 tcp_bt_sample_t *prev = bt_prev_sample (bt, bts);
74 prev->next = bts->next;
75 }
76 else
77 bt->head = bts->next;
78
79 if (bts->next != TCP_BTS_INVALID_INDEX)
80 {
81 tcp_bt_sample_t *next = bt_next_sample (bt, bts);
82 next->prev = bts->prev;
83 }
84 else
85 bt->tail = bts->prev;
86
87 rb_tree_del_custom (&bt->sample_lookup, bts->min_seq, bt_seq_lt);
88 if (CLIB_DEBUG)
89 memset (bts, 0xfc, sizeof (*bts));
90 pool_put (bt->samples, bts);
91}
92
93static tcp_bt_sample_t *
94bt_lookup_seq (tcp_byte_tracker_t * bt, u32 seq)
95{
96 rb_tree_t *rt = &bt->sample_lookup;
97 rb_node_t *cur, *prev;
98 tcp_bt_sample_t *bts;
99
100 cur = rb_node (rt, rt->root);
101 if (rb_node_is_tnil (rt, cur))
102 return 0;
103
104 while (seq != cur->key)
105 {
106 prev = cur;
107 if (seq_lt (seq, cur->key))
108 cur = rb_node_left (rt, cur);
109 else
110 cur = rb_node_right (rt, cur);
111
112 if (rb_node_is_tnil (rt, cur))
113 {
114 /* Hit tnil as a left child. Find predecessor */
115 if (seq_lt (seq, prev->key))
116 {
117 cur = rb_tree_predecessor (rt, prev);
118 if (rb_node_is_tnil (rt, cur))
119 return 0;
120 bts = bt_get_sample (bt, cur->opaque);
121 }
122 /* Hit tnil as a right child */
123 else
124 {
125 bts = bt_get_sample (bt, prev->opaque);
126 }
127
128 if (seq_geq (seq, bts->min_seq))
129 return bts;
130
131 return 0;
132 }
133 }
134
135 if (!rb_node_is_tnil (rt, cur))
136 return bt_get_sample (bt, cur->opaque);
137
138 return 0;
139}
140
141static void
142bt_update_sample (tcp_byte_tracker_t * bt, tcp_bt_sample_t * bts, u32 seq)
143{
144 rb_tree_del_custom (&bt->sample_lookup, bts->min_seq, bt_seq_lt);
145 bts->min_seq = seq;
146 rb_tree_add_custom (&bt->sample_lookup, bts->min_seq,
147 bt_sample_index (bt, bts), bt_seq_lt);
148}
149
150static tcp_bt_sample_t *
151bt_fix_overlapped (tcp_byte_tracker_t * bt, tcp_bt_sample_t * start,
152 u32 seq, u8 is_end)
153{
154 tcp_bt_sample_t *cur, *next;
155
156 cur = start;
157 while ((next = bt_next_sample (bt, cur)) && seq_lt (next->min_seq, seq))
158 {
159 bt_free_sample (bt, cur);
160 cur = next;
161 }
162
163 if (next)
164 {
165 bt_free_sample (bt, cur);
166 return next;
167 }
168
169 /* Overlapping current entirely */
170 if (is_end)
171 {
172 bt_free_sample (bt, cur);
173 return 0;
174 }
175
176 /* Overlapping head of current but not all */
177 bt_update_sample (bt, cur, seq);
178 return cur;
179}
180
181int
182tcp_bt_is_sane (tcp_byte_tracker_t * bt)
183{
184 tcp_bt_sample_t *bts, *tmp;
185
186 if (pool_elts (bt->samples) != pool_elts (bt->sample_lookup.nodes) - 1)
187 return 0;
188
189 if (bt->head == TCP_BTS_INVALID_INDEX)
190 {
191 if (bt->tail != TCP_BTS_INVALID_INDEX)
192 return 0;
193 if (pool_elts (bt->samples) != 0)
194 return 0;
195 return 1;
196 }
197
198 bts = bt_get_sample (bt, bt->tail);
199 if (!bts)
200 return 0;
201
202 bts = bt_get_sample (bt, bt->head);
203 if (!bts || bts->prev != TCP_BTS_INVALID_INDEX)
204 return 0;
205
206 while (bts)
207 {
208 tmp = bt_lookup_seq (bt, bts->min_seq);
209 if (!tmp)
210 return 0;
211 if (tmp != bts)
212 return 0;
213 tmp = bt_next_sample (bt, bts);
214 if (tmp)
215 {
216 if (tmp->prev != bt_sample_index (bt, bts))
217 {
218 clib_warning ("next %u thinks prev is %u should be %u",
219 bts->next, tmp->prev, bt_sample_index (bt, bts));
220 return 0;
221 }
222 if (!seq_lt (bts->min_seq, tmp->min_seq))
223 return 0;
224 }
225 else
226 {
227 if (bt->tail != bt_sample_index (bt, bts))
228 return 0;
229 if (bts->next != TCP_BTS_INVALID_INDEX)
230 return 0;
231 }
232 bts = tmp;
233 }
234 return 1;
235}
236
237static tcp_bt_sample_t *
238tcp_bt_alloc_tx_sample (tcp_connection_t * tc, u32 min_seq)
239{
240 tcp_bt_sample_t *bts;
241 bts = bt_alloc_sample (tc->bt, min_seq);
242 bts->delivered = tc->delivered;
243 bts->delivered_time = tc->delivered_time;
Florin Coras85fc1302019-06-26 09:12:34 -0700244 bts->tx_time = tcp_time_now_us (tc->c_thread_index);
Florin Coras7436b432019-09-10 23:26:27 -0700245 bts->first_tx_time = tc->first_tx_time;
Florin Coras52814732019-06-12 15:38:19 -0700246 bts->flags |= tc->app_limited ? TCP_BTS_IS_APP_LIMITED : 0;
247 return bts;
248}
249
250void
251tcp_bt_check_app_limited (tcp_connection_t * tc)
252{
253 u32 available_bytes, flight_size;
254
255 available_bytes = transport_max_tx_dequeue (&tc->connection);
256 flight_size = tcp_flight_size (tc);
257
258 /* Not enough bytes to fill the cwnd */
259 if (available_bytes + flight_size + tc->snd_mss < tc->cwnd
260 /* Bytes considered lost have been retransmitted */
261 && tc->sack_sb.lost_bytes <= tc->snd_rxt_bytes)
262 tc->app_limited = tc->delivered + flight_size ? : 1;
263}
264
265void
266tcp_bt_track_tx (tcp_connection_t * tc)
267{
268 tcp_byte_tracker_t *bt = tc->bt;
269 tcp_bt_sample_t *bts, *tail;
270 u32 bts_index;
271
Florin Coras85fc1302019-06-26 09:12:34 -0700272 if (tc->snd_una == tc->snd_nxt)
Florin Coras7436b432019-09-10 23:26:27 -0700273 {
274 tc->delivered_time = tcp_time_now_us (tc->c_thread_index);
275 tc->first_tx_time = tc->delivered_time;
276 }
Florin Coras52814732019-06-12 15:38:19 -0700277
278 bts = tcp_bt_alloc_tx_sample (tc, tc->snd_nxt);
279 bts_index = bt_sample_index (bt, bts);
280 tail = bt_get_sample (bt, bt->tail);
281 if (tail)
282 {
283 tail->next = bts_index;
284 bts->prev = bt->tail;
285 bt->tail = bts_index;
286 }
287 else
288 {
289 bt->tail = bt->head = bts_index;
290 }
291}
292
293void
294tcp_bt_track_rxt (tcp_connection_t * tc, u32 start, u32 end)
295{
296 tcp_byte_tracker_t *bt = tc->bt;
297 tcp_bt_sample_t *bts, *next, *cur, *prev, *nbts;
298 u32 bts_index, cur_index, next_index, prev_index, min_seq;
299 u8 is_end = end == tc->snd_nxt;
300
301 bts = bt_get_sample (bt, bt->last_ooo);
302 if (bts && bts->max_seq == start)
303 {
304 bts->max_seq = end;
305 next = bt_next_sample (bt, bts);
306 if (next)
307 bt_fix_overlapped (bt, next, end, is_end);
308
309 return;
310 }
311
312 /* Find original tx sample */
313 bts = bt_lookup_seq (bt, start);
314
315 ASSERT (bts != 0 && seq_geq (start, bts->min_seq));
316
317 /* Head in the past */
318 if (seq_lt (bts->min_seq, tc->snd_una))
319 bt_update_sample (bt, bts, tc->snd_una);
320
321 /* Head overlap */
322 if (bts->min_seq == start)
323 {
324 prev_index = bts->prev;
325 next = bt_fix_overlapped (bt, bts, end, is_end);
326 next_index = bt_sample_index (bt, next);
327
328 cur = tcp_bt_alloc_tx_sample (tc, start);
329 cur->max_seq = end;
330 cur->flags |= TCP_BTS_IS_RXT;
331 cur->next = next_index;
332 cur->prev = prev_index;
333
334 cur_index = bt_sample_index (bt, cur);
335
336 if (next_index != TCP_BTS_INVALID_INDEX)
337 {
338 next = bt_get_sample (bt, next_index);
339 next->prev = cur_index;
340 }
341 else
342 {
343 bt->tail = cur_index;
344 }
345
346 if (prev_index != TCP_BTS_INVALID_INDEX)
347 {
348 prev = bt_get_sample (bt, prev_index);
349 prev->next = cur_index;
350 }
351 else
352 {
353 bt->head = cur_index;
354 }
355
356 bt->last_ooo = cur_index;
357 return;
358 }
359
360 bts_index = bt_sample_index (bt, bts);
361 next = bt_next_sample (bt, bts);
362 if (next)
363 next = bt_fix_overlapped (bt, next, end, is_end);
364
365 min_seq = next ? next->min_seq : tc->snd_nxt;
366 ASSERT (seq_lt (start, min_seq));
367
368 /* Have to split or tail overlap */
369 cur = tcp_bt_alloc_tx_sample (tc, start);
370 cur->max_seq = end;
371 cur->flags |= TCP_BTS_IS_RXT;
372 cur->prev = bts_index;
373 cur_index = bt_sample_index (bt, cur);
374
375 /* Split. Allocate another sample */
376 if (seq_lt (end, min_seq))
377 {
378 nbts = tcp_bt_alloc_tx_sample (tc, end);
379 cur = bt_get_sample (bt, cur_index);
380 bts = bt_get_sample (bt, bts_index);
381
382 *nbts = *bts;
383 nbts->min_seq = end;
384
385 if (nbts->next != TCP_BTS_INVALID_INDEX)
386 {
387 next = bt_get_sample (bt, nbts->next);
388 next->prev = bt_sample_index (bt, nbts);
389 }
390 else
391 bt->tail = bt_sample_index (bt, nbts);
392
393 bts->next = nbts->prev = cur_index;
394 cur->next = bt_sample_index (bt, nbts);
395
396 bt->last_ooo = cur_index;
397 }
398 /* Tail completely overlapped */
399 else
400 {
401 bts = bt_get_sample (bt, bts_index);
402
403 if (bts->next != TCP_BTS_INVALID_INDEX)
404 {
405 next = bt_get_sample (bt, bts->next);
406 next->prev = cur_index;
407 }
408 else
409 bt->tail = cur_index;
410
411 cur->next = bts->next;
412 bts->next = cur_index;
413
414 bt->last_ooo = cur_index;
415 }
416}
417
418static void
419tcp_bt_sample_to_rate_sample (tcp_connection_t * tc, tcp_bt_sample_t * bts,
420 tcp_rate_sample_t * rs)
421{
Florin Coras85fc1302019-06-26 09:12:34 -0700422 if (rs->prior_delivered && rs->prior_delivered >= bts->delivered)
Florin Coras52814732019-06-12 15:38:19 -0700423 return;
424
Florin Coras85fc1302019-06-26 09:12:34 -0700425 rs->prior_delivered = bts->delivered;
426 rs->prior_time = bts->delivered_time;
Florin Coras7436b432019-09-10 23:26:27 -0700427 rs->interval_time = bts->tx_time - bts->first_tx_time;
Florin Coras85fc1302019-06-26 09:12:34 -0700428 rs->rtt_time = bts->tx_time;
Florin Coras52814732019-06-12 15:38:19 -0700429 rs->flags = bts->flags;
Florin Coras7436b432019-09-10 23:26:27 -0700430 tc->first_tx_time = bts->tx_time;
Florin Coras52814732019-06-12 15:38:19 -0700431}
432
433static void
434tcp_bt_walk_samples (tcp_connection_t * tc, tcp_rate_sample_t * rs)
435{
436 tcp_byte_tracker_t *bt = tc->bt;
437 tcp_bt_sample_t *next, *cur;
438
439 cur = bt_get_sample (bt, bt->head);
440 tcp_bt_sample_to_rate_sample (tc, cur, rs);
441 while ((next = bt_get_sample (bt, cur->next))
442 && seq_lt (next->min_seq, tc->snd_una))
443 {
444 bt_free_sample (bt, cur);
445 tcp_bt_sample_to_rate_sample (tc, next, rs);
446 cur = next;
447 }
448
449 ASSERT (seq_lt (cur->min_seq, tc->snd_una));
450
451 /* All samples acked */
452 if (tc->snd_una == tc->snd_nxt)
453 {
454 ASSERT (pool_elts (bt->samples) == 1);
455 bt_free_sample (bt, cur);
456 return;
457 }
458
459 /* Current sample completely consumed */
460 if (next && next->min_seq == tc->snd_una)
461 {
462 bt_free_sample (bt, cur);
463 cur = next;
464 }
465}
466
467static void
468tcp_bt_walk_samples_ooo (tcp_connection_t * tc, tcp_rate_sample_t * rs)
469{
470 sack_block_t *blks = tc->rcv_opts.sacks, *blk;
471 tcp_byte_tracker_t *bt = tc->bt;
472 tcp_bt_sample_t *next, *cur;
473 int i;
474
475 for (i = 0; i < vec_len (blks); i++)
476 {
477 blk = &blks[i];
478
479 /* Ignore blocks that are already covered by snd_una */
480 if (seq_lt (blk->end, tc->snd_una))
481 continue;
482
483 cur = bt_lookup_seq (bt, blk->start);
484 if (!cur)
485 continue;
486
487 tcp_bt_sample_to_rate_sample (tc, cur, rs);
488
489 /* Current shouldn't be removed */
490 if (cur->min_seq != blk->start)
491 {
492 cur = bt_next_sample (bt, cur);
493 if (!cur)
494 continue;
495 }
496
497 while ((next = bt_get_sample (bt, cur->next))
498 && seq_lt (next->min_seq, blk->end))
499 {
500 bt_free_sample (bt, cur);
501 tcp_bt_sample_to_rate_sample (tc, next, rs);
502 cur = next;
503 }
504
505 /* Current consumed entirely */
506 if (next && next->min_seq == blk->end)
507 bt_free_sample (bt, cur);
508 }
509}
510
511void
512tcp_bt_sample_delivery_rate (tcp_connection_t * tc, tcp_rate_sample_t * rs)
513{
514 u32 delivered;
515
516 if (PREDICT_FALSE (tc->flags & TCP_CONN_FINSNT))
517 return;
518
519 delivered = tc->bytes_acked + tc->sack_sb.last_sacked_bytes;
520 if (!delivered || tc->bt->head == TCP_BTS_INVALID_INDEX)
521 return;
522
523 /* Do not count bytes that were previously sacked again */
524 tc->delivered += delivered - tc->sack_sb.last_bytes_delivered;
525 tc->delivered_time = tcp_time_now_us (tc->c_thread_index);
526
527 if (tc->app_limited && tc->delivered > tc->app_limited)
528 tc->app_limited = 0;
529
530 if (tc->bytes_acked)
531 tcp_bt_walk_samples (tc, rs);
532
533 if (tc->sack_sb.last_sacked_bytes)
534 tcp_bt_walk_samples_ooo (tc, rs);
Florin Coras85fc1302019-06-26 09:12:34 -0700535
Florin Coras7436b432019-09-10 23:26:27 -0700536 rs->interval_time = clib_max (tc->delivered_time - rs->prior_time,
537 rs->interval_time);
Florin Coras85fc1302019-06-26 09:12:34 -0700538 rs->delivered = tc->delivered - rs->prior_delivered;
539 rs->rtt_time = tc->delivered_time - rs->rtt_time;
540 rs->acked_and_sacked = delivered;
541 rs->lost = tc->sack_sb.last_lost_bytes;
Florin Coras52814732019-06-12 15:38:19 -0700542}
543
544void
545tcp_bt_flush_samples (tcp_connection_t * tc)
546{
547 tcp_byte_tracker_t *bt = tc->bt;
548 tcp_bt_sample_t *bts;
549 u32 *samples = 0, *si;
550
551 vec_validate (samples, pool_elts (bt->samples) - 1);
Florin Coras92f190a2019-08-23 10:28:01 -0700552 vec_reset_length (samples);
Florin Coras52814732019-06-12 15:38:19 -0700553
554 /* *INDENT-OFF* */
555 pool_foreach (bts, bt->samples, ({
556 vec_add1 (samples, bts - bt->samples);
557 }));
558 /* *INDENT-ON* */
559
560 vec_foreach (si, samples)
561 {
562 bts = bt_get_sample (bt, *si);
563 bt_free_sample (bt, bts);
564 }
565
566 vec_free (samples);
567}
568
569void
570tcp_bt_cleanup (tcp_connection_t * tc)
571{
572 tcp_byte_tracker_t *bt = tc->bt;
573
574 rb_tree_free_nodes (&bt->sample_lookup);
575 pool_free (bt->samples);
576 clib_mem_free (bt);
577 tc->bt = 0;
578}
579
580void
581tcp_bt_init (tcp_connection_t * tc)
582{
583 tcp_byte_tracker_t *bt;
584
585 bt = clib_mem_alloc (sizeof (tcp_byte_tracker_t));
586 clib_memset (bt, 0, sizeof (tcp_byte_tracker_t));
587
588 rb_tree_init (&bt->sample_lookup);
589 bt->head = bt->tail = TCP_BTS_INVALID_INDEX;
590 tc->bt = bt;
591}
592
593/*
594 * fd.io coding-style-patch-verification: ON
595 *
596 * Local Variables:
597 * eval: (c-set-style "gnu")
598 * End:
599 */