blob: dd3d9539d9a7c631aa3eb68a707781a6510ca04d [file] [log] [blame]
Florin Coras52814732019-06-12 15:38:19 -07001/*
2 * Copyright (c) 2019 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 *
15 * TCP byte tracker that can generate delivery rate estimates. Based on
16 * draft-cheng-iccrg-delivery-rate-estimation-00
17 */
18
19#include <vnet/tcp/tcp.h>
20
21static tcp_bt_sample_t *
22bt_get_sample (tcp_byte_tracker_t * bt, u32 bts_index)
23{
24 if (pool_is_free_index (bt->samples, bts_index))
25 return 0;
26 return pool_elt_at_index (bt->samples, bts_index);
27}
28
29static tcp_bt_sample_t *
30bt_next_sample (tcp_byte_tracker_t * bt, tcp_bt_sample_t * bts)
31{
32 return bt_get_sample (bt, bts->next);
33}
34
35static tcp_bt_sample_t *
36bt_prev_sample (tcp_byte_tracker_t * bt, tcp_bt_sample_t * bts)
37{
38 return bt_get_sample (bt, bts->prev);
39}
40
41static u32
42bt_sample_index (tcp_byte_tracker_t * bt, tcp_bt_sample_t * bts)
43{
44 if (!bts)
45 return TCP_BTS_INVALID_INDEX;
46 return bts - bt->samples;
47}
48
49static inline int
50bt_seq_lt (u32 a, u32 b)
51{
52 return seq_lt (a, b);
53}
54
55static tcp_bt_sample_t *
56bt_alloc_sample (tcp_byte_tracker_t * bt, u32 min_seq)
57{
58 tcp_bt_sample_t *bts;
59
60 pool_get_zero (bt->samples, bts);
61 bts->next = bts->prev = TCP_BTS_INVALID_INDEX;
62 bts->min_seq = min_seq;
63 rb_tree_add_custom (&bt->sample_lookup, bts->min_seq, bts - bt->samples,
64 bt_seq_lt);
65 return bts;
66}
67
68static void
69bt_free_sample (tcp_byte_tracker_t * bt, tcp_bt_sample_t * bts)
70{
71 if (bts->prev != TCP_BTS_INVALID_INDEX)
72 {
73 tcp_bt_sample_t *prev = bt_prev_sample (bt, bts);
74 prev->next = bts->next;
75 }
76 else
77 bt->head = bts->next;
78
79 if (bts->next != TCP_BTS_INVALID_INDEX)
80 {
81 tcp_bt_sample_t *next = bt_next_sample (bt, bts);
82 next->prev = bts->prev;
83 }
84 else
85 bt->tail = bts->prev;
86
87 rb_tree_del_custom (&bt->sample_lookup, bts->min_seq, bt_seq_lt);
88 if (CLIB_DEBUG)
89 memset (bts, 0xfc, sizeof (*bts));
90 pool_put (bt->samples, bts);
91}
92
93static tcp_bt_sample_t *
94bt_lookup_seq (tcp_byte_tracker_t * bt, u32 seq)
95{
96 rb_tree_t *rt = &bt->sample_lookup;
97 rb_node_t *cur, *prev;
98 tcp_bt_sample_t *bts;
99
100 cur = rb_node (rt, rt->root);
101 if (rb_node_is_tnil (rt, cur))
102 return 0;
103
104 while (seq != cur->key)
105 {
106 prev = cur;
107 if (seq_lt (seq, cur->key))
108 cur = rb_node_left (rt, cur);
109 else
110 cur = rb_node_right (rt, cur);
111
112 if (rb_node_is_tnil (rt, cur))
113 {
114 /* Hit tnil as a left child. Find predecessor */
115 if (seq_lt (seq, prev->key))
116 {
117 cur = rb_tree_predecessor (rt, prev);
118 if (rb_node_is_tnil (rt, cur))
119 return 0;
120 bts = bt_get_sample (bt, cur->opaque);
121 }
122 /* Hit tnil as a right child */
123 else
124 {
125 bts = bt_get_sample (bt, prev->opaque);
126 }
127
128 if (seq_geq (seq, bts->min_seq))
129 return bts;
130
131 return 0;
132 }
133 }
134
135 if (!rb_node_is_tnil (rt, cur))
136 return bt_get_sample (bt, cur->opaque);
137
138 return 0;
139}
140
141static void
142bt_update_sample (tcp_byte_tracker_t * bt, tcp_bt_sample_t * bts, u32 seq)
143{
144 rb_tree_del_custom (&bt->sample_lookup, bts->min_seq, bt_seq_lt);
145 bts->min_seq = seq;
146 rb_tree_add_custom (&bt->sample_lookup, bts->min_seq,
147 bt_sample_index (bt, bts), bt_seq_lt);
148}
149
150static tcp_bt_sample_t *
151bt_fix_overlapped (tcp_byte_tracker_t * bt, tcp_bt_sample_t * start,
152 u32 seq, u8 is_end)
153{
154 tcp_bt_sample_t *cur, *next;
155
156 cur = start;
157 while ((next = bt_next_sample (bt, cur)) && seq_lt (next->min_seq, seq))
158 {
159 bt_free_sample (bt, cur);
160 cur = next;
161 }
162
163 if (next)
164 {
165 bt_free_sample (bt, cur);
166 return next;
167 }
168
169 /* Overlapping current entirely */
170 if (is_end)
171 {
172 bt_free_sample (bt, cur);
173 return 0;
174 }
175
176 /* Overlapping head of current but not all */
177 bt_update_sample (bt, cur, seq);
178 return cur;
179}
180
181int
182tcp_bt_is_sane (tcp_byte_tracker_t * bt)
183{
184 tcp_bt_sample_t *bts, *tmp;
185
186 if (pool_elts (bt->samples) != pool_elts (bt->sample_lookup.nodes) - 1)
187 return 0;
188
189 if (bt->head == TCP_BTS_INVALID_INDEX)
190 {
191 if (bt->tail != TCP_BTS_INVALID_INDEX)
192 return 0;
193 if (pool_elts (bt->samples) != 0)
194 return 0;
195 return 1;
196 }
197
198 bts = bt_get_sample (bt, bt->tail);
199 if (!bts)
200 return 0;
201
202 bts = bt_get_sample (bt, bt->head);
203 if (!bts || bts->prev != TCP_BTS_INVALID_INDEX)
204 return 0;
205
206 while (bts)
207 {
208 tmp = bt_lookup_seq (bt, bts->min_seq);
209 if (!tmp)
210 return 0;
211 if (tmp != bts)
212 return 0;
213 tmp = bt_next_sample (bt, bts);
214 if (tmp)
215 {
216 if (tmp->prev != bt_sample_index (bt, bts))
217 {
218 clib_warning ("next %u thinks prev is %u should be %u",
219 bts->next, tmp->prev, bt_sample_index (bt, bts));
220 return 0;
221 }
222 if (!seq_lt (bts->min_seq, tmp->min_seq))
223 return 0;
224 }
225 else
226 {
227 if (bt->tail != bt_sample_index (bt, bts))
228 return 0;
229 if (bts->next != TCP_BTS_INVALID_INDEX)
230 return 0;
231 }
232 bts = tmp;
233 }
234 return 1;
235}
236
237static tcp_bt_sample_t *
238tcp_bt_alloc_tx_sample (tcp_connection_t * tc, u32 min_seq)
239{
240 tcp_bt_sample_t *bts;
241 bts = bt_alloc_sample (tc->bt, min_seq);
242 bts->delivered = tc->delivered;
243 bts->delivered_time = tc->delivered_time;
244 bts->tx_rate = transport_connection_tx_pacer_rate (&tc->connection);
Florin Coras85fc1302019-06-26 09:12:34 -0700245 bts->tx_time = tcp_time_now_us (tc->c_thread_index);
Florin Coras52814732019-06-12 15:38:19 -0700246 bts->flags |= tc->app_limited ? TCP_BTS_IS_APP_LIMITED : 0;
247 return bts;
248}
249
250void
251tcp_bt_check_app_limited (tcp_connection_t * tc)
252{
253 u32 available_bytes, flight_size;
254
255 available_bytes = transport_max_tx_dequeue (&tc->connection);
256 flight_size = tcp_flight_size (tc);
257
258 /* Not enough bytes to fill the cwnd */
259 if (available_bytes + flight_size + tc->snd_mss < tc->cwnd
260 /* Bytes considered lost have been retransmitted */
261 && tc->sack_sb.lost_bytes <= tc->snd_rxt_bytes)
262 tc->app_limited = tc->delivered + flight_size ? : 1;
263}
264
265void
266tcp_bt_track_tx (tcp_connection_t * tc)
267{
268 tcp_byte_tracker_t *bt = tc->bt;
269 tcp_bt_sample_t *bts, *tail;
270 u32 bts_index;
271
Florin Coras85fc1302019-06-26 09:12:34 -0700272 if (tc->snd_una == tc->snd_nxt)
Florin Coras52814732019-06-12 15:38:19 -0700273 tc->delivered_time = tcp_time_now_us (tc->c_thread_index);
274
275 bts = tcp_bt_alloc_tx_sample (tc, tc->snd_nxt);
276 bts_index = bt_sample_index (bt, bts);
277 tail = bt_get_sample (bt, bt->tail);
278 if (tail)
279 {
280 tail->next = bts_index;
281 bts->prev = bt->tail;
282 bt->tail = bts_index;
283 }
284 else
285 {
286 bt->tail = bt->head = bts_index;
287 }
288}
289
290void
291tcp_bt_track_rxt (tcp_connection_t * tc, u32 start, u32 end)
292{
293 tcp_byte_tracker_t *bt = tc->bt;
294 tcp_bt_sample_t *bts, *next, *cur, *prev, *nbts;
295 u32 bts_index, cur_index, next_index, prev_index, min_seq;
296 u8 is_end = end == tc->snd_nxt;
297
298 bts = bt_get_sample (bt, bt->last_ooo);
299 if (bts && bts->max_seq == start)
300 {
301 bts->max_seq = end;
302 next = bt_next_sample (bt, bts);
303 if (next)
304 bt_fix_overlapped (bt, next, end, is_end);
305
306 return;
307 }
308
309 /* Find original tx sample */
310 bts = bt_lookup_seq (bt, start);
311
312 ASSERT (bts != 0 && seq_geq (start, bts->min_seq));
313
314 /* Head in the past */
315 if (seq_lt (bts->min_seq, tc->snd_una))
316 bt_update_sample (bt, bts, tc->snd_una);
317
318 /* Head overlap */
319 if (bts->min_seq == start)
320 {
321 prev_index = bts->prev;
322 next = bt_fix_overlapped (bt, bts, end, is_end);
323 next_index = bt_sample_index (bt, next);
324
325 cur = tcp_bt_alloc_tx_sample (tc, start);
326 cur->max_seq = end;
327 cur->flags |= TCP_BTS_IS_RXT;
328 cur->next = next_index;
329 cur->prev = prev_index;
330
331 cur_index = bt_sample_index (bt, cur);
332
333 if (next_index != TCP_BTS_INVALID_INDEX)
334 {
335 next = bt_get_sample (bt, next_index);
336 next->prev = cur_index;
337 }
338 else
339 {
340 bt->tail = cur_index;
341 }
342
343 if (prev_index != TCP_BTS_INVALID_INDEX)
344 {
345 prev = bt_get_sample (bt, prev_index);
346 prev->next = cur_index;
347 }
348 else
349 {
350 bt->head = cur_index;
351 }
352
353 bt->last_ooo = cur_index;
354 return;
355 }
356
357 bts_index = bt_sample_index (bt, bts);
358 next = bt_next_sample (bt, bts);
359 if (next)
360 next = bt_fix_overlapped (bt, next, end, is_end);
361
362 min_seq = next ? next->min_seq : tc->snd_nxt;
363 ASSERT (seq_lt (start, min_seq));
364
365 /* Have to split or tail overlap */
366 cur = tcp_bt_alloc_tx_sample (tc, start);
367 cur->max_seq = end;
368 cur->flags |= TCP_BTS_IS_RXT;
369 cur->prev = bts_index;
370 cur_index = bt_sample_index (bt, cur);
371
372 /* Split. Allocate another sample */
373 if (seq_lt (end, min_seq))
374 {
375 nbts = tcp_bt_alloc_tx_sample (tc, end);
376 cur = bt_get_sample (bt, cur_index);
377 bts = bt_get_sample (bt, bts_index);
378
379 *nbts = *bts;
380 nbts->min_seq = end;
381
382 if (nbts->next != TCP_BTS_INVALID_INDEX)
383 {
384 next = bt_get_sample (bt, nbts->next);
385 next->prev = bt_sample_index (bt, nbts);
386 }
387 else
388 bt->tail = bt_sample_index (bt, nbts);
389
390 bts->next = nbts->prev = cur_index;
391 cur->next = bt_sample_index (bt, nbts);
392
393 bt->last_ooo = cur_index;
394 }
395 /* Tail completely overlapped */
396 else
397 {
398 bts = bt_get_sample (bt, bts_index);
399
400 if (bts->next != TCP_BTS_INVALID_INDEX)
401 {
402 next = bt_get_sample (bt, bts->next);
403 next->prev = cur_index;
404 }
405 else
406 bt->tail = cur_index;
407
408 cur->next = bts->next;
409 bts->next = cur_index;
410
411 bt->last_ooo = cur_index;
412 }
413}
414
415static void
416tcp_bt_sample_to_rate_sample (tcp_connection_t * tc, tcp_bt_sample_t * bts,
417 tcp_rate_sample_t * rs)
418{
Florin Coras85fc1302019-06-26 09:12:34 -0700419 if (rs->prior_delivered && rs->prior_delivered >= bts->delivered)
Florin Coras52814732019-06-12 15:38:19 -0700420 return;
421
Florin Coras85fc1302019-06-26 09:12:34 -0700422 rs->prior_delivered = bts->delivered;
423 rs->prior_time = bts->delivered_time;
424 rs->rtt_time = bts->tx_time;
Florin Coras52814732019-06-12 15:38:19 -0700425 rs->tx_rate = bts->tx_rate;
426 rs->flags = bts->flags;
427}
428
429static void
430tcp_bt_walk_samples (tcp_connection_t * tc, tcp_rate_sample_t * rs)
431{
432 tcp_byte_tracker_t *bt = tc->bt;
433 tcp_bt_sample_t *next, *cur;
434
435 cur = bt_get_sample (bt, bt->head);
436 tcp_bt_sample_to_rate_sample (tc, cur, rs);
437 while ((next = bt_get_sample (bt, cur->next))
438 && seq_lt (next->min_seq, tc->snd_una))
439 {
440 bt_free_sample (bt, cur);
441 tcp_bt_sample_to_rate_sample (tc, next, rs);
442 cur = next;
443 }
444
445 ASSERT (seq_lt (cur->min_seq, tc->snd_una));
446
447 /* All samples acked */
448 if (tc->snd_una == tc->snd_nxt)
449 {
450 ASSERT (pool_elts (bt->samples) == 1);
451 bt_free_sample (bt, cur);
452 return;
453 }
454
455 /* Current sample completely consumed */
456 if (next && next->min_seq == tc->snd_una)
457 {
458 bt_free_sample (bt, cur);
459 cur = next;
460 }
461}
462
463static void
464tcp_bt_walk_samples_ooo (tcp_connection_t * tc, tcp_rate_sample_t * rs)
465{
466 sack_block_t *blks = tc->rcv_opts.sacks, *blk;
467 tcp_byte_tracker_t *bt = tc->bt;
468 tcp_bt_sample_t *next, *cur;
469 int i;
470
471 for (i = 0; i < vec_len (blks); i++)
472 {
473 blk = &blks[i];
474
475 /* Ignore blocks that are already covered by snd_una */
476 if (seq_lt (blk->end, tc->snd_una))
477 continue;
478
479 cur = bt_lookup_seq (bt, blk->start);
480 if (!cur)
481 continue;
482
483 tcp_bt_sample_to_rate_sample (tc, cur, rs);
484
485 /* Current shouldn't be removed */
486 if (cur->min_seq != blk->start)
487 {
488 cur = bt_next_sample (bt, cur);
489 if (!cur)
490 continue;
491 }
492
493 while ((next = bt_get_sample (bt, cur->next))
494 && seq_lt (next->min_seq, blk->end))
495 {
496 bt_free_sample (bt, cur);
497 tcp_bt_sample_to_rate_sample (tc, next, rs);
498 cur = next;
499 }
500
501 /* Current consumed entirely */
502 if (next && next->min_seq == blk->end)
503 bt_free_sample (bt, cur);
504 }
505}
506
507void
508tcp_bt_sample_delivery_rate (tcp_connection_t * tc, tcp_rate_sample_t * rs)
509{
510 u32 delivered;
511
512 if (PREDICT_FALSE (tc->flags & TCP_CONN_FINSNT))
513 return;
514
515 delivered = tc->bytes_acked + tc->sack_sb.last_sacked_bytes;
516 if (!delivered || tc->bt->head == TCP_BTS_INVALID_INDEX)
517 return;
518
519 /* Do not count bytes that were previously sacked again */
520 tc->delivered += delivered - tc->sack_sb.last_bytes_delivered;
521 tc->delivered_time = tcp_time_now_us (tc->c_thread_index);
522
523 if (tc->app_limited && tc->delivered > tc->app_limited)
524 tc->app_limited = 0;
525
526 if (tc->bytes_acked)
527 tcp_bt_walk_samples (tc, rs);
528
529 if (tc->sack_sb.last_sacked_bytes)
530 tcp_bt_walk_samples_ooo (tc, rs);
Florin Coras85fc1302019-06-26 09:12:34 -0700531
532 rs->interval_time = tc->delivered_time - rs->prior_time;
533 rs->delivered = tc->delivered - rs->prior_delivered;
534 rs->rtt_time = tc->delivered_time - rs->rtt_time;
535 rs->acked_and_sacked = delivered;
536 rs->lost = tc->sack_sb.last_lost_bytes;
Florin Coras52814732019-06-12 15:38:19 -0700537}
538
539void
540tcp_bt_flush_samples (tcp_connection_t * tc)
541{
542 tcp_byte_tracker_t *bt = tc->bt;
543 tcp_bt_sample_t *bts;
544 u32 *samples = 0, *si;
545
546 vec_validate (samples, pool_elts (bt->samples) - 1);
547
548 /* *INDENT-OFF* */
549 pool_foreach (bts, bt->samples, ({
550 vec_add1 (samples, bts - bt->samples);
551 }));
552 /* *INDENT-ON* */
553
554 vec_foreach (si, samples)
555 {
556 bts = bt_get_sample (bt, *si);
557 bt_free_sample (bt, bts);
558 }
559
560 vec_free (samples);
561}
562
563void
564tcp_bt_cleanup (tcp_connection_t * tc)
565{
566 tcp_byte_tracker_t *bt = tc->bt;
567
568 rb_tree_free_nodes (&bt->sample_lookup);
569 pool_free (bt->samples);
570 clib_mem_free (bt);
571 tc->bt = 0;
572}
573
574void
575tcp_bt_init (tcp_connection_t * tc)
576{
577 tcp_byte_tracker_t *bt;
578
579 bt = clib_mem_alloc (sizeof (tcp_byte_tracker_t));
580 clib_memset (bt, 0, sizeof (tcp_byte_tracker_t));
581
582 rb_tree_init (&bt->sample_lookup);
583 bt->head = bt->tail = TCP_BTS_INVALID_INDEX;
584 tc->bt = bt;
585}
586
587/*
588 * fd.io coding-style-patch-verification: ON
589 *
590 * Local Variables:
591 * eval: (c-set-style "gnu")
592 * End:
593 */