blob: 830888a19c11862505e66d22f74868441c7534ff [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15#include <vppinfra/bitmap.h>
16#include <vppinfra/hash.h>
17#include <vppinfra/pool.h>
18#include <vppinfra/timing_wheel.h>
19
20void
Dave Barachc3799992016-08-15 11:12:27 -040021timing_wheel_init (timing_wheel_t * w, u64 current_cpu_time,
22 f64 cpu_clocks_per_second)
Ed Warnickecb9cada2015-12-08 15:45:58 -070023{
24 if (w->max_sched_time <= w->min_sched_time)
25 {
26 w->min_sched_time = 1e-6;
27 w->max_sched_time = 1e-3;
28 }
29
30 w->cpu_clocks_per_second = cpu_clocks_per_second;
Dave Barachc3799992016-08-15 11:12:27 -040031 w->log2_clocks_per_bin =
32 max_log2 (w->cpu_clocks_per_second * w->min_sched_time);
33 w->log2_bins_per_wheel =
34 max_log2 (w->cpu_clocks_per_second * w->max_sched_time);
Ed Warnickecb9cada2015-12-08 15:45:58 -070035 w->log2_bins_per_wheel -= w->log2_clocks_per_bin;
36 w->log2_clocks_per_wheel = w->log2_bins_per_wheel + w->log2_clocks_per_bin;
37 w->bins_per_wheel = 1 << w->log2_bins_per_wheel;
38 w->bins_per_wheel_mask = w->bins_per_wheel - 1;
39
40 w->current_time_index = current_cpu_time >> w->log2_clocks_per_bin;
41
42 if (w->n_wheel_elt_time_bits <= 0 ||
Dave Barachc3799992016-08-15 11:12:27 -040043 w->n_wheel_elt_time_bits >= STRUCT_BITS_OF (timing_wheel_elt_t,
44 cpu_time_relative_to_base))
45 w->n_wheel_elt_time_bits =
46 STRUCT_BITS_OF (timing_wheel_elt_t, cpu_time_relative_to_base) - 1;
Ed Warnickecb9cada2015-12-08 15:45:58 -070047
48 w->cpu_time_base = current_cpu_time;
49 w->time_index_next_cpu_time_base_update
Dave Barachc3799992016-08-15 11:12:27 -040050 =
51 w->current_time_index +
52 ((u64) 1 << (w->n_wheel_elt_time_bits - w->log2_clocks_per_bin));
Ed Warnickecb9cada2015-12-08 15:45:58 -070053}
54
55always_inline uword
Dave Barachc3799992016-08-15 11:12:27 -040056get_level_and_relative_time (timing_wheel_t * w, u64 cpu_time,
57 uword * rtime_result)
Ed Warnickecb9cada2015-12-08 15:45:58 -070058{
59 u64 dt, rtime;
60 uword level_index;
61
62 dt = (cpu_time >> w->log2_clocks_per_bin);
63
64 /* Time should always move forward. */
65 ASSERT (dt >= w->current_time_index);
66
67 dt -= w->current_time_index;
68
69 /* Find level and offset within level. Level i has bins of size 2^((i+1)*M) */
70 rtime = dt;
71 for (level_index = 0; (rtime >> w->log2_bins_per_wheel) != 0; level_index++)
72 rtime = (rtime >> w->log2_bins_per_wheel) - 1;
73
74 /* Return offset within level and level index. */
75 ASSERT (rtime < w->bins_per_wheel);
76 *rtime_result = rtime;
77 return level_index;
78}
79
80always_inline uword
81time_index_to_wheel_index (timing_wheel_t * w, uword level_index, u64 ti)
Dave Barachc3799992016-08-15 11:12:27 -040082{
83 return (ti >> (level_index * w->log2_bins_per_wheel)) &
84 w->bins_per_wheel_mask;
85}
Ed Warnickecb9cada2015-12-08 15:45:58 -070086
87/* Find current time on this level. */
88always_inline uword
89current_time_wheel_index (timing_wheel_t * w, uword level_index)
Dave Barachc3799992016-08-15 11:12:27 -040090{
91 return time_index_to_wheel_index (w, level_index, w->current_time_index);
92}
Ed Warnickecb9cada2015-12-08 15:45:58 -070093
94/* Circular wheel indexing. */
95always_inline uword
96wheel_add (timing_wheel_t * w, word x)
Dave Barachc3799992016-08-15 11:12:27 -040097{
98 return x & w->bins_per_wheel_mask;
99}
Ed Warnickecb9cada2015-12-08 15:45:58 -0700100
101always_inline uword
102rtime_to_wheel_index (timing_wheel_t * w, uword level_index, uword rtime)
103{
104 uword t = current_time_wheel_index (w, level_index);
105 return wheel_add (w, t + rtime);
106}
107
108static clib_error_t *
109validate_level (timing_wheel_t * w, uword level_index, uword * n_elts)
110{
Dave Barachc3799992016-08-15 11:12:27 -0400111 timing_wheel_level_t *level;
112 timing_wheel_elt_t *e;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700113 uword wi;
Dave Barachc3799992016-08-15 11:12:27 -0400114 clib_error_t *error = 0;
115
Ed Warnickecb9cada2015-12-08 15:45:58 -0700116#define _(x) \
117 do { \
118 error = CLIB_ERROR_ASSERT (x); \
119 ASSERT (! error); \
120 if (error) return error; \
121 } while (0)
122
123 level = vec_elt_at_index (w->levels, level_index);
124 for (wi = 0; wi < vec_len (level->elts); wi++)
125 {
126 /* Validate occupancy bitmap. */
Dave Barachc3799992016-08-15 11:12:27 -0400127 _(clib_bitmap_get_no_check (level->occupancy_bitmap, wi) ==
128 (vec_len (level->elts[wi]) > 0));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700129
130 *n_elts += vec_len (level->elts[wi]);
131
132 vec_foreach (e, level->elts[wi])
Dave Barachc3799992016-08-15 11:12:27 -0400133 {
134 /* Validate time bin and level. */
135 u64 e_time;
136 uword e_ti, e_li, e_wi;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700137
Dave Barachc3799992016-08-15 11:12:27 -0400138 e_time = e->cpu_time_relative_to_base + w->cpu_time_base;
139 e_li = get_level_and_relative_time (w, e_time, &e_ti);
140 e_wi = rtime_to_wheel_index (w, level_index, e_ti);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700141
Dave Barachc3799992016-08-15 11:12:27 -0400142 if (e_li == level_index - 1)
143 /* If this element was scheduled on the previous level
144 it must be wrapped. */
145 _(e_ti + current_time_wheel_index (w, level_index - 1)
146 >= w->bins_per_wheel);
147 else
148 {
149 _(e_li == level_index);
150 if (e_li == 0)
151 _(e_wi == wi);
152 else
153 _(e_wi == wi || e_wi + 1 == wi || e_wi - 1 == wi);
154 }
155 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700156 }
157
158#undef _
159
160 return error;
161}
162
Dave Barachc3799992016-08-15 11:12:27 -0400163void
164timing_wheel_validate (timing_wheel_t * w)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700165{
166 uword l;
Dave Barachc3799992016-08-15 11:12:27 -0400167 clib_error_t *error = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700168 uword n_elts;
169
Dave Barachc3799992016-08-15 11:12:27 -0400170 if (!w->validate)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700171 return;
172
173 n_elts = pool_elts (w->overflow_pool);
174 for (l = 0; l < vec_len (w->levels); l++)
175 {
176 error = validate_level (w, l, &n_elts);
177 if (error)
178 clib_error_report (error);
179 }
180}
181
182always_inline void
183free_elt_vector (timing_wheel_t * w, timing_wheel_elt_t * ev)
184{
185 /* Poison free elements so we never use them by mistake. */
186 if (CLIB_DEBUG > 0)
Dave Barachb7b92992018-10-17 10:38:51 -0400187 clib_memset (ev, ~0, vec_len (ev) * sizeof (ev[0]));
Damjan Marion8bea5892022-04-04 22:40:45 +0200188 vec_set_len (ev, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700189 vec_add1 (w->free_elt_vectors, ev);
190}
191
192static timing_wheel_elt_t *
Dave Barachc3799992016-08-15 11:12:27 -0400193insert_helper (timing_wheel_t * w, uword level_index, uword rtime)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700194{
Dave Barachc3799992016-08-15 11:12:27 -0400195 timing_wheel_level_t *level;
196 timing_wheel_elt_t *e;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700197 uword wheel_index;
198
199 /* Circular buffer. */
200 vec_validate (w->levels, level_index);
201 level = vec_elt_at_index (w->levels, level_index);
202
Dave Barachc3799992016-08-15 11:12:27 -0400203 if (PREDICT_FALSE (!level->elts))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700204 {
205 uword max = w->bins_per_wheel - 1;
206 clib_bitmap_validate (level->occupancy_bitmap, max);
207 vec_validate (level->elts, max);
208 }
209
210 wheel_index = rtime_to_wheel_index (w, level_index, rtime);
211
Dave Barachc3799992016-08-15 11:12:27 -0400212 level->occupancy_bitmap =
213 clib_bitmap_ori (level->occupancy_bitmap, wheel_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700214
215 /* Allocate an elt vector from free list if there is one. */
Dave Barachc3799992016-08-15 11:12:27 -0400216 if (!level->elts[wheel_index] && vec_len (w->free_elt_vectors))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700217 level->elts[wheel_index] = vec_pop (w->free_elt_vectors);
218
219 /* Add element to vector for this time bin. */
220 vec_add2 (level->elts[wheel_index], e, 1);
221
222 return e;
223}
224
225/* Insert user data on wheel at given CPU time stamp. */
Dave Barachc3799992016-08-15 11:12:27 -0400226static void
227timing_wheel_insert_helper (timing_wheel_t * w, u64 insert_cpu_time,
228 u32 user_data)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700229{
Dave Barachc3799992016-08-15 11:12:27 -0400230 timing_wheel_elt_t *e;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700231 u64 dt;
232 uword rtime, level_index;
233
234 level_index = get_level_and_relative_time (w, insert_cpu_time, &rtime);
235
236 dt = insert_cpu_time - w->cpu_time_base;
237 if (PREDICT_TRUE (0 == (dt >> BITS (e->cpu_time_relative_to_base))))
238 {
239 e = insert_helper (w, level_index, rtime);
240 e->user_data = user_data;
241 e->cpu_time_relative_to_base = dt;
Andrew Yourtchenkofa5231d2017-02-01 14:08:21 +0000242 if (insert_cpu_time < w->cached_min_cpu_time_on_wheel)
243 w->cached_min_cpu_time_on_wheel = insert_cpu_time;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700244 }
245 else
246 {
247 /* Time too far in the future: add to overflow vector. */
Dave Barachc3799992016-08-15 11:12:27 -0400248 timing_wheel_overflow_elt_t *oe;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700249 pool_get (w->overflow_pool, oe);
250 oe->user_data = user_data;
251 oe->cpu_time = insert_cpu_time;
252 }
253}
254
255always_inline uword
256elt_is_deleted (timing_wheel_t * w, u32 user_data)
257{
258 return (hash_elts (w->deleted_user_data_hash) > 0
259 && hash_get (w->deleted_user_data_hash, user_data));
260}
261
262static timing_wheel_elt_t *
263delete_user_data (timing_wheel_elt_t * elts, u32 user_data)
264{
265 uword found_match;
Dave Barachc3799992016-08-15 11:12:27 -0400266 timing_wheel_elt_t *e, *new_elts;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700267
268 /* Quickly scan to see if there are any elements to delete
269 in this bucket. */
270 found_match = 0;
271 vec_foreach (e, elts)
Dave Barachc3799992016-08-15 11:12:27 -0400272 {
273 found_match = e->user_data == user_data;
274 if (found_match)
275 break;
276 }
277 if (!found_match)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700278 return elts;
279
280 /* Re-scan to build vector of new elts with matching user_data deleted. */
281 new_elts = 0;
282 vec_foreach (e, elts)
Dave Barachc3799992016-08-15 11:12:27 -0400283 {
284 if (e->user_data != user_data)
285 vec_add1 (new_elts, e[0]);
286 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700287
288 vec_free (elts);
289 return new_elts;
290}
291
292/* Insert user data on wheel at given CPU time stamp. */
Dave Barachc3799992016-08-15 11:12:27 -0400293void
294timing_wheel_insert (timing_wheel_t * w, u64 insert_cpu_time, u32 user_data)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700295{
296 /* Remove previously deleted elements. */
297 if (elt_is_deleted (w, user_data))
298 {
Dave Barachc3799992016-08-15 11:12:27 -0400299 timing_wheel_level_t *l;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700300 uword wi;
301
302 /* Delete elts with given user data so that stale events don't expire. */
303 vec_foreach (l, w->levels)
Dave Barachc3799992016-08-15 11:12:27 -0400304 {
Damjan Marionf0ca1e82020-12-13 23:26:56 +0100305 clib_bitmap_foreach (wi, l->occupancy_bitmap) {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700306 l->elts[wi] = delete_user_data (l->elts[wi], user_data);
307 if (vec_len (l->elts[wi]) == 0)
308 l->occupancy_bitmap = clib_bitmap_andnoti (l->occupancy_bitmap, wi);
Damjan Marionf0ca1e82020-12-13 23:26:56 +0100309 }
Dave Barachc3799992016-08-15 11:12:27 -0400310 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700311
312 {
Dave Barachc3799992016-08-15 11:12:27 -0400313 timing_wheel_overflow_elt_t *oe;
Damjan Marionb2c31b62020-12-13 21:47:40 +0100314 pool_foreach (oe, w->overflow_pool) {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700315 if (oe->user_data == user_data)
316 pool_put (w->overflow_pool, oe);
Damjan Marionb2c31b62020-12-13 21:47:40 +0100317 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700318 }
319
320 hash_unset (w->deleted_user_data_hash, user_data);
321 }
322
323 timing_wheel_insert_helper (w, insert_cpu_time, user_data);
324}
325
Dave Barachc3799992016-08-15 11:12:27 -0400326void
327timing_wheel_delete (timing_wheel_t * w, u32 user_data)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700328{
Dave Barachc3799992016-08-15 11:12:27 -0400329 if (!w->deleted_user_data_hash)
330 w->deleted_user_data_hash =
331 hash_create ( /* capacity */ 0, /* value bytes */ 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700332
333 hash_set1 (w->deleted_user_data_hash, user_data);
334}
335
336/* Returns time of next expiring element. */
Dave Barachc3799992016-08-15 11:12:27 -0400337u64
338timing_wheel_next_expiring_elt_time (timing_wheel_t * w)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700339{
Dave Barachc3799992016-08-15 11:12:27 -0400340 timing_wheel_level_t *l;
341 timing_wheel_elt_t *e;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700342 uword li, wi, wi0;
343 u32 min_dt;
344 u64 min_t;
345 uword wrapped = 0;
346
347 min_dt = ~0;
348 min_t = ~0ULL;
349 vec_foreach (l, w->levels)
Dave Barachc3799992016-08-15 11:12:27 -0400350 {
351 if (!l->occupancy_bitmap)
352 continue;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700353
Dave Barachc3799992016-08-15 11:12:27 -0400354 li = l - w->levels;
355 wi0 = wi = current_time_wheel_index (w, li);
356 wrapped = 0;
357 while (1)
358 {
359 if (clib_bitmap_get_no_check (l->occupancy_bitmap, wi))
360 {
361 vec_foreach (e, l->elts[wi])
362 min_dt = clib_min (min_dt, e->cpu_time_relative_to_base);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700363
Dave Barachc3799992016-08-15 11:12:27 -0400364 if (wrapped && li + 1 < vec_len (w->levels))
365 {
366 uword wi1 = current_time_wheel_index (w, li + 1);
367 if (l[1].occupancy_bitmap
368 && clib_bitmap_get_no_check (l[1].occupancy_bitmap, wi1))
369 {
370 vec_foreach (e, l[1].elts[wi1])
Ed Warnickecb9cada2015-12-08 15:45:58 -0700371 {
Dave Barachc3799992016-08-15 11:12:27 -0400372 min_dt =
373 clib_min (min_dt, e->cpu_time_relative_to_base);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700374 }
Dave Barachc3799992016-08-15 11:12:27 -0400375 }
376 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700377
Dave Barachc3799992016-08-15 11:12:27 -0400378 min_t = w->cpu_time_base + min_dt;
379 goto done;
380 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700381
Dave Barachc3799992016-08-15 11:12:27 -0400382 wi = wheel_add (w, wi + 1);
383 if (wi == wi0)
384 break;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700385
Dave Barachc3799992016-08-15 11:12:27 -0400386 wrapped = wi != wi + 1;
387 }
388 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700389
390 {
Dave Barachc3799992016-08-15 11:12:27 -0400391 timing_wheel_overflow_elt_t *oe;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700392
393 if (min_dt != ~0)
394 min_t = w->cpu_time_base + min_dt;
395
Damjan Marionb2c31b62020-12-13 21:47:40 +0100396 pool_foreach (oe, w->overflow_pool)
397 { min_t = clib_min (min_t, oe->cpu_time); }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700398
399 done:
400 return min_t;
401 }
402}
403
404static inline void
405insert_elt (timing_wheel_t * w, timing_wheel_elt_t * e)
406{
407 u64 t = w->cpu_time_base + e->cpu_time_relative_to_base;
408 timing_wheel_insert_helper (w, t, e->user_data);
409}
410
411always_inline u64
412elt_cpu_time (timing_wheel_t * w, timing_wheel_elt_t * e)
Dave Barachc3799992016-08-15 11:12:27 -0400413{
414 return w->cpu_time_base + e->cpu_time_relative_to_base;
415}
Ed Warnickecb9cada2015-12-08 15:45:58 -0700416
417always_inline void
418validate_expired_elt (timing_wheel_t * w, timing_wheel_elt_t * e,
419 u64 current_cpu_time)
420{
421 if (CLIB_DEBUG > 0)
422 {
423 u64 e_time = elt_cpu_time (w, e);
424
425 /* Verify that element is actually expired. */
426 ASSERT ((e_time >> w->log2_clocks_per_bin)
427 <= (current_cpu_time >> w->log2_clocks_per_bin));
428 }
429}
430
431static u32 *
432expire_bin (timing_wheel_t * w,
433 uword level_index,
Dave Barachc3799992016-08-15 11:12:27 -0400434 uword wheel_index, u64 advance_cpu_time, u32 * expired_user_data)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700435{
Dave Barachc3799992016-08-15 11:12:27 -0400436 timing_wheel_level_t *level = vec_elt_at_index (w->levels, level_index);
437 timing_wheel_elt_t *e;
438 u32 *x;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700439 uword i, j, e_len;
440
441 e = vec_elt (level->elts, wheel_index);
442 e_len = vec_len (e);
443
444 vec_add2 (expired_user_data, x, e_len);
445 for (i = j = 0; i < e_len; i++)
446 {
447 validate_expired_elt (w, &e[i], advance_cpu_time);
448 x[j] = e[i].user_data;
449
450 /* Only advance if elt is not to be deleted. */
Dave Barachc3799992016-08-15 11:12:27 -0400451 j += !elt_is_deleted (w, e[i].user_data);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700452 }
453
454 /* Adjust for deleted elts. */
455 if (j < e_len)
Damjan Marion8bea5892022-04-04 22:40:45 +0200456 vec_dec_len (expired_user_data, e_len - j);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700457
458 free_elt_vector (w, e);
459
460 level->elts[wheel_index] = 0;
461 clib_bitmap_set_no_check (level->occupancy_bitmap, wheel_index, 0);
462
463 return expired_user_data;
464}
465
466/* Called rarely. 32 bit times should only overflow every 4 seconds or so on a fast machine. */
Klement Sekerae5af88c2016-11-11 05:58:10 +0100467static u32 *
Ed Warnickecb9cada2015-12-08 15:45:58 -0700468advance_cpu_time_base (timing_wheel_t * w, u32 * expired_user_data)
469{
Dave Barachc3799992016-08-15 11:12:27 -0400470 timing_wheel_level_t *l;
471 timing_wheel_elt_t *e;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700472 u64 delta;
473
474 w->stats.cpu_time_base_advances++;
475 delta = ((u64) 1 << w->n_wheel_elt_time_bits);
476 w->cpu_time_base += delta;
477 w->time_index_next_cpu_time_base_update += delta >> w->log2_clocks_per_bin;
478
479 vec_foreach (l, w->levels)
Dave Barachc3799992016-08-15 11:12:27 -0400480 {
481 uword wi;
Damjan Marionf0ca1e82020-12-13 23:26:56 +0100482 clib_bitmap_foreach (wi, l->occupancy_bitmap) {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700483 vec_foreach (e, l->elts[wi])
484 {
485 /* This should always be true since otherwise we would have already expired
Andrew Yourtchenkofa5231d2017-02-01 14:08:21 +0000486 this element. Note that in the second half of this function we need
487 to take care not to place the expired elements ourselves. */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700488 ASSERT (e->cpu_time_relative_to_base >= delta);
489 e->cpu_time_relative_to_base -= delta;
490 }
Damjan Marionf0ca1e82020-12-13 23:26:56 +0100491 }
Dave Barachc3799992016-08-15 11:12:27 -0400492 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700493
494 /* See which overflow elements fit now. */
495 {
Dave Barachc3799992016-08-15 11:12:27 -0400496 timing_wheel_overflow_elt_t *oe;
Damjan Marionb2c31b62020-12-13 21:47:40 +0100497 pool_foreach (oe, w->overflow_pool) {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700498 /* It fits now into 32 bits. */
499 if (0 == ((oe->cpu_time - w->cpu_time_base) >> BITS (e->cpu_time_relative_to_base)))
500 {
501 u64 ti = oe->cpu_time >> w->log2_clocks_per_bin;
Andrew Yourtchenkofa5231d2017-02-01 14:08:21 +0000502 if (ti <= w->current_time_index)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700503 {
504 /* This can happen when timing wheel is not advanced for a long time
505 (for example when at a gdb breakpoint for a while). */
Andrew Yourtchenkofa5231d2017-02-01 14:08:21 +0000506 /* Note: the ti == w->current_time_index means it is also an expired timer */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700507 if (! elt_is_deleted (w, oe->user_data))
508 vec_add1 (expired_user_data, oe->user_data);
509 }
510 else
511 timing_wheel_insert_helper (w, oe->cpu_time, oe->user_data);
512 pool_put (w->overflow_pool, oe);
513 }
Damjan Marionb2c31b62020-12-13 21:47:40 +0100514 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700515 }
Klement Sekerae5af88c2016-11-11 05:58:10 +0100516 return expired_user_data;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700517}
518
519static u32 *
520refill_level (timing_wheel_t * w,
521 uword level_index,
522 u64 advance_cpu_time,
523 uword from_wheel_index,
Dave Barachc3799992016-08-15 11:12:27 -0400524 uword to_wheel_index, u32 * expired_user_data)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700525{
Dave Barachc3799992016-08-15 11:12:27 -0400526 timing_wheel_level_t *level;
527 timing_wheel_elt_t *to_insert = w->unexpired_elts_pending_insert;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700528 u64 advance_time_index = advance_cpu_time >> w->log2_clocks_per_bin;
529
530 vec_validate (w->stats.refills, level_index);
531 w->stats.refills[level_index] += 1;
532
533 if (level_index + 1 >= vec_len (w->levels))
534 goto done;
535
536 level = vec_elt_at_index (w->levels, level_index + 1);
Dave Barachc3799992016-08-15 11:12:27 -0400537 if (!level->occupancy_bitmap)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700538 goto done;
539
540 while (1)
541 {
Dave Barachc3799992016-08-15 11:12:27 -0400542 timing_wheel_elt_t *e, *es;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700543
Dave Barachc3799992016-08-15 11:12:27 -0400544 if (clib_bitmap_get_no_check
545 (level->occupancy_bitmap, from_wheel_index))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700546 {
547 es = level->elts[from_wheel_index];
548 level->elts[from_wheel_index] = 0;
Dave Barachc3799992016-08-15 11:12:27 -0400549 clib_bitmap_set_no_check (level->occupancy_bitmap, from_wheel_index,
550 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700551
552 vec_foreach (e, es)
Dave Barachc3799992016-08-15 11:12:27 -0400553 {
554 u64 e_time = elt_cpu_time (w, e);
555 u64 ti = e_time >> w->log2_clocks_per_bin;
556 if (ti <= advance_time_index)
557 {
558 validate_expired_elt (w, e, advance_cpu_time);
559 if (!elt_is_deleted (w, e->user_data))
560 vec_add1 (expired_user_data, e->user_data);
561 }
562 else
563 vec_add1 (to_insert, e[0]);
564 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700565 free_elt_vector (w, es);
566 }
567
568 if (from_wheel_index == to_wheel_index)
569 break;
570
571 from_wheel_index = wheel_add (w, from_wheel_index + 1);
572 }
573
574 timing_wheel_validate (w);
Dave Barachc3799992016-08-15 11:12:27 -0400575done:
Ed Warnickecb9cada2015-12-08 15:45:58 -0700576 w->unexpired_elts_pending_insert = to_insert;
577 return expired_user_data;
578}
579
580/* Advance wheel and return any expired user data in vector. */
581u32 *
Dave Barachc3799992016-08-15 11:12:27 -0400582timing_wheel_advance (timing_wheel_t * w, u64 advance_cpu_time,
583 u32 * expired_user_data,
Ed Warnickecb9cada2015-12-08 15:45:58 -0700584 u64 * next_expiring_element_cpu_time)
585{
Dave Barachc3799992016-08-15 11:12:27 -0400586 timing_wheel_level_t *level;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700587 uword level_index, advance_rtime, advance_level_index, advance_wheel_index;
588 uword n_expired_user_data_before;
589 u64 current_time_index, advance_time_index;
590
591 n_expired_user_data_before = vec_len (expired_user_data);
592
593 /* Re-fill lower levels when time wraps. */
594 current_time_index = w->current_time_index;
595 advance_time_index = advance_cpu_time >> w->log2_clocks_per_bin;
596
597 {
598 u64 current_ti, advance_ti;
599
600 current_ti = current_time_index >> w->log2_bins_per_wheel;
601 advance_ti = advance_time_index >> w->log2_bins_per_wheel;
602
603 if (PREDICT_FALSE (current_ti != advance_ti))
604 {
605 if (w->unexpired_elts_pending_insert)
Damjan Marion8bea5892022-04-04 22:40:45 +0200606 vec_set_len (w->unexpired_elts_pending_insert, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700607
608 level_index = 0;
609 while (current_ti != advance_ti)
610 {
611 uword c, a;
612 c = current_ti & (w->bins_per_wheel - 1);
613 a = advance_ti & (w->bins_per_wheel - 1);
614 if (c != a)
615 expired_user_data = refill_level (w,
616 level_index,
617 advance_cpu_time,
Dave Barachc3799992016-08-15 11:12:27 -0400618 c, a, expired_user_data);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700619 current_ti >>= w->log2_bins_per_wheel;
620 advance_ti >>= w->log2_bins_per_wheel;
621 level_index++;
622 }
623 }
624 }
625
Dave Barachc3799992016-08-15 11:12:27 -0400626 advance_level_index =
627 get_level_and_relative_time (w, advance_cpu_time, &advance_rtime);
628 advance_wheel_index =
629 rtime_to_wheel_index (w, advance_level_index, advance_rtime);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700630
631 /* Empty all occupied bins for entire levels that we advance past. */
632 for (level_index = 0; level_index < advance_level_index; level_index++)
633 {
634 uword wi;
635
636 if (level_index >= vec_len (w->levels))
637 break;
638
639 level = vec_elt_at_index (w->levels, level_index);
Damjan Marionf0ca1e82020-12-13 23:26:56 +0100640 clib_bitmap_foreach (wi, level->occupancy_bitmap) {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700641 expired_user_data = expire_bin (w, level_index, wi, advance_cpu_time,
642 expired_user_data);
Damjan Marionf0ca1e82020-12-13 23:26:56 +0100643 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700644 }
645
646 if (PREDICT_TRUE (level_index < vec_len (w->levels)))
647 {
648 uword wi;
649 level = vec_elt_at_index (w->levels, level_index);
Dave Barachc3799992016-08-15 11:12:27 -0400650 wi = current_time_wheel_index (w, level_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700651 if (level->occupancy_bitmap)
652 while (1)
653 {
654 if (clib_bitmap_get_no_check (level->occupancy_bitmap, wi))
Dave Barachc3799992016-08-15 11:12:27 -0400655 expired_user_data =
656 expire_bin (w, advance_level_index, wi, advance_cpu_time,
657 expired_user_data);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700658
Andrew Yourtchenkofa5231d2017-02-01 14:08:21 +0000659 /* When we jump out, we have already just expired the bin,
660 corresponding to advance_wheel_index */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700661 if (wi == advance_wheel_index)
662 break;
663
664 wi = wheel_add (w, wi + 1);
665 }
666 }
667
668 /* Advance current time index. */
669 w->current_time_index = advance_time_index;
670
671 if (vec_len (w->unexpired_elts_pending_insert) > 0)
672 {
Dave Barachc3799992016-08-15 11:12:27 -0400673 timing_wheel_elt_t *e;
674 vec_foreach (e, w->unexpired_elts_pending_insert) insert_elt (w, e);
Damjan Marion8bea5892022-04-04 22:40:45 +0200675 vec_set_len (w->unexpired_elts_pending_insert, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700676 }
677
678 /* Don't advance until necessary. */
Andrew Yourtchenkofa5231d2017-02-01 14:08:21 +0000679 /* However, if the timing_wheel_advance() hasn't been called for some time,
680 the while() loop will ensure multiple calls to advance_cpu_time_base()
681 in a row until the w->cpu_time_base is fresh enough. */
Dave Barachc3799992016-08-15 11:12:27 -0400682 while (PREDICT_FALSE
683 (advance_time_index >= w->time_index_next_cpu_time_base_update))
Klement Sekerae5af88c2016-11-11 05:58:10 +0100684 expired_user_data = advance_cpu_time_base (w, expired_user_data);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700685
686 if (next_expiring_element_cpu_time)
687 {
688 u64 min_t;
689
690 /* Anything expired? If so we need to recompute next expiring elt time. */
691 if (vec_len (expired_user_data) == n_expired_user_data_before
692 && w->cached_min_cpu_time_on_wheel != 0ULL)
693 min_t = w->cached_min_cpu_time_on_wheel;
694 else
695 {
696 min_t = timing_wheel_next_expiring_elt_time (w);
697 w->cached_min_cpu_time_on_wheel = min_t;
698 }
699
700 *next_expiring_element_cpu_time = min_t;
701 }
702
703 return expired_user_data;
704}
705
Dave Barachc3799992016-08-15 11:12:27 -0400706u8 *
707format_timing_wheel (u8 * s, va_list * va)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700708{
Dave Barachc3799992016-08-15 11:12:27 -0400709 timing_wheel_t *w = va_arg (*va, timing_wheel_t *);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700710 int verbose = va_arg (*va, int);
Christophe Fontained3c008d2017-10-02 18:10:54 +0200711 u32 indent = format_get_indent (s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700712
713 s = format (s, "level 0: %.4e - %.4e secs, 2^%d - 2^%d clocks",
714 (f64) (1 << w->log2_clocks_per_bin) / w->cpu_clocks_per_second,
Dave Barachc3799992016-08-15 11:12:27 -0400715 (f64) (1 << w->log2_clocks_per_wheel) /
716 w->cpu_clocks_per_second, w->log2_clocks_per_bin,
717 w->log2_clocks_per_wheel);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700718
719 if (verbose)
720 {
721 int l;
722
723 s = format (s, "\n%Utime base advances %Ld, every %.4e secs",
724 format_white_space, indent + 2,
725 w->stats.cpu_time_base_advances,
Dave Barachc3799992016-08-15 11:12:27 -0400726 (f64) ((u64) 1 << w->n_wheel_elt_time_bits) /
727 w->cpu_clocks_per_second);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700728
729 for (l = 0; l < vec_len (w->levels); l++)
730 s = format (s, "\n%Ulevel %d: refills %Ld",
731 format_white_space, indent + 2,
Dave Barachc3799992016-08-15 11:12:27 -0400732 l,
733 l <
734 vec_len (w->stats.refills) ? w->stats.
735 refills[l] : (u64) 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700736 }
737
738 return s;
739}
Dave Barachc3799992016-08-15 11:12:27 -0400740
741/*
742 * fd.io coding-style-patch-verification: ON
743 *
744 * Local Variables:
745 * eval: (c-set-style "gnu")
746 * End:
747 */