Damjan Marion | 1c22971 | 2021-04-21 12:55:15 +0200 | [diff] [blame] | 1 | /* SPDX-License-Identifier: Apache-2.0 |
| 2 | * Copyright(c) 2021 Cisco Systems, Inc. |
| 3 | */ |
| 4 | |
| 5 | #include <vlib/vlib.h> |
| 6 | |
| 7 | void __clib_section (".vlib_buffer_enqueue_to_next_fn") CLIB_MULTIARCH_FN ( |
| 8 | vlib_buffer_enqueue_to_next_fn) (vlib_main_t *vm, vlib_node_runtime_t *node, |
| 9 | u32 *buffers, u16 *nexts, uword count) |
| 10 | { |
| 11 | u32 *to_next, n_left_to_next, max; |
| 12 | u16 next_index; |
| 13 | |
| 14 | next_index = nexts[0]; |
| 15 | vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); |
| 16 | max = clib_min (n_left_to_next, count); |
| 17 | |
| 18 | while (count) |
| 19 | { |
| 20 | u32 n_enqueued; |
| 21 | if ((nexts[0] != next_index) || n_left_to_next == 0) |
| 22 | { |
| 23 | vlib_put_next_frame (vm, node, next_index, n_left_to_next); |
| 24 | next_index = nexts[0]; |
| 25 | vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); |
| 26 | max = clib_min (n_left_to_next, count); |
| 27 | } |
| 28 | #if defined(CLIB_HAVE_VEC512) |
| 29 | u16x32 next32 = CLIB_MEM_OVERFLOW_LOAD (u16x32_load_unaligned, nexts); |
| 30 | next32 = (next32 == u16x32_splat (next32[0])); |
| 31 | u64 bitmap = u16x32_msb_mask (next32); |
| 32 | n_enqueued = count_trailing_zeros (~bitmap); |
| 33 | #elif defined(CLIB_HAVE_VEC256) |
| 34 | u16x16 next16 = CLIB_MEM_OVERFLOW_LOAD (u16x16_load_unaligned, nexts); |
| 35 | next16 = (next16 == u16x16_splat (next16[0])); |
| 36 | u64 bitmap = u8x32_msb_mask ((u8x32) next16); |
| 37 | n_enqueued = count_trailing_zeros (~bitmap) / 2; |
| 38 | #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) |
| 39 | u16x8 next8 = CLIB_MEM_OVERFLOW_LOAD (u16x8_load_unaligned, nexts); |
| 40 | next8 = (next8 == u16x8_splat (next8[0])); |
| 41 | u64 bitmap = u8x16_msb_mask ((u8x16) next8); |
| 42 | n_enqueued = count_trailing_zeros (~bitmap) / 2; |
| 43 | #else |
| 44 | u16 x = 0; |
| 45 | if (count + 3 < max) |
| 46 | { |
| 47 | x |= next_index ^ nexts[1]; |
| 48 | x |= next_index ^ nexts[2]; |
| 49 | x |= next_index ^ nexts[3]; |
| 50 | n_enqueued = (x == 0) ? 4 : 1; |
| 51 | } |
| 52 | else |
| 53 | n_enqueued = 1; |
| 54 | #endif |
| 55 | |
| 56 | if (PREDICT_FALSE (n_enqueued > max)) |
| 57 | n_enqueued = max; |
| 58 | |
| 59 | #ifdef CLIB_HAVE_VEC512 |
| 60 | if (n_enqueued >= 32) |
| 61 | { |
| 62 | vlib_buffer_copy_indices (to_next, buffers, 32); |
| 63 | nexts += 32; |
| 64 | to_next += 32; |
| 65 | buffers += 32; |
| 66 | n_left_to_next -= 32; |
| 67 | count -= 32; |
| 68 | max -= 32; |
| 69 | continue; |
| 70 | } |
| 71 | #endif |
| 72 | |
| 73 | #ifdef CLIB_HAVE_VEC256 |
| 74 | if (n_enqueued >= 16) |
| 75 | { |
| 76 | vlib_buffer_copy_indices (to_next, buffers, 16); |
| 77 | nexts += 16; |
| 78 | to_next += 16; |
| 79 | buffers += 16; |
| 80 | n_left_to_next -= 16; |
| 81 | count -= 16; |
| 82 | max -= 16; |
| 83 | continue; |
| 84 | } |
| 85 | #endif |
| 86 | |
| 87 | #ifdef CLIB_HAVE_VEC128 |
| 88 | if (n_enqueued >= 8) |
| 89 | { |
| 90 | vlib_buffer_copy_indices (to_next, buffers, 8); |
| 91 | nexts += 8; |
| 92 | to_next += 8; |
| 93 | buffers += 8; |
| 94 | n_left_to_next -= 8; |
| 95 | count -= 8; |
| 96 | max -= 8; |
| 97 | continue; |
| 98 | } |
| 99 | #endif |
| 100 | |
| 101 | if (n_enqueued >= 4) |
| 102 | { |
| 103 | vlib_buffer_copy_indices (to_next, buffers, 4); |
| 104 | nexts += 4; |
| 105 | to_next += 4; |
| 106 | buffers += 4; |
| 107 | n_left_to_next -= 4; |
| 108 | count -= 4; |
| 109 | max -= 4; |
| 110 | continue; |
| 111 | } |
| 112 | |
| 113 | /* copy */ |
| 114 | to_next[0] = buffers[0]; |
| 115 | |
| 116 | /* next */ |
| 117 | nexts += 1; |
| 118 | to_next += 1; |
| 119 | buffers += 1; |
| 120 | n_left_to_next -= 1; |
| 121 | count -= 1; |
| 122 | max -= 1; |
| 123 | } |
| 124 | vlib_put_next_frame (vm, node, next_index, n_left_to_next); |
| 125 | } |
| 126 | CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_next_fn); |
| 127 | |
| 128 | void __clib_section (".vlib_buffer_enqueue_to_single_next_fn") |
| 129 | CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_single_next_fn) ( |
| 130 | vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 next_index, |
| 131 | u32 count) |
| 132 | { |
| 133 | u32 *to_next, n_left_to_next, n_enq; |
| 134 | |
| 135 | vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); |
| 136 | |
| 137 | if (PREDICT_TRUE (n_left_to_next >= count)) |
| 138 | { |
| 139 | vlib_buffer_copy_indices (to_next, buffers, count); |
| 140 | n_left_to_next -= count; |
| 141 | vlib_put_next_frame (vm, node, next_index, n_left_to_next); |
| 142 | return; |
| 143 | } |
| 144 | |
| 145 | n_enq = n_left_to_next; |
| 146 | next: |
| 147 | vlib_buffer_copy_indices (to_next, buffers, n_enq); |
| 148 | n_left_to_next -= n_enq; |
| 149 | |
| 150 | if (PREDICT_FALSE (count > n_enq)) |
| 151 | { |
| 152 | count -= n_enq; |
| 153 | buffers += n_enq; |
| 154 | |
| 155 | vlib_put_next_frame (vm, node, next_index, n_left_to_next); |
| 156 | vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); |
| 157 | n_enq = clib_min (n_left_to_next, count); |
| 158 | goto next; |
| 159 | } |
| 160 | vlib_put_next_frame (vm, node, next_index, n_left_to_next); |
| 161 | } |
| 162 | CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_single_next_fn); |
| 163 | |
| 164 | u32 __clib_section (".vlib_buffer_enqueue_to_thread_fn") |
| 165 | CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_fn) ( |
| 166 | vlib_main_t *vm, u32 frame_queue_index, u32 *buffer_indices, |
| 167 | u16 *thread_indices, u32 n_packets, int drop_on_congestion) |
| 168 | { |
| 169 | vlib_thread_main_t *tm = vlib_get_thread_main (); |
| 170 | vlib_frame_queue_main_t *fqm; |
| 171 | vlib_frame_queue_per_thread_data_t *ptd; |
| 172 | u32 n_left = n_packets; |
| 173 | u32 drop_list[VLIB_FRAME_SIZE], *dbi = drop_list, n_drop = 0; |
| 174 | vlib_frame_queue_elt_t *hf = 0; |
| 175 | u32 n_left_to_next_thread = 0, *to_next_thread = 0; |
| 176 | u32 next_thread_index, current_thread_index = ~0; |
| 177 | int i; |
| 178 | |
| 179 | fqm = vec_elt_at_index (tm->frame_queue_mains, frame_queue_index); |
| 180 | ptd = vec_elt_at_index (fqm->per_thread_data, vm->thread_index); |
| 181 | |
| 182 | while (n_left) |
| 183 | { |
| 184 | next_thread_index = thread_indices[0]; |
| 185 | |
| 186 | if (next_thread_index != current_thread_index) |
| 187 | { |
| 188 | if (drop_on_congestion && |
| 189 | is_vlib_frame_queue_congested ( |
| 190 | frame_queue_index, next_thread_index, fqm->queue_hi_thresh, |
| 191 | ptd->congested_handoff_queue_by_thread_index)) |
| 192 | { |
| 193 | dbi[0] = buffer_indices[0]; |
| 194 | dbi++; |
| 195 | n_drop++; |
| 196 | goto next; |
| 197 | } |
| 198 | |
| 199 | if (hf) |
| 200 | hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread; |
| 201 | |
| 202 | hf = vlib_get_worker_handoff_queue_elt ( |
| 203 | frame_queue_index, next_thread_index, |
| 204 | ptd->handoff_queue_elt_by_thread_index); |
| 205 | |
| 206 | n_left_to_next_thread = VLIB_FRAME_SIZE - hf->n_vectors; |
| 207 | to_next_thread = &hf->buffer_index[hf->n_vectors]; |
| 208 | current_thread_index = next_thread_index; |
| 209 | } |
| 210 | |
| 211 | to_next_thread[0] = buffer_indices[0]; |
| 212 | to_next_thread++; |
| 213 | n_left_to_next_thread--; |
| 214 | |
| 215 | if (n_left_to_next_thread == 0) |
| 216 | { |
| 217 | hf->n_vectors = VLIB_FRAME_SIZE; |
| 218 | vlib_put_frame_queue_elt (hf); |
| 219 | vlib_get_main_by_index (current_thread_index)->check_frame_queues = |
| 220 | 1; |
| 221 | current_thread_index = ~0; |
| 222 | ptd->handoff_queue_elt_by_thread_index[next_thread_index] = 0; |
| 223 | hf = 0; |
| 224 | } |
| 225 | |
| 226 | /* next */ |
| 227 | next: |
| 228 | thread_indices += 1; |
| 229 | buffer_indices += 1; |
| 230 | n_left -= 1; |
| 231 | } |
| 232 | |
| 233 | if (hf) |
| 234 | hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread; |
| 235 | |
| 236 | /* Ship frames to the thread nodes */ |
| 237 | for (i = 0; i < vec_len (ptd->handoff_queue_elt_by_thread_index); i++) |
| 238 | { |
| 239 | if (ptd->handoff_queue_elt_by_thread_index[i]) |
| 240 | { |
| 241 | hf = ptd->handoff_queue_elt_by_thread_index[i]; |
| 242 | /* |
| 243 | * It works better to let the handoff node |
| 244 | * rate-adapt, always ship the handoff queue element. |
| 245 | */ |
| 246 | if (1 || hf->n_vectors == hf->last_n_vectors) |
| 247 | { |
| 248 | vlib_put_frame_queue_elt (hf); |
| 249 | vlib_get_main_by_index (i)->check_frame_queues = 1; |
| 250 | ptd->handoff_queue_elt_by_thread_index[i] = 0; |
| 251 | } |
| 252 | else |
| 253 | hf->last_n_vectors = hf->n_vectors; |
| 254 | } |
| 255 | ptd->congested_handoff_queue_by_thread_index[i] = |
| 256 | (vlib_frame_queue_t *) (~0); |
| 257 | } |
| 258 | |
| 259 | if (drop_on_congestion && n_drop) |
| 260 | vlib_buffer_free (vm, drop_list, n_drop); |
| 261 | |
| 262 | return n_packets - n_drop; |
| 263 | } |
| 264 | |
| 265 | CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_thread_fn); |
| 266 | |
| 267 | #ifndef CLIB_MARCH_VARIANT |
| 268 | vlib_buffer_func_main_t vlib_buffer_func_main; |
| 269 | |
| 270 | static clib_error_t * |
| 271 | vlib_buffer_funcs_init (vlib_main_t *vm) |
| 272 | { |
| 273 | vlib_buffer_func_main_t *bfm = &vlib_buffer_func_main; |
| 274 | bfm->buffer_enqueue_to_next_fn = |
| 275 | CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_next_fn); |
| 276 | bfm->buffer_enqueue_to_single_next_fn = |
| 277 | CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_single_next_fn); |
| 278 | bfm->buffer_enqueue_to_thread_fn = |
| 279 | CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_thread_fn); |
| 280 | return 0; |
| 281 | } |
| 282 | |
| 283 | VLIB_INIT_FUNCTION (vlib_buffer_funcs_init); |
| 284 | #endif |