blob: 3be0914a29ec092f05143ecebce19e2f921349a1 [file] [log] [blame]
Damjan Marion1c229712021-04-21 12:55:15 +02001/* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
3 */
4
5#include <vlib/vlib.h>
6
7void __clib_section (".vlib_buffer_enqueue_to_next_fn") CLIB_MULTIARCH_FN (
8 vlib_buffer_enqueue_to_next_fn) (vlib_main_t *vm, vlib_node_runtime_t *node,
9 u32 *buffers, u16 *nexts, uword count)
10{
11 u32 *to_next, n_left_to_next, max;
12 u16 next_index;
13
14 next_index = nexts[0];
15 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
16 max = clib_min (n_left_to_next, count);
17
18 while (count)
19 {
20 u32 n_enqueued;
21 if ((nexts[0] != next_index) || n_left_to_next == 0)
22 {
23 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
24 next_index = nexts[0];
25 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
26 max = clib_min (n_left_to_next, count);
27 }
28#if defined(CLIB_HAVE_VEC512)
29 u16x32 next32 = CLIB_MEM_OVERFLOW_LOAD (u16x32_load_unaligned, nexts);
30 next32 = (next32 == u16x32_splat (next32[0]));
31 u64 bitmap = u16x32_msb_mask (next32);
32 n_enqueued = count_trailing_zeros (~bitmap);
33#elif defined(CLIB_HAVE_VEC256)
34 u16x16 next16 = CLIB_MEM_OVERFLOW_LOAD (u16x16_load_unaligned, nexts);
35 next16 = (next16 == u16x16_splat (next16[0]));
36 u64 bitmap = u8x32_msb_mask ((u8x32) next16);
37 n_enqueued = count_trailing_zeros (~bitmap) / 2;
38#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
39 u16x8 next8 = CLIB_MEM_OVERFLOW_LOAD (u16x8_load_unaligned, nexts);
40 next8 = (next8 == u16x8_splat (next8[0]));
41 u64 bitmap = u8x16_msb_mask ((u8x16) next8);
42 n_enqueued = count_trailing_zeros (~bitmap) / 2;
43#else
44 u16 x = 0;
45 if (count + 3 < max)
46 {
47 x |= next_index ^ nexts[1];
48 x |= next_index ^ nexts[2];
49 x |= next_index ^ nexts[3];
50 n_enqueued = (x == 0) ? 4 : 1;
51 }
52 else
53 n_enqueued = 1;
54#endif
55
56 if (PREDICT_FALSE (n_enqueued > max))
57 n_enqueued = max;
58
59#ifdef CLIB_HAVE_VEC512
60 if (n_enqueued >= 32)
61 {
62 vlib_buffer_copy_indices (to_next, buffers, 32);
63 nexts += 32;
64 to_next += 32;
65 buffers += 32;
66 n_left_to_next -= 32;
67 count -= 32;
68 max -= 32;
69 continue;
70 }
71#endif
72
73#ifdef CLIB_HAVE_VEC256
74 if (n_enqueued >= 16)
75 {
76 vlib_buffer_copy_indices (to_next, buffers, 16);
77 nexts += 16;
78 to_next += 16;
79 buffers += 16;
80 n_left_to_next -= 16;
81 count -= 16;
82 max -= 16;
83 continue;
84 }
85#endif
86
87#ifdef CLIB_HAVE_VEC128
88 if (n_enqueued >= 8)
89 {
90 vlib_buffer_copy_indices (to_next, buffers, 8);
91 nexts += 8;
92 to_next += 8;
93 buffers += 8;
94 n_left_to_next -= 8;
95 count -= 8;
96 max -= 8;
97 continue;
98 }
99#endif
100
101 if (n_enqueued >= 4)
102 {
103 vlib_buffer_copy_indices (to_next, buffers, 4);
104 nexts += 4;
105 to_next += 4;
106 buffers += 4;
107 n_left_to_next -= 4;
108 count -= 4;
109 max -= 4;
110 continue;
111 }
112
113 /* copy */
114 to_next[0] = buffers[0];
115
116 /* next */
117 nexts += 1;
118 to_next += 1;
119 buffers += 1;
120 n_left_to_next -= 1;
121 count -= 1;
122 max -= 1;
123 }
124 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
125}
126CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_next_fn);
127
128void __clib_section (".vlib_buffer_enqueue_to_single_next_fn")
129 CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_single_next_fn) (
130 vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 next_index,
131 u32 count)
132{
133 u32 *to_next, n_left_to_next, n_enq;
134
135 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
136
137 if (PREDICT_TRUE (n_left_to_next >= count))
138 {
139 vlib_buffer_copy_indices (to_next, buffers, count);
140 n_left_to_next -= count;
141 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
142 return;
143 }
144
145 n_enq = n_left_to_next;
146next:
147 vlib_buffer_copy_indices (to_next, buffers, n_enq);
148 n_left_to_next -= n_enq;
149
150 if (PREDICT_FALSE (count > n_enq))
151 {
152 count -= n_enq;
153 buffers += n_enq;
154
155 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
156 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
157 n_enq = clib_min (n_left_to_next, count);
158 goto next;
159 }
160 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
161}
162CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_single_next_fn);
163
164u32 __clib_section (".vlib_buffer_enqueue_to_thread_fn")
165 CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_fn) (
166 vlib_main_t *vm, u32 frame_queue_index, u32 *buffer_indices,
167 u16 *thread_indices, u32 n_packets, int drop_on_congestion)
168{
169 vlib_thread_main_t *tm = vlib_get_thread_main ();
170 vlib_frame_queue_main_t *fqm;
171 vlib_frame_queue_per_thread_data_t *ptd;
172 u32 n_left = n_packets;
173 u32 drop_list[VLIB_FRAME_SIZE], *dbi = drop_list, n_drop = 0;
174 vlib_frame_queue_elt_t *hf = 0;
175 u32 n_left_to_next_thread = 0, *to_next_thread = 0;
176 u32 next_thread_index, current_thread_index = ~0;
177 int i;
178
179 fqm = vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
180 ptd = vec_elt_at_index (fqm->per_thread_data, vm->thread_index);
181
182 while (n_left)
183 {
184 next_thread_index = thread_indices[0];
185
186 if (next_thread_index != current_thread_index)
187 {
188 if (drop_on_congestion &&
189 is_vlib_frame_queue_congested (
190 frame_queue_index, next_thread_index, fqm->queue_hi_thresh,
191 ptd->congested_handoff_queue_by_thread_index))
192 {
193 dbi[0] = buffer_indices[0];
194 dbi++;
195 n_drop++;
196 goto next;
197 }
198
199 if (hf)
200 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
201
202 hf = vlib_get_worker_handoff_queue_elt (
203 frame_queue_index, next_thread_index,
204 ptd->handoff_queue_elt_by_thread_index);
205
206 n_left_to_next_thread = VLIB_FRAME_SIZE - hf->n_vectors;
207 to_next_thread = &hf->buffer_index[hf->n_vectors];
208 current_thread_index = next_thread_index;
209 }
210
211 to_next_thread[0] = buffer_indices[0];
212 to_next_thread++;
213 n_left_to_next_thread--;
214
215 if (n_left_to_next_thread == 0)
216 {
217 hf->n_vectors = VLIB_FRAME_SIZE;
218 vlib_put_frame_queue_elt (hf);
219 vlib_get_main_by_index (current_thread_index)->check_frame_queues =
220 1;
221 current_thread_index = ~0;
222 ptd->handoff_queue_elt_by_thread_index[next_thread_index] = 0;
223 hf = 0;
224 }
225
226 /* next */
227 next:
228 thread_indices += 1;
229 buffer_indices += 1;
230 n_left -= 1;
231 }
232
233 if (hf)
234 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
235
236 /* Ship frames to the thread nodes */
237 for (i = 0; i < vec_len (ptd->handoff_queue_elt_by_thread_index); i++)
238 {
239 if (ptd->handoff_queue_elt_by_thread_index[i])
240 {
241 hf = ptd->handoff_queue_elt_by_thread_index[i];
242 /*
243 * It works better to let the handoff node
244 * rate-adapt, always ship the handoff queue element.
245 */
246 if (1 || hf->n_vectors == hf->last_n_vectors)
247 {
248 vlib_put_frame_queue_elt (hf);
249 vlib_get_main_by_index (i)->check_frame_queues = 1;
250 ptd->handoff_queue_elt_by_thread_index[i] = 0;
251 }
252 else
253 hf->last_n_vectors = hf->n_vectors;
254 }
255 ptd->congested_handoff_queue_by_thread_index[i] =
256 (vlib_frame_queue_t *) (~0);
257 }
258
259 if (drop_on_congestion && n_drop)
260 vlib_buffer_free (vm, drop_list, n_drop);
261
262 return n_packets - n_drop;
263}
264
265CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_thread_fn);
266
267#ifndef CLIB_MARCH_VARIANT
268vlib_buffer_func_main_t vlib_buffer_func_main;
269
270static clib_error_t *
271vlib_buffer_funcs_init (vlib_main_t *vm)
272{
273 vlib_buffer_func_main_t *bfm = &vlib_buffer_func_main;
274 bfm->buffer_enqueue_to_next_fn =
275 CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_next_fn);
276 bfm->buffer_enqueue_to_single_next_fn =
277 CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_single_next_fn);
278 bfm->buffer_enqueue_to_thread_fn =
279 CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_thread_fn);
280 return 0;
281}
282
283VLIB_INIT_FUNCTION (vlib_buffer_funcs_init);
284#endif