blob: 55be89ae907a2f081dbaa81bb5419a64a24b1d29 [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15#include <vnet/mcast/mcast.h>
16
17#include <vlib/vlib.h>
18#include <vnet/vnet.h>
19#include <vnet/pg/pg.h>
20#include <vppinfra/error.h>
21#include <vnet/ip/ip4_packet.h>
22#include <vnet/ip/icmp46_packet.h>
23#include <vnet/ip/ip4.h>
24
25typedef struct {
26 u32 sw_if_index;
27 u32 next_index;
28 u32 group_index;
29} mcast_prep_trace_t;
30
31/* packet trace format function */
32static u8 * format_mcast_prep_trace (u8 * s, va_list * args)
33{
34 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
35 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
36 mcast_prep_trace_t * t = va_arg (*args, mcast_prep_trace_t *);
37
38 s = format (s, "MCAST_PREP: group %d, next index %d, tx_sw_if_index %d",
39 t->group_index, t->next_index, t->sw_if_index);
40 return s;
41}
42
43mcast_main_t mcast_main;
44vlib_node_registration_t mcast_prep_node;
45vlib_node_registration_t mcast_recycle_node;
46
47#define foreach_mcast_prep_error \
48_(MCASTS, "Multicast Packets")
49
50typedef enum {
51#define _(sym,str) MCAST_PREP_ERROR_##sym,
52 foreach_mcast_prep_error
53#undef _
54 MCAST_PREP_N_ERROR,
55} mcast_prep_error_t;
56
57static char * mcast_prep_error_strings[] = {
58#define _(sym,string) string,
59 foreach_mcast_prep_error
60#undef _
61};
62
63typedef enum {
64 MCAST_PREP_NEXT_DROP,
65 MCAST_PREP_N_NEXT,
66} mcast_prep_next_t;
67
68static uword
69mcast_prep_node_fn (vlib_main_t * vm,
70 vlib_node_runtime_t * node,
71 vlib_frame_t * frame)
72{
73 u32 n_left_from, * from, * to_next;
74 mcast_prep_next_t next_index;
75 mcast_main_t * mcm = &mcast_main;
76 vlib_node_t *n = vlib_get_node (vm, mcast_prep_node.index);
77 u32 node_counter_base_index = n->error_heap_index;
78 vlib_error_main_t * em = &vm->error_main;
79 ip4_main_t * im = &ip4_main;
80 ip_lookup_main_t * lm = &im->lookup_main;
81
82 from = vlib_frame_vector_args (frame);
83 n_left_from = frame->n_vectors;
84 next_index = node->cached_next_index;
85
86 while (n_left_from > 0)
87 {
88 u32 n_left_to_next;
89
90 vlib_get_next_frame (vm, node, next_index,
91 to_next, n_left_to_next);
92
93 while (0 && n_left_from >= 4 && n_left_to_next >= 2)
94 {
95 u32 bi0, bi1;
96 vlib_buffer_t * b0, * b1;
97 u32 next0, next1;
98 u32 sw_if_index0, sw_if_index1;
99
100 /* Prefetch next iteration. */
101 {
102 vlib_buffer_t * p2, * p3;
103
104 p2 = vlib_get_buffer (vm, from[2]);
105 p3 = vlib_get_buffer (vm, from[3]);
106
107 vlib_prefetch_buffer_header (p2, LOAD);
108 vlib_prefetch_buffer_header (p3, LOAD);
109
110 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
111 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
112 }
113
114 /* speculatively enqueue b0 and b1 to the current next frame */
115 to_next[0] = bi0 = from[0];
116 to_next[1] = bi1 = from[1];
117 from += 2;
118 to_next += 2;
119 n_left_from -= 2;
120 n_left_to_next -= 2;
121
122 b0 = vlib_get_buffer (vm, bi0);
123 b1 = vlib_get_buffer (vm, bi1);
124
125 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
126 next0 = 0;
127 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
128 next1 = 0;
129
130 /* $$$$ your message in this space. Process 2 x pkts */
131
132 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
133 {
134 if (b0->flags & VLIB_BUFFER_IS_TRACED)
135 {
136 mcast_prep_trace_t *t =
137 vlib_add_trace (vm, node, b0, sizeof (*t));
138 t->sw_if_index = sw_if_index0;
139 t->next_index = next0;
140 }
141 if (b1->flags & VLIB_BUFFER_IS_TRACED)
142 {
143 mcast_prep_trace_t *t =
144 vlib_add_trace (vm, node, b1, sizeof (*t));
145 t->sw_if_index = sw_if_index1;
146 t->next_index = next1;
147 }
148 }
149
150 /* verify speculative enqueues, maybe switch current next frame */
151 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
152 to_next, n_left_to_next,
153 bi0, bi1, next0, next1);
154 }
155
156 while (n_left_from > 0 && n_left_to_next > 0)
157 {
158 u32 bi0;
159 vlib_buffer_t * b0;
160 u32 next0, adj_index0;
161 mcast_group_t * g0;
162 ip_adjacency_t * adj0;
163
164 /* speculatively enqueue b0 to the current next frame */
165 bi0 = from[0];
166 to_next[0] = bi0;
167 from += 1;
168 to_next += 1;
169 n_left_from -= 1;
170 n_left_to_next -= 1;
171
172 b0 = vlib_get_buffer (vm, bi0);
173
174 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
175 adj0 = ip_get_adjacency (lm, adj_index0);
176 vnet_buffer(b0)->mcast.mcast_group_index = adj0->mcast_group_index;
177 g0 = pool_elt_at_index (mcm->groups, adj0->mcast_group_index);
178
179 /*
180 * Handle the degenerate single-copy case
181 * If we don't change the freelist, the packet will never
182 * make it to the recycle node...
183 */
184 if (PREDICT_TRUE(vec_len (g0->members) > 1))
185 {
186 /* Save the original free list index */
187 vnet_buffer(b0)->mcast.original_free_list_index =
188 b0->free_list_index;
189
190 /* Swap in the multicast recycle list */
191 b0->free_list_index = mcm->mcast_recycle_list_index;
192
193 /*
194 * Make sure that intermediate "frees" don't screw up
195 */
Dave Barachb5adaea2016-06-17 14:09:56 -0400196 b0->recycle_count = vec_len (g0->members);
197 b0->flags |= VLIB_BUFFER_RECYCLE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700198
199 /* Set up for the recycle node */
200 vnet_buffer(b0)->mcast.mcast_current_index = 1;
201 }
202
203 /* Transmit the pkt on the first interface */
204 next0 = g0->members[0].prep_and_recycle_node_next_index;
205 vnet_buffer(b0)->sw_if_index[VLIB_TX] =
206 g0->members[0].tx_sw_if_index;
207
208 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
209 && (b0->flags & VLIB_BUFFER_IS_TRACED))) {
210 mcast_prep_trace_t *t =
211 vlib_add_trace (vm, node, b0, sizeof (*t));
212 t->next_index = next0;
213 t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_TX];
214 t->group_index = vnet_buffer(b0)->mcast.mcast_group_index;
215 }
216
217 /* verify speculative enqueue, maybe switch current next frame */
218 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
219 to_next, n_left_to_next,
220 bi0, next0);
221 }
222
223 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
224 }
225
226 em->counters[node_counter_base_index + MCAST_PREP_ERROR_MCASTS] +=
227 frame->n_vectors;
228
229 return frame->n_vectors;
230}
231
232VLIB_REGISTER_NODE (mcast_prep_node) = {
233 .function = mcast_prep_node_fn,
234 .name = "mcast_prep",
235 .vector_size = sizeof (u32),
236 .format_trace = format_mcast_prep_trace,
237 .type = VLIB_NODE_TYPE_INTERNAL,
238
239 .n_errors = ARRAY_LEN(mcast_prep_error_strings),
240 .error_strings = mcast_prep_error_strings,
241
242 .n_next_nodes = MCAST_PREP_N_NEXT,
243
244 /* edit / add dispositions here */
245 .next_nodes = {
246 [MCAST_PREP_NEXT_DROP] = "error-drop",
247 },
248};
249
250typedef struct {
251 u32 sw_if_index;
252 u32 next_index;
253 u32 current_member;
254 u32 group_index;
255} mcast_recycle_trace_t;
256
257static u8 * format_mcast_recycle_trace (u8 * s, va_list * args)
258{
259 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
260 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
261 mcast_recycle_trace_t * t = va_arg (*args, mcast_recycle_trace_t *);
262
263 s = format (s,
264"MCAST_R: group %d, current member %d next (node) index %d, tx_sw_if_index %d",
265 t->group_index, t->current_member, t->next_index, t->sw_if_index);
266 return s;
267}
268
269#define foreach_mcast_recycle_error \
270_(RECYCLES, "Multicast Recycles")
271
272typedef enum {
273#define _(sym,str) MCAST_RECYCLE_ERROR_##sym,
274 foreach_mcast_recycle_error
275#undef _
276 MCAST_RECYCLE_N_ERROR,
277} mcast_recycle_error_t;
278
279static char * mcast_recycle_error_strings[] = {
280#define _(sym,string) string,
281 foreach_mcast_recycle_error
282#undef _
283};
284
285typedef enum {
286 MCAST_RECYCLE_NEXT_DROP,
287 MCAST_RECYCLE_N_NEXT,
288} mcast_recycle_next_t;
289
290static uword
291mcast_recycle_node_fn (vlib_main_t * vm,
292 vlib_node_runtime_t * node,
293 vlib_frame_t * frame)
294{
295 u32 n_left_from, * from, * to_next;
296 mcast_recycle_next_t next_index;
297 mcast_main_t * mcm = &mcast_main;
298 vlib_node_t *n = vlib_get_node (vm, mcast_recycle_node.index);
299 u32 node_counter_base_index = n->error_heap_index;
300 vlib_error_main_t * em = &vm->error_main;
301
302 from = vlib_frame_vector_args (frame);
303 n_left_from = frame->n_vectors;
304 next_index = node->cached_next_index;
305
306 while (n_left_from > 0)
307 {
308 u32 n_left_to_next;
309
310 vlib_get_next_frame (vm, node, next_index,
311 to_next, n_left_to_next);
312
313 while (0 && n_left_from >= 4 && n_left_to_next >= 2)
314 {
315 u32 bi0, bi1;
316 vlib_buffer_t * b0, * b1;
317 u32 next0, next1;
318 u32 sw_if_index0, sw_if_index1;
319
320 /* Prefetch next iteration. */
321 {
322 vlib_buffer_t * p2, * p3;
323
324 p2 = vlib_get_buffer (vm, from[2]);
325 p3 = vlib_get_buffer (vm, from[3]);
326
327 vlib_prefetch_buffer_header (p2, LOAD);
328 vlib_prefetch_buffer_header (p3, LOAD);
329
330 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
331 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
332 }
333
334 /* speculatively enqueue b0 and b1 to the current next frame */
335 to_next[0] = bi0 = from[0];
336 to_next[1] = bi1 = from[1];
337 from += 2;
338 to_next += 2;
339 n_left_from -= 2;
340 n_left_to_next -= 2;
341
342 b0 = vlib_get_buffer (vm, bi0);
343 b1 = vlib_get_buffer (vm, bi1);
344
345 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
346 next0 = 0;
347 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
348 next1 = 0;
349
350 /* $$$$ your message in this space. Process 2 x pkts */
351
352 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
353 {
354 if (b0->flags & VLIB_BUFFER_IS_TRACED)
355 {
356 mcast_recycle_trace_t *t =
357 vlib_add_trace (vm, node, b0, sizeof (*t));
358 t->sw_if_index = sw_if_index0;
359 t->next_index = next0;
360 }
361 if (b1->flags & VLIB_BUFFER_IS_TRACED)
362 {
363 mcast_recycle_trace_t *t =
364 vlib_add_trace (vm, node, b1, sizeof (*t));
365 t->sw_if_index = sw_if_index1;
366 t->next_index = next1;
367 }
368 }
369
370 /* verify speculative enqueues, maybe switch current next frame */
371 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
372 to_next, n_left_to_next,
373 bi0, bi1, next0, next1);
374 }
375
376 while (n_left_from > 0 && n_left_to_next > 0)
377 {
378 u32 bi0;
379 vlib_buffer_t * b0;
380 u32 next0;
381 u32 current_member0;
382 mcast_group_t * g0;
383
384 /* speculatively enqueue b0 to the current next frame */
385 bi0 = from[0];
386 to_next[0] = bi0;
387 from += 1;
388 to_next += 1;
389 n_left_from -= 1;
390 n_left_to_next -= 1;
391
392 b0 = vlib_get_buffer (vm, bi0);
393
394 g0 = pool_elt_at_index (mcm->groups,
395 vnet_buffer(b0)->mcast.mcast_group_index);
396
397 /* No more replicas? */
Dave Barachb5adaea2016-06-17 14:09:56 -0400398 if (b0->recycle_count == 1)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700399 {
400 /* Restore the original free list index */
401 b0->free_list_index =
402 vnet_buffer(b0)->mcast.original_free_list_index;
Dave Barachb5adaea2016-06-17 14:09:56 -0400403 b0->flags &= ~(VLIB_BUFFER_RECYCLE);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700404 }
405 current_member0 = vnet_buffer(b0)->mcast.mcast_current_index;
406
407 next0 =
408 g0->members[current_member0].prep_and_recycle_node_next_index;
409 vnet_buffer(b0)->sw_if_index[VLIB_TX] =
410 g0->members[current_member0].tx_sw_if_index;
411
412 vnet_buffer(b0)->mcast.mcast_current_index =
413 current_member0 + 1;
414
415 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
416 && (b0->flags & VLIB_BUFFER_IS_TRACED))) {
417 mcast_recycle_trace_t *t =
418 vlib_add_trace (vm, node, b0, sizeof (*t));
419 t->next_index = next0;
420 t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_TX];
421 t->group_index = vnet_buffer(b0)->mcast.mcast_group_index;
422 t->current_member = current_member0;
423 }
424
425 /* verify speculative enqueue, maybe switch current next frame */
426 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
427 to_next, n_left_to_next,
428 bi0, next0);
429 }
430 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
431 }
432
433 em->counters[node_counter_base_index + MCAST_RECYCLE_ERROR_RECYCLES] +=
434 frame->n_vectors;
435
436 return frame->n_vectors;
437}
438
439VLIB_REGISTER_NODE (mcast_recycle_node) = {
440 .function = mcast_recycle_node_fn,
441 .name = "mcast-recycle",
442 .vector_size = sizeof (u32),
443 .format_trace = format_mcast_recycle_trace,
444 .type = VLIB_NODE_TYPE_INTERNAL,
445
446 .n_errors = ARRAY_LEN(mcast_recycle_error_strings),
447 .error_strings = mcast_recycle_error_strings,
448
449 .n_next_nodes = MCAST_RECYCLE_N_NEXT,
450
451 /* edit / add dispositions here */
452 .next_nodes = {
453 [MCAST_RECYCLE_NEXT_DROP] = "error-drop",
454 },
455};
456
457/*
458 * fish pkts back from the recycle queue/freelist
459 * un-flatten the context chains
460 */
461static void mcast_recycle_callback (vlib_main_t *vm,
462 vlib_buffer_free_list_t * fl)
463{
464 vlib_frame_t * f = 0;
465 u32 n_left_from;
466 u32 n_left_to_next = 0;
467 u32 n_this_frame = 0;
468 u32 * from;
469 u32 * to_next;
470 u32 bi0, pi0;
471 vlib_buffer_t *b0;
472 vlib_buffer_t *bnext0;
473 int i;
474
475 /* aligned, unaligned buffers */
476 for (i = 0; i < 2; i++)
477 {
478 if (i == 0)
479 {
480 from = fl->aligned_buffers;
481 n_left_from = vec_len (from);
482 }
483 else
484 {
485 from = fl->unaligned_buffers;
486 n_left_from = vec_len (from);
487 }
488
489 while (n_left_from > 0)
490 {
491 if (PREDICT_FALSE(n_left_to_next == 0))
492 {
493 if (f)
494 {
495 f->n_vectors = n_this_frame;
496 vlib_put_frame_to_node (vm, mcast_recycle_node.index, f);
497 }
498
499 f = vlib_get_frame_to_node (vm, mcast_recycle_node.index);
500 to_next = vlib_frame_vector_args (f);
501 n_left_to_next = VLIB_FRAME_SIZE;
502 n_this_frame = 0;
503 }
504
505 bi0 = from[0];
506 if (PREDICT_TRUE(n_left_from > 1))
507 {
508 pi0 = from[1];
509 vlib_prefetch_buffer_with_index(vm,pi0,LOAD);
510 }
511
512 bnext0 = b0 = vlib_get_buffer (vm, bi0);
513
514 while (bnext0->flags & VLIB_BUFFER_NEXT_PRESENT)
515 {
516 from += 1;
517 n_left_from -= 1;
518 bnext0 = vlib_get_buffer (vm, bnext0->next_buffer);
519 }
520 to_next[0] = bi0;
521
522 if (CLIB_DEBUG > 0)
523 vlib_buffer_set_known_state (vm, bi0, VLIB_BUFFER_KNOWN_ALLOCATED);
524
525 from++;
526 to_next++;
527 n_this_frame++;
528 n_left_to_next--;
529 n_left_from--;
530 }
531 }
532
533 vec_reset_length (fl->aligned_buffers);
534 vec_reset_length (fl->unaligned_buffers);
535
536 if (f)
537 {
538 ASSERT(n_this_frame);
539 f->n_vectors = n_this_frame;
540 vlib_put_frame_to_node (vm, mcast_recycle_node.index, f);
541 }
542}
543
544clib_error_t *mcast_init (vlib_main_t *vm)
545{
546 mcast_main_t * mcm = &mcast_main;
547 vlib_buffer_main_t * bm = vm->buffer_main;
548 vlib_buffer_free_list_t * fl;
549
550 mcm->vlib_main = vm;
551 mcm->vnet_main = vnet_get_main();
552 mcm->mcast_recycle_list_index =
553 vlib_buffer_create_free_list (vm, 1024 /* fictional */, "mcast-recycle");
554
555 fl = pool_elt_at_index (bm->buffer_free_list_pool,
556 mcm->mcast_recycle_list_index);
557
558 fl->buffers_added_to_freelist_function = mcast_recycle_callback;
559
560 return 0;
561}
562
563VLIB_INIT_FUNCTION (mcast_init);
564
565