blob: 2c71b48b42edc316d12482b6e621d14b77cbde83 [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15#include <vlib/vlib.h>
16#include <vnet/vnet.h>
17#include <vnet/pg/pg.h>
Neale Ranns571ab202018-08-22 04:27:15 -070018#include <vnet/ethernet/ethernet.h>
Ed Warnickecb9cada2015-12-08 15:45:58 -070019#include <vppinfra/error.h>
20#include <sample/sample.h>
21
Dave Barach9594b562018-07-25 16:56:38 -040022typedef struct
23{
Ed Warnickecb9cada2015-12-08 15:45:58 -070024 u32 next_index;
25 u32 sw_if_index;
Dave Barachb7e2f3d2016-11-08 16:47:34 -050026 u8 new_src_mac[6];
27 u8 new_dst_mac[6];
Ed Warnickecb9cada2015-12-08 15:45:58 -070028} sample_trace_t;
29
Dave Barachb7e2f3d2016-11-08 16:47:34 -050030
Ed Warnickecb9cada2015-12-08 15:45:58 -070031/* packet trace format function */
Dave Barach9594b562018-07-25 16:56:38 -040032static u8 *
33format_sample_trace (u8 * s, va_list * args)
Ed Warnickecb9cada2015-12-08 15:45:58 -070034{
35 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
36 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
Dave Barach9594b562018-07-25 16:56:38 -040037 sample_trace_t *t = va_arg (*args, sample_trace_t *);
38
Dave Barachb7e2f3d2016-11-08 16:47:34 -050039 s = format (s, "SAMPLE: sw_if_index %d, next index %d\n",
Dave Barach9594b562018-07-25 16:56:38 -040040 t->sw_if_index, t->next_index);
Dave Barachb7e2f3d2016-11-08 16:47:34 -050041 s = format (s, " new src %U -> new dst %U",
Dave Barach9594b562018-07-25 16:56:38 -040042 format_mac_address, t->new_src_mac,
43 format_mac_address, t->new_dst_mac);
Dave Barachb7e2f3d2016-11-08 16:47:34 -050044
Ed Warnickecb9cada2015-12-08 15:45:58 -070045 return s;
46}
47
Damjan Marion0fa900e2018-09-12 12:12:36 +020048extern vlib_node_registration_t sample_node;
Ed Warnickecb9cada2015-12-08 15:45:58 -070049
50#define foreach_sample_error \
51_(SWAPPED, "Mac swap packets processed")
52
Dave Barach9594b562018-07-25 16:56:38 -040053typedef enum
54{
Ed Warnickecb9cada2015-12-08 15:45:58 -070055#define _(sym,str) SAMPLE_ERROR_##sym,
56 foreach_sample_error
57#undef _
Dave Barach9594b562018-07-25 16:56:38 -040058 SAMPLE_N_ERROR,
Ed Warnickecb9cada2015-12-08 15:45:58 -070059} sample_error_t;
60
Dave Barach9594b562018-07-25 16:56:38 -040061static char *sample_error_strings[] = {
Ed Warnickecb9cada2015-12-08 15:45:58 -070062#define _(sym,string) string,
63 foreach_sample_error
64#undef _
65};
66
Dave Barach9594b562018-07-25 16:56:38 -040067typedef enum
68{
Ed Warnickecb9cada2015-12-08 15:45:58 -070069 SAMPLE_NEXT_INTERFACE_OUTPUT,
70 SAMPLE_N_NEXT,
71} sample_next_t;
72
Dave Barach9594b562018-07-25 16:56:38 -040073/*
74 * Simple dual/single loop version, default version which will compile
75 * everywhere.
76 *
77 * Node costs 30 clocks/pkt at a vector size of 51
78 */
Dave Barach9594b562018-07-25 16:56:38 -040079
Dave Barachd56550c2019-07-26 11:58:16 -040080#define VERSION_1 1
Dave Barach9594b562018-07-25 16:56:38 -040081#ifdef VERSION_1
Ed Warnickecb9cada2015-12-08 15:45:58 -070082#define foreach_mac_address_offset \
83_(0) \
84_(1) \
85_(2) \
86_(3) \
87_(4) \
88_(5)
89
Damjan Marion0fa900e2018-09-12 12:12:36 +020090VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
91 vlib_frame_t * frame)
Ed Warnickecb9cada2015-12-08 15:45:58 -070092{
Dave Barach9594b562018-07-25 16:56:38 -040093 u32 n_left_from, *from, *to_next;
Ed Warnickecb9cada2015-12-08 15:45:58 -070094 sample_next_t next_index;
95 u32 pkts_swapped = 0;
96
97 from = vlib_frame_vector_args (frame);
98 n_left_from = frame->n_vectors;
99 next_index = node->cached_next_index;
100
101 while (n_left_from > 0)
102 {
103 u32 n_left_to_next;
104
Dave Barach9594b562018-07-25 16:56:38 -0400105 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700106
107 while (n_left_from >= 4 && n_left_to_next >= 2)
108 {
Dave Barach9594b562018-07-25 16:56:38 -0400109 u32 next0 = SAMPLE_NEXT_INTERFACE_OUTPUT;
110 u32 next1 = SAMPLE_NEXT_INTERFACE_OUTPUT;
111 u32 sw_if_index0, sw_if_index1;
112 u8 tmp0[6], tmp1[6];
113 ethernet_header_t *en0, *en1;
114 u32 bi0, bi1;
115 vlib_buffer_t *b0, *b1;
116
Ed Warnickecb9cada2015-12-08 15:45:58 -0700117 /* Prefetch next iteration. */
118 {
Dave Barach9594b562018-07-25 16:56:38 -0400119 vlib_buffer_t *p2, *p3;
120
Ed Warnickecb9cada2015-12-08 15:45:58 -0700121 p2 = vlib_get_buffer (vm, from[2]);
122 p3 = vlib_get_buffer (vm, from[3]);
Dave Barach9594b562018-07-25 16:56:38 -0400123
Ed Warnickecb9cada2015-12-08 15:45:58 -0700124 vlib_prefetch_buffer_header (p2, LOAD);
125 vlib_prefetch_buffer_header (p3, LOAD);
126
127 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
128 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
129 }
130
Dave Barach9594b562018-07-25 16:56:38 -0400131 /* speculatively enqueue b0 and b1 to the current next frame */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700132 to_next[0] = bi0 = from[0];
133 to_next[1] = bi1 = from[1];
134 from += 2;
135 to_next += 2;
136 n_left_from -= 2;
137 n_left_to_next -= 2;
138
139 b0 = vlib_get_buffer (vm, bi0);
140 b1 = vlib_get_buffer (vm, bi1);
141
Dave Barach9594b562018-07-25 16:56:38 -0400142 ASSERT (b0->current_data == 0);
143 ASSERT (b1->current_data == 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700144
Dave Barach9594b562018-07-25 16:56:38 -0400145 en0 = vlib_buffer_get_current (b0);
146 en1 = vlib_buffer_get_current (b1);
147
148 /* This is not the fastest way to swap src + dst mac addresses */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700149#define _(a) tmp0[a] = en0->src_address[a];
Dave Barach9594b562018-07-25 16:56:38 -0400150 foreach_mac_address_offset;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700151#undef _
152#define _(a) en0->src_address[a] = en0->dst_address[a];
Dave Barach9594b562018-07-25 16:56:38 -0400153 foreach_mac_address_offset;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700154#undef _
155#define _(a) en0->dst_address[a] = tmp0[a];
Dave Barach9594b562018-07-25 16:56:38 -0400156 foreach_mac_address_offset;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700157#undef _
158
159#define _(a) tmp1[a] = en1->src_address[a];
Dave Barach9594b562018-07-25 16:56:38 -0400160 foreach_mac_address_offset;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700161#undef _
162#define _(a) en1->src_address[a] = en1->dst_address[a];
Dave Barach9594b562018-07-25 16:56:38 -0400163 foreach_mac_address_offset;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700164#undef _
165#define _(a) en1->dst_address[a] = tmp1[a];
Dave Barach9594b562018-07-25 16:56:38 -0400166 foreach_mac_address_offset;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700167#undef _
168
Dave Barach9594b562018-07-25 16:56:38 -0400169 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
170 sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
Ed Warnickecb9cada2015-12-08 15:45:58 -0700171
Dave Barach9594b562018-07-25 16:56:38 -0400172 /* Send pkt back out the RX interface */
173 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
174 vnet_buffer (b1)->sw_if_index[VLIB_TX] = sw_if_index1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700175
Dave Barach9594b562018-07-25 16:56:38 -0400176 pkts_swapped += 2;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700177
Dave Barach9594b562018-07-25 16:56:38 -0400178 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
179 {
180 if (b0->flags & VLIB_BUFFER_IS_TRACED)
181 {
182 sample_trace_t *t =
183 vlib_add_trace (vm, node, b0, sizeof (*t));
184 t->sw_if_index = sw_if_index0;
185 t->next_index = next0;
Dave Barach178cf492018-11-13 16:34:13 -0500186 clib_memcpy_fast (t->new_src_mac, en0->src_address,
187 sizeof (t->new_src_mac));
188 clib_memcpy_fast (t->new_dst_mac, en0->dst_address,
189 sizeof (t->new_dst_mac));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700190
Dave Barach9594b562018-07-25 16:56:38 -0400191 }
192 if (b1->flags & VLIB_BUFFER_IS_TRACED)
193 {
194 sample_trace_t *t =
195 vlib_add_trace (vm, node, b1, sizeof (*t));
196 t->sw_if_index = sw_if_index1;
197 t->next_index = next1;
Dave Barach178cf492018-11-13 16:34:13 -0500198 clib_memcpy_fast (t->new_src_mac, en1->src_address,
199 sizeof (t->new_src_mac));
200 clib_memcpy_fast (t->new_dst_mac, en1->dst_address,
201 sizeof (t->new_dst_mac));
Dave Barach9594b562018-07-25 16:56:38 -0400202 }
203 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700204
Dave Barach9594b562018-07-25 16:56:38 -0400205 /* verify speculative enqueues, maybe switch current next frame */
206 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
207 to_next, n_left_to_next,
208 bi0, bi1, next0, next1);
209 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700210
211 while (n_left_from > 0 && n_left_to_next > 0)
212 {
Dave Barach9594b562018-07-25 16:56:38 -0400213 u32 bi0;
214 vlib_buffer_t *b0;
215 u32 next0 = SAMPLE_NEXT_INTERFACE_OUTPUT;
216 u32 sw_if_index0;
217 u8 tmp0[6];
218 ethernet_header_t *en0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700219
Dave Barach9594b562018-07-25 16:56:38 -0400220 /* speculatively enqueue b0 to the current next frame */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700221 bi0 = from[0];
222 to_next[0] = bi0;
223 from += 1;
224 to_next += 1;
225 n_left_from -= 1;
226 n_left_to_next -= 1;
227
228 b0 = vlib_get_buffer (vm, bi0);
Dave Barach9594b562018-07-25 16:56:38 -0400229 /*
230 * Direct from the driver, we should be at offset 0
231 * aka at &b0->data[0]
232 */
233 ASSERT (b0->current_data == 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700234
Dave Barach9594b562018-07-25 16:56:38 -0400235 en0 = vlib_buffer_get_current (b0);
236
237 /* This is not the fastest way to swap src + dst mac addresses */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700238#define _(a) tmp0[a] = en0->src_address[a];
Dave Barach9594b562018-07-25 16:56:38 -0400239 foreach_mac_address_offset;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700240#undef _
241#define _(a) en0->src_address[a] = en0->dst_address[a];
Dave Barach9594b562018-07-25 16:56:38 -0400242 foreach_mac_address_offset;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700243#undef _
244#define _(a) en0->dst_address[a] = tmp0[a];
Dave Barach9594b562018-07-25 16:56:38 -0400245 foreach_mac_address_offset;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700246#undef _
247
Dave Barach9594b562018-07-25 16:56:38 -0400248 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
Ed Warnickecb9cada2015-12-08 15:45:58 -0700249
Dave Barach9594b562018-07-25 16:56:38 -0400250 /* Send pkt back out the RX interface */
251 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700252
Dave Barach9594b562018-07-25 16:56:38 -0400253 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
254 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
255 {
256 sample_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
257 t->sw_if_index = sw_if_index0;
258 t->next_index = next0;
Dave Barach178cf492018-11-13 16:34:13 -0500259 clib_memcpy_fast (t->new_src_mac, en0->src_address,
260 sizeof (t->new_src_mac));
261 clib_memcpy_fast (t->new_dst_mac, en0->dst_address,
262 sizeof (t->new_dst_mac));
Dave Barach9594b562018-07-25 16:56:38 -0400263 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700264
Dave Barach9594b562018-07-25 16:56:38 -0400265 pkts_swapped += 1;
266
267 /* verify speculative enqueue, maybe switch current next frame */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700268 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
269 to_next, n_left_to_next,
270 bi0, next0);
271 }
272
273 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
274 }
275
Dave Barach9594b562018-07-25 16:56:38 -0400276 vlib_node_increment_counter (vm, sample_node.index,
277 SAMPLE_ERROR_SWAPPED, pkts_swapped);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700278 return frame->n_vectors;
279}
Dave Barach9594b562018-07-25 16:56:38 -0400280#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -0700281
Dave Barach9594b562018-07-25 16:56:38 -0400282/*
283 * This version swaps mac addresses using an MMX vector shuffle
284 * Node costs about 17 clocks/pkt at a vector size of 26
285 */
286#ifdef VERSION_2
Damjan Marion0fa900e2018-09-12 12:12:36 +0200287VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
288 vlib_frame_t * frame)
Dave Barach9594b562018-07-25 16:56:38 -0400289{
290 u32 n_left_from, *from, *to_next;
291 sample_next_t next_index;
292 u32 pkts_swapped = 0;
293 /* Vector shuffle mask to swap src, dst */
294 u8x16 swapmac = { 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 };
295
296 from = vlib_frame_vector_args (frame);
297 n_left_from = frame->n_vectors;
298 next_index = node->cached_next_index;
299
300 while (n_left_from > 0)
301 {
302 u32 n_left_to_next;
303
304 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
305 while (n_left_from >= 4 && n_left_to_next >= 2)
306 {
307 u32 next0 = SAMPLE_NEXT_INTERFACE_OUTPUT;
308 u32 next1 = SAMPLE_NEXT_INTERFACE_OUTPUT;
309 u32 sw_if_index0, sw_if_index1;
310 u8x16 src_dst0, src_dst1;
311 ethernet_header_t *en0, *en1;
312 u32 bi0, bi1;
313 vlib_buffer_t *b0, *b1;
314
315 /* Prefetch next iteration. */
316 {
317 vlib_buffer_t *p2, *p3;
318
319 p2 = vlib_get_buffer (vm, from[2]);
320 p3 = vlib_get_buffer (vm, from[3]);
321
322 vlib_prefetch_buffer_header (p2, LOAD);
323 vlib_prefetch_buffer_header (p3, LOAD);
324
325 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
326 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
327 }
328
329 /* speculatively enqueue b0 and b1 to the current next frame */
330 to_next[0] = bi0 = from[0];
331 to_next[1] = bi1 = from[1];
332 from += 2;
333 to_next += 2;
334 n_left_from -= 2;
335 n_left_to_next -= 2;
336
337 b0 = vlib_get_buffer (vm, bi0);
338 b1 = vlib_get_buffer (vm, bi1);
339
340 ASSERT (b0->current_data == 0);
341 ASSERT (b1->current_data == 0);
342
343 en0 = vlib_buffer_get_current (b0);
344 en1 = vlib_buffer_get_current (b1);
345
346 src_dst0 = ((u8x16 *) en0)[0];
347 src_dst1 = ((u8x16 *) en1)[0];
348 src_dst0 = u8x16_shuffle (src_dst0, swapmac);
349 src_dst1 = u8x16_shuffle (src_dst1, swapmac);
350 ((u8x16 *) en0)[0] = src_dst0;
351 ((u8x16 *) en1)[0] = src_dst1;
352
353 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
354 sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
355
356 /* Send pkt back out the RX interface */
357 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
358 vnet_buffer (b1)->sw_if_index[VLIB_TX] = sw_if_index1;
359
360 pkts_swapped += 2;
361
362 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
363 {
364 if (b0->flags & VLIB_BUFFER_IS_TRACED)
365 {
366 sample_trace_t *t =
367 vlib_add_trace (vm, node, b0, sizeof (*t));
368 t->sw_if_index = sw_if_index0;
369 t->next_index = next0;
Dave Barach178cf492018-11-13 16:34:13 -0500370 clib_memcpy_fast (t->new_src_mac, en0->src_address,
371 sizeof (t->new_src_mac));
372 clib_memcpy_fast (t->new_dst_mac, en0->dst_address,
373 sizeof (t->new_dst_mac));
Dave Barach9594b562018-07-25 16:56:38 -0400374
375 }
376 if (b1->flags & VLIB_BUFFER_IS_TRACED)
377 {
378 sample_trace_t *t =
379 vlib_add_trace (vm, node, b1, sizeof (*t));
380 t->sw_if_index = sw_if_index1;
381 t->next_index = next1;
Dave Barach178cf492018-11-13 16:34:13 -0500382 clib_memcpy_fast (t->new_src_mac, en1->src_address,
383 sizeof (t->new_src_mac));
384 clib_memcpy_fast (t->new_dst_mac, en1->dst_address,
385 sizeof (t->new_dst_mac));
Dave Barach9594b562018-07-25 16:56:38 -0400386 }
387 }
388
389 /* verify speculative enqueues, maybe switch current next frame */
390 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
391 to_next, n_left_to_next,
392 bi0, bi1, next0, next1);
393 }
394
395 while (n_left_from > 0 && n_left_to_next > 0)
396 {
397 u32 bi0;
398 vlib_buffer_t *b0;
399 u32 next0 = SAMPLE_NEXT_INTERFACE_OUTPUT;
400 u32 sw_if_index0;
401 u8x16 src_dst0;
402 ethernet_header_t *en0;
403
404 /* speculatively enqueue b0 to the current next frame */
405 bi0 = from[0];
406 to_next[0] = bi0;
407 from += 1;
408 to_next += 1;
409 n_left_from -= 1;
410 n_left_to_next -= 1;
411
412 b0 = vlib_get_buffer (vm, bi0);
413 /*
414 * Direct from the driver, we should be at offset 0
415 * aka at &b0->data[0]
416 */
417 ASSERT (b0->current_data == 0);
418
419 en0 = vlib_buffer_get_current (b0);
420 src_dst0 = ((u8x16 *) en0)[0];
421 src_dst0 = u8x16_shuffle (src_dst0, swapmac);
422 ((u8x16 *) en0)[0] = src_dst0;
423
424 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
425
426 /* Send pkt back out the RX interface */
427 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
428
429 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
430 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
431 {
432 sample_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
433 t->sw_if_index = sw_if_index0;
434 t->next_index = next0;
Dave Barach178cf492018-11-13 16:34:13 -0500435 clib_memcpy_fast (t->new_src_mac, en0->src_address,
436 sizeof (t->new_src_mac));
437 clib_memcpy_fast (t->new_dst_mac, en0->dst_address,
438 sizeof (t->new_dst_mac));
Dave Barach9594b562018-07-25 16:56:38 -0400439 }
440
441 pkts_swapped += 1;
442
443 /* verify speculative enqueue, maybe switch current next frame */
444 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
445 to_next, n_left_to_next,
446 bi0, next0);
447 }
448
449 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
450 }
451
452 vlib_node_increment_counter (vm, sample_node.index,
453 SAMPLE_ERROR_SWAPPED, pkts_swapped);
454 return frame->n_vectors;
455}
456#endif
457
458
459/*
460 * This version computes all of the buffer pointers in
461 * one motion, uses a quad/single loop model, and
462 * traces the entire frame in one motion.
463 *
464 * Node costs about 16 clocks/pkt at a vector size of 26
465 *
466 * Some compilation drama with u8x16_shuffle, so turned off by
467 * default.
468 */
469
470#ifdef VERSION_3
471
472#define u8x16_shuffle __builtin_shuffle
473/* This would normally be a stack local, but since it's a constant... */
474static const u16 nexts[VLIB_FRAME_SIZE] = { 0 };
475
Damjan Marion0fa900e2018-09-12 12:12:36 +0200476VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
477 vlib_frame_t * frame)
Dave Barach9594b562018-07-25 16:56:38 -0400478{
479 u32 n_left_from, *from;
480 u32 pkts_swapped = 0;
481 /* Vector shuffle mask to swap src, dst */
482 u8x16 swapmac = { 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 };
483 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
484 /* See comment below about sending all pkts to the same place... */
485 u16 *next __attribute__ ((unused));
486
487 from = vlib_frame_vector_args (frame);
488 n_left_from = frame->n_vectors;
489
490 vlib_get_buffers (vm, from, bufs, n_left_from);
491 b = bufs;
492 // next = nexts;
493
494 /*
495 * We send all pkts to SAMPLE_NEXT_INTERFACE_OUTPUT, aka
496 * graph arc 0. So the usual setting of next[0...3] is commented
497 * out below
498 */
499
500 while (n_left_from >= 4)
501 {
502 u8x16 src_dst0, src_dst1, src_dst2, src_dst3;
503 /* Prefetch next iteration. */
504 if (PREDICT_TRUE (n_left_from >= 8))
505 {
506 vlib_prefetch_buffer_header (b[4], STORE);
507 vlib_prefetch_buffer_header (b[5], STORE);
508 vlib_prefetch_buffer_header (b[6], STORE);
509 vlib_prefetch_buffer_header (b[7], STORE);
510 CLIB_PREFETCH (&b[4]->data, CLIB_CACHE_LINE_BYTES, STORE);
511 CLIB_PREFETCH (&b[5]->data, CLIB_CACHE_LINE_BYTES, STORE);
512 CLIB_PREFETCH (&b[6]->data, CLIB_CACHE_LINE_BYTES, STORE);
513 CLIB_PREFETCH (&b[7]->data, CLIB_CACHE_LINE_BYTES, STORE);
514 }
515
516 src_dst0 = ((u8x16 *) vlib_buffer_get_current (b[0]))[0];
517 src_dst1 = ((u8x16 *) vlib_buffer_get_current (b[1]))[0];
518 src_dst2 = ((u8x16 *) vlib_buffer_get_current (b[2]))[0];
519 src_dst3 = ((u8x16 *) vlib_buffer_get_current (b[3]))[0];
520
521 src_dst0 = u8x16_shuffle (src_dst0, swapmac);
522 src_dst1 = u8x16_shuffle (src_dst1, swapmac);
523 src_dst2 = u8x16_shuffle (src_dst2, swapmac);
524 src_dst3 = u8x16_shuffle (src_dst3, swapmac);
525
526 ((u8x16 *) vlib_buffer_get_current (b[0]))[0] = src_dst0;
527 ((u8x16 *) vlib_buffer_get_current (b[1]))[0] = src_dst1;
528 ((u8x16 *) vlib_buffer_get_current (b[2]))[0] = src_dst2;
529 ((u8x16 *) vlib_buffer_get_current (b[3]))[0] = src_dst3;
530
531 vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
532 vnet_buffer (b[0])->sw_if_index[VLIB_RX];
533 vnet_buffer (b[1])->sw_if_index[VLIB_TX] =
534 vnet_buffer (b[1])->sw_if_index[VLIB_RX];
535 vnet_buffer (b[2])->sw_if_index[VLIB_TX] =
536 vnet_buffer (b[2])->sw_if_index[VLIB_RX];
537 vnet_buffer (b[3])->sw_if_index[VLIB_TX] =
538 vnet_buffer (b[3])->sw_if_index[VLIB_RX];
539
540 // next[0] = SAMPLE_NEXT_INTERFACE_OUTPUT;
541 // next[1] = SAMPLE_NEXT_INTERFACE_OUTPUT;
542 // next[2] = SAMPLE_NEXT_INTERFACE_OUTPUT;
543 // next[3] = SAMPLE_NEXT_INTERFACE_OUTPUT;
544
545 b += 4;
546 // next += 4;
547 n_left_from -= 4;
548 pkts_swapped += 4;
549 }
550
551 while (n_left_from > 0)
552 {
553 u8x16 src_dst0;
554 src_dst0 = ((u8x16 *) vlib_buffer_get_current (b[0]))[0];
555 src_dst0 = u8x16_shuffle (src_dst0, swapmac);
556 ((u8x16 *) vlib_buffer_get_current (b[0]))[0] = src_dst0;
557 vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
558 vnet_buffer (b[0])->sw_if_index[VLIB_RX];
559 // next[0] = SAMPLE_NEXT_INTERFACE_OUTPUT;
560
561 b += 1;
562 // next += 1;
563 n_left_from -= 1;
564 pkts_swapped += 1;
565
566 }
567 vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
568 frame->n_vectors);
569
570 vlib_node_increment_counter (vm, sample_node.index,
571 SAMPLE_ERROR_SWAPPED, pkts_swapped);
572
573 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
574 {
575 int i;
576 b = bufs;
577
578 for (i = 0; i < frame->n_vectors; i++)
579 {
580 if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
581 {
582 ethernet_header_t *en;
583 sample_trace_t *t =
584 vlib_add_trace (vm, node, b[0], sizeof (*t));
585 t->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
586 t->next_index = SAMPLE_NEXT_INTERFACE_OUTPUT;
587 en = vlib_buffer_get_current (b[0]);
Dave Barach178cf492018-11-13 16:34:13 -0500588 clib_memcpy_fast (t->new_src_mac, en->src_address,
589 sizeof (t->new_src_mac));
590 clib_memcpy_fast (t->new_dst_mac, en->dst_address,
591 sizeof (t->new_dst_mac));
Dave Barach9594b562018-07-25 16:56:38 -0400592 b++;
593 }
594 else
595 break;
596 }
597 }
598 return frame->n_vectors;
599}
600#endif
601
Dave Barachd56550c2019-07-26 11:58:16 -0400602/*
603 * This version computes all of the buffer pointers in
604 * one motion, uses a fully pipelined loop model, and
605 * traces the entire frame in one motion.
606 *
607 * It's performance-competative with other coding paradigms,
608 * and it's the simplest way to write performant vpp code
609 */
610
611
612#ifdef VERSION_4
613
614#define u8x16_shuffle __builtin_shuffle
615
616static u8x16 swapmac =
617 { 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 };
618
619/* Final stage in the pipeline, do the mac swap */
620static inline u32
621last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b)
622{
623 u8x16 src_dst0;
624 src_dst0 = ((u8x16 *) vlib_buffer_get_current (b))[0];
625 src_dst0 = u8x16_shuffle (src_dst0, swapmac);
626 ((u8x16 *) vlib_buffer_get_current (b))[0] = src_dst0;
627 vnet_buffer (b)->sw_if_index[VLIB_TX] =
628 vnet_buffer (b)->sw_if_index[VLIB_RX];
629 /* set next-index[] to 0 for this buffer */
630 return 0;
631}
632
633/*
634 * Add a couple of nil stages to increase the prefetch stride.
635 * For any specific platform, the optimal prefetch stride may differ.
636 */
637static inline void
638stage1 (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b)
639{
640}
641
642static inline void
643stage2 (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b)
644{
645}
646
647#define NSTAGES 4
648#define STAGE_INLINE inline __attribute__((__always_inline__))
649
650#define stage0 generic_stage0
651
652#include <vnet/pipeline.h>
653
654VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
655 vlib_frame_t * frame)
656{
657 dispatch_pipeline (vm, node, frame);
658
659 vlib_node_increment_counter (vm, sample_node.index,
660 SAMPLE_ERROR_SWAPPED, frame->n_vectors);
661 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
662 {
663 int i;
664 b = bufs;
665
666 for (i = 0; i < frame->n_vectors; i++)
667 {
668 if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
669 {
670 ethernet_header_t *en;
671 sample_trace_t *t =
672 vlib_add_trace (vm, node, b[0], sizeof (*t));
673 t->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
674 t->next_index = SAMPLE_NEXT_INTERFACE_OUTPUT;
675 en = vlib_buffer_get_current (b[0]);
676 clib_memcpy_fast (t->new_src_mac, en->src_address,
677 sizeof (t->new_src_mac));
678 clib_memcpy_fast (t->new_dst_mac, en->dst_address,
679 sizeof (t->new_dst_mac));
680 b++;
681 }
682 else
683 break;
684 }
685 }
686 return frame->n_vectors;
687}
688#endif
689
Dave Barach9594b562018-07-25 16:56:38 -0400690/* *INDENT-OFF* */
691VLIB_REGISTER_NODE (sample_node) =
692{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700693 .name = "sample",
694 .vector_size = sizeof (u32),
695 .format_trace = format_sample_trace,
696 .type = VLIB_NODE_TYPE_INTERNAL,
Dave Barach9594b562018-07-25 16:56:38 -0400697
Ed Warnickecb9cada2015-12-08 15:45:58 -0700698 .n_errors = ARRAY_LEN(sample_error_strings),
699 .error_strings = sample_error_strings,
700
701 .n_next_nodes = SAMPLE_N_NEXT,
702
703 /* edit / add dispositions here */
704 .next_nodes = {
Dave Barach9594b562018-07-25 16:56:38 -0400705 [SAMPLE_NEXT_INTERFACE_OUTPUT] = "interface-output",
Ed Warnickecb9cada2015-12-08 15:45:58 -0700706 },
707};
Dave Barach9594b562018-07-25 16:56:38 -0400708/* *INDENT-ON* */
709
Dave Barach9594b562018-07-25 16:56:38 -0400710/*
711 * fd.io coding-style-patch-verification: ON
712 *
713 * Local Variables:
714 * eval: (c-set-style "gnu")
715 * End:
716 */