| /* |
| * vnet/pipeline.h: software pipeline |
| * |
| * Copyright (c) 2012 Cisco and/or its affiliates. |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at: |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* |
| * Usage example. |
| * |
| * #define NSTAGES 3 or whatever |
| * |
| * <Define pipeline stages> |
| * |
| * #include <vnet/pipeline.h> |
| * |
| * static uword my_node_fn (vlib_main_t * vm, |
| * vlib_node_runtime_t * node, |
| * vlib_frame_t * frame) |
| * { |
| * return dispatch_pipeline (vm, node, frame); |
| * } |
| * |
| */ |
| |
| #ifndef NSTAGES |
| #error files which #include <vnet/pipeline.h> must define NSTAGES |
| #endif |
| |
| #ifndef STAGE_INLINE |
| #define STAGE_INLINE inline |
| #endif |
| |
| /* |
| * A prefetch stride of 2 is quasi-equivalent to doubling the number |
| * of stages with every other pipeline stage empty. |
| */ |
| |
| /* |
| * This is a typical first pipeline stage, which prefetches |
| * buffer metadata and the first line of pkt data. |
| * To use it: |
| * #define stage0 generic_stage0 |
| */ |
| static STAGE_INLINE void |
| generic_stage0 (vlib_main_t * vm, |
| vlib_node_runtime_t * node, u32 buffer_index) |
| { |
| /* generic default stage 0 here */ |
| vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index); |
| vlib_prefetch_buffer_header (b, STORE); |
| CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, STORE); |
| } |
| |
| #if NSTAGES == 2 |
| |
| static STAGE_INLINE uword |
| dispatch_pipeline (vlib_main_t * vm, |
| vlib_node_runtime_t * node, vlib_frame_t * frame) |
| { |
| u32 *from = vlib_frame_vector_args (frame); |
| u32 n_left_from, n_left_to_next, *to_next, next_index, next0; |
| int pi, pi_limit; |
| |
| n_left_from = frame->n_vectors; |
| next_index = node->cached_next_index; |
| |
| while (n_left_from > 0) |
| { |
| vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); |
| |
| pi_limit = clib_min (n_left_from, n_left_to_next); |
| |
| for (pi = 0; pi < NSTAGES - 1; pi++) |
| { |
| if (pi == pi_limit) |
| break; |
| stage0 (vm, node, from[pi]); |
| } |
| |
| for (; pi < pi_limit; pi++) |
| { |
| stage0 (vm, node, from[pi]); |
| to_next[0] = from[pi - 1]; |
| to_next++; |
| n_left_to_next--; |
| next0 = last_stage (vm, node, from[pi - 1]); |
| vlib_validate_buffer_enqueue_x1 (vm, node, next_index, |
| to_next, n_left_to_next, |
| from[pi - 1], next0); |
| n_left_from--; |
| if ((int) n_left_to_next < 0 && n_left_from > 0) |
| vlib_get_next_frame (vm, node, next_index, to_next, |
| n_left_to_next); |
| } |
| |
| for (; pi < (pi_limit + (NSTAGES - 1)); pi++) |
| { |
| if (((pi - 1) >= 0) && ((pi - 1) < pi_limit)) |
| { |
| to_next[0] = from[pi - 1]; |
| to_next++; |
| n_left_to_next--; |
| next0 = last_stage (vm, node, from[pi - 1]); |
| vlib_validate_buffer_enqueue_x1 (vm, node, next_index, |
| to_next, n_left_to_next, |
| from[pi - 1], next0); |
| n_left_from--; |
| if ((int) n_left_to_next < 0 && n_left_from > 0) |
| vlib_get_next_frame (vm, node, next_index, to_next, |
| n_left_to_next); |
| } |
| } |
| vlib_put_next_frame (vm, node, next_index, n_left_to_next); |
| from += pi_limit; |
| } |
| return frame->n_vectors; |
| } |
| #endif |
| |
| #if NSTAGES == 3 |
| static STAGE_INLINE uword |
| dispatch_pipeline (vlib_main_t * vm, |
| vlib_node_runtime_t * node, vlib_frame_t * frame) |
| { |
| u32 *from = vlib_frame_vector_args (frame); |
| u32 n_left_from, n_left_to_next, *to_next, next_index, next0; |
| int pi, pi_limit; |
| |
| n_left_from = frame->n_vectors; |
| next_index = node->cached_next_index; |
| |
| while (n_left_from > 0) |
| { |
| vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); |
| |
| pi_limit = clib_min (n_left_from, n_left_to_next); |
| |
| for (pi = 0; pi < NSTAGES - 1; pi++) |
| { |
| if (pi == pi_limit) |
| break; |
| stage0 (vm, node, from[pi]); |
| if (pi - 1 >= 0) |
| stage1 (vm, node, from[pi - 1]); |
| } |
| |
| for (; pi < pi_limit; pi++) |
| { |
| stage0 (vm, node, from[pi]); |
| stage1 (vm, node, from[pi - 1]); |
| to_next[0] = from[pi - 2]; |
| to_next++; |
| n_left_to_next--; |
| next0 = last_stage (vm, node, from[pi - 2]); |
| vlib_validate_buffer_enqueue_x1 (vm, node, next_index, |
| to_next, n_left_to_next, |
| from[pi - 2], next0); |
| n_left_from--; |
| if ((int) n_left_to_next < 0 && n_left_from > 0) |
| vlib_get_next_frame (vm, node, next_index, to_next, |
| n_left_to_next); |
| } |
| |
| |
| for (; pi < (pi_limit + (NSTAGES - 1)); pi++) |
| { |
| if (((pi - 1) >= 0) && ((pi - 1) < pi_limit)) |
| stage1 (vm, node, from[pi - 1]); |
| if (((pi - 2) >= 0) && ((pi - 2) < pi_limit)) |
| { |
| to_next[0] = from[pi - 2]; |
| to_next++; |
| n_left_to_next--; |
| next0 = last_stage (vm, node, from[pi - 2]); |
| vlib_validate_buffer_enqueue_x1 (vm, node, next_index, |
| to_next, n_left_to_next, |
| from[pi - 2], next0); |
| n_left_from--; |
| if ((int) n_left_to_next < 0 && n_left_from > 0) |
| vlib_get_next_frame (vm, node, next_index, to_next, |
| n_left_to_next); |
| } |
| } |
| |
| vlib_put_next_frame (vm, node, next_index, n_left_to_next); |
| from += pi_limit; |
| } |
| return frame->n_vectors; |
| } |
| #endif |
| |
| #if NSTAGES == 4 |
| static STAGE_INLINE uword |
| dispatch_pipeline (vlib_main_t * vm, |
| vlib_node_runtime_t * node, vlib_frame_t * frame) |
| { |
| u32 *from = vlib_frame_vector_args (frame); |
| u32 n_left_from, n_left_to_next, *to_next, next_index, next0; |
| int pi, pi_limit; |
| |
| n_left_from = frame->n_vectors; |
| next_index = node->cached_next_index; |
| |
| while (n_left_from > 0) |
| { |
| vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); |
| |
| pi_limit = clib_min (n_left_from, n_left_to_next); |
| |
| for (pi = 0; pi < NSTAGES - 1; pi++) |
| { |
| if (pi == pi_limit) |
| break; |
| stage0 (vm, node, from[pi]); |
| if (pi - 1 >= 0) |
| stage1 (vm, node, from[pi - 1]); |
| if (pi - 2 >= 0) |
| stage2 (vm, node, from[pi - 2]); |
| } |
| |
| for (; pi < pi_limit; pi++) |
| { |
| stage0 (vm, node, from[pi]); |
| stage1 (vm, node, from[pi - 1]); |
| stage2 (vm, node, from[pi - 2]); |
| to_next[0] = from[pi - 3]; |
| to_next++; |
| n_left_to_next--; |
| next0 = last_stage (vm, node, from[pi - 3]); |
| vlib_validate_buffer_enqueue_x1 (vm, node, next_index, |
| to_next, n_left_to_next, |
| from[pi - 3], next0); |
| n_left_from--; |
| if ((int) n_left_to_next < 0 && n_left_from > 0) |
| vlib_get_next_frame (vm, node, next_index, to_next, |
| n_left_to_next); |
| } |
| |
| |
| for (; pi < (pi_limit + (NSTAGES - 1)); pi++) |
| { |
| if (((pi - 1) >= 0) && ((pi - 1) < pi_limit)) |
| stage1 (vm, node, from[pi - 1]); |
| if (((pi - 2) >= 0) && ((pi - 2) < pi_limit)) |
| stage2 (vm, node, from[pi - 2]); |
| if (((pi - 3) >= 0) && ((pi - 3) < pi_limit)) |
| { |
| to_next[0] = from[pi - 3]; |
| to_next++; |
| n_left_to_next--; |
| next0 = last_stage (vm, node, from[pi - 3]); |
| vlib_validate_buffer_enqueue_x1 (vm, node, next_index, |
| to_next, n_left_to_next, |
| from[pi - 3], next0); |
| n_left_from--; |
| if ((int) n_left_to_next < 0 && n_left_from > 0) |
| vlib_get_next_frame (vm, node, next_index, to_next, |
| n_left_to_next); |
| } |
| } |
| |
| vlib_put_next_frame (vm, node, next_index, n_left_to_next); |
| from += pi_limit; |
| } |
| return frame->n_vectors; |
| } |
| #endif |
| |
| |
| #if NSTAGES == 5 |
| static STAGE_INLINE uword |
| dispatch_pipeline (vlib_main_t * vm, |
| vlib_node_runtime_t * node, vlib_frame_t * frame) |
| { |
| u32 *from = vlib_frame_vector_args (frame); |
| u32 n_left_from, n_left_to_next, *to_next, next_index, next0; |
| int pi, pi_limit; |
| |
| n_left_from = frame->n_vectors; |
| next_index = node->cached_next_index; |
| |
| while (n_left_from > 0) |
| { |
| vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); |
| |
| pi_limit = clib_min (n_left_from, n_left_to_next); |
| |
| for (pi = 0; pi < NSTAGES - 1; pi++) |
| { |
| if (pi == pi_limit) |
| break; |
| stage0 (vm, node, from[pi]); |
| if (pi - 1 >= 0) |
| stage1 (vm, node, from[pi - 1]); |
| if (pi - 2 >= 0) |
| stage2 (vm, node, from[pi - 2]); |
| if (pi - 3 >= 0) |
| stage3 (vm, node, from[pi - 3]); |
| } |
| |
| for (; pi < pi_limit; pi++) |
| { |
| stage0 (vm, node, from[pi]); |
| stage1 (vm, node, from[pi - 1]); |
| stage2 (vm, node, from[pi - 2]); |
| stage3 (vm, node, from[pi - 3]); |
| to_next[0] = from[pi - 4]; |
| to_next++; |
| n_left_to_next--; |
| next0 = last_stage (vm, node, from[pi - 4]); |
| vlib_validate_buffer_enqueue_x1 (vm, node, next_index, |
| to_next, n_left_to_next, |
| from[pi - 4], next0); |
| n_left_from--; |
| if ((int) n_left_to_next < 0 && n_left_from > 0) |
| vlib_get_next_frame (vm, node, next_index, to_next, |
| n_left_to_next); |
| } |
| |
| |
| for (; pi < (pi_limit + (NSTAGES - 1)); pi++) |
| { |
| if (((pi - 1) >= 0) && ((pi - 1) < pi_limit)) |
| stage1 (vm, node, from[pi - 1]); |
| if (((pi - 2) >= 0) && ((pi - 2) < pi_limit)) |
| stage2 (vm, node, from[pi - 2]); |
| if (((pi - 3) >= 0) && ((pi - 3) < pi_limit)) |
| stage3 (vm, node, from[pi - 3]); |
| if (((pi - 4) >= 0) && ((pi - 4) < pi_limit)) |
| { |
| to_next[0] = from[pi - 4]; |
| to_next++; |
| n_left_to_next--; |
| next0 = last_stage (vm, node, from[pi - 4]); |
| vlib_validate_buffer_enqueue_x1 (vm, node, next_index, |
| to_next, n_left_to_next, |
| from[pi - 4], next0); |
| n_left_from--; |
| if ((int) n_left_to_next < 0 && n_left_from > 0) |
| vlib_get_next_frame (vm, node, next_index, to_next, |
| n_left_to_next); |
| } |
| } |
| |
| vlib_put_next_frame (vm, node, next_index, n_left_to_next); |
| from += pi_limit; |
| } |
| return frame->n_vectors; |
| } |
| #endif |
| |
| #if NSTAGES == 6 |
| static STAGE_INLINE uword |
| dispatch_pipeline (vlib_main_t * vm, |
| vlib_node_runtime_t * node, vlib_frame_t * frame) |
| { |
| u32 *from = vlib_frame_vector_args (frame); |
| u32 n_left_from, n_left_to_next, *to_next, next_index, next0; |
| int pi, pi_limit; |
| |
| n_left_from = frame->n_vectors; |
| next_index = node->cached_next_index; |
| |
| while (n_left_from > 0) |
| { |
| vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); |
| |
| pi_limit = clib_min (n_left_from, n_left_to_next); |
| |
| for (pi = 0; pi < NSTAGES - 1; pi++) |
| { |
| if (pi == pi_limit) |
| break; |
| stage0 (vm, node, from[pi]); |
| if (pi - 1 >= 0) |
| stage1 (vm, node, from[pi - 1]); |
| if (pi - 2 >= 0) |
| stage2 (vm, node, from[pi - 2]); |
| if (pi - 3 >= 0) |
| stage3 (vm, node, from[pi - 3]); |
| if (pi - 4 >= 0) |
| stage4 (vm, node, from[pi - 4]); |
| } |
| |
| for (; pi < pi_limit; pi++) |
| { |
| stage0 (vm, node, from[pi]); |
| stage1 (vm, node, from[pi - 1]); |
| stage2 (vm, node, from[pi - 2]); |
| stage3 (vm, node, from[pi - 3]); |
| stage4 (vm, node, from[pi - 4]); |
| to_next[0] = from[pi - 5]; |
| to_next++; |
| n_left_to_next--; |
| next0 = last_stage (vm, node, from[pi - 5]); |
| vlib_validate_buffer_enqueue_x1 (vm, node, next_index, |
| to_next, n_left_to_next, |
| from[pi - 5], next0); |
| n_left_from--; |
| if ((int) n_left_to_next < 0 && n_left_from > 0) |
| vlib_get_next_frame (vm, node, next_index, to_next, |
| n_left_to_next); |
| } |
| |
| |
| for (; pi < (pi_limit + (NSTAGES - 1)); pi++) |
| { |
| if (((pi - 1) >= 0) && ((pi - 1) < pi_limit)) |
| stage1 (vm, node, from[pi - 1]); |
| if (((pi - 2) >= 0) && ((pi - 2) < pi_limit)) |
| stage2 (vm, node, from[pi - 2]); |
| if (((pi - 3) >= 0) && ((pi - 3) < pi_limit)) |
| stage3 (vm, node, from[pi - 3]); |
| if (((pi - 4) >= 0) && ((pi - 4) < pi_limit)) |
| stage4 (vm, node, from[pi - 4]); |
| if (((pi - 5) >= 0) && ((pi - 5) < pi_limit)) |
| { |
| to_next[0] = from[pi - 5]; |
| to_next++; |
| n_left_to_next--; |
| next0 = last_stage (vm, node, from[pi - 5]); |
| vlib_validate_buffer_enqueue_x1 (vm, node, next_index, |
| to_next, n_left_to_next, |
| from[pi - 5], next0); |
| n_left_from--; |
| if ((int) n_left_to_next < 0 && n_left_from > 0) |
| vlib_get_next_frame (vm, node, next_index, to_next, |
| n_left_to_next); |
| } |
| } |
| |
| vlib_put_next_frame (vm, node, next_index, n_left_to_next); |
| from += pi_limit; |
| } |
| return frame->n_vectors; |
| } |
| #endif |
| |
| /* |
| * fd.io coding-style-patch-verification: ON |
| * |
| * Local Variables: |
| * eval: (c-set-style "gnu") |
| * End: |
| */ |