Dave Barach | 2ab470a | 2016-08-10 18:38:36 -0400 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2016 Cisco and/or its affiliates. |
| 3 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | * you may not use this file except in compliance with the License. |
| 5 | * You may obtain a copy of the License at: |
| 6 | * |
| 7 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | * |
| 9 | * Unless required by applicable law or agreed to in writing, software |
| 10 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | * See the License for the specific language governing permissions and |
| 13 | * limitations under the License. |
| 14 | */ |
| 15 | |
| 16 | #error do not #include this file! |
| 17 | |
| 18 | /** \file |
| 19 | |
| 20 | Cooperative multi-tasking thread support. |
| 21 | |
| 22 | Vlib provides a lightweight cooperative multi-tasking thread |
| 23 | model. Context switching costs a setjmp/longjump pair. It's not |
| 24 | unreasonable to put vlib threads to sleep for 10us. |
| 25 | |
| 26 | The graph node scheduler invokes these processes in much the same |
| 27 | way as traditional vector-processing run-to-completion graph |
| 28 | nodes; plus-or-minus a setjmp/longjmp pair required to switch |
| 29 | stacks. Simply set the vlib_node_registration_t type field to |
| 30 | VLIB_NODE_TYPE_PROCESS. Process is a misnomer; these are threads. |
| 31 | |
| 32 | As of this writing, the default stack size is 2<<15; |
| 33 | 32kb. Initialize the node registration's |
| 34 | process_log2_n_stack_bytes member as needed. The graph node |
| 35 | dispatcher makes some effort to detect stack overrun. We map a |
| 36 | no-access page below each thread stack. |
| 37 | |
| 38 | Process node dispatch functions are expected to be while(1) { } |
| 39 | loops which suspend when not otherwise occupied, and which must |
| 40 | not run for unreasonably long periods of time. Unreasonably long |
| 41 | is an application-dependent concept. Over the years, we have |
| 42 | constructed frame-size sensitive control-plane nodes which will |
| 43 | use a much higher fraction of the available CPU bandwidth when the |
| 44 | frame size is low. Classic example: modifying forwarding |
| 45 | tables. So long as the table-builder leaves the forwarding tables |
| 46 | in a valid state, one can suspend the table builder to avoid |
| 47 | dropping packets as a result of control-plane activity. |
| 48 | |
| 49 | Process nodes can suspend for fixed amounts of time, or until another |
| 50 | entity signals an event, or both. See the example below. |
| 51 | |
| 52 | When running in VLIB process context, one must pay strict attention to |
| 53 | loop invariant issues. If one walks a data structure and calls a |
| 54 | function which may suspend, one had best know by construction that it |
| 55 | cannot change. Often, it s best to simply make a snapshot copy of a |
| 56 | data structure, walk the copy at leisure, then free the copy. |
| 57 | |
| 58 | Here's an example: |
| 59 | |
Chris Luke | d4024f5 | 2016-09-06 09:32:36 -0400 | [diff] [blame] | 60 | <code><pre> |
| 61 | \#define EXAMPLE_POLL_PERIOD 10.0 |
Dave Barach | 2ab470a | 2016-08-10 18:38:36 -0400 | [diff] [blame] | 62 | |
| 63 | static uword |
Damjan Marion | 607de1a | 2016-08-16 22:53:54 +0200 | [diff] [blame] | 64 | example_process (vlib_main_t * vm, vlib_node_runtime_t * rt, |
Dave Barach | 2ab470a | 2016-08-10 18:38:36 -0400 | [diff] [blame] | 65 | vlib_frame_t * f) |
| 66 | { |
| 67 | f64 poll_time_remaining; |
| 68 | uword event_type, *event_data = 0; |
| 69 | |
| 70 | poll_time_remaining = EXAMPLE_POLL_PERIOD; |
| 71 | while (1) |
| 72 | { |
| 73 | int i; |
| 74 | |
Damjan Marion | 607de1a | 2016-08-16 22:53:54 +0200 | [diff] [blame] | 75 | // Sleep until next periodic call due, |
| 76 | // or until we receive event(s) |
Dave Barach | 2ab470a | 2016-08-10 18:38:36 -0400 | [diff] [blame] | 77 | // |
| 78 | poll_time_remaining = |
| 79 | vlib_process_wait_for_event_or_clock (vm, poll_time_remaining); |
| 80 | |
| 81 | event_type = vlib_process_get_events (vm, &event_data); |
| 82 | switch (event_type) |
| 83 | { |
| 84 | case ~0: // no events => timeout |
| 85 | break; |
| 86 | |
| 87 | case EVENT1: |
| 88 | for (i = 0; i < vec_len (event_data); i++) |
| 89 | handle_event1 (mm, event_data[i]); |
| 90 | break; |
| 91 | |
| 92 | case EVENT2: |
| 93 | for (i = 0; i < vec_len (event_data); i++) |
| 94 | handle_event2 (vm, event_data[i]); |
| 95 | break; |
| 96 | |
Damjan Marion | 607de1a | 2016-08-16 22:53:54 +0200 | [diff] [blame] | 97 | // ... and so forth for each event type |
Dave Barach | 2ab470a | 2016-08-10 18:38:36 -0400 | [diff] [blame] | 98 | |
| 99 | default: |
Damjan Marion | 607de1a | 2016-08-16 22:53:54 +0200 | [diff] [blame] | 100 | // This should never happen... |
| 101 | clib_warning ("BUG: unhandled event type %d", |
Dave Barach | 2ab470a | 2016-08-10 18:38:36 -0400 | [diff] [blame] | 102 | event_type); |
| 103 | break; |
| 104 | } |
| 105 | vec_reset_length (event_data); |
| 106 | |
Damjan Marion | 607de1a | 2016-08-16 22:53:54 +0200 | [diff] [blame] | 107 | // Timer expired, call periodic function |
Dave Barach | 2ab470a | 2016-08-10 18:38:36 -0400 | [diff] [blame] | 108 | if (vlib_process_suspend_time_is_zero (poll_time_remaining)) |
| 109 | { |
| 110 | example_periodic (vm); |
| 111 | poll_time_remaining = EXAMPLE_POLL_PERIOD; |
| 112 | } |
| 113 | } |
Damjan Marion | 607de1a | 2016-08-16 22:53:54 +0200 | [diff] [blame] | 114 | // NOTREACHED |
Dave Barach | 2ab470a | 2016-08-10 18:38:36 -0400 | [diff] [blame] | 115 | return 0; |
Damjan Marion | 607de1a | 2016-08-16 22:53:54 +0200 | [diff] [blame] | 116 | } |
Dave Barach | 2ab470a | 2016-08-10 18:38:36 -0400 | [diff] [blame] | 117 | |
| 118 | static VLIB_REGISTER_NODE (example_node) = { |
| 119 | .function = example_process, |
| 120 | .type = VLIB_NODE_TYPE_PROCESS, |
| 121 | .name = "example-process", |
| 122 | }; |
Chris Luke | d4024f5 | 2016-09-06 09:32:36 -0400 | [diff] [blame] | 123 | </pre></code> |
Dave Barach | 2ab470a | 2016-08-10 18:38:36 -0400 | [diff] [blame] | 124 | |
| 125 | In this example, the VLIB process node waits for an event to |
| 126 | occur, or for 10 seconds to elapse. The code demuxes on the event |
| 127 | type, calling the appropriate handler function. |
| 128 | |
| 129 | Each call to vlib_process_get_events returns a vector of |
| 130 | per-event-type data passed to successive vlib_process_signal_event |
| 131 | calls; vec_len (event_data) >= 1. It is an error to process only |
| 132 | event_data[0]. |
| 133 | |
| 134 | Resetting the event_data vector-length to 0 by calling |
| 135 | vec_reset_length (event_data) - instead of calling vec_free (...) |
| 136 | - means that the event scheme doesn t burn cycles continuously |
| 137 | allocating and freeing the event data vector. This is a common |
| 138 | coding pattern, well worth using when appropriate. |
| 139 | */ |
| 140 | |
| 141 | /* |
| 142 | * fd.io coding-style-patch-verification: ON |
| 143 | * |
| 144 | * Local Variables: |
| 145 | * eval: (c-set-style "gnu") |
| 146 | * End: |
| 147 | */ |