blob: 0c2984466362e0a69c4f356b57642ee217ffb645 [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 * input.c: Unix file input
17 *
18 * Copyright (c) 2008 Eliot Dresselhaus
19 *
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
30 *
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39
40#include <vlib/vlib.h>
41#include <vlib/unix/unix.h>
42#include <signal.h>
Damjan Marionceab7882018-01-19 20:56:12 +010043#include <unistd.h>
Dave Barach5c20a012017-06-13 08:48:31 -040044#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
Ed Warnickecb9cada2015-12-08 15:45:58 -070045
46/* FIXME autoconf */
47#define HAVE_LINUX_EPOLL
48
49#ifdef HAVE_LINUX_EPOLL
50
51#include <sys/epoll.h>
52
Dave Barach9b8ffd92016-07-08 08:13:45 -040053typedef struct
54{
Ed Warnickecb9cada2015-12-08 15:45:58 -070055 int epoll_fd;
Dave Barach9b8ffd92016-07-08 08:13:45 -040056 struct epoll_event *epoll_events;
Damjan Marionceab7882018-01-19 20:56:12 +010057 int n_epoll_fds;
Ed Warnickecb9cada2015-12-08 15:45:58 -070058
59 /* Statistics. */
60 u64 epoll_files_ready;
61 u64 epoll_waits;
62} linux_epoll_main_t;
63
Damjan Marionceab7882018-01-19 20:56:12 +010064static linux_epoll_main_t *linux_epoll_mains = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -070065
66static void
Dave Barach59b25652017-09-10 15:04:27 -040067linux_epoll_file_update (clib_file_t * f, clib_file_update_type_t update_type)
Ed Warnickecb9cada2015-12-08 15:45:58 -070068{
Damjan Marion56dd5432017-09-08 19:52:02 +020069 clib_file_main_t *fm = &file_main;
Damjan Marionceab7882018-01-19 20:56:12 +010070 linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains,
71 f->polling_thread_index);
72 struct epoll_event e = { 0 };
73 int op, add_del = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -070074
75 e.events = EPOLLIN;
76 if (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE)
77 e.events |= EPOLLOUT;
Dave Barach9b8ffd92016-07-08 08:13:45 -040078 if (f->flags & UNIX_FILE_EVENT_EDGE_TRIGGERED)
79 e.events |= EPOLLET;
Damjan Marion56dd5432017-09-08 19:52:02 +020080 e.data.u32 = f - fm->file_pool;
Ed Warnickecb9cada2015-12-08 15:45:58 -070081
Dave Baracha1a093d2017-03-02 13:13:23 -050082 op = -1;
83
84 switch (update_type)
85 {
86 case UNIX_FILE_UPDATE_ADD:
87 op = EPOLL_CTL_ADD;
Damjan Marionceab7882018-01-19 20:56:12 +010088 add_del = 1;
Dave Baracha1a093d2017-03-02 13:13:23 -050089 break;
90
91 case UNIX_FILE_UPDATE_MODIFY:
92 op = EPOLL_CTL_MOD;
93 break;
94
95 case UNIX_FILE_UPDATE_DELETE:
96 op = EPOLL_CTL_DEL;
Damjan Marionceab7882018-01-19 20:56:12 +010097 add_del = -1;
Dave Baracha1a093d2017-03-02 13:13:23 -050098 break;
99
100 default:
101 clib_warning ("unknown update_type %d", update_type);
102 return;
103 }
104
Damjan Marionceab7882018-01-19 20:56:12 +0100105 /* worker threads open epoll fd only if needed */
106 if (update_type == UNIX_FILE_UPDATE_ADD && em->epoll_fd == -1)
107 {
108 em->epoll_fd = epoll_create (1);
109 if (em->epoll_fd < 0)
110 {
111 clib_unix_warning ("epoll_create");
112 return;
113 }
114 em->n_epoll_fds = 0;
115 }
116
Dave Baracha1a093d2017-03-02 13:13:23 -0500117 if (epoll_ctl (em->epoll_fd, op, f->file_descriptor, &e) < 0)
Damjan Marionceab7882018-01-19 20:56:12 +0100118 {
119 clib_unix_warning ("epoll_ctl");
120 return;
121 }
122
123 em->n_epoll_fds += add_del;
124
125 if (em->n_epoll_fds == 0)
126 {
127 close (em->epoll_fd);
128 em->epoll_fd = -1;
129 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700130}
131
Damjan Marionceab7882018-01-19 20:56:12 +0100132static_always_inline uword
133linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
134 vlib_frame_t * frame, u32 thread_index)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700135{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400136 unix_main_t *um = &unix_main;
Damjan Marion56dd5432017-09-08 19:52:02 +0200137 clib_file_main_t *fm = &file_main;
Damjan Marionceab7882018-01-19 20:56:12 +0100138 linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains, thread_index);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400139 struct epoll_event *e;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700140 int n_fds_ready;
Damjan Marionceab7882018-01-19 20:56:12 +0100141 int is_main = (thread_index == 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700142
143 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400144 vlib_node_main_t *nm = &vm->node_main;
Dave Barach5c20a012017-06-13 08:48:31 -0400145 u32 ticks_until_expiration;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700146 f64 timeout;
Dave Barach5c20a012017-06-13 08:48:31 -0400147 int timeout_ms = 0, max_timeout_ms = 10;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700148 f64 vector_rate = vlib_last_vectors_per_main_loop (vm);
149
Dave Barach5c20a012017-06-13 08:48:31 -0400150 /* If we're not working very hard, decide how long to sleep */
Damjan Marionceab7882018-01-19 20:56:12 +0100151 if (is_main && vector_rate < 2 && vm->api_queue_nonempty == 0
Dave Barach5c20a012017-06-13 08:48:31 -0400152 && nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700153 {
Dave Barach5c20a012017-06-13 08:48:31 -0400154 ticks_until_expiration = TW (tw_timer_first_expires_in_ticks)
155 ((TWT (tw_timer_wheel) *) nm->timing_wheel);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400156
Dave Barach5c20a012017-06-13 08:48:31 -0400157 /* Nothing on the fast wheel, sleep 10ms */
158 if (ticks_until_expiration == TW_SLOTS_PER_RING)
Damjan Marion18bc9072016-12-07 14:07:54 +0100159 {
Dave Barach5c20a012017-06-13 08:48:31 -0400160 timeout = 10e-3;
161 timeout_ms = max_timeout_ms;
Damjan Marion18bc9072016-12-07 14:07:54 +0100162 }
163 else
164 {
Dave Barach5c20a012017-06-13 08:48:31 -0400165 timeout = (f64) ticks_until_expiration *1e-5;
166 if (timeout < 1e-3)
167 timeout_ms = 0;
168 else
169 {
170 timeout_ms = timeout * 1e3;
171 /* Must be between 1 and 10 ms. */
172 timeout_ms = clib_max (1, timeout_ms);
173 timeout_ms = clib_min (max_timeout_ms, timeout_ms);
174 }
Damjan Marion18bc9072016-12-07 14:07:54 +0100175 }
Dave Barach5c20a012017-06-13 08:48:31 -0400176 node->input_main_loops_per_call = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700177 }
Damjan Marionceab7882018-01-19 20:56:12 +0100178 else if (is_main == 0 && vector_rate < 2 &&
179 nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0)
180 {
181 timeout = 10e-3;
182 timeout_ms = max_timeout_ms;
183 node->input_main_loops_per_call = 0;
184 }
Dave Barach5c20a012017-06-13 08:48:31 -0400185 else /* busy */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700186 {
Dave Barach5c20a012017-06-13 08:48:31 -0400187 /* Don't come back for a respectable number of dispatch cycles */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700188 node->input_main_loops_per_call = 1024;
189 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700190
191 /* Allow any signal to wakeup our sleep. */
Damjan Marionceab7882018-01-19 20:56:12 +0100192 if (is_main || em->epoll_fd != -1)
193 {
194 static sigset_t unblock_all_signals;
195 n_fds_ready = epoll_pwait (em->epoll_fd,
196 em->epoll_events,
197 vec_len (em->epoll_events),
198 timeout_ms, &unblock_all_signals);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400199
Damjan Marionceab7882018-01-19 20:56:12 +0100200 /* This kludge is necessary to run over absurdly old kernels */
201 if (n_fds_ready < 0 && errno == ENOSYS)
202 {
203 n_fds_ready = epoll_wait (em->epoll_fd,
204 em->epoll_events,
205 vec_len (em->epoll_events), timeout_ms);
206 }
207 }
208 else
209 {
210 if (timeout_ms)
211 usleep (timeout_ms * 1000);
212 return 0;
213 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700214 }
215
216 if (n_fds_ready < 0)
217 {
218 if (unix_error_is_fatal (errno))
Dave Barach9b8ffd92016-07-08 08:13:45 -0400219 vlib_panic_with_error (vm, clib_error_return_unix (0, "epoll_wait"));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700220
221 /* non fatal error (e.g. EINTR). */
222 return 0;
223 }
224
225 em->epoll_waits += 1;
226 em->epoll_files_ready += n_fds_ready;
227
228 for (e = em->epoll_events; e < em->epoll_events + n_fds_ready; e++)
229 {
230 u32 i = e->data.u32;
Damjan Marion56dd5432017-09-08 19:52:02 +0200231 clib_file_t *f = pool_elt_at_index (fm->file_pool, i);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400232 clib_error_t *errors[4];
Ed Warnickecb9cada2015-12-08 15:45:58 -0700233 int n_errors = 0;
234
Dave Barach9b8ffd92016-07-08 08:13:45 -0400235 if (PREDICT_TRUE (!(e->events & EPOLLERR)))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700236 {
237 if (e->events & EPOLLIN)
238 {
239 errors[n_errors] = f->read_function (f);
Damjan Marionceab7882018-01-19 20:56:12 +0100240 f->read_events++;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700241 n_errors += errors[n_errors] != 0;
242 }
243 if (e->events & EPOLLOUT)
244 {
245 errors[n_errors] = f->write_function (f);
Damjan Marionceab7882018-01-19 20:56:12 +0100246 f->write_events++;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700247 n_errors += errors[n_errors] != 0;
248 }
249 }
250 else
251 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400252 if (f->error_function)
253 {
254 errors[n_errors] = f->error_function (f);
Damjan Marionceab7882018-01-19 20:56:12 +0100255 f->error_events++;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400256 n_errors += errors[n_errors] != 0;
257 }
Ole Troan4b12b3c2016-01-27 23:37:58 +0200258 else
Dave Barach9b8ffd92016-07-08 08:13:45 -0400259 close (f->file_descriptor);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700260 }
261
262 ASSERT (n_errors < ARRAY_LEN (errors));
263 for (i = 0; i < n_errors; i++)
264 {
265 unix_save_error (um, errors[i]);
266 }
267 }
268
269 return 0;
270}
271
Damjan Marionceab7882018-01-19 20:56:12 +0100272static uword
273linux_epoll_input (vlib_main_t * vm,
274 vlib_node_runtime_t * node, vlib_frame_t * frame)
275{
276 u32 thread_index = vlib_get_thread_index ();
277
278 if (thread_index == 0)
279 return linux_epoll_input_inline (vm, node, frame, 0);
280 else
281 return linux_epoll_input_inline (vm, node, frame, thread_index);
282}
283
Dave Barach9b8ffd92016-07-08 08:13:45 -0400284/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700285VLIB_REGISTER_NODE (linux_epoll_input_node,static) = {
286 .function = linux_epoll_input,
287 .type = VLIB_NODE_TYPE_PRE_INPUT,
288 .name = "unix-epoll-input",
289};
Dave Barach9b8ffd92016-07-08 08:13:45 -0400290/* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700291
292clib_error_t *
293linux_epoll_input_init (vlib_main_t * vm)
294{
Damjan Marionceab7882018-01-19 20:56:12 +0100295 linux_epoll_main_t *em;
Damjan Marion56dd5432017-09-08 19:52:02 +0200296 clib_file_main_t *fm = &file_main;
Damjan Marionceab7882018-01-19 20:56:12 +0100297 vlib_thread_main_t *tm = vlib_get_thread_main ();
Dave Barach9b8ffd92016-07-08 08:13:45 -0400298
Ed Warnickecb9cada2015-12-08 15:45:58 -0700299
Damjan Marionceab7882018-01-19 20:56:12 +0100300 vec_validate_aligned (linux_epoll_mains, tm->n_vlib_mains,
301 CLIB_CACHE_LINE_BYTES);
302
303 vec_foreach (em, linux_epoll_mains)
304 {
305 /* Allocate some events. */
306 vec_resize (em->epoll_events, VLIB_FRAME_SIZE);
307
308 if (linux_epoll_mains == em)
309 {
310 em->epoll_fd = epoll_create (1);
311 if (em->epoll_fd < 0)
312 return clib_error_return_unix (0, "epoll_create");
313 }
314 else
315 em->epoll_fd = -1;
316 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700317
Damjan Marion56dd5432017-09-08 19:52:02 +0200318 fm->file_update = linux_epoll_file_update;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700319
320 return 0;
321}
322
323VLIB_INIT_FUNCTION (linux_epoll_input_init);
324
325#endif /* HAVE_LINUX_EPOLL */
326
327static clib_error_t *
328unix_input_init (vlib_main_t * vm)
329{
330 return vlib_call_init_function (vm, linux_epoll_input_init);
331}
332
333VLIB_INIT_FUNCTION (unix_input_init);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400334
335/*
336 * fd.io coding-style-patch-verification: ON
337 *
338 * Local Variables:
339 * eval: (c-set-style "gnu")
340 * End:
341 */