blob: ecd317913e9b5c319b078307fdfb6ad9b3848873 [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 * input.c: Unix file input
17 *
18 * Copyright (c) 2008 Eliot Dresselhaus
19 *
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
30 *
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39
40#include <vlib/vlib.h>
41#include <vlib/unix/unix.h>
42#include <signal.h>
Dave Barach5c20a012017-06-13 08:48:31 -040043#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
Ed Warnickecb9cada2015-12-08 15:45:58 -070044
45/* FIXME autoconf */
46#define HAVE_LINUX_EPOLL
47
48#ifdef HAVE_LINUX_EPOLL
49
50#include <sys/epoll.h>
51
Dave Barach9b8ffd92016-07-08 08:13:45 -040052typedef struct
53{
Ed Warnickecb9cada2015-12-08 15:45:58 -070054 int epoll_fd;
Dave Barach9b8ffd92016-07-08 08:13:45 -040055 struct epoll_event *epoll_events;
Ed Warnickecb9cada2015-12-08 15:45:58 -070056
57 /* Statistics. */
58 u64 epoll_files_ready;
59 u64 epoll_waits;
60} linux_epoll_main_t;
61
62static linux_epoll_main_t linux_epoll_main;
63
64static void
Damjan Marion56dd5432017-09-08 19:52:02 +020065linux_epoll_file_update (clib_file_t * f, unix_file_update_type_t update_type)
Ed Warnickecb9cada2015-12-08 15:45:58 -070066{
Damjan Marion56dd5432017-09-08 19:52:02 +020067 clib_file_main_t *fm = &file_main;
Dave Barach9b8ffd92016-07-08 08:13:45 -040068 linux_epoll_main_t *em = &linux_epoll_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -070069 struct epoll_event e;
Dave Baracha1a093d2017-03-02 13:13:23 -050070 int op;
Ed Warnickecb9cada2015-12-08 15:45:58 -070071
72 memset (&e, 0, sizeof (e));
73
74 e.events = EPOLLIN;
75 if (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE)
76 e.events |= EPOLLOUT;
Dave Barach9b8ffd92016-07-08 08:13:45 -040077 if (f->flags & UNIX_FILE_EVENT_EDGE_TRIGGERED)
78 e.events |= EPOLLET;
Damjan Marion56dd5432017-09-08 19:52:02 +020079 e.data.u32 = f - fm->file_pool;
Ed Warnickecb9cada2015-12-08 15:45:58 -070080
Dave Baracha1a093d2017-03-02 13:13:23 -050081 op = -1;
82
83 switch (update_type)
84 {
85 case UNIX_FILE_UPDATE_ADD:
86 op = EPOLL_CTL_ADD;
87 break;
88
89 case UNIX_FILE_UPDATE_MODIFY:
90 op = EPOLL_CTL_MOD;
91 break;
92
93 case UNIX_FILE_UPDATE_DELETE:
94 op = EPOLL_CTL_DEL;
95 break;
96
97 default:
98 clib_warning ("unknown update_type %d", update_type);
99 return;
100 }
101
102 if (epoll_ctl (em->epoll_fd, op, f->file_descriptor, &e) < 0)
103 clib_unix_warning ("epoll_ctl");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700104}
105
106static uword
107linux_epoll_input (vlib_main_t * vm,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400108 vlib_node_runtime_t * node, vlib_frame_t * frame)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700109{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400110 unix_main_t *um = &unix_main;
Damjan Marion56dd5432017-09-08 19:52:02 +0200111 clib_file_main_t *fm = &file_main;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400112 linux_epoll_main_t *em = &linux_epoll_main;
113 struct epoll_event *e;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700114 int n_fds_ready;
115
116 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400117 vlib_node_main_t *nm = &vm->node_main;
Dave Barach5c20a012017-06-13 08:48:31 -0400118 u32 ticks_until_expiration;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700119 f64 timeout;
Dave Barach5c20a012017-06-13 08:48:31 -0400120 int timeout_ms = 0, max_timeout_ms = 10;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700121 f64 vector_rate = vlib_last_vectors_per_main_loop (vm);
122
Dave Barach5c20a012017-06-13 08:48:31 -0400123 /* If we're not working very hard, decide how long to sleep */
124 if (vector_rate < 2 && vm->api_queue_nonempty == 0
125 && nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700126 {
Dave Barach5c20a012017-06-13 08:48:31 -0400127 ticks_until_expiration = TW (tw_timer_first_expires_in_ticks)
128 ((TWT (tw_timer_wheel) *) nm->timing_wheel);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400129
Dave Barach5c20a012017-06-13 08:48:31 -0400130 /* Nothing on the fast wheel, sleep 10ms */
131 if (ticks_until_expiration == TW_SLOTS_PER_RING)
Damjan Marion18bc9072016-12-07 14:07:54 +0100132 {
Dave Barach5c20a012017-06-13 08:48:31 -0400133 timeout = 10e-3;
134 timeout_ms = max_timeout_ms;
Damjan Marion18bc9072016-12-07 14:07:54 +0100135 }
136 else
137 {
Dave Barach5c20a012017-06-13 08:48:31 -0400138 timeout = (f64) ticks_until_expiration *1e-5;
139 if (timeout < 1e-3)
140 timeout_ms = 0;
141 else
142 {
143 timeout_ms = timeout * 1e3;
144 /* Must be between 1 and 10 ms. */
145 timeout_ms = clib_max (1, timeout_ms);
146 timeout_ms = clib_min (max_timeout_ms, timeout_ms);
147 }
Damjan Marion18bc9072016-12-07 14:07:54 +0100148 }
Dave Barach5c20a012017-06-13 08:48:31 -0400149 node->input_main_loops_per_call = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700150 }
Dave Barach5c20a012017-06-13 08:48:31 -0400151 else /* busy */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700152 {
Dave Barach5c20a012017-06-13 08:48:31 -0400153 /* Don't come back for a respectable number of dispatch cycles */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700154 node->input_main_loops_per_call = 1024;
155 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700156
157 /* Allow any signal to wakeup our sleep. */
158 {
159 static sigset_t unblock_all_signals;
160 n_fds_ready = epoll_pwait (em->epoll_fd,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400161 em->epoll_events,
162 vec_len (em->epoll_events),
163 timeout_ms, &unblock_all_signals);
164
Ed Warnickecb9cada2015-12-08 15:45:58 -0700165 /* This kludge is necessary to run over absurdly old kernels */
166 if (n_fds_ready < 0 && errno == ENOSYS)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400167 {
168 n_fds_ready = epoll_wait (em->epoll_fd,
169 em->epoll_events,
170 vec_len (em->epoll_events), timeout_ms);
171 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700172 }
173 }
174
175 if (n_fds_ready < 0)
176 {
177 if (unix_error_is_fatal (errno))
Dave Barach9b8ffd92016-07-08 08:13:45 -0400178 vlib_panic_with_error (vm, clib_error_return_unix (0, "epoll_wait"));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700179
180 /* non fatal error (e.g. EINTR). */
181 return 0;
182 }
183
184 em->epoll_waits += 1;
185 em->epoll_files_ready += n_fds_ready;
186
187 for (e = em->epoll_events; e < em->epoll_events + n_fds_ready; e++)
188 {
189 u32 i = e->data.u32;
Damjan Marion56dd5432017-09-08 19:52:02 +0200190 clib_file_t *f = pool_elt_at_index (fm->file_pool, i);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400191 clib_error_t *errors[4];
Ed Warnickecb9cada2015-12-08 15:45:58 -0700192 int n_errors = 0;
193
Dave Barach9b8ffd92016-07-08 08:13:45 -0400194 if (PREDICT_TRUE (!(e->events & EPOLLERR)))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700195 {
196 if (e->events & EPOLLIN)
197 {
198 errors[n_errors] = f->read_function (f);
199 n_errors += errors[n_errors] != 0;
200 }
201 if (e->events & EPOLLOUT)
202 {
203 errors[n_errors] = f->write_function (f);
204 n_errors += errors[n_errors] != 0;
205 }
206 }
207 else
208 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400209 if (f->error_function)
210 {
211 errors[n_errors] = f->error_function (f);
212 n_errors += errors[n_errors] != 0;
213 }
Ole Troan4b12b3c2016-01-27 23:37:58 +0200214 else
Dave Barach9b8ffd92016-07-08 08:13:45 -0400215 close (f->file_descriptor);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700216 }
217
218 ASSERT (n_errors < ARRAY_LEN (errors));
219 for (i = 0; i < n_errors; i++)
220 {
221 unix_save_error (um, errors[i]);
222 }
223 }
224
225 return 0;
226}
227
Dave Barach9b8ffd92016-07-08 08:13:45 -0400228/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700229VLIB_REGISTER_NODE (linux_epoll_input_node,static) = {
230 .function = linux_epoll_input,
231 .type = VLIB_NODE_TYPE_PRE_INPUT,
232 .name = "unix-epoll-input",
233};
Dave Barach9b8ffd92016-07-08 08:13:45 -0400234/* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700235
236clib_error_t *
237linux_epoll_input_init (vlib_main_t * vm)
238{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400239 linux_epoll_main_t *em = &linux_epoll_main;
Damjan Marion56dd5432017-09-08 19:52:02 +0200240 clib_file_main_t *fm = &file_main;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400241
Ed Warnickecb9cada2015-12-08 15:45:58 -0700242 /* Allocate some events. */
243 vec_resize (em->epoll_events, VLIB_FRAME_SIZE);
244
245 em->epoll_fd = epoll_create (vec_len (em->epoll_events));
246 if (em->epoll_fd < 0)
247 return clib_error_return_unix (0, "epoll_create");
248
Damjan Marion56dd5432017-09-08 19:52:02 +0200249 fm->file_update = linux_epoll_file_update;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700250
251 return 0;
252}
253
254VLIB_INIT_FUNCTION (linux_epoll_input_init);
255
256#endif /* HAVE_LINUX_EPOLL */
257
258static clib_error_t *
259unix_input_init (vlib_main_t * vm)
260{
261 return vlib_call_init_function (vm, linux_epoll_input_init);
262}
263
264VLIB_INIT_FUNCTION (unix_input_init);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400265
266/*
267 * fd.io coding-style-patch-verification: ON
268 *
269 * Local Variables:
270 * eval: (c-set-style "gnu")
271 * End:
272 */