blob: 7b4183a4a700eb84870f3c1043c677ddfdddb17b [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 * input.c: Unix file input
17 *
18 * Copyright (c) 2008 Eliot Dresselhaus
19 *
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
30 *
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39
40#include <vlib/vlib.h>
41#include <vlib/unix/unix.h>
42#include <signal.h>
43
44/* FIXME autoconf */
45#define HAVE_LINUX_EPOLL
46
47#ifdef HAVE_LINUX_EPOLL
48
49#include <sys/epoll.h>
50
Dave Barach9b8ffd92016-07-08 08:13:45 -040051typedef struct
52{
Ed Warnickecb9cada2015-12-08 15:45:58 -070053 int epoll_fd;
Dave Barach9b8ffd92016-07-08 08:13:45 -040054 struct epoll_event *epoll_events;
Ed Warnickecb9cada2015-12-08 15:45:58 -070055
56 /* Statistics. */
57 u64 epoll_files_ready;
58 u64 epoll_waits;
59} linux_epoll_main_t;
60
61static linux_epoll_main_t linux_epoll_main;
62
63static void
Dave Barach9b8ffd92016-07-08 08:13:45 -040064linux_epoll_file_update (unix_file_t * f, unix_file_update_type_t update_type)
Ed Warnickecb9cada2015-12-08 15:45:58 -070065{
Dave Barach9b8ffd92016-07-08 08:13:45 -040066 unix_main_t *um = &unix_main;
67 linux_epoll_main_t *em = &linux_epoll_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -070068 struct epoll_event e;
Dave Baracha1a093d2017-03-02 13:13:23 -050069 int op;
Ed Warnickecb9cada2015-12-08 15:45:58 -070070
71 memset (&e, 0, sizeof (e));
72
73 e.events = EPOLLIN;
74 if (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE)
75 e.events |= EPOLLOUT;
Dave Barach9b8ffd92016-07-08 08:13:45 -040076 if (f->flags & UNIX_FILE_EVENT_EDGE_TRIGGERED)
77 e.events |= EPOLLET;
Ed Warnickecb9cada2015-12-08 15:45:58 -070078 e.data.u32 = f - um->file_pool;
79
Dave Baracha1a093d2017-03-02 13:13:23 -050080 op = -1;
81
82 switch (update_type)
83 {
84 case UNIX_FILE_UPDATE_ADD:
85 op = EPOLL_CTL_ADD;
86 break;
87
88 case UNIX_FILE_UPDATE_MODIFY:
89 op = EPOLL_CTL_MOD;
90 break;
91
92 case UNIX_FILE_UPDATE_DELETE:
93 op = EPOLL_CTL_DEL;
94 break;
95
96 default:
97 clib_warning ("unknown update_type %d", update_type);
98 return;
99 }
100
101 if (epoll_ctl (em->epoll_fd, op, f->file_descriptor, &e) < 0)
102 clib_unix_warning ("epoll_ctl");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700103}
104
105static uword
106linux_epoll_input (vlib_main_t * vm,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400107 vlib_node_runtime_t * node, vlib_frame_t * frame)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700108{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400109 unix_main_t *um = &unix_main;
110 linux_epoll_main_t *em = &linux_epoll_main;
111 struct epoll_event *e;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700112 int n_fds_ready;
113
114 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400115 vlib_node_main_t *nm = &vm->node_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700116 u64 t = nm->cpu_time_next_process_ready;
117 f64 timeout;
118 int timeout_ms, max_timeout_ms = 10;
119 f64 vector_rate = vlib_last_vectors_per_main_loop (vm);
120
121 if (t == ~0ULL)
122 {
123 timeout = 10e-3;
124 timeout_ms = max_timeout_ms;
125 }
126 else
127 {
128 timeout =
129 (((i64) t - (i64) clib_cpu_time_now ())
130 * vm->clib_time.seconds_per_clock)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400131 /* subtract off some slop time */ - 50e-6;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400132
Damjan Marion18bc9072016-12-07 14:07:54 +0100133 if (timeout < 1e3)
134 {
135 /* We have event happenning in less than 1 ms so
136 don't allow epoll to wait */
137 timeout_ms = 0;
138 }
139 else
140 {
141 timeout_ms = timeout * 1e3;
142
143 /* Must be between 1 and 10 ms. */
144 timeout_ms = clib_max (1, timeout_ms);
145 timeout_ms = clib_min (max_timeout_ms, timeout_ms);
146 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700147 }
148
149 /* If we still have input nodes polling (e.g. vnet packet generator)
150 don't sleep. */
151 if (nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] > 0)
152 timeout_ms = 0;
153
Dave Barach9b8ffd92016-07-08 08:13:45 -0400154 /*
155 * When busy: don't wait & only epoll for input
156 * every 1024 times through main loop.
Dave Barachdae88b92016-04-19 09:38:35 -0400157 */
158 if (vector_rate > 1 || vm->api_queue_nonempty)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700159 {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700160 timeout_ms = 0;
161 node->input_main_loops_per_call = 1024;
162 }
163 else
164 /* We're not busy; go to sleep for a while. */
165 node->input_main_loops_per_call = 0;
166
167 /* Allow any signal to wakeup our sleep. */
168 {
169 static sigset_t unblock_all_signals;
170 n_fds_ready = epoll_pwait (em->epoll_fd,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400171 em->epoll_events,
172 vec_len (em->epoll_events),
173 timeout_ms, &unblock_all_signals);
174
Ed Warnickecb9cada2015-12-08 15:45:58 -0700175 /* This kludge is necessary to run over absurdly old kernels */
176 if (n_fds_ready < 0 && errno == ENOSYS)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400177 {
178 n_fds_ready = epoll_wait (em->epoll_fd,
179 em->epoll_events,
180 vec_len (em->epoll_events), timeout_ms);
181 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700182 }
183 }
184
185 if (n_fds_ready < 0)
186 {
187 if (unix_error_is_fatal (errno))
Dave Barach9b8ffd92016-07-08 08:13:45 -0400188 vlib_panic_with_error (vm, clib_error_return_unix (0, "epoll_wait"));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700189
190 /* non fatal error (e.g. EINTR). */
191 return 0;
192 }
193
194 em->epoll_waits += 1;
195 em->epoll_files_ready += n_fds_ready;
196
197 for (e = em->epoll_events; e < em->epoll_events + n_fds_ready; e++)
198 {
199 u32 i = e->data.u32;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400200 unix_file_t *f = pool_elt_at_index (um->file_pool, i);
201 clib_error_t *errors[4];
Ed Warnickecb9cada2015-12-08 15:45:58 -0700202 int n_errors = 0;
203
Dave Barach9b8ffd92016-07-08 08:13:45 -0400204 if (PREDICT_TRUE (!(e->events & EPOLLERR)))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700205 {
206 if (e->events & EPOLLIN)
207 {
208 errors[n_errors] = f->read_function (f);
209 n_errors += errors[n_errors] != 0;
210 }
211 if (e->events & EPOLLOUT)
212 {
213 errors[n_errors] = f->write_function (f);
214 n_errors += errors[n_errors] != 0;
215 }
216 }
217 else
218 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400219 if (f->error_function)
220 {
221 errors[n_errors] = f->error_function (f);
222 n_errors += errors[n_errors] != 0;
223 }
Ole Troan4b12b3c2016-01-27 23:37:58 +0200224 else
Dave Barach9b8ffd92016-07-08 08:13:45 -0400225 close (f->file_descriptor);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700226 }
227
228 ASSERT (n_errors < ARRAY_LEN (errors));
229 for (i = 0; i < n_errors; i++)
230 {
231 unix_save_error (um, errors[i]);
232 }
233 }
234
235 return 0;
236}
237
Dave Barach9b8ffd92016-07-08 08:13:45 -0400238/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700239VLIB_REGISTER_NODE (linux_epoll_input_node,static) = {
240 .function = linux_epoll_input,
241 .type = VLIB_NODE_TYPE_PRE_INPUT,
242 .name = "unix-epoll-input",
243};
Dave Barach9b8ffd92016-07-08 08:13:45 -0400244/* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700245
246clib_error_t *
247linux_epoll_input_init (vlib_main_t * vm)
248{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400249 linux_epoll_main_t *em = &linux_epoll_main;
250 unix_main_t *um = &unix_main;
251
Ed Warnickecb9cada2015-12-08 15:45:58 -0700252 /* Allocate some events. */
253 vec_resize (em->epoll_events, VLIB_FRAME_SIZE);
254
255 em->epoll_fd = epoll_create (vec_len (em->epoll_events));
256 if (em->epoll_fd < 0)
257 return clib_error_return_unix (0, "epoll_create");
258
259 um->file_update = linux_epoll_file_update;
260
261 return 0;
262}
263
264VLIB_INIT_FUNCTION (linux_epoll_input_init);
265
266#endif /* HAVE_LINUX_EPOLL */
267
268static clib_error_t *
269unix_input_init (vlib_main_t * vm)
270{
271 return vlib_call_init_function (vm, linux_epoll_input_init);
272}
273
274VLIB_INIT_FUNCTION (unix_input_init);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400275
276/*
277 * fd.io coding-style-patch-verification: ON
278 *
279 * Local Variables:
280 * eval: (c-set-style "gnu")
281 * End:
282 */