vcl: basic support for epoll level-triggered evts
Type: feature
Signed-off-by: Florin Coras <fcoras@cisco.com>
Change-Id: I2d5039cd35edd02ffa2439bcac119d0647234385
diff --git a/src/vcl/vcl_private.c b/src/vcl/vcl_private.c
index 3538a09..45c208d 100644
--- a/src/vcl/vcl_private.c
+++ b/src/vcl/vcl_private.c
@@ -131,6 +131,8 @@
hash_free (wrk->session_index_by_vpp_handles);
vec_free (wrk->mq_events);
vec_free (wrk->mq_msg_vector);
+ vec_free (wrk->ep_level_evts);
+ vec_free (wrk->ep_level_evts_fl);
vcl_worker_free (wrk);
clib_spinlock_unlock (&vcm->workers_lock);
}
diff --git a/src/vcl/vcl_private.h b/src/vcl/vcl_private.h
index 5b19f94..ab3ecab 100644
--- a/src/vcl/vcl_private.h
+++ b/src/vcl/vcl_private.h
@@ -253,6 +253,12 @@
/** Per worker buffer for receiving mq epoll events */
struct epoll_event *mq_events;
+ /** Vector of session indices recently notified of epoll level events */
+ u32 *ep_level_evts;
+
+ /** Storage for level events session while new ones are processed */
+ u32 *ep_level_evts_fl;
+
/** Hash table for disconnect processing */
uword *session_index_by_vpp_handles;
diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c
index d378f40..f96ceea 100644
--- a/src/vcl/vppcom.c
+++ b/src/vcl/vppcom.c
@@ -1445,6 +1445,18 @@
return vcl_session_handle (session);
}
+static void
+vcl_epoll_wait_clean_lt (vcl_worker_t *wrk, u32 sid)
+{
+ int i;
+
+ for (i = vec_len (wrk->ep_level_evts) - 1; i >= 0; i--)
+ {
+ if (wrk->ep_level_evts[i] == sid)
+ vec_del1 (wrk->ep_level_evts, i);
+ }
+}
+
int
vcl_session_cleanup (vcl_worker_t * wrk, vcl_session_t * s,
vcl_session_handle_t sh, u8 do_disconnect)
@@ -1475,6 +1487,8 @@
VDBG (0, "session %u [0x%llx]: EPOLL_CTL_DEL vep_idx %u "
"failed! rv %d (%s)", s->session_index, s->vpp_handle,
s->vep.vep_sh, rv, vppcom_retval_str (rv));
+ if (PREDICT_FALSE (vec_len (wrk->ep_level_evts)))
+ vcl_epoll_wait_clean_lt (wrk, s->session_index);
}
if (!do_disconnect)
@@ -3063,6 +3077,10 @@
s = vcl_session_get (wrk, sid);
s->vep.ev.events = 0;
}
+ if (!(EPOLLET & session_events))
+ {
+ vec_add1 (wrk->ep_level_evts, sid);
+ }
*num_ev += 1;
}
}
@@ -3177,13 +3195,73 @@
return 0;
}
+static void
+vcl_epoll_swap_lt_lists (vcl_worker_t *wrk)
+{
+ u32 *le;
+
+ le = wrk->ep_level_evts;
+ wrk->ep_level_evts = wrk->ep_level_evts_fl;
+ wrk->ep_level_evts_fl = le;
+}
+
+static void
+vcl_epoll_wait_handle_lt (vcl_worker_t *wrk, struct epoll_event *events,
+ int maxevents, u32 *n_evts)
+{
+ u32 *sid, add_event = 0, *le = wrk->ep_level_evts_fl;
+ vcl_session_t *s;
+ u64 evt_data;
+
+ if (*n_evts >= maxevents)
+ {
+ vec_add (wrk->ep_level_evts, le, vec_len (le));
+ vec_reset_length (wrk->ep_level_evts_fl);
+ return;
+ }
+
+ vec_foreach (sid, le)
+ {
+ s = vcl_session_get (wrk, sid[0]);
+ if (!s)
+ continue;
+ if ((s->vep.ev.events & EPOLLIN) && vcl_session_read_ready (s))
+ {
+ add_event = 1;
+ events[*n_evts].events |= EPOLLIN;
+ evt_data = s->vep.ev.data.u64;
+ }
+ if ((s->vep.ev.events & EPOLLOUT) && vcl_session_write_ready (s))
+ {
+ add_event = 1;
+ events[*n_evts].events |= EPOLLOUT;
+ evt_data = s->vep.ev.data.u64;
+ }
+ if (add_event)
+ {
+ events[*n_evts].data.u64 = evt_data;
+ *n_evts += 1;
+ add_event = 0;
+ vec_add1 (wrk->ep_level_evts, sid[0]);
+ if (*n_evts == maxevents)
+ {
+ u32 pos = (sid - le) + 1;
+ vec_add (wrk->ep_level_evts, &le[pos], vec_len (le) - pos);
+ break;
+ }
+ }
+ }
+
+ vec_reset_length (wrk->ep_level_evts_fl);
+}
+
int
vppcom_epoll_wait (uint32_t vep_handle, struct epoll_event *events,
int maxevents, double wait_for_time)
{
vcl_worker_t *wrk = vcl_worker_get_current ();
vcl_session_t *vep_session;
- u32 n_evts = 0;
+ u32 n_evts = 0, do_lt = 0;
int i;
if (PREDICT_FALSE (maxevents <= 0))
@@ -3222,12 +3300,23 @@
if ((int) wait_for_time == -2)
return n_evts;
- if (vcm->cfg.use_mq_eventfd)
- return vppcom_epoll_wait_eventfd (wrk, events, maxevents, n_evts,
- wait_for_time);
+ if (PREDICT_FALSE (vec_len (wrk->ep_level_evts)))
+ {
+ vcl_epoll_swap_lt_lists (wrk);
+ do_lt = 1;
+ }
- return vppcom_epoll_wait_condvar (wrk, events, maxevents, n_evts,
- wait_for_time);
+ if (vcm->cfg.use_mq_eventfd)
+ n_evts = vppcom_epoll_wait_eventfd (wrk, events, maxevents, n_evts,
+ wait_for_time);
+ else
+ n_evts = vppcom_epoll_wait_condvar (wrk, events, maxevents, n_evts,
+ wait_for_time);
+
+ if (PREDICT_FALSE (do_lt))
+ vcl_epoll_wait_handle_lt (wrk, events, maxevents, &n_evts);
+
+ return n_evts;
}
int