session: app mq congestion detection
Detect mq congestion and handle it by queueing messages in a fifo and
postponing handling via rpcs. App workers with congested mqs cannot
accept nor connect additional sessions.
Type: feature
Signed-off-by: Florin Coras <fcoras@cisco.com>
Change-Id: I401d971a1a53896758b88fc60f158cbc31e0c7cb
diff --git a/src/vnet/session/application_worker.c b/src/vnet/session/application_worker.c
index 5437760..6bbad04 100644
--- a/src/vnet/session/application_worker.c
+++ b/src/vnet/session/application_worker.c
@@ -33,6 +33,7 @@
app_wrk->wrk_map_index = ~0;
app_wrk->connects_seg_manager = APP_INVALID_SEGMENT_MANAGER_INDEX;
clib_spinlock_init (&app_wrk->detached_seg_managers_lock);
+ clib_spinlock_init (&app_wrk->postponed_mq_msgs_lock);
APP_DBG ("New app %v worker %u", app->name, app_wrk->wrk_index);
return app_wrk;
}
@@ -126,6 +127,7 @@
}
vec_free (app_wrk->detached_seg_managers);
clib_spinlock_free (&app_wrk->detached_seg_managers_lock);
+ clib_spinlock_free (&app_wrk->postponed_mq_msgs_lock);
if (CLIB_DEBUG)
clib_memset (app_wrk, 0xfe, sizeof (*app_wrk));
@@ -338,8 +340,10 @@
listener = listen_session_get_from_handle (s->listener_handle);
app_wrk = application_listener_select_worker (listener);
- s->app_wrk_index = app_wrk->wrk_index;
+ if (PREDICT_FALSE (app_wrk->mq_congested))
+ return -1;
+ s->app_wrk_index = app_wrk->wrk_index;
app = application_get (app_wrk->app_index);
if (app->cb_fns.fifo_tuning_callback)
s->flags |= SESSION_F_CUSTOM_FIFO_TUNING;
@@ -510,6 +514,9 @@
app_worker_connect_session (app_worker_t *app_wrk, session_endpoint_cfg_t *sep,
session_handle_t *rsh)
{
+ if (PREDICT_FALSE (app_wrk->mq_congested))
+ return SESSION_E_REFUSED;
+
sep->app_wrk_index = app_wrk->wrk_index;
return session_open (sep, rsh);
@@ -614,12 +621,240 @@
return app_wrk->app_is_builtin;
}
+static int
+app_wrk_send_fd (app_worker_t *app_wrk, int fd)
+{
+ if (!appns_sapi_enabled ())
+ {
+ vl_api_registration_t *reg;
+ clib_error_t *error;
+
+ reg =
+ vl_mem_api_client_index_to_registration (app_wrk->api_client_index);
+ if (!reg)
+ {
+ clib_warning ("no api registration for client: %u",
+ app_wrk->api_client_index);
+ return -1;
+ }
+
+ if (vl_api_registration_file_index (reg) == VL_API_INVALID_FI)
+ return -1;
+
+ error = vl_api_send_fd_msg (reg, &fd, 1);
+ if (error)
+ {
+ clib_error_report (error);
+ return -1;
+ }
+
+ return 0;
+ }
+
+ app_sapi_msg_t smsg = { 0 };
+ app_namespace_t *app_ns;
+ clib_error_t *error;
+ application_t *app;
+ clib_socket_t *cs;
+ u32 cs_index;
+
+ app = application_get (app_wrk->app_index);
+ app_ns = app_namespace_get (app->ns_index);
+ cs_index = appns_sapi_handle_sock_index (app_wrk->api_client_index);
+ cs = appns_sapi_get_socket (app_ns, cs_index);
+ if (PREDICT_FALSE (!cs))
+ return -1;
+
+ /* There's no payload for the message only the type */
+ smsg.type = APP_SAPI_MSG_TYPE_SEND_FDS;
+ error = clib_socket_sendmsg (cs, &smsg, sizeof (smsg), &fd, 1);
+ if (error)
+ {
+ clib_error_report (error);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+mq_try_lock_and_alloc_msg (svm_msg_q_t *mq, session_mq_rings_e ring,
+ svm_msg_q_msg_t *msg)
+{
+ int rv, n_try = 0;
+
+ while (n_try < 5)
+ {
+ rv = svm_msg_q_lock_and_alloc_msg_w_ring (mq, ring, SVM_Q_NOWAIT, msg);
+ if (!rv)
+ return 0;
+ /*
+ * Break the loop if mq is full, usually this is because the
+ * app has crashed or is hanging on somewhere.
+ */
+ if (rv != -1)
+ break;
+ n_try += 1;
+ usleep (1);
+ }
+
+ return -1;
+}
+
+typedef union app_wrk_mq_rpc_args_
+{
+ struct
+ {
+ u32 thread_index;
+ u32 app_wrk_index;
+ };
+ uword as_uword;
+} app_wrk_mq_rpc_ags_t;
+
+static int
+app_wrk_handle_mq_postponed_msgs (void *arg)
+{
+ svm_msg_q_msg_t _mq_msg, *mq_msg = &_mq_msg;
+ app_wrk_postponed_msg_t *pm;
+ app_wrk_mq_rpc_ags_t args;
+ u32 max_msg, n_msg = 0;
+ app_worker_t *app_wrk;
+ session_event_t *evt;
+ svm_msg_q_t *mq;
+
+ args.as_uword = pointer_to_uword (arg);
+ app_wrk = app_worker_get_if_valid (args.app_wrk_index);
+ if (!app_wrk)
+ return 0;
+
+ mq = app_wrk->event_queue;
+
+ clib_spinlock_lock (&app_wrk->postponed_mq_msgs_lock);
+
+ max_msg = clib_min (32, clib_fifo_elts (app_wrk->postponed_mq_msgs));
+
+ while (n_msg < max_msg)
+ {
+ pm = clib_fifo_head (app_wrk->postponed_mq_msgs);
+ if (mq_try_lock_and_alloc_msg (mq, pm->ring, mq_msg))
+ break;
+
+ evt = svm_msg_q_msg_data (mq, mq_msg);
+ clib_memset (evt, 0, sizeof (*evt));
+ evt->event_type = pm->event_type;
+ clib_memcpy_fast (evt->data, pm->data, pm->len);
+
+ if (pm->fd != -1)
+ app_wrk_send_fd (app_wrk, pm->fd);
+
+ svm_msg_q_add_and_unlock (mq, mq_msg);
+
+ clib_fifo_advance_head (app_wrk->postponed_mq_msgs, 1);
+ n_msg += 1;
+ }
+
+ if (!clib_fifo_elts (app_wrk->postponed_mq_msgs))
+ {
+ app_wrk->mq_congested = 0;
+ }
+ else
+ {
+ session_send_rpc_evt_to_thread_force (
+ args.thread_index, app_wrk_handle_mq_postponed_msgs,
+ uword_to_pointer (args.as_uword, void *));
+ }
+
+ clib_spinlock_unlock (&app_wrk->postponed_mq_msgs_lock);
+
+ return 0;
+}
+
+static void
+app_wrk_add_mq_postponed_msg (app_worker_t *app_wrk, session_mq_rings_e ring,
+ u8 evt_type, void *msg, u32 msg_len, int fd)
+{
+ app_wrk_postponed_msg_t *pm;
+
+ clib_spinlock_lock (&app_wrk->postponed_mq_msgs_lock);
+
+ app_wrk->mq_congested = 1;
+
+ clib_fifo_add2 (app_wrk->postponed_mq_msgs, pm);
+ clib_memcpy_fast (pm->data, msg, msg_len);
+ pm->event_type = evt_type;
+ pm->ring = ring;
+ pm->len = msg_len;
+ pm->fd = fd;
+
+ if (clib_fifo_elts (app_wrk->postponed_mq_msgs) == 1)
+ {
+ app_wrk_mq_rpc_ags_t args = { .thread_index = vlib_get_thread_index (),
+ .app_wrk_index = app_wrk->wrk_index };
+
+ session_send_rpc_evt_to_thread_force (
+ args.thread_index, app_wrk_handle_mq_postponed_msgs,
+ uword_to_pointer (args.as_uword, void *));
+ }
+
+ clib_spinlock_unlock (&app_wrk->postponed_mq_msgs_lock);
+}
+
+always_inline void
+app_wrk_send_ctrl_evt_inline (app_worker_t *app_wrk, u8 evt_type, void *msg,
+ u32 msg_len, int fd)
+{
+ svm_msg_q_msg_t _mq_msg, *mq_msg = &_mq_msg;
+ svm_msg_q_t *mq = app_wrk->event_queue;
+ session_event_t *evt;
+ int rv;
+
+ if (PREDICT_FALSE (app_wrk->mq_congested))
+ goto handle_congestion;
+
+ rv = mq_try_lock_and_alloc_msg (mq, SESSION_MQ_CTRL_EVT_RING, mq_msg);
+ if (PREDICT_FALSE (rv))
+ goto handle_congestion;
+
+ evt = svm_msg_q_msg_data (mq, mq_msg);
+ clib_memset (evt, 0, sizeof (*evt));
+ evt->event_type = evt_type;
+ clib_memcpy_fast (evt->data, msg, msg_len);
+
+ if (fd != -1)
+ app_wrk_send_fd (app_wrk, fd);
+
+ svm_msg_q_add_and_unlock (mq, mq_msg);
+
+ return;
+
+handle_congestion:
+
+ app_wrk_add_mq_postponed_msg (app_wrk, SESSION_MQ_CTRL_EVT_RING, evt_type,
+ msg, msg_len, fd);
+}
+
+void
+app_wrk_send_ctrl_evt_fd (app_worker_t *app_wrk, u8 evt_type, void *msg,
+ u32 msg_len, int fd)
+{
+ app_wrk_send_ctrl_evt_inline (app_wrk, evt_type, msg, msg_len, fd);
+}
+
+void
+app_wrk_send_ctrl_evt (app_worker_t *app_wrk, u8 evt_type, void *msg,
+ u32 msg_len)
+{
+ app_wrk_send_ctrl_evt_inline (app_wrk, evt_type, msg, msg_len, -1);
+}
+
static inline int
app_send_io_evt_rx (app_worker_t * app_wrk, session_t * s)
{
+ svm_msg_q_msg_t _mq_msg = { 0 }, *mq_msg = &_mq_msg;
session_event_t *evt;
- svm_msg_q_msg_t msg;
svm_msg_q_t *mq;
+ u32 app_session;
+ int rv;
if (app_worker_application_is_builtin (app_wrk))
return app_worker_builtin_rx (app_wrk, s);
@@ -627,68 +862,72 @@
if (svm_fifo_has_event (s->rx_fifo))
return 0;
+ app_session = s->rx_fifo->shr->client_session_index;
mq = app_wrk->event_queue;
- svm_msg_q_lock (mq);
- if (PREDICT_FALSE (svm_msg_q_is_full (mq)))
- {
- clib_warning ("evt q full");
- svm_msg_q_unlock (mq);
- return -1;
- }
+ if (PREDICT_FALSE (app_wrk->mq_congested))
+ goto handle_congestion;
- if (PREDICT_FALSE (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)))
- {
- clib_warning ("evt q rings full");
- svm_msg_q_unlock (mq);
- return -1;
- }
+ rv = mq_try_lock_and_alloc_msg (mq, SESSION_MQ_IO_EVT_RING, mq_msg);
- msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
- evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
- evt->session_index = s->rx_fifo->shr->client_session_index;
+ if (PREDICT_FALSE (rv))
+ goto handle_congestion;
+
+ evt = svm_msg_q_msg_data (mq, mq_msg);
evt->event_type = SESSION_IO_EVT_RX;
+ evt->session_index = app_session;
(void) svm_fifo_set_event (s->rx_fifo);
- svm_msg_q_add_and_unlock (mq, &msg);
+
+ svm_msg_q_add_and_unlock (mq, mq_msg);
return 0;
+
+handle_congestion:
+
+ app_wrk_add_mq_postponed_msg (app_wrk, SESSION_MQ_IO_EVT_RING,
+ SESSION_IO_EVT_RX, &app_session,
+ sizeof (app_session), -1);
+ return -1;
}
static inline int
app_send_io_evt_tx (app_worker_t * app_wrk, session_t * s)
{
- svm_msg_q_t *mq;
+ svm_msg_q_msg_t _mq_msg = { 0 }, *mq_msg = &_mq_msg;
session_event_t *evt;
- svm_msg_q_msg_t msg;
+ svm_msg_q_t *mq;
+ u32 app_session;
+ int rv;
if (app_worker_application_is_builtin (app_wrk))
return app_worker_builtin_tx (app_wrk, s);
+ app_session = s->tx_fifo->shr->client_session_index;
mq = app_wrk->event_queue;
- svm_msg_q_lock (mq);
- if (PREDICT_FALSE (svm_msg_q_is_full (mq)))
- {
- clib_warning ("evt q full");
- svm_msg_q_unlock (mq);
- return -1;
- }
+ if (PREDICT_FALSE (app_wrk->mq_congested))
+ goto handle_congestion;
- if (PREDICT_FALSE (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)))
- {
- clib_warning ("evt q rings full");
- svm_msg_q_unlock (mq);
- return -1;
- }
+ rv = mq_try_lock_and_alloc_msg (mq, SESSION_MQ_IO_EVT_RING, mq_msg);
- msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
- evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
+ if (PREDICT_FALSE (rv))
+ goto handle_congestion;
+
+ evt = svm_msg_q_msg_data (mq, mq_msg);
evt->event_type = SESSION_IO_EVT_TX;
- evt->session_index = s->tx_fifo->shr->client_session_index;
+ evt->session_index = app_session;
- svm_msg_q_add_and_unlock (mq, &msg);
+ svm_msg_q_add_and_unlock (mq, mq_msg);
+
return 0;
+
+handle_congestion:
+
+ app_wrk_add_mq_postponed_msg (app_wrk, SESSION_MQ_IO_EVT_RING,
+ SESSION_IO_EVT_TX, &app_session,
+ sizeof (app_session), -1);
+ return -1;
}
/* *INDENT-OFF* */
@@ -764,10 +1003,12 @@
app_worker_t *app_wrk = va_arg (*args, app_worker_t *);
u32 indent = 1;
- s = format (s, "%U wrk-index %u app-index %u map-index %u "
- "api-client-index %d\n", format_white_space, indent,
- app_wrk->wrk_index, app_wrk->app_index, app_wrk->wrk_map_index,
- app_wrk->api_client_index);
+ s = format (s,
+ "%U wrk-index %u app-index %u map-index %u "
+ "api-client-index %d mq-cong %u\n",
+ format_white_space, indent, app_wrk->wrk_index,
+ app_wrk->app_index, app_wrk->wrk_map_index,
+ app_wrk->api_client_index, app_wrk->mq_congested);
return s;
}