avf: improve timeout handling
Type: feature
Change-Id: I8fa1d691ef7bc51ae5c44c344195207ce7d0a2e7
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/plugins/avf/avf.h b/src/plugins/avf/avf.h
index 1ddae66..a8e8b96 100644
--- a/src/plugins/avf/avf.h
+++ b/src/plugins/avf/avf.h
@@ -23,7 +23,13 @@
#include <vlib/log.h>
#define AVF_AQ_ENQ_SUSPEND_TIME 50e-6
-#define AVF_AQ_ENQ_MAX_WAIT_TIME 50e-3
+#define AVF_AQ_ENQ_MAX_WAIT_TIME 250e-3
+
+#define AVF_RESET_SUSPEND_TIME 20e-3
+#define AVF_RESET_MAX_WAIT_TIME 1
+
+#define AVF_SEND_TO_PF_SUSPEND_TIME 10e-3
+#define AVF_SEND_TO_PF_MAX_WAIT_TIME 1
#define AVF_RXD_STATUS(x) (1ULL << x)
#define AVF_RXD_STATUS_DD AVF_RXD_STATUS(0)
diff --git a/src/plugins/avf/device.c b/src/plugins/avf/device.c
index efa4079..b7bfac3 100644
--- a/src/plugins/avf/device.c
+++ b/src/plugins/avf/device.c
@@ -115,7 +115,7 @@
{
clib_error_t *err = 0;
avf_aq_desc_t *d, dc;
- f64 t0, wait_time, suspend_time = AVF_AQ_ENQ_SUSPEND_TIME;
+ f64 t0, suspend_time = AVF_AQ_ENQ_SUSPEND_TIME;
d = &ad->atq[ad->atq_next_slot];
clib_memcpy_fast (d, dt, sizeof (avf_aq_desc_t));
@@ -144,12 +144,13 @@
t0 = vlib_time_now (vm);
retry:
vlib_process_suspend (vm, suspend_time);
- wait_time = vlib_time_now (vm) - t0;
if (((d->flags & AVF_AQ_F_DD) == 0) || ((d->flags & AVF_AQ_F_CMP) == 0))
{
- if (wait_time > AVF_AQ_ENQ_MAX_WAIT_TIME)
+ f64 t = vlib_time_now (vm) - t0;
+ if (t > AVF_AQ_ENQ_MAX_WAIT_TIME)
{
+ avf_log_err (ad, "aq_desc_enq failed (timeout %.3fs)", t);
err = clib_error_return (0, "adminq enqueue timeout [opcode 0x%x]",
d->opcode);
goto done;
@@ -387,8 +388,7 @@
clib_error_t *err;
avf_aq_desc_t *d, dt = {.opcode = 0x801,.v_opcode = op };
u32 head;
- int n_retry = 5;
-
+ f64 t0, suspend_time = AVF_SEND_TO_PF_SUSPEND_TIME;
/* suppress interrupt in the next adminq receive slot
as we are going to wait for response
@@ -399,14 +399,20 @@
if ((err = avf_aq_desc_enq (vm, ad, &dt, in, in_len)))
return err;
+ t0 = vlib_time_now (vm);
retry:
head = avf_get_u32 (ad->bar0, AVF_ARQH);
if (ad->arq_next_slot == head)
{
- if (--n_retry == 0)
- return clib_error_return (0, "timeout");
- vlib_process_suspend (vm, 10e-3);
+ f64 t = vlib_time_now (vm) - t0;
+ if (t > AVF_SEND_TO_PF_MAX_WAIT_TIME)
+ {
+ avf_log_err (ad, "send_to_pf failed (timeout %.3fs)", t);
+ return clib_error_return (0, "timeout");
+ }
+ vlib_process_suspend (vm, suspend_time);
+ suspend_time *= 2;
goto retry;
}
@@ -425,7 +431,9 @@
clib_memcpy_fast (e, buf, sizeof (virtchnl_pf_event_t));
avf_arq_slot_init (ad, ad->arq_next_slot);
ad->arq_next_slot++;
- n_retry = 5;
+ /* reset timer */
+ t0 = vlib_time_now (vm);
+ suspend_time = AVF_SEND_TO_PF_SUSPEND_TIME;
goto retry;
}
@@ -778,7 +786,7 @@
avf_aq_desc_t d = { 0 };
clib_error_t *error;
u32 rstat;
- int n_retry = 20;
+ f64 t0, t = 0, suspend_time = AVF_RESET_SUSPEND_TIME;
avf_log_debug (ad, "reset");
@@ -787,19 +795,26 @@
if ((error = avf_aq_desc_enq (vm, ad, &d, 0, 0)))
return error;
+ t0 = vlib_time_now (vm);
retry:
- vlib_process_suspend (vm, 10e-3);
+ vlib_process_suspend (vm, suspend_time);
+
rstat = avf_get_u32 (ad->bar0, AVFGEN_RSTAT);
if (rstat == 2 || rstat == 3)
- return 0;
-
- if (--n_retry == 0)
{
- avf_log_err (ad, "reset failed");
+ avf_log_debug (ad, "reset completed in %.3fs", t);
+ return 0;
+ }
+
+ t = vlib_time_now (vm) - t0;
+ if (t > AVF_RESET_MAX_WAIT_TIME)
+ {
+ avf_log_err (ad, "reset failed (timeout %.3fs)", t);
return clib_error_return (0, "reset failed (timeout)");
}
+ suspend_time *= 2;
goto retry;
}
@@ -809,7 +824,7 @@
virtchnl_vf_res_request_t res_req = { 0 };
clib_error_t *error;
u32 rstat;
- int n_retry = 20;
+ f64 t0, t, suspend_time = AVF_RESET_SUSPEND_TIME;
res_req.num_queue_pairs = num_queue_pairs;
@@ -829,16 +844,23 @@
res_req.num_queue_pairs);
}
+ t0 = vlib_time_now (vm);
retry:
- vlib_process_suspend (vm, 10e-3);
+ vlib_process_suspend (vm, suspend_time);
+ t = vlib_time_now (vm) - t0;
+
rstat = avf_get_u32 (ad->bar0, AVFGEN_RSTAT);
if ((rstat == VIRTCHNL_VFR_COMPLETED) || (rstat == VIRTCHNL_VFR_VFACTIVE))
goto done;
- if (--n_retry == 0)
- return clib_error_return (0, "reset failed (timeout)");
+ if (t > AVF_RESET_MAX_WAIT_TIME)
+ {
+ avf_log_err (ad, "request queues failed (timeout %.3f seconds)", t);
+ return clib_error_return (0, "request queues failed (timeout)");
+ }
+ suspend_time *= 2;
goto retry;
done: