avf: improve timeout handling

Type: feature

Change-Id: I8fa1d691ef7bc51ae5c44c344195207ce7d0a2e7
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/plugins/avf/avf.h b/src/plugins/avf/avf.h
index 1ddae66..a8e8b96 100644
--- a/src/plugins/avf/avf.h
+++ b/src/plugins/avf/avf.h
@@ -23,7 +23,13 @@
 #include <vlib/log.h>
 
 #define AVF_AQ_ENQ_SUSPEND_TIME		50e-6
-#define AVF_AQ_ENQ_MAX_WAIT_TIME	50e-3
+#define AVF_AQ_ENQ_MAX_WAIT_TIME	250e-3
+
+#define AVF_RESET_SUSPEND_TIME		20e-3
+#define AVF_RESET_MAX_WAIT_TIME		1
+
+#define AVF_SEND_TO_PF_SUSPEND_TIME	10e-3
+#define AVF_SEND_TO_PF_MAX_WAIT_TIME	1
 
 #define AVF_RXD_STATUS(x)		(1ULL << x)
 #define AVF_RXD_STATUS_DD		AVF_RXD_STATUS(0)
diff --git a/src/plugins/avf/device.c b/src/plugins/avf/device.c
index efa4079..b7bfac3 100644
--- a/src/plugins/avf/device.c
+++ b/src/plugins/avf/device.c
@@ -115,7 +115,7 @@
 {
   clib_error_t *err = 0;
   avf_aq_desc_t *d, dc;
-  f64 t0, wait_time, suspend_time = AVF_AQ_ENQ_SUSPEND_TIME;
+  f64 t0, suspend_time = AVF_AQ_ENQ_SUSPEND_TIME;
 
   d = &ad->atq[ad->atq_next_slot];
   clib_memcpy_fast (d, dt, sizeof (avf_aq_desc_t));
@@ -144,12 +144,13 @@
   t0 = vlib_time_now (vm);
 retry:
   vlib_process_suspend (vm, suspend_time);
-  wait_time = vlib_time_now (vm) - t0;
 
   if (((d->flags & AVF_AQ_F_DD) == 0) || ((d->flags & AVF_AQ_F_CMP) == 0))
     {
-      if (wait_time > AVF_AQ_ENQ_MAX_WAIT_TIME)
+      f64 t = vlib_time_now (vm) - t0;
+      if (t > AVF_AQ_ENQ_MAX_WAIT_TIME)
 	{
+	  avf_log_err (ad, "aq_desc_enq failed (timeout %.3fs)", t);
 	  err = clib_error_return (0, "adminq enqueue timeout [opcode 0x%x]",
 				   d->opcode);
 	  goto done;
@@ -387,8 +388,7 @@
   clib_error_t *err;
   avf_aq_desc_t *d, dt = {.opcode = 0x801,.v_opcode = op };
   u32 head;
-  int n_retry = 5;
-
+  f64 t0, suspend_time = AVF_SEND_TO_PF_SUSPEND_TIME;
 
   /* suppress interrupt in the next adminq receive slot
      as we are going to wait for response
@@ -399,14 +399,20 @@
   if ((err = avf_aq_desc_enq (vm, ad, &dt, in, in_len)))
     return err;
 
+  t0 = vlib_time_now (vm);
 retry:
   head = avf_get_u32 (ad->bar0, AVF_ARQH);
 
   if (ad->arq_next_slot == head)
     {
-      if (--n_retry == 0)
-	return clib_error_return (0, "timeout");
-      vlib_process_suspend (vm, 10e-3);
+      f64 t = vlib_time_now (vm) - t0;
+      if (t > AVF_SEND_TO_PF_MAX_WAIT_TIME)
+	{
+	  avf_log_err (ad, "send_to_pf failed (timeout %.3fs)", t);
+	  return clib_error_return (0, "timeout");
+	}
+      vlib_process_suspend (vm, suspend_time);
+      suspend_time *= 2;
       goto retry;
     }
 
@@ -425,7 +431,9 @@
       clib_memcpy_fast (e, buf, sizeof (virtchnl_pf_event_t));
       avf_arq_slot_init (ad, ad->arq_next_slot);
       ad->arq_next_slot++;
-      n_retry = 5;
+      /* reset timer */
+      t0 = vlib_time_now (vm);
+      suspend_time = AVF_SEND_TO_PF_SUSPEND_TIME;
       goto retry;
     }
 
@@ -778,7 +786,7 @@
   avf_aq_desc_t d = { 0 };
   clib_error_t *error;
   u32 rstat;
-  int n_retry = 20;
+  f64 t0, t = 0, suspend_time = AVF_RESET_SUSPEND_TIME;
 
   avf_log_debug (ad, "reset");
 
@@ -787,19 +795,26 @@
   if ((error = avf_aq_desc_enq (vm, ad, &d, 0, 0)))
     return error;
 
+  t0 = vlib_time_now (vm);
 retry:
-  vlib_process_suspend (vm, 10e-3);
+  vlib_process_suspend (vm, suspend_time);
+
   rstat = avf_get_u32 (ad->bar0, AVFGEN_RSTAT);
 
   if (rstat == 2 || rstat == 3)
-    return 0;
-
-  if (--n_retry == 0)
     {
-      avf_log_err (ad, "reset failed");
+      avf_log_debug (ad, "reset completed in %.3fs", t);
+      return 0;
+    }
+
+  t = vlib_time_now (vm) - t0;
+  if (t > AVF_RESET_MAX_WAIT_TIME)
+    {
+      avf_log_err (ad, "reset failed (timeout %.3fs)", t);
       return clib_error_return (0, "reset failed (timeout)");
     }
 
+  suspend_time *= 2;
   goto retry;
 }
 
@@ -809,7 +824,7 @@
   virtchnl_vf_res_request_t res_req = { 0 };
   clib_error_t *error;
   u32 rstat;
-  int n_retry = 20;
+  f64 t0, t, suspend_time = AVF_RESET_SUSPEND_TIME;
 
   res_req.num_queue_pairs = num_queue_pairs;
 
@@ -829,16 +844,23 @@
 				res_req.num_queue_pairs);
     }
 
+  t0 = vlib_time_now (vm);
 retry:
-  vlib_process_suspend (vm, 10e-3);
+  vlib_process_suspend (vm, suspend_time);
+  t = vlib_time_now (vm) - t0;
+
   rstat = avf_get_u32 (ad->bar0, AVFGEN_RSTAT);
 
   if ((rstat == VIRTCHNL_VFR_COMPLETED) || (rstat == VIRTCHNL_VFR_VFACTIVE))
     goto done;
 
-  if (--n_retry == 0)
-    return clib_error_return (0, "reset failed (timeout)");
+  if (t > AVF_RESET_MAX_WAIT_TIME)
+    {
+      avf_log_err (ad, "request queues failed (timeout %.3f seconds)", t);
+      return clib_error_return (0, "request queues failed (timeout)");
+    }
 
+  suspend_time *= 2;
   goto retry;
 
 done: