tcp: improve timestamp rtt accuracy

- switch to using vlib_time as reference for timestamps
- use us precision ticks for tcp but keep using ms precision for
timestamps. As a result, srtt, rttvar and rto are now measured in us
instead of ms. MRTT samples from timestamps are converted from ms to
us (not accurate under ms) while high precision samples are used with us
precision, i.e., they're no longer converted to ms precision samples.

Type: improvement

Change-Id: Ibda559575d9b4fdc85b0985264f7c865ff367e34
Signed-off-by: Florin Coras <fcoras@cisco.com>
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index c30a693..938a863 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -681,7 +681,7 @@
   tc->snd_una = tc->iss;
   tc->snd_nxt = tc->iss + 1;
   tc->snd_una_max = tc->snd_nxt;
-  tc->srtt = 100;		/* 100 ms */
+  tc->srtt = 0.1 * THZ;		/* 100 ms */
 
   if (!tcp_cfg.csum_offload)
     tc->cfg_flags |= TCP_CFG_F_NO_CSUM_OFFLOAD;
@@ -1361,11 +1361,6 @@
     pool_init_fixed (tm->half_open_connections,
 		     tcp_cfg.preallocated_half_open_connections);
 
-  /* Initialize clocks per tick for TCP timestamp. Used to compute
-   * monotonically increasing timestamps. */
-  tm->tstamp_ticks_per_clock = vm->clib_time.seconds_per_clock
-    / TCP_TSTAMP_RESOLUTION;
-
   if (num_threads > 1)
     {
       clib_spinlock_init (&tm->half_open_lock);
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 91783a6..bc6e353 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -91,7 +91,7 @@
   /** convenience pointer to this thread's vlib main */
   vlib_main_t *vm;
 
-  /** worker time */
+  /** Time measured in @ref TCP_TSTAMP_TICK used for time stamps */
   u32 time_now;
 
   /* Max timers to be handled per dispatch loop */
@@ -209,8 +209,6 @@
   /* Pool of listeners. */
   tcp_connection_t *listener_pool;
 
-  f64 tstamp_ticks_per_clock;
-
   /** vlib buffer size */
   u32 bytes_per_buffer;
 
diff --git a/src/vnet/tcp/tcp_cli.c b/src/vnet/tcp/tcp_cli.c
index b76b404..6030440 100644
--- a/src/vnet/tcp/tcp_cli.c
+++ b/src/vnet/tcp/tcp_cli.c
@@ -205,10 +205,11 @@
 	      tc->rcv_opts.tsecr, tc->tsecr_last_ack,
 	      tcp_time_now () - tc->tsval_recent_age);
   s = format (s, " snd_mss %u\n", tc->snd_mss);
-  s = format (s, " rto %u rto_boff %u srtt %u us %.3f rttvar %u rtt_ts %.4f",
-	      tc->rto, tc->rto_boff, tc->srtt, tc->mrtt_us * 1000, tc->rttvar,
-	      tc->rtt_ts);
-  s = format (s, " rtt_seq %u\n", tc->rtt_seq - tc->iss);
+  s = format (s, " rto %u rto_boff %u srtt %.1f us %.3f rttvar %.1f",
+	      tc->rto / 1000, tc->rto_boff, tc->srtt / 1000.0,
+	      tc->mrtt_us * 1e3, tc->rttvar / 1000.0);
+  s = format (s, " rtt_ts %.4f rtt_seq %u\n", tc->rtt_ts,
+	      tc->rtt_seq - tc->iss);
   s = format (s, " next_node %u opaque 0x%x fib_index %u\n",
 	      tc->next_node_index, tc->next_node_opaque, tc->c_fib_index);
   s = format (s, " cong:   %U", format_tcp_congestion, tc);
diff --git a/src/vnet/tcp/tcp_inlines.h b/src/vnet/tcp/tcp_inlines.h
index c4b155a..cb00ca4 100644
--- a/src/vnet/tcp/tcp_inlines.h
+++ b/src/vnet/tcp/tcp_inlines.h
@@ -218,9 +218,7 @@
 always_inline u32
 tcp_set_time_now (tcp_worker_ctx_t * wrk)
 {
-  tcp_main_t *tm = &tcp_main;
-  wrk->time_now = (u64) (clib_cpu_time_now () * tm->tstamp_ticks_per_clock);
-  return wrk->time_now;
+  return wrk->time_now = (u64) (vlib_time_now (wrk->vm) * TCP_TSTP_HZ);
 }
 
 always_inline tcp_connection_t *
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index 519219e..5fa7bf2 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -424,31 +424,26 @@
 /**
  * Compute smoothed RTT as per VJ's '88 SIGCOMM and RFC6298
  *
- * Note that although the original article, srtt and rttvar are scaled
+ * Note that although in the original article srtt and rttvar are scaled
  * to minimize round-off errors, here we don't. Instead, we rely on
  * better precision time measurements.
+ *
+ * A known limitation of the algorithm is that a drop in rtt results in a
+ * rttvar increase and bigger RTO.
+ *
+ * mrtt must be provided in @ref TCP_TICK multiples, i.e., in us. Note that
+ * timestamps are measured as ms ticks so they must be converted before
+ * calling this function.
  */
 static void
 tcp_estimate_rtt (tcp_connection_t * tc, u32 mrtt)
 {
   int err, diff;
 
-  if (tc->srtt != 0)
-    {
-      err = mrtt - tc->srtt;
-
-      /* XXX Drop in RTT results in RTTVAR increase and bigger RTO.
-       * The increase should be bound */
-      tc->srtt = clib_max ((int) tc->srtt + (err >> 3), 1);
-      diff = (clib_abs (err) - (int) tc->rttvar) >> 2;
-      tc->rttvar = clib_max ((int) tc->rttvar + diff, 1);
-    }
-  else
-    {
-      /* First measurement. */
-      tc->srtt = mrtt;
-      tc->rttvar = mrtt >> 1;
-    }
+  err = mrtt - tc->srtt;
+  tc->srtt = clib_max ((int) tc->srtt + (err >> 3), 1);
+  diff = (clib_abs (err) - (int) tc->rttvar) >> 2;
+  tc->rttvar = clib_max ((int) tc->rttvar + diff, 1);
 }
 
 static inline void
@@ -506,8 +501,8 @@
    * seq_lt (tc->snd_una, ack). This is a condition for calling update_rtt */
   else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr)
     {
-      u32 now = tcp_tstamp (tc);
-      mrtt = clib_max (now - tc->rcv_opts.tsecr, 1);
+      mrtt = clib_max (tcp_tstamp (tc) - tc->rcv_opts.tsecr, 1);
+      mrtt *= TCP_TSTP_TO_HZ;
     }
 
 estimate_rtt:
@@ -543,8 +538,8 @@
     }
   else
     {
-      mrtt = tcp_time_now_w_thread (thread_index) - tc->rcv_opts.tsecr;
-      mrtt = clib_max (mrtt, 1);
+      mrtt = tcp_tstamp (tc) - tc->rcv_opts.tsecr;
+      mrtt = clib_max (mrtt, 1) * TCP_TSTP_TO_HZ;
       /* Due to retransmits we don't know the initial mrtt */
       if (tc->rto_boff && mrtt > 1 * THZ)
 	mrtt = 1 * THZ;
@@ -552,7 +547,11 @@
     }
 
   if (mrtt > 0 && mrtt < TCP_RTT_MAX)
-    tcp_estimate_rtt (tc, mrtt);
+    {
+      /* First measurement as per RFC 6298 */
+      tc->srtt = mrtt;
+      tc->rttvar = mrtt >> 1;
+    }
   tcp_update_rto (tc);
 }
 
diff --git a/src/vnet/tcp/tcp_types.h b/src/vnet/tcp/tcp_types.h
index 3cf4e9e..d7bcac5 100644
--- a/src/vnet/tcp/tcp_types.h
+++ b/src/vnet/tcp/tcp_types.h
@@ -22,10 +22,14 @@
 #include <vnet/session/transport.h>
 #include <vppinfra/tw_timer_16t_2w_512sl.h>
 
-#define TCP_TICK 0.001			/**< TCP tick period (s) */
-#define THZ (u32) (1/TCP_TICK)		/**< TCP tick frequency */
-#define TCP_TSTAMP_RESOLUTION TCP_TICK	/**< Time stamp resolution */
-#define TCP_PAWS_IDLE 24 * 24 * 60 * 60 * THZ /**< 24 days */
+#define TCP_TICK 0.000001			/**< TCP tick period (s) */
+#define THZ (u32) (1/TCP_TICK)			/**< TCP tick frequency */
+
+#define TCP_TSTP_TICK 0.001			/**< Timestamp tick (s) */
+#define TCP_TSTP_HZ (u32) (1/TCP_TSTP_TICK)	/**< Timestamp freq */
+#define TCP_PAWS_IDLE (24 * 86400 * TCP_TSTP_HZ)/**< 24 days */
+#define TCP_TSTP_TO_HZ (u32) (TCP_TSTP_TICK * THZ)
+
 #define TCP_FIB_RECHECK_PERIOD	1 * THZ	/**< Recheck every 1s */
 #define TCP_MAX_OPTION_SPACE 40
 #define TCP_CC_DATA_SZ 24
@@ -355,7 +359,7 @@
   /* RTT and RTO */
   u32 rto;		/**< Retransmission timeout */
   u32 rto_boff;		/**< Index for RTO backoff */
-  u32 srtt;		/**< Smoothed RTT */
+  u32 srtt;		/**< Smoothed RTT measured in @ref TCP_TICK */
   u32 rttvar;		/**< Smoothed mean RTT difference. Approximates variance */
   u32 rtt_seq;		/**< Sequence number for tracked ACK */
   f64 rtt_ts;		/**< Timestamp for tracked ACK */