dns: handle multiple replies for single requests

The world is a mess. A single DNS request may yield multiple, subtly
different responses; all with the same DNS protocol-level ID.

Last response wins in terms of what ends up in the cache.

First response wins in terms of the response sent to the client. Hard
to do otherwise since we have no clue that more than one answer will
be forthcoming.

Type: fix
Signed-off-by: Dave Barach <dave@barachs.net>
Change-Id: I3175a40eb1fea237048d16b852a430f5ab51eaef
diff --git a/MAINTAINERS b/MAINTAINERS
index 5b7ca88..7e4a9d0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -245,6 +245,11 @@
 M:	Neale Ranns <nranns@cisco.com>
 F:	src/vnet/dhcp/
 
+VNET DNS
+I:	dns
+M:	Dave Barach <dave@barachs.net>
+F:	src/vnet/dns/
+
 VNET TLS and TLS engine plugins
 I:	tls
 M:	Florin Coras <fcoras@cisco.com>
diff --git a/src/vnet/dns/dns.c b/src/vnet/dns/dns.c
index 3285bed..eac9545 100644
--- a/src/vnet/dns/dns.c
+++ b/src/vnet/dns/dns.c
@@ -248,7 +248,8 @@
   fib_index = fib_table_find (prefix.fp_proto, 0 /* default VRF for now */ );
   if (fib_index == (u32) ~ 0)
     {
-      clib_warning ("no fib table");
+      if (0)
+	clib_warning ("no fib table");
       return;
     }
 
@@ -257,7 +258,8 @@
   /* Couldn't find route to destination. Bail out. */
   if (fei == FIB_NODE_INDEX_INVALID)
     {
-      clib_warning ("no route to DNS server");
+      if (0)
+	clib_warning ("no route to DNS server");
       return;
     }
 
@@ -265,9 +267,10 @@
 
   if (sw_if_index == ~0)
     {
-      clib_warning
-	("route to %U exists, fei %d, get_resolving_interface returned"
-	 " ~0", format_ip4_address, &prefix.fp_addr, fei);
+      if (0)
+	clib_warning
+	  ("route to %U exists, fei %d, get_resolving_interface returned"
+	   " ~0", format_ip4_address, &prefix.fp_addr, fei);
       return;
     }
 
@@ -365,7 +368,8 @@
   fib_index = fib_table_find (prefix.fp_proto, 0 /* default VRF for now */ );
   if (fib_index == (u32) ~ 0)
     {
-      clib_warning ("no fib table");
+      if (0)
+	clib_warning ("no fib table");
       return;
     }
 
@@ -1061,6 +1065,7 @@
 	goto found_last_request;
       }
   clib_warning ("pool elt %d supposedly pending, but not found...", ep_index);
+  return -1;
 
 found_last_request:
 
@@ -2206,46 +2211,51 @@
       return s;
     }
 
-  /* *INDENT-OFF* */
-  pool_foreach (ep, dm->entries,
-  ({
-    if (ep->flags & DNS_CACHE_ENTRY_FLAG_VALID)
-      {
-        ASSERT (ep->dns_response);
-        if (ep->flags & DNS_CACHE_ENTRY_FLAG_STATIC)
-          ss = "[S] ";
-        else
-          ss = "    ";
+  s = format (s, "DNS cache contains %d entries\n", pool_elts (dm->entries));
 
-        if (verbose < 2 && ep->flags & DNS_CACHE_ENTRY_FLAG_CNAME)
-          s = format (s, "%s%s -> %s", ss, ep->name, ep->cname);
-        else
-          s = format (s, "%s%s -> %U", ss, ep->name,
-                      format_dns_reply,
-                      ep->dns_response,
-                      verbose);
-        if (!(ep->flags & DNS_CACHE_ENTRY_FLAG_STATIC))
+  if (verbose > 0)
+    {
+      /* *INDENT-OFF* */
+      pool_foreach (ep, dm->entries,
+      ({
+        if (ep->flags & DNS_CACHE_ENTRY_FLAG_VALID)
           {
-            f64 time_left = ep->expiration_time - now;
-            if (time_left > 0.0)
-              s = format (s, "  TTL left %.1f", time_left);
+            ASSERT (ep->dns_response);
+            if (ep->flags & DNS_CACHE_ENTRY_FLAG_STATIC)
+              ss = "[S] ";
             else
-              s = format (s, "  EXPIRED");
+              ss = "    ";
 
-            if (verbose > 2)
-              s = format (s, "    %d client notifications pending\n",
-                          vec_len(ep->pending_requests));
+            if (verbose < 2 && ep->flags & DNS_CACHE_ENTRY_FLAG_CNAME)
+              s = format (s, "%s%s -> %s", ss, ep->name, ep->cname);
+            else
+              s = format (s, "%s%s -> %U", ss, ep->name,
+                          format_dns_reply,
+                          ep->dns_response,
+                          verbose);
+            if (!(ep->flags & DNS_CACHE_ENTRY_FLAG_STATIC))
+              {
+                f64 time_left = ep->expiration_time - now;
+                if (time_left > 0.0)
+                  s = format (s, "  TTL left %.1f", time_left);
+                else
+                  s = format (s, "  EXPIRED");
+
+                if (verbose > 2)
+                  s = format (s, "    %d client notifications pending\n",
+                              vec_len(ep->pending_requests));
+              }
           }
-      }
-    else
-      {
-        ASSERT (ep->dns_request);
-        s = format (s, "[P] %U", format_dns_reply, ep->dns_request,
-                    verbose);
-      }
-    vec_add1 (s, '\n');
-  }));
-  /* *INDENT-ON* */
+        else
+          {
+            ASSERT (ep->dns_request);
+            s = format (s, "[P] %U", format_dns_reply, ep->dns_request,
+                        verbose);
+          }
+        vec_add1 (s, '\n');
+      }));
+      /* *INDENT-ON* */
+    }
 
   dns_cache_unlock (dm);
 
@@ -2764,6 +2774,7 @@
   dns_rr_t *rr;
   u8 *rrptr;
   int is_fail = 0;
+  int is_recycle = (b0 != 0);
 
   ASSERT (ep && ep->dns_response);
 
@@ -2804,6 +2815,16 @@
 	return;
       b0 = vlib_get_buffer (vm, bi);
     }
+  else
+    {
+      /* Use the buffer we were handed. Reinitialize it... */
+      vlib_buffer_t bt = { };
+      /* push/pop the reference count */
+      u8 save_ref_count = b0->ref_count;
+      vlib_buffer_copy_template (b0, &bt);
+      b0->ref_count = save_ref_count;
+      bi = vlib_get_buffer_index (vm, b0);
+    }
 
   if (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
     vlib_buffer_free_one (vm, b0->next_buffer);
@@ -2817,9 +2838,6 @@
    */
   b0->flags |= (VNET_BUFFER_F_LOCALLY_ORIGINATED
 		| VLIB_BUFFER_TOTAL_LENGTH_VALID);
-  b0->current_data = 0;
-  b0->current_length = 0;
-  b0->total_length_not_including_first_buffer = 0;
   vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;	/* "local0" */
   vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0;	/* default VRF for now */
 
@@ -2971,12 +2989,18 @@
   udp->checksum = 0;
   vec_free (reply);
 
-  /* Ship it to ip4_lookup */
-  f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
-  to_next = vlib_frame_vector_args (f);
-  to_next[0] = bi;
-  f->n_vectors = 1;
-  vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
+  /*
+   * Ship pkts made out of whole cloth to ip4_lookup
+   * Caller will ship recycled dns reply packets to ip4_lookup
+   */
+  if (is_recycle == 0)
+    {
+      f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
+      to_next = vlib_frame_vector_args (f);
+      to_next[0] = bi;
+      f->n_vectors = 1;
+      vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
+    }
 }
 
 /*
diff --git a/src/vnet/dns/dns.h b/src/vnet/dns/dns.h
index e6944de..6d09010 100644
--- a/src/vnet/dns/dns.h
+++ b/src/vnet/dns/dns.h
@@ -150,7 +150,9 @@
 _(PROCESSED, "DNS reply pkts processed")                \
 _(NO_ELT, "No DNS pool element")                        \
 _(FORMAT_ERROR, "DNS format errors")                    \
-_(TEST_DROP, "DNS reply pkt dropped for test purposes")
+_(TEST_DROP, "DNS reply pkt dropped for test purposes") \
+_(MULTIPLE_REPLY, "DNS multiple reply packets")	        \
+_(NO_UNRESOLVED_ENTRY, "No unresolved entry for pkt")
 
 typedef enum
 {
diff --git a/src/vnet/dns/resolver_process.c b/src/vnet/dns/resolver_process.c
index 220a490..4ed7c79 100644
--- a/src/vnet/dns/resolver_process.c
+++ b/src/vnet/dns/resolver_process.c
@@ -59,6 +59,8 @@
   u16 flags;
   u16 rcode;
   int i;
+  int entry_was_valid;
+  int remove_count;
   int rv = 0;
 
   d = (dns_header_t *) reply;
@@ -72,6 +74,8 @@
   if (pool_is_free_index (dm->entries, pool_index))
     {
       vec_free (reply);
+      if (0)
+	clib_warning ("pool index %d is free", pool_index);
       vlib_node_increment_counter (vm, dns46_reply_node.index,
 				   DNS46_REPLY_ERROR_NO_ELT, 1);
       dns_cache_unlock (dm);
@@ -149,8 +153,29 @@
 reply:
   /* Save the response */
   ep->dns_response = reply;
-  /* Pick some sensible default. */
+
+  /*
+   * Pick a sensible default cache entry expiration time.
+   * We don't play the 10-second timeout game.
+   */
   ep->expiration_time = now + 600.0;
+
+  if (0)
+    clib_warning ("resolving '%s', was %s valid",
+		  ep->name, (ep->flags & DNS_CACHE_ENTRY_FLAG_VALID) ?
+		  "already" : "not");
+  /*
+   * The world is a mess. A single DNS request sent to e.g. 8.8.8.8
+   * may yield multiple, subtly different responses - all with the same
+   * DNS protocol-level ID.
+   *
+   * Last response wins in terms of what ends up in the cache.
+   * First response wins in terms of the response sent to the client.
+   */
+
+  /* Strong hint that we may not find a pending resolution entry */
+  entry_was_valid = (ep->flags & DNS_CACHE_ENTRY_FLAG_VALID) ? 1 : 0;
+
   if (vec_len (ep->dns_response))
     ep->flags |= DNS_CACHE_ENTRY_FLAG_VALID;
 
@@ -218,17 +243,27 @@
     }
   vec_free (ep->pending_requests);
 
+  remove_count = 0;
   for (i = 0; i < vec_len (dm->unresolved_entries); i++)
     {
       if (dm->unresolved_entries[i] == pool_index)
 	{
 	  vec_delete (dm->unresolved_entries, 1, i);
-	  goto found;
+	  remove_count++;
+	  i--;
 	}
     }
-  clib_warning ("pool index %d AWOL from unresolved vector", pool_index);
+  /* See multiple response comment above... */
+  if (remove_count == 0)
+    {
+      u32 error_code = entry_was_valid ? DNS46_REPLY_ERROR_MULTIPLE_REPLY :
+	DNS46_REPLY_ERROR_NO_UNRESOLVED_ENTRY;
 
-found:
+      vlib_node_increment_counter (vm, dns46_reply_node.index, error_code, 1);
+      dns_cache_unlock (dm);
+      return;
+    }
+
   /* Deal with bogus names, server issues, etc. */
   switch (rcode)
     {
@@ -240,13 +275,13 @@
     case DNS_RCODE_NOT_IMPLEMENTED:
     case DNS_RCODE_REFUSED:
       if (ep->server_af == 0)
-	clib_warning ("name server %U backfire",
+	clib_warning ("name server %U can't resolve '%s'",
 		      format_ip4_address,
-		      dm->ip4_name_servers + ep->server_rotor);
+		      dm->ip4_name_servers + ep->server_rotor, ep->name);
       else
-	clib_warning ("name server %U backfire",
+	clib_warning ("name server %U can't resolve '%s'",
 		      format_ip6_address,
-		      dm->ip6_name_servers + ep->server_rotor);
+		      dm->ip6_name_servers + ep->server_rotor, ep->name);
       /* FALLTHROUGH */
     case DNS_RCODE_NAME_ERROR:
     case DNS_RCODE_FORMAT_ERROR:
@@ -255,6 +290,7 @@
       break;
     }
 
+
   dns_cache_unlock (dm);
   return;
 }