Moved origin/cp-main to upstream v2.90 version

Following are the commands used to move to the v2.90
upstream version in the dnsmasq directory:
  1. git remote add upstream http://thekelleys.org.uk/git/dnsmasq.git
  2. git remote -v show
  3. git fetch upstream
  4. git diff origin/cp-main v2.90 > ../dnsmasq_to_v2.90.gitdiff
  5. patch -p1 < ../dnsmasq_to_v2.90.gitdiff
  6. git add . && git commit -m "Moved origin/cp-main to upstream v2.90 version"
  7. git diff v2.90 - Should be empty

Change-Id: I167f369cc3c625e7d291b296950fe98aa8f7d513
diff --git a/src/forward.c b/src/forward.c
index 7093e82..32f37e4 100644
--- a/src/forward.c
+++ b/src/forward.c
@@ -1,4 +1,4 @@
-/* dnsmasq is Copyright (c) 2000-2021 Simon Kelley
+/* dnsmasq is Copyright (c) 2000-2024 Simon Kelley
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -16,12 +16,18 @@
 
 #include "dnsmasq.h"
 
-static struct frec *lookup_frec(unsigned short id, int fd, void *hash);
-static struct frec *lookup_frec_by_query(void *hash, unsigned int flags);
+static struct frec *get_new_frec(time_t now, struct server *serv, int force);
+static struct frec *lookup_frec(unsigned short id, int fd, void *hash, int *firstp, int *lastp);
+static struct frec *lookup_frec_by_query(void *hash, unsigned int flags, unsigned int flagmask);
+#ifdef HAVE_DNSSEC
+static struct frec *lookup_frec_dnssec(char *target, int class, int flags, struct dns_header *header);
+#endif
 
 static unsigned short get_id(void);
 static void free_frec(struct frec *f);
-static void query_full(time_t now);
+static void query_full(time_t now, char *domain);
+
+static void return_reply(time_t now, struct frec *forward, struct dns_header *header, ssize_t n, int status);
 
 /* Send a UDP packet with its source address set as "source" 
    unless nowild is true, when we just send it with the kernel default */
@@ -106,146 +112,6 @@
   return 1;
 }
           
-static unsigned int search_servers(time_t now, union all_addr **addrpp, unsigned int qtype,
-				   char *qdomain, int *type, char **domain, int *norebind)
-			      
-{
-  /* If the query ends in the domain in one of our servers, set
-     domain to point to that name. We find the largest match to allow both
-     domain.org and sub.domain.org to exist. */
-  
-  unsigned int namelen = strlen(qdomain);
-  unsigned int matchlen = 0;
-  struct server *serv;
-  unsigned int flags = 0;
-  static union all_addr zero;
-  
-  for (serv = daemon->servers; serv; serv=serv->next)
-    if (qtype == F_DNSSECOK && !(serv->flags & SERV_DO_DNSSEC))
-      continue;
-    /* domain matches take priority over NODOTS matches */
-    else if ((serv->flags & SERV_FOR_NODOTS) && *type != SERV_HAS_DOMAIN && !strchr(qdomain, '.') && namelen != 0)
-      {
-	unsigned int sflag = serv->addr.sa.sa_family == AF_INET ? F_IPV4 : F_IPV6; 
-	*type = SERV_FOR_NODOTS;
-	if ((serv->flags & SERV_NO_REBIND) && norebind)
-	  *norebind = 1;
-	else if (serv->flags & SERV_NO_ADDR)
-	  flags = F_NXDOMAIN;
-	else if (serv->flags & SERV_LITERAL_ADDRESS)
-	  { 
-	    /* literal address = '#' -> return all-zero address for IPv4 and IPv6 */
-	    if ((serv->flags & SERV_USE_RESOLV) && (qtype & (F_IPV6 | F_IPV4)))
-	      {
-		memset(&zero, 0, sizeof(zero));
-		flags = qtype;
-		*addrpp = &zero;
-	      }
-	    else if (sflag & qtype)
-	      {
-		flags = sflag;
-		if (serv->addr.sa.sa_family == AF_INET) 
-		  *addrpp = (union all_addr *)&serv->addr.in.sin_addr;
-		else
-		  *addrpp = (union all_addr *)&serv->addr.in6.sin6_addr;
-	      }
-	    else if (!flags || (flags & F_NXDOMAIN))
-	      flags = F_NOERR;
-	  } 
-      }
-    else if (serv->flags & SERV_HAS_DOMAIN)
-      {
-	unsigned int domainlen = strlen(serv->domain);
-	char *matchstart = qdomain + namelen - domainlen;
-	if (namelen >= domainlen &&
-	    hostname_isequal(matchstart, serv->domain) &&
-	    (domainlen == 0 || namelen == domainlen || *(matchstart-1) == '.' ))
-	  {
-	    if ((serv->flags & SERV_NO_REBIND) && norebind)	
-	      *norebind = 1;
-	    else
-	      {
-		unsigned int sflag = serv->addr.sa.sa_family == AF_INET ? F_IPV4 : F_IPV6;
-		/* implement priority rules for --address and --server for same domain.
-		   --address wins if the address is for the correct AF
-		   --server wins otherwise. */
-		if (domainlen != 0 && domainlen == matchlen)
-		  {
-		    if ((serv->flags & SERV_LITERAL_ADDRESS))
-		      {
-			if (!(sflag & qtype) && flags == 0)
-			  continue;
-		      }
-		    else
-		      {
-			if (flags & (F_IPV4 | F_IPV6))
-			  continue;
-		      }
-		  }
-		
-		if (domainlen >= matchlen)
-		  {
-		    *type = serv->flags & (SERV_HAS_DOMAIN | SERV_USE_RESOLV | SERV_NO_REBIND | SERV_DO_DNSSEC);
-		    *domain = serv->domain;
-		    matchlen = domainlen;
-		    if (serv->flags & SERV_NO_ADDR)
-		      flags = F_NXDOMAIN;
-		    else if (serv->flags & SERV_LITERAL_ADDRESS)
-		      {
-			 /* literal address = '#' -> return all-zero address for IPv4 and IPv6 */
-			if ((serv->flags & SERV_USE_RESOLV) && (qtype & (F_IPV6 | F_IPV4)))
-			  {			    
-			    memset(&zero, 0, sizeof(zero));
-			    flags = qtype;
-			    *addrpp = &zero;
-			  }
-			else if (sflag & qtype)
-			  {
-			    flags = sflag;
-			    if (serv->addr.sa.sa_family == AF_INET) 
-			      *addrpp = (union all_addr *)&serv->addr.in.sin_addr;
-			    else
-			      *addrpp = (union all_addr *)&serv->addr.in6.sin6_addr;
-			  }
-			else if (!flags || (flags & F_NXDOMAIN))
-			  flags = F_NOERR;
-		      }
-		    else
-		      flags = 0;
-		  } 
-	      }
-	  }
-      }
-  
-  if (flags == 0 && !(qtype & (F_QUERY | F_DNSSECOK)) && 
-      option_bool(OPT_NODOTS_LOCAL) && !strchr(qdomain, '.') && namelen != 0)
-    /* don't forward A or AAAA queries for simple names, except the empty name */
-    flags = F_NOERR;
-  
-  if (flags == F_NXDOMAIN && check_for_local_domain(qdomain, now))
-    flags = F_NOERR;
-
-  if (flags)
-    {
-       if (flags == F_NXDOMAIN || flags == F_NOERR)
-	 log_query(flags | qtype | F_NEG | F_CONFIG | F_FORWARD, qdomain, NULL, NULL);
-       else
-	 {
-	   /* handle F_IPV4 and F_IPV6 set on ANY query to 0.0.0.0/:: domain. */
-	   if (flags & F_IPV4)
-	     log_query((flags | F_CONFIG | F_FORWARD) & ~F_IPV6, qdomain, *addrpp, NULL);
-	   if (flags & F_IPV6)
-	     log_query((flags | F_CONFIG | F_FORWARD) & ~F_IPV4, qdomain, *addrpp, NULL);
-	 }
-    }
-  else if ((*type) & SERV_USE_RESOLV)
-    {
-      *type = 0; /* use normal servers for this domain */
-      *domain = NULL;
-    }
-  return  flags;
-}
-
 #ifdef HAVE_CONNTRACK
 static void set_outgoing_mark(struct frec *forward, int fd)
 {
@@ -256,12 +122,20 @@
 }
 #endif
 
-static void log_query_mysockaddr(unsigned int flags, char *name, union mysockaddr *addr, char *arg)
+static void log_query_mysockaddr(unsigned int flags, char *name, union mysockaddr *addr, char *arg, unsigned short type)
 {
   if (addr->sa.sa_family == AF_INET)
-    log_query(flags | F_IPV4, name, (union all_addr *)&addr->in.sin_addr, arg);
+    {
+      if (flags & F_SERVER)
+	type = ntohs(addr->in.sin_port);
+      log_query(flags | F_IPV4, name, (union all_addr *)&addr->in.sin_addr, arg, type);
+    }
   else
-    log_query(flags | F_IPV6, name, (union all_addr *)&addr->in6.sin6_addr, arg);
+    {
+      if (flags & F_SERVER)
+	type = ntohs(addr->in6.sin6_port);
+      log_query(flags | F_IPV6, name, (union all_addr *)&addr->in6.sin6_addr, arg, type);
+    }
 }
 
 static void server_send(struct server *server, int fd,
@@ -272,46 +146,45 @@
 			   sa_len(&server->addr))));
 }
 
-#ifdef HAVE_DNSSEC
-static void server_send_log(struct server *server, int fd,
-			const void *header, size_t plen, int dumpflags,
-			unsigned int logflags, char *name, char *arg)
+static int domain_no_rebind(char *domain)
 {
-#ifdef HAVE_DUMPFILE
-	  dump_packet(dumpflags, (void *)header, (size_t)plen, NULL, &server->addr);
-#endif
-	  log_query_mysockaddr(logflags, name, &server->addr, arg);
-	  server_send(server, fd, header, plen, 0);
-}
-#endif
+  struct rebind_domain *rbd;
+  size_t tlen, dlen = strlen(domain);
+  char *dots = strchr(domain, '.');
 
-static int server_test_type(const struct server *server,
-			    const char *domain, int type, int extratype)
-{
-  return (type == (server->flags & (SERV_TYPE | extratype)) &&
-      (type != SERV_HAS_DOMAIN || hostname_isequal(domain, server->domain)) &&
-      !(server->flags & (SERV_LITERAL_ADDRESS | SERV_LOOP)));
+  /* Match whole labels only. Empty domain matches no dots (any single label) */
+  for (rbd = daemon->no_rebind; rbd; rbd = rbd->next)
+    {
+      if (dlen >= (tlen = strlen(rbd->domain)) &&
+	hostname_isequal(rbd->domain, &domain[dlen - tlen]) &&
+	(dlen == tlen || domain[dlen - tlen - 1] == '.'))
+      return 1;
+
+      if (tlen == 0 && !dots)
+	return 1;
+    }
+  
+  return 0;
 }
 
 static int forward_query(int udpfd, union mysockaddr *udpaddr,
 			 union all_addr *dst_addr, unsigned int dst_iface,
-			 struct dns_header *header, size_t plen, time_t now, 
-			 struct frec *forward, int ad_reqd, int do_bit)
+			 struct dns_header *header, size_t plen,  char *limit, time_t now, 
+			 struct frec *forward, int ad_reqd, int do_bit, int fast_retry)
 {
-  char *domain = NULL;
-  int type = SERV_DO_DNSSEC, norebind = 0;
-  union all_addr *addrp = NULL;
   unsigned int flags = 0;
   unsigned int fwd_flags = 0;
-  struct server *start = NULL;
+  int is_dnssec = forward && (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY));
+  struct server *master;
   void *hash = hash_questions(header, plen, daemon->namebuff);
-#ifdef HAVE_DNSSEC
-  int do_dnssec = 0;
-#endif
   unsigned int gotname = extract_request(header, plen, daemon->namebuff, NULL);
   unsigned char *oph = find_pseudoheader(header, plen, NULL, NULL, NULL, NULL);
-  int old_src = 0;
-  
+  int old_src = 0, old_reply = 0;
+  int first, last, start = 0;
+  int cacheable, forwarded = 0;
+  size_t edns0_len;
+  unsigned char *pheader;
+  int ede = EDE_UNSET;
   (void)do_bit;
   
   if (header->hb4 & HB4_CD)
@@ -325,10 +198,20 @@
     fwd_flags |= FREC_DO_QUESTION;
 #endif
   
-  /* Check for retry on existing query */
+  /* Check for retry on existing query.
+     FREC_DNSKEY and FREC_DS_QUERY are never set in flags, so the test below 
+     ensures that no frec created for internal DNSSEC query can be returned here.
+     
+     Similarly FREC_NO_CACHE is never set in flags, so a query which is
+     contigent on a particular source address EDNS0 option will never be matched. */
   if (forward)
-    old_src = 1;
-  else if ((forward = lookup_frec_by_query(hash, fwd_flags)))
+    {
+      old_src = 1;
+      old_reply = 1;
+    }
+  else if ((forward = lookup_frec_by_query(hash, fwd_flags,
+					   FREC_CHECKING_DISABLED | FREC_AD_QUESTION | FREC_DO_QUESTION |
+					   FREC_HAS_PHEADER | FREC_DNSKEY_QUERY | FREC_DS_QUERY | FREC_NO_CACHE)))
     {
       struct frec_src *src;
       
@@ -338,7 +221,11 @@
 	  break;
       
       if (src)
-	old_src = 1;
+	{
+	  old_src = 1;
+	  /* If a query is retried, use the log_id for the retry when logging the answer. */
+	  src->log_id = daemon->log_id;
+	}
       else
 	{
 	  /* Existing query, but from new source, just add this 
@@ -356,8 +243,16 @@
 	  /* If we've been spammed with many duplicates, return REFUSED. */
 	  if (!daemon->free_frec_src)
 	    {
-	      query_full(now);
-	      goto frec_err;
+	      query_full(now, NULL);
+	      /* This is tricky; if we're blasted with the same query
+		 over and over, we'll end up taking this path each time
+		 and never resetting until the frec gets deleted by
+		 aging followed by the receipt of a different query. This
+		 is a bit of a DoS vuln. Avoid by explicitly deleting the
+		 frec once it expires. */
+	      if (difftime(now, forward->time) >= TIMEOUT)
+		free_frec(forward);
+	      goto reply;
 	    }
 	  
 	  src = daemon->free_frec_src;
@@ -379,313 +274,435 @@
 	}
     }
 
-  /* retry existing query */
-  if (forward)
+  /* new query */
+  if (!forward)
     {
+      /* If the query is malformed, we can't forward it because
+	 we can't get a reliable hash to recognise the answer. */
+      if (!hash)
+	{
+	  flags = 0;
+	  ede = EDE_INVALID_DATA;
+	  goto reply;
+	}
+      
+      if (lookup_domain(daemon->namebuff, gotname, &first, &last))
+	flags = is_local_answer(now, first, daemon->namebuff);
+      else
+	{
+	  /* no available server. */
+	  ede = EDE_NOT_READY;
+	  flags = 0;
+	}
+       
+      /* don't forward A or AAAA queries for simple names, except the empty name */
+      if (!flags &&
+	  option_bool(OPT_NODOTS_LOCAL) &&
+	  (gotname & (F_IPV4 | F_IPV6)) &&
+	  !strchr(daemon->namebuff, '.') &&
+	  strlen(daemon->namebuff) != 0)
+	flags = check_for_local_domain(daemon->namebuff, now) ? F_NOERR : F_NXDOMAIN;
+      
+      /* Configured answer. */
+      if (flags || ede == EDE_NOT_READY)
+	goto reply;
+      
+      master = daemon->serverarray[first];
+      
+      if (!(forward = get_new_frec(now, master, 0)))
+	goto reply;
+      /* table full - flags == 0, return REFUSED */
+      
+      /* Keep copy of query if we're doing fast retry. */
+      if (daemon->fast_retry_time != 0)
+	{
+	  forward->stash = blockdata_alloc((char *)header, plen);
+	  forward->stash_len = plen;
+	}
+      
+      forward->frec_src.log_id = daemon->log_id;
+      forward->frec_src.source = *udpaddr;
+      forward->frec_src.orig_id = ntohs(header->id);
+      forward->frec_src.dest = *dst_addr;
+      forward->frec_src.iface = dst_iface;
+      forward->frec_src.next = NULL;
+      forward->frec_src.fd = udpfd;
+      forward->new_id = get_id();
+      memcpy(forward->hash, hash, HASH_SIZE);
+      forward->forwardall = 0;
+      forward->flags = fwd_flags;
+      if (domain_no_rebind(daemon->namebuff))
+	forward->flags |= FREC_NOREBIND;
+      if (header->hb4 & HB4_CD)
+	forward->flags |= FREC_CHECKING_DISABLED;
+      if (ad_reqd)
+	forward->flags |= FREC_AD_QUESTION;
+#ifdef HAVE_DNSSEC
+      forward->work_counter = daemon->limit[LIMIT_WORK];
+      forward->validate_counter = daemon->limit[LIMIT_CRYPTO]; 
+      if (do_bit)
+	forward->flags |= FREC_DO_QUESTION;
+#endif
+      
+      start = first;
+
+      if (option_bool(OPT_ALL_SERVERS))
+	forward->forwardall = 1;
+
+      if (!option_bool(OPT_ORDER))
+	{
+	  if (master->forwardcount++ > FORWARD_TEST ||
+	      difftime(now, master->forwardtime) > FORWARD_TIME ||
+	      master->last_server == -1)
+	    {
+	      master->forwardtime = now;
+	      master->forwardcount = 0;
+	      forward->forwardall = 1;
+	    }
+	  else
+	    start = master->last_server;
+	}
+    }
+  else
+    {
+#ifdef HAVE_DNSSEC
+      /* If we've already got an answer to this query, but we're awaiting keys for validation,
+	 there's no point retrying the query, retry the key query instead...... */
+      while (forward->blocking_query)
+	forward = forward->blocking_query;
+
+      if (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY))
+	{
+	  int is_sign;
+	  unsigned char *pheader;
+	  
+	  /* log_id should match previous DNSSEC query. */
+	  daemon->log_display_id = forward->frec_src.log_id;
+	  
+	  blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
+	  plen = forward->stash_len;
+	  /* get query for logging. */
+	  extract_request(header, plen, daemon->namebuff, NULL);
+	  
+	  if (find_pseudoheader(header, plen, NULL, &pheader, &is_sign, NULL) && !is_sign)
+	    PUTSHORT(SAFE_PKTSZ, pheader);
+	  
+	  /* Find suitable servers: should never fail. */
+	  if (!filter_servers(forward->sentto->arrayposn, F_DNSSECOK, &first, &last))
+	    return 0;
+	  
+	  is_dnssec = 1;
+	  forward->forwardall = 1;
+	}
+      else
+#endif
+	{
+	  /* retry on existing query, from original source. Send to all available servers  */
+	  if (udpfd == -1 && !fast_retry)
+	    forward->sentto->failed_queries++;
+	  else
+	    forward->sentto->retrys++;
+	  
+	  if (!filter_servers(forward->sentto->arrayposn, F_SERVER, &first, &last))
+	    goto reply;
+	  
+	  master = daemon->serverarray[first];
+	  
+	  /* Forward to all available servers on retry of query from same host. */
+	  if (!option_bool(OPT_ORDER) && old_src && !fast_retry)
+	    forward->forwardall = 1;
+	  else
+	    {
+	      start = forward->sentto->arrayposn;
+	      
+	      if (option_bool(OPT_ORDER) && !fast_retry)
+		{
+		  /* In strict order mode, there must be a server later in the list
+		     left to send to, otherwise without the forwardall mechanism,
+		     code further on will cycle around the list forwever if they
+		     all return REFUSED. If at the last, give up.
+		     Note that we can get here EITHER because a client retried,
+		     or an upstream server returned REFUSED. The above only
+		     applied in the later case. For client retries,
+		     keep trying the last server.. */
+		  if (++start == last)
+		    {
+		      if (old_reply)
+			goto reply;
+		      else
+			start--;
+		    }
+		}
+	    }	  
+	}
+      
       /* If we didn't get an answer advertising a maximal packet in EDNS,
 	 fall back to 1280, which should work everywhere on IPv6.
 	 If that generates an answer, it will become the new default
 	 for this server */
       forward->flags |= FREC_TEST_PKTSZ;
-      
-#ifdef HAVE_DNSSEC
-      /* If we've already got an answer to this query, but we're awaiting keys for validation,
-	 there's no point retrying the query, retry the key query instead...... */
-      if (forward->blocking_query)
-	{
-	  int fd, is_sign;
-	  unsigned char *pheader;
-	  
-	  forward->flags &= ~FREC_TEST_PKTSZ;
-	  
-	  while (forward->blocking_query)
-	    forward = forward->blocking_query;
-	   
-	  blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
-	  plen = forward->stash_len;
-	  
-	  forward->flags |= FREC_TEST_PKTSZ;
-	  if (find_pseudoheader(header, plen, NULL, &pheader, &is_sign, NULL) && !is_sign)
-	    PUTSHORT(SAFE_PKTSZ, pheader);
-	  
-	  if ((fd = allocate_rfd(&forward->rfds, forward->sentto)) != -1)
-	    server_send_log(forward->sentto, fd, header, plen,
-			    DUMP_SEC_QUERY,
-			    F_NOEXTRA | F_DNSSEC, "retry", "dnssec");
+    }
 
-	  return 1;
-	}
-#endif
-
-      /* retry on existing query, from original source. Send to all available servers  */
-      domain = forward->sentto->domain;
-      forward->sentto->failed_queries++;
-      if (!option_bool(OPT_ORDER) && old_src)
-	{
-	  forward->forwardall = 1;
-	  daemon->last_server = NULL;
-	}
-      type = forward->sentto->flags & SERV_TYPE;
-#ifdef HAVE_DNSSEC
-      do_dnssec = forward->sentto->flags & SERV_DO_DNSSEC;
-#endif
-
-      if (!(start = forward->sentto->next))
-	start = daemon->servers; /* at end of list, recycle */
+  /* We may be resending a DNSSEC query here, for which the below processing is not necessary. */
+  if (!is_dnssec)
+    {
       header->id = htons(forward->new_id);
-    }
-  else 
-    {
-      /* new query */
-
-      if (gotname)
-	flags = search_servers(now, &addrp, gotname, daemon->namebuff, &type, &domain, &norebind);
       
-#ifdef HAVE_DNSSEC
-      do_dnssec = type & SERV_DO_DNSSEC;
-#endif
-      type &= ~SERV_DO_DNSSEC;      
+      plen = add_edns0_config(header, plen, ((unsigned char *)header) + PACKETSZ, &forward->frec_src.source, now, &cacheable);
       
-      /* may be no servers available. */
-      if (daemon->servers && !flags)
-	forward = get_new_frec(now, NULL, NULL);
-      /* table full - flags == 0, return REFUSED */
-      
-      if (forward)
-	{
-	  forward->frec_src.source = *udpaddr;
-	  forward->frec_src.orig_id = ntohs(header->id);
-	  forward->frec_src.dest = *dst_addr;
-	  forward->frec_src.iface = dst_iface;
-	  forward->frec_src.next = NULL;
-	  forward->frec_src.fd = udpfd;
-	  forward->new_id = get_id();
-	  memcpy(forward->hash, hash, HASH_SIZE);
-	  forward->forwardall = 0;
-	  forward->flags = fwd_flags;
-	  if (norebind)
-	    forward->flags |= FREC_NOREBIND;
-	  if (header->hb4 & HB4_CD)
-	    forward->flags |= FREC_CHECKING_DISABLED;
-	  if (ad_reqd)
-	    forward->flags |= FREC_AD_QUESTION;
-#ifdef HAVE_DNSSEC
-	  forward->work_counter = DNSSEC_WORK;
-	  if (do_bit)
-	    forward->flags |= FREC_DO_QUESTION;
-#endif
-	  
-	  header->id = htons(forward->new_id);
-	  
-	  /* In strict_order mode, always try servers in the order 
-	     specified in resolv.conf, if a domain is given 
-	     always try all the available servers,
-	     otherwise, use the one last known to work. */
-	  
-	  if (type == 0)
-	    {
-	      if (option_bool(OPT_ORDER))
-		start = daemon->servers;
-	      else if (!(start = daemon->last_server) ||
-		       daemon->forwardcount++ > FORWARD_TEST ||
-		       difftime(now, daemon->forwardtime) > FORWARD_TIME)
-		{
-		  start = daemon->servers;
-		  forward->forwardall = 1;
-		  daemon->forwardcount = 0;
-		  daemon->forwardtime = now;
-		}
-	    }
-	  else
-	    {
-	      start = daemon->servers;
-	      if (!option_bool(OPT_ORDER))
-		forward->forwardall = 1;
-	    }
-	}
-    }
-
-  /* check for send errors here (no route to host) 
-     if we fail to send to all nameservers, send back an error
-     packet straight away (helps modem users when offline)  */
-  
-  if (!flags && forward)
-    {
-      struct server *firstsentto = start;
-      int subnet, cacheable, forwarded = 0;
-      size_t edns0_len;
-      unsigned char *pheader;
-      
-      /* If a query is retried, use the log_id for the retry when logging the answer. */
-      forward->frec_src.log_id = daemon->log_id;
-      
-       if(!option_bool(OPT_EDNS_RESTRICT) || (option_bool(OPT_EDNS_RESTRICT) && !domain))
-      plen = add_edns0_config(header, plen, ((unsigned char *)header) + PACKETSZ, &forward->frec_src.source, now, &subnet, &cacheable);
-      
-      if (subnet)
-	forward->flags |= FREC_HAS_SUBNET;
-
       if (!cacheable)
 	forward->flags |= FREC_NO_CACHE;
-
+      
 #ifdef HAVE_DNSSEC
-      if (option_bool(OPT_DNSSEC_VALID) && do_dnssec)
+      if (option_bool(OPT_DNSSEC_VALID) && (master->flags & SERV_DO_DNSSEC))
 	{
 	  plen = add_do_bit(header, plen, ((unsigned char *) header) + PACKETSZ);
-	 	      
+	  
 	  /* For debugging, set Checking Disabled, otherwise, have the upstream check too,
 	     this allows it to select auth servers when one is returning bad data. */
 	  if (option_bool(OPT_DNSSEC_DEBUG))
 	    header->hb4 |= HB4_CD;
-
+	  
 	}
 #endif
-
+      
       if (find_pseudoheader(header, plen, &edns0_len, &pheader, NULL, NULL))
 	{
 	  /* If there wasn't a PH before, and there is now, we added it. */
 	  if (!oph)
 	    forward->flags |= FREC_ADDED_PHEADER;
-
+	  
 	  /* If we're sending an EDNS0 with any options, we can't recreate the query from a reply. */
 	  if (edns0_len > 11)
 	    forward->flags |= FREC_HAS_EXTRADATA;
-
+	  
 	  /* Reduce udp size on retransmits. */
 	  if (forward->flags & FREC_TEST_PKTSZ)
 	    PUTSHORT(SAFE_PKTSZ, pheader);
 	}
+    }
+  
+  if (forward->forwardall)
+    start = first;
+
+  forwarded = 0;
+  
+  /* check for send errors here (no route to host) 
+     if we fail to send to all nameservers, send back an error
+     packet straight away (helps modem users when offline)  */
+
+  while (1)
+    { 
+      int fd;
+      struct server *srv = daemon->serverarray[start];
       
-      while (1)
-	{ 
-	  int fd;
-
-	  /* only send to servers dealing with our domain.
-	     domain may be NULL, in which case server->domain 
-	     must be NULL also. */
+      if ((fd = allocate_rfd(&forward->rfds, srv)) != -1)
+	{
 	  
-	  if (server_test_type(start, domain, type, 0) &&
-	      ((fd = allocate_rfd(&forward->rfds, start)) != -1))
-	    {
-	      
 #ifdef HAVE_CONNTRACK
-	      /* Copy connection mark of incoming query to outgoing connection. */
-	      if (option_bool(OPT_CONNTRACK))
-		set_outgoing_mark(forward, fd);
+	  /* Copy connection mark of incoming query to outgoing connection. */
+	  if (option_bool(OPT_CONNTRACK))
+	    set_outgoing_mark(forward, fd);
+#endif
+	  
+#ifdef HAVE_DNSSEC
+	  if (option_bool(OPT_DNSSEC_VALID) && (forward->flags & FREC_ADDED_PHEADER))
+	    {
+	      /* Difficult one here. If our client didn't send EDNS0, we will have set the UDP
+		 packet size to 512. But that won't provide space for the RRSIGS in many cases.
+		 The RRSIGS will be stripped out before the answer goes back, so the packet should
+		 shrink again. So, if we added a do-bit, bump the udp packet size to the value
+		 known to be OK for this server. We check returned size after stripping and set
+		 the truncated bit if it's still too big. */		  
+	      unsigned char *pheader;
+	      int is_sign;
+	      if (find_pseudoheader(header, plen, NULL, &pheader, &is_sign, NULL) && !is_sign)
+		PUTSHORT(srv->edns_pktsz, pheader);
+	    }
+#endif
+	  
+	  if (retry_send(sendto(fd, (char *)header, plen, 0,
+				&srv->addr.sa,
+				sa_len(&srv->addr))))
+	    continue;
+	  
+	  if (errno == 0)
+	    {
+#ifdef HAVE_DUMPFILE
+	      dump_packet_udp(DUMP_UP_QUERY, (void *)header, plen, NULL, &srv->addr, fd);
 #endif
 	      
-#ifdef HAVE_DNSSEC
-	      if (option_bool(OPT_DNSSEC_VALID) && (forward->flags & FREC_ADDED_PHEADER))
+	      /* Keep info in case we want to re-send this packet */
+	      daemon->srv_save = srv;
+	      daemon->packet_len = plen;
+	      daemon->fd_save = fd;
+	      
+	      if (!(forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)))
 		{
-		  /* Difficult one here. If our client didn't send EDNS0, we will have set the UDP
-		     packet size to 512. But that won't provide space for the RRSIGS in many cases.
-		     The RRSIGS will be stripped out before the answer goes back, so the packet should
-		     shrink again. So, if we added a do-bit, bump the udp packet size to the value
-		     known to be OK for this server. We check returned size after stripping and set
-		     the truncated bit if it's still too big. */		  
-		  unsigned char *pheader;
-		  int is_sign;
-		  if (find_pseudoheader(header, plen, NULL, &pheader, &is_sign, NULL) && !is_sign)
-		    PUTSHORT(start->edns_pktsz, pheader);
-		}
-#endif
-
-	      if (udpaddr &&
-		  (start->addr.in.sin_addr.s_addr != udpaddr->in.sin_addr.s_addr) &&
-		  (retry_send(sendto(fd, (char *)header, plen, 0,
-				    &start->addr.sa,
-				    sa_len(&start->addr)))))
-		continue;
-	    
-	      if (errno == 0)
-		{
-#ifdef HAVE_DUMPFILE
-		  dump_packet(DUMP_UP_QUERY, (void *)header, plen, NULL, &start->addr);
-#endif
-		  
-		  /* Keep info in case we want to re-send this packet */
-		  daemon->srv_save = start;
-		  daemon->packet_len = plen;
-		  daemon->fd_save = fd;
-		  
 		  if (!gotname)
 		    strcpy(daemon->namebuff, "query");
 		  log_query_mysockaddr(F_SERVER | F_FORWARD, daemon->namebuff,
-				       &start->addr, NULL);
-		  start->queries++;
-		  forwarded = 1;
-		  forward->sentto = start;
-		  if (!forward->forwardall) 
-		    break;
-		  forward->forwardall++;
+				       &srv->addr, NULL, 0);
 		}
+#ifdef HAVE_DNSSEC
+	      else
+		log_query_mysockaddr(F_NOEXTRA | F_DNSSEC | F_SERVER, daemon->namebuff, &srv->addr,
+				     (forward->flags & FREC_DNSKEY_QUERY) ? "dnssec-retry[DNSKEY]" : "dnssec-retry[DS]", 0);
+#endif
+
+	      srv->queries++;
+	      forwarded = 1;
+	      forward->sentto = srv;
+	      if (!forward->forwardall) 
+		break;
+	      forward->forwardall++;
 	    }
-	  
-	  if (!(start = start->next))
- 	    start = daemon->servers;
-	  
-	  if (start == firstsentto)
-	    break;
 	}
       
-      if (forwarded)
-	return 1;
-      
-      /* could not send on, prepare to return */ 
-      header->id = htons(forward->frec_src.orig_id);
-      free_frec(forward); /* cancel */
-    }	  
+      if (++start == last)
+	break;
+    }
   
-  /* could not send on, return empty answer or address if known for whole domain */
- frec_err:
+  if (forwarded || is_dnssec)
+    {
+      forward->forward_timestamp = dnsmasq_milliseconds();
+      return 1;
+    }
+  
+  /* could not send on, prepare to return */ 
+  header->id = htons(forward->frec_src.orig_id);
+  free_frec(forward); /* cancel */
+  ede = EDE_NETERR;
+  
+ reply:
   if (udpfd != -1)
     {
-      plen = setup_reply(header, plen, addrp, flags, daemon->local_ttl);
+      if (!(plen = make_local_answer(flags, gotname, plen, header, daemon->namebuff, limit, first, last, ede)))
+	return 0;
+      
       if (oph)
-	plen = add_pseudoheader(header, plen, ((unsigned char *) header) + PACKETSZ, daemon->edns_pktsz, 0, NULL, 0, do_bit, 0);
+	{
+	  u16 swap = htons((u16)ede);
+
+	  if (ede != EDE_UNSET)
+	    plen = add_pseudoheader(header, plen, (unsigned char *)limit, daemon->edns_pktsz, EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
+	  else
+	    plen = add_pseudoheader(header, plen, (unsigned char *)limit, daemon->edns_pktsz, 0, NULL, 0, do_bit, 0);
+	}
+      
+#if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
+      if (option_bool(OPT_CMARK_ALST_EN))
+	{
+	  unsigned int mark;
+	  int have_mark = get_incoming_mark(udpaddr, dst_addr, /* istcp: */ 0, &mark);
+	  if (have_mark && ((u32)mark & daemon->allowlist_mask))
+	    report_addresses(header, plen, mark);
+	}
+#endif
+      
       send_from(udpfd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND), (char *)header, plen, udpaddr, dst_addr, dst_iface);
     }
-
+	  
   return 0;
 }
 
+/* Check if any frecs need to do a retry, and action that if so. 
+   Return time in milliseconds until he next retry will be required,
+   or -1 if none. */
+int fast_retry(time_t now)
+{
+  struct frec *f;
+  int ret = -1;
+  
+  if (daemon->fast_retry_time != 0)
+    {
+      u32 millis = dnsmasq_milliseconds();
+      
+      for (f = daemon->frec_list; f; f = f->next)
+	if (f->sentto && f->stash && difftime(now, f->time) < daemon->fast_retry_timeout)
+	  {
+#ifdef HAVE_DNSSEC
+	    if (f->blocking_query)
+	      continue;
+#endif
+	    /* t is milliseconds since last query sent. */ 
+	    int to_run, t = (int)(millis - f->forward_timestamp);
+	    
+	    if (t < f->forward_delay)
+	      to_run = f->forward_delay - t;
+	    else
+	      {
+		unsigned char *udpsz;
+		unsigned short udp_size =  PACKETSZ; /* default if no EDNS0 */
+		struct dns_header *header = (struct dns_header *)daemon->packet;
+		
+		/* packet buffer overwritten */
+		daemon->srv_save = NULL;
+		
+		blockdata_retrieve(f->stash, f->stash_len, (void *)header);
+		
+		/* UDP size already set in saved query. */
+		if (find_pseudoheader(header, f->stash_len, NULL, &udpsz, NULL, NULL))
+		  GETSHORT(udp_size, udpsz);
+		
+		daemon->log_display_id = f->frec_src.log_id;
+		
+		forward_query(-1, NULL, NULL, 0, header, f->stash_len, ((char *) header) + udp_size, now, f,
+			      f->flags & FREC_AD_QUESTION, f->flags & FREC_DO_QUESTION, 1);
+
+		to_run = f->forward_delay = 2 * f->forward_delay;
+	      }
+
+	    if (ret == -1 || ret > to_run)
+	      ret = to_run;
+	  }
+      
+    }
+  return ret;
+}
+
+static struct ipsets *domain_find_sets(struct ipsets *setlist, const char *domain) {
+  /* Similar algorithm to search_servers. */
+  struct ipsets *ipset_pos, *ret = NULL;
+  unsigned int namelen = strlen(domain);
+  unsigned int matchlen = 0;
+  for (ipset_pos = setlist; ipset_pos; ipset_pos = ipset_pos->next) 
+    {
+      unsigned int domainlen = strlen(ipset_pos->domain);
+      const char *matchstart = domain + namelen - domainlen;
+      if (namelen >= domainlen && hostname_isequal(matchstart, ipset_pos->domain) &&
+          (domainlen == 0 || namelen == domainlen || *(matchstart - 1) == '.' ) &&
+          domainlen >= matchlen) 
+        {
+          matchlen = domainlen;
+          ret = ipset_pos;
+        }
+    }
+
+  return ret;
+}
+
 static size_t process_reply(struct dns_header *header, time_t now, struct server *server, size_t n, int check_rebind, 
 			    int no_cache, int cache_secure, int bogusanswer, int ad_reqd, int do_bit, int added_pheader, 
-			    int check_subnet, union mysockaddr *query_source)
+			    union mysockaddr *query_source, unsigned char *limit, int ede)
 {
   unsigned char *pheader, *sizep;
-  char **sets = 0;
-  int munged = 0, is_sign;
+  struct ipsets *ipsets = NULL, *nftsets = NULL;
+  int is_sign;
   unsigned int rcode = RCODE(header);
   size_t plen; 
-  
+    
   (void)ad_reqd;
   (void)do_bit;
-  (void)bogusanswer;
-
+ 
 #ifdef HAVE_IPSET
-  if (extract_request(header, n, daemon->namebuff, NULL))
-    {
-	  if (header && (ntohs(header->arcount)))
-	    n = process_appid_catid(header, n);
-      /* Similar algorithm to search_servers. */
-      struct ipsets *ipset_pos;
-      unsigned int namelen = strlen(daemon->namebuff);
-      unsigned int matchlen = 0;
-      if (daemon->ipsets)
-        for (ipset_pos = daemon->ipsets; ipset_pos; ipset_pos = ipset_pos->next) 
-	{
-	  unsigned int domainlen = strlen(ipset_pos->domain);
-	  char *matchstart = daemon->namebuff + namelen - domainlen;
-	  if (namelen >= domainlen && hostname_isequal(matchstart, ipset_pos->domain) &&
-	      (domainlen == 0 || namelen == domainlen || *(matchstart - 1) == '.' ) &&
-	      domainlen >= matchlen) 
-	    {
-	      matchlen = domainlen;
-	      sets = ipset_pos->sets;
-	    }
-	}
-    }
+  if (daemon->ipsets && extract_request(header, n, daemon->namebuff, NULL))
+    ipsets = domain_find_sets(daemon->ipsets, daemon->namebuff);
+#endif
+
+#ifdef HAVE_NFTSET
+  if (daemon->nftsets && extract_request(header, n, daemon->namebuff, NULL))
+    nftsets = domain_find_sets(daemon->nftsets, daemon->namebuff);
 #endif
 
   if ((pheader = find_pseudoheader(header, n, &plen, &sizep, &is_sign, NULL)))
@@ -693,7 +710,7 @@
       /* Get extended RCODE. */
       rcode |= sizep[2] << 4;
 
-      if (check_subnet && !check_source(header, plen, pheader, query_source))
+      if (option_bool(OPT_CLIENT_SUBNET) && !check_source(header, plen, pheader, query_source))
 	{
 	  my_syslog(LOG_WARNING, _("discarding DNS reply: subnet option mismatch"));
 	  return 0;
@@ -704,16 +721,15 @@
 	  if (added_pheader)
 	    {
 	      /* client didn't send EDNS0, we added one, strip it off before returning answer. */
-	      n = rrfilter(header, n, 0);
+	      rrfilter(header, &n, RRFILTER_EDNS0);
 	      pheader = NULL;
 	    }
 	  else
 	    {
-	      unsigned short udpsz;
-
 	      /* If upstream is advertising a larger UDP packet size
 		 than we allow, trim it so that we don't get overlarge
 		 requests for the client. We can't do this for signed packets. */
+	      unsigned short udpsz;
 	      GETSHORT(udpsz, sizep);
 	      if (udpsz > daemon->edns_pktsz)
 		{
@@ -740,7 +756,9 @@
   /* RFC 4035 sect 4.6 para 3 */
   if (!is_sign && !option_bool(OPT_DNSSEC_PROXY))
      header->hb4 &= ~HB4_AD;
-  
+
+  header->hb4 |= HB4_RA; /* recursion if available */
+
   if (OPCODE(header) != QUERY)
     return resize_packet(header, n, pheader, plen);
 
@@ -748,7 +766,8 @@
     {
       union all_addr a;
       a.log.rcode = rcode;
-      log_query(F_UPSTREAM | F_RCODE, "error", &a, NULL);
+      a.log.ede = ede;
+      log_query(F_UPSTREAM | F_RCODE, "error", &a, NULL, 0);
       
       return resize_packet(header, n, pheader, plen);
     }
@@ -763,79 +782,331 @@
 	server->flags |= SERV_WARNED_RECURSIVE;
     }  
 
-  if (daemon->bogus_addr && rcode != NXDOMAIN &&
-      check_for_bogus_wildcard(header, n, daemon->namebuff, now))
+  if (header->hb3 & HB3_TC)
     {
-      munged = 1;
-      SET_RCODE(header, NXDOMAIN);
-      header->hb3 &= ~HB3_AA;
-      cache_secure = 0;
+      log_query(F_UPSTREAM, NULL, NULL, "truncated", 0);
+      header->ancount = htons(0);
+      header->nscount = htons(0);
+      header->arcount = htons(0);
     }
-  else 
+
+  if  (!(header->hb3 & HB3_TC) && (!bogusanswer || (header->hb4 & HB4_CD)))
     {
-      int doctored = 0;
-      
-      if (rcode == NXDOMAIN && 
-	  extract_request(header, n, daemon->namebuff, NULL) &&
-	  check_for_local_domain(daemon->namebuff, now))
+      if (rcode == NXDOMAIN && extract_request(header, n, daemon->namebuff, NULL) &&
+	  (check_for_local_domain(daemon->namebuff, now) || lookup_domain(daemon->namebuff, F_CONFIG, NULL, NULL)))
 	{
 	  /* if we forwarded a query for a locally known name (because it was for 
 	     an unknown type) and the answer is NXDOMAIN, convert that to NODATA,
 	     since we know that the domain exists, even if upstream doesn't */
-	  munged = 1;
 	  header->hb3 |= HB3_AA;
 	  SET_RCODE(header, NOERROR);
 	  cache_secure = 0;
 	}
       
-      if (extract_addresses(header, n, daemon->namebuff, now, sets, is_sign, check_rebind, no_cache, cache_secure, &doctored))
-	{
-	  my_syslog(LOG_WARNING, _("possible DNS-rebind attack detected: %s"), daemon->namebuff);
-	  munged = 1;
-	  cache_secure = 0;
-	}
-
-      if (doctored)
+      if (daemon->doctors && do_doctor(header, n, daemon->namebuff))
 	cache_secure = 0;
+      
+      /* check_for_bogus_wildcard() does it's own caching, so
+	 don't call extract_addresses() if it triggers. */
+      if (daemon->bogus_addr && rcode != NXDOMAIN &&
+	  check_for_bogus_wildcard(header, n, daemon->namebuff, now))
+	{
+	  header->ancount = htons(0);
+	  header->nscount = htons(0);
+	  header->arcount = htons(0);
+	  SET_RCODE(header, NXDOMAIN);
+	  header->hb3 &= ~HB3_AA;
+	  cache_secure = 0;
+	  ede = EDE_BLOCKED;
+	}
+      else
+	{
+	  int rc = extract_addresses(header, n, daemon->namebuff, now, ipsets, nftsets, is_sign, check_rebind, no_cache, cache_secure);
+
+	  if (rc != 0)
+	    {
+	      header->ancount = htons(0);
+	      header->nscount = htons(0);
+	      header->arcount = htons(0);
+	      cache_secure = 0;
+	    }
+	  
+	  if (rc == 1)
+	    {
+	      my_syslog(LOG_WARNING, _("possible DNS-rebind attack detected: %s"), daemon->namebuff);
+	      ede = EDE_BLOCKED;
+	    }
+
+	  if (rc == 2)
+	    {
+	      /* extract_addresses() found a malformed answer. */
+	      SET_RCODE(header, SERVFAIL);
+	      ede = EDE_OTHER;
+	    }
+	}
+      
+      if (RCODE(header) == NOERROR && rrfilter(header, &n, RRFILTER_CONF) > 0) 
+	ede = EDE_FILTERED;
     }
   
 #ifdef HAVE_DNSSEC
-  if (bogusanswer && !(header->hb4 & HB4_CD) && !option_bool(OPT_DNSSEC_DEBUG))
-    {
-      /* Bogus reply, turn into SERVFAIL */
-      SET_RCODE(header, SERVFAIL);
-      munged = 1;
-    }
-
   if (option_bool(OPT_DNSSEC_VALID))
     {
-      header->hb4 &= ~HB4_AD;
-      
-      if (!(header->hb4 & HB4_CD) && ad_reqd && cache_secure)
+      if (bogusanswer)
+	{
+	  if (!(header->hb4 & HB4_CD) && !option_bool(OPT_DNSSEC_DEBUG))
+	    {
+	      /* Bogus reply, turn into SERVFAIL */
+	      SET_RCODE(header, SERVFAIL);
+	      header->ancount = htons(0);
+	      header->nscount = htons(0);
+	      header->arcount = htons(0);
+	      ede = EDE_DNSSEC_BOGUS;
+	    }
+	}
+      else if (!(header->hb4 & HB4_CD) && ad_reqd && cache_secure)
 	header->hb4 |= HB4_AD;
       
       /* If the requestor didn't set the DO bit, don't return DNSSEC info. */
       if (!do_bit)
-	n = rrfilter(header, n, 1);
+	rrfilter(header, &n, RRFILTER_DNSSEC);
     }
 #endif
-
-  /* do this after extract_addresses. Ensure NODATA reply and remove
-     nameserver info. */
   
-  if (munged)
+  /* the code above can elide sections of the packet. Find the new length here 
+     and put back pseudoheader if it was removed. */
+  n = resize_packet(header, n, pheader, plen);
+
+  if (pheader && ede != EDE_UNSET)
     {
-      header->ancount = htons(0);
-      header->nscount = htons(0);
-      header->arcount = htons(0);
-      header->hb3 &= ~HB3_TC;
+      u16 swap = htons((u16)ede);
+      n = add_pseudoheader(header, n, limit, daemon->edns_pktsz, EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 1);
+    }
+
+  if (RCODE(header) == NXDOMAIN)
+    server->nxdomain_replies++;
+
+  return n;
+}
+
+#ifdef HAVE_DNSSEC
+static void dnssec_validate(struct frec *forward, struct dns_header *header,
+			    ssize_t plen, int status, time_t now)
+{
+  struct frec *orig;
+  int log_resource = 0;
+
+  daemon->log_display_id = forward->frec_src.log_id;
+  
+  /* We've had a reply already, which we're validating. Ignore this duplicate */
+  if (forward->blocking_query)
+    return;
+  
+  /* If all replies to a query are REFUSED, give up. */
+  if (RCODE(header) == REFUSED)
+    status = STAT_ABANDONED;
+  else if (header->hb3 & HB3_TC)
+    {
+      /* Truncated answer can't be validated.
+	 If this is an answer to a DNSSEC-generated query, we still
+	 need to get the client to retry over TCP, so return
+	 an answer with the TC bit set, even if the actual answer fits.
+      */
+      status = STAT_TRUNCATED;
+      if (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY))
+	{
+	  unsigned char *p = (unsigned char *)(header+1);
+	  if  (extract_name(header, plen, &p, daemon->namebuff, 0, 4) == 1)
+	    log_query(F_UPSTREAM | F_NOEXTRA, daemon->namebuff, NULL, "truncated", (forward->flags & FREC_DNSKEY_QUERY) ? T_DNSKEY : T_DS);
+	}
+    }
+
+  /* Find the original query that started it all.... */
+  for (orig = forward; orig->dependent; orig = orig->dependent);
+  
+  /* As soon as anything returns BOGUS, we stop and unwind, to do otherwise
+     would invite infinite loops, since the answers to DNSKEY and DS queries
+     will not be cached, so they'll be repeated. */
+  if (!STAT_ISEQUAL(status, STAT_BOGUS) && !STAT_ISEQUAL(status, STAT_TRUNCATED) && !STAT_ISEQUAL(status, STAT_ABANDONED))
+    {
+      if (forward->flags & FREC_DNSKEY_QUERY)
+	status = dnssec_validate_by_ds(now, header, plen, daemon->namebuff, daemon->keyname, forward->class, &orig->validate_counter);
+      else if (forward->flags & FREC_DS_QUERY)
+	status = dnssec_validate_ds(now, header, plen, daemon->namebuff, daemon->keyname, forward->class, &orig->validate_counter);
+      else
+	status = dnssec_validate_reply(now, header, plen, daemon->namebuff, daemon->keyname, &forward->class, 
+				       !option_bool(OPT_DNSSEC_IGN_NS) && (forward->sentto->flags & SERV_DO_DNSSEC),
+				       NULL, NULL, NULL, &orig->validate_counter);
+    }
+
+  if (STAT_ISEQUAL(status, STAT_ABANDONED))
+    log_resource = 1;
+  
+  /* Can't validate, as we're missing key data. Put this
+     answer aside, whilst we get that. */     
+  if (STAT_ISEQUAL(status, STAT_NEED_DS) || STAT_ISEQUAL(status, STAT_NEED_KEY))
+    {
+      struct frec *new = NULL;
+      struct blockdata *stash;
+      
+      /* Now save reply pending receipt of key data */
+      if ((stash = blockdata_alloc((char *)header, plen)))
+	{
+	  /* validate routines leave name of required record in daemon->keyname */
+	  unsigned int flags = STAT_ISEQUAL(status, STAT_NEED_KEY) ? FREC_DNSKEY_QUERY : FREC_DS_QUERY;
+
+	  if ((new = lookup_frec_dnssec(daemon->keyname, forward->class, flags, header)))
+	    {
+	      /* This is tricky; it detects loops in the dependency
+		 graph for DNSSEC validation, say validating A requires DS B
+		 and validating DS B requires DNSKEY C and validating DNSKEY C requires DS B.
+		 This should never happen in correctly signed records, but it's
+		 likely the case that sufficiently broken ones can cause our validation
+		 code requests to exhibit cycles. The result is that the ->blocking_query list
+		 can form a cycle, and under certain circumstances that can lock us in 
+		 an infinite loop. Here we transform the situation into ABANDONED. */
+	      struct frec *f;
+	      for (f = new; f; f = f->blocking_query)
+		if (f == forward)
+		  break;
+
+	      if (!f)
+		{
+		  forward->next_dependent = new->dependent;
+		  new->dependent = forward;
+		  /* Make consistent, only replace query copy with unvalidated answer
+		     when we set ->blocking_query. */
+		  if (forward->stash)
+		    blockdata_free(forward->stash);
+		  forward->blocking_query = new;
+		  forward->stash_len = plen;
+		  forward->stash = stash;
+		  return;
+		}
+	    }
+	  else if (orig->work_counter-- == 0)
+	    {
+	      my_syslog(LOG_WARNING, _("limit exceeded: per-query subqueries"));
+	      log_resource = 1;
+	    }
+	  else
+	    {
+	      struct server *server;
+	      void *hash;
+	      size_t nn;
+	      int serverind, fd;
+	      struct randfd_list *rfds = NULL;
+	      
+	      /* Make sure we don't expire and free the orig frec during the
+		 allocation of a new one: third arg of get_new_frec() does that. */
+	      if ((serverind = dnssec_server(forward->sentto, daemon->keyname, NULL, NULL)) != -1 &&
+		  (server = daemon->serverarray[serverind]) &&
+		  (nn = dnssec_generate_query(header, ((unsigned char *) header) + server->edns_pktsz,
+					      daemon->keyname, forward->class,
+					      STAT_ISEQUAL(status, STAT_NEED_KEY) ? T_DNSKEY : T_DS, server->edns_pktsz)) && 
+		  (hash = hash_questions(header, nn, daemon->namebuff)) &&
+		  (fd = allocate_rfd(&rfds, server)) != -1 &&
+		  (new = get_new_frec(now, server, 1)))
+		{
+		  struct frec *next = new->next;
+		  
+		  *new = *forward; /* copy everything, then overwrite */
+		  new->next = next;
+		  new->blocking_query = NULL;
+		  
+		  new->frec_src.log_id = daemon->log_display_id = ++daemon->log_id;
+		  new->sentto = server;
+		  new->rfds = rfds;
+		  new->frec_src.next = NULL;
+		  new->flags &= ~(FREC_DNSKEY_QUERY | FREC_DS_QUERY | FREC_HAS_EXTRADATA);
+		  new->flags |= flags;
+		  new->forwardall = 0;
+		  
+		  forward->next_dependent = NULL;
+		  new->dependent = forward; /* to find query awaiting new one. */
+		  
+		  /* Make consistent, only replace query copy with unvalidated answer
+		     when we set ->blocking_query. */
+		  forward->blocking_query = new; 
+		  if (forward->stash)
+		    blockdata_free(forward->stash);
+		  forward->stash_len = plen;
+		  forward->stash = stash;
+		  
+		  memcpy(new->hash, hash, HASH_SIZE);
+		  new->new_id = get_id();
+		  header->id = htons(new->new_id);
+		  /* Save query for retransmission and de-dup */
+		  new->stash = blockdata_alloc((char *)header, nn);
+		  new->stash_len = nn;
+		  if (daemon->fast_retry_time != 0)
+		    new->forward_timestamp = dnsmasq_milliseconds();
+		  
+		  /* Don't resend this. */
+		  daemon->srv_save = NULL;
+		  
+#ifdef HAVE_CONNTRACK
+		  if (option_bool(OPT_CONNTRACK))
+		    set_outgoing_mark(orig, fd);
+#endif
+		  
+		  server_send(server, fd, header, nn, 0);
+		  server->queries++;
+#ifdef HAVE_DUMPFILE
+		  dump_packet_udp(DUMP_SEC_QUERY, (void *)header, (size_t)nn, NULL, &server->addr, fd);
+#endif
+		  log_query_mysockaddr(F_NOEXTRA | F_DNSSEC | F_SERVER, daemon->keyname, &server->addr,
+				       STAT_ISEQUAL(status, STAT_NEED_KEY) ? "dnssec-query[DNSKEY]" : "dnssec-query[DS]", 0);
+		  return;
+		}
+	      
+	      free_rfds(&rfds); /* error unwind */
+	    }
+	  
+	  blockdata_free(stash); /* don't leak this on failure. */
+	}
+
+      /* sending DNSSEC query failed or loop detected. */
+      status = STAT_ABANDONED;
+    }
+
+  if (log_resource)
+    {
+      /* Log the actual validation that made us barf. */
+      unsigned char *p = (unsigned char *)(header+1);
+      if  (extract_name(header, plen, &p, daemon->namebuff, 0, 4) == 1)
+	my_syslog(LOG_WARNING, _("validation of %s failed: resource limit exceeded."),
+		  daemon->namebuff[0] ? daemon->namebuff : ".");
     }
   
-  /* the bogus-nxdomain stuff, doctor and NXDOMAIN->NODATA munging can all elide
-     sections of the packet. Find the new length here and put back pseudoheader
-     if it was removed. */
-  return resize_packet(header, n, pheader, plen);
+#ifdef HAVE_DUMPFILE
+  if (STAT_ISEQUAL(status, STAT_BOGUS) || STAT_ISEQUAL(status, STAT_ABANDONED))
+    dump_packet_udp((forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)) ? DUMP_SEC_BOGUS : DUMP_BOGUS,
+		    header, (size_t)plen, &forward->sentto->addr, NULL, -daemon->port);
+#endif
+  
+  /* Validated original answer, all done. */
+  if (!forward->dependent)
+    return_reply(now, forward, header, plen, status);
+  else
+    {
+      /* validated subsidiary query/queries, (and cached result)
+	 pop that and return to the previous query/queries we were working on. */
+      struct frec *prev, *nxt = forward->dependent;
+
+      free_frec(forward);
+      
+      while ((prev = nxt))
+	{
+	  /* ->next_dependent will have changed after return from recursive call below. */
+	  nxt = prev->next_dependent;
+	  prev->blocking_query = NULL; /* already gone */
+	  blockdata_retrieve(prev->stash, prev->stash_len, (void *)header);
+	  dnssec_validate(prev, header, prev->stash_len, status, now);
+	}
+    }
 }
+#endif
 
 /* sets new last_server */
 void reply_query(int fd, time_t now)
@@ -847,10 +1118,10 @@
   struct frec *forward;
   socklen_t addrlen = sizeof(serveraddr);
   ssize_t n = recvfrom(fd, daemon->packet, daemon->packet_buff_sz, 0, &serveraddr.sa, &addrlen);
-  size_t nn;
   struct server *server;
   void *hash;
-
+  int first, last, c;
+    
   /* packet buffer overwritten */
   daemon->srv_save = NULL;
 
@@ -862,39 +1133,47 @@
 
   if (n < (int)sizeof(struct dns_header) || !(header->hb3 & HB3_QR))
     return;
+
+  hash = hash_questions(header, n, daemon->namebuff);
   
-  /* spoof check: answer must come from known server, */
-  for (server = daemon->servers; server; server = server->next)
-    if (!(server->flags & (SERV_LITERAL_ADDRESS | SERV_NO_ADDR)) &&
-	sockaddr_isequal(&server->addr, &serveraddr))
+  if (!(forward = lookup_frec(ntohs(header->id), fd, hash, &first, &last)))
+    return;
+  
+  /* spoof check: answer must come from known server, also
+     we may have sent the same query to multiple servers from
+     the same local socket, and would like to know which one has answered. */
+  for (c = first; c != last; c++)
+    if (sockaddr_isequal(&daemon->serverarray[c]->addr, &serveraddr))
       break;
   
-  if (!server)
+  if (c == last)
     return;
 
+  server = daemon->serverarray[c];
+
+  if (RCODE(header) != REFUSED)
+    daemon->serverarray[first]->last_server = c;
+  else if (daemon->serverarray[first]->last_server == c)
+    daemon->serverarray[first]->last_server = -1;
+
   /* If sufficient time has elapsed, try and expand UDP buffer size again. */
   if (difftime(now, server->pktsz_reduced) > UDP_TEST_TIME)
     server->edns_pktsz = daemon->edns_pktsz;
 
-  hash = hash_questions(header, n, daemon->namebuff);
-  
-  if (!(forward = lookup_frec(ntohs(header->id), fd, hash)))
-    return;
-  
-#ifdef HAVE_DUMPFILE
-  dump_packet((forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)) ? DUMP_SEC_REPLY : DUMP_UP_REPLY,
-	      (void *)header, n, &serveraddr, NULL);
-#endif
-
   /* log_query gets called indirectly all over the place, so 
      pass these in global variables - sorry. */
   daemon->log_display_id = forward->frec_src.log_id;
   daemon->log_source_addr = &forward->frec_src.source;
   
+#ifdef HAVE_DUMPFILE
+  dump_packet_udp((forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)) ? DUMP_SEC_REPLY : DUMP_UP_REPLY,
+		  (void *)header, n, &serveraddr, NULL, fd);
+#endif
+
   if (daemon->ignore_addr && RCODE(header) == NOERROR &&
       check_for_ignored_address(header, n))
     return;
-
+  
   /* Note: if we send extra options in the EDNS0 header, we can't recreate
      the query from the reply. */
   if ((RCODE(header) == REFUSED || RCODE(header) == SERVFAIL) &&
@@ -902,384 +1181,305 @@
       !(forward->flags & FREC_HAS_EXTRADATA))
     /* for broken servers, attempt to send to another one. */
     {
-      unsigned char *pheader;
+      unsigned char *pheader, *udpsz;
+      unsigned short udp_size =  PACKETSZ; /* default if no EDNS0 */
       size_t plen;
       int is_sign;
-
+      size_t nn = 0;
+      
 #ifdef HAVE_DNSSEC
+      /* The query MAY have got a good answer, and be awaiting
+	 the results of further queries, in which case
+	 The Stash contains something else and we don't need to retry anyway. */
+      if (forward->blocking_query)
+	return;
+      
       if (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY))
 	{
-	  struct server *start;
-	  
+	  /* DNSSEC queries have a copy of the original query stashed. */
 	  blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
-	  plen = forward->stash_len;
-
-	  forward->forwardall = 2; /* only retry once */
-	  start = forward->sentto;
-
-	  /* for non-domain specific servers, see if we can find another to try. */
-	  if ((forward->sentto->flags & SERV_TYPE) == 0)
-	    while (1)
-	      {
-		if (!(start = start->next))
-		  start = daemon->servers;
-		if (start == forward->sentto)
-		  break;
-		
-		if ((start->flags & SERV_TYPE) == 0 &&
-		    (start->flags & SERV_DO_DNSSEC))
-		  break;
-	      }
-	    
-	  
-	  if ((fd = allocate_rfd(&forward->rfds, start)) != -1)
-	    server_send_log(start, fd, header, plen,
-			    DUMP_SEC_QUERY,
-			    F_NOEXTRA | F_DNSSEC, "retry", "dnssec");
-	  return;
+	  nn = forward->stash_len;
+	  udp_size = daemon->edns_pktsz;
 	}
+      else
 #endif
-      
-      /* In strict order mode, there must be a server later in the chain
-	 left to send to, otherwise without the forwardall mechanism,
-	 code further on will cycle around the list forwever if they
-	 all return REFUSED. Note that server is always non-NULL before 
-	 this executes. */
-      if (option_bool(OPT_ORDER))
-	for (server = forward->sentto->next; server; server = server->next)
-	  if (!(server->flags & (SERV_LITERAL_ADDRESS | SERV_HAS_DOMAIN | SERV_FOR_NODOTS | SERV_NO_ADDR | SERV_LOOP)))
-	    break;
-
-      /* recreate query from reply */
-      pheader = find_pseudoheader(header, (size_t)n, &plen, NULL, &is_sign, NULL);
-      if (!is_sign && server)
 	{
-	  header->ancount = htons(0);
-	  header->nscount = htons(0);
-	  header->arcount = htons(0);
-	  if ((nn = resize_packet(header, (size_t)n, pheader, plen)))
+	  /* in fast retry mode, we have a copy of the query. */
+	  if (daemon->fast_retry_time != 0 && forward->stash)
 	    {
+	      blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
+	      nn = forward->stash_len;
+	      /* UDP size already set in saved query. */
+	      if (find_pseudoheader(header, (size_t)n, NULL, &udpsz, NULL, NULL))
+		GETSHORT(udp_size, udpsz);
+	    }
+	  else
+	    {
+	      /* recreate query from reply */
+	      if ((pheader = find_pseudoheader(header, (size_t)n, &plen, &udpsz, &is_sign, NULL)))
+		GETSHORT(udp_size, udpsz);
+	      
+	      /* If the client provides an EDNS0 UDP size, use that to limit our reply.
+		 (bounded by the maximum configured). If no EDNS0, then it
+		 defaults to 512 */
+	      if (udp_size > daemon->edns_pktsz)
+		udp_size = daemon->edns_pktsz;
+	      else if (udp_size < PACKETSZ)
+		udp_size = PACKETSZ; /* Sanity check - can't reduce below default. RFC 6891 6.2.3 */
+	      
+	      header->ancount = htons(0);
+	      header->nscount = htons(0);
+	      header->arcount = htons(0);
 	      header->hb3 &= ~(HB3_QR | HB3_AA | HB3_TC);
 	      header->hb4 &= ~(HB4_RA | HB4_RCODE | HB4_CD | HB4_AD);
 	      if (forward->flags & FREC_CHECKING_DISABLED)
 		header->hb4 |= HB4_CD;
 	      if (forward->flags & FREC_AD_QUESTION)
 		header->hb4 |= HB4_AD;
-	      if (forward->flags & FREC_DO_QUESTION)
+
+	      if (!is_sign &&
+		  (nn = resize_packet(header, (size_t)n, pheader, plen)) &&
+		  (forward->flags & FREC_DO_QUESTION))
 		add_do_bit(header, nn,  (unsigned char *)pheader + plen);
-	      forward_query(-1, NULL, NULL, 0, header, nn, now, forward, forward->flags & FREC_AD_QUESTION, forward->flags & FREC_DO_QUESTION);
-	      return;
 	    }
 	}
-    }   
-   
-  server = forward->sentto;
-  if ((forward->sentto->flags & SERV_TYPE) == 0)
-    {
-      if (RCODE(header) == REFUSED)
-	server = NULL;
-      else
+      
+      if (nn)
 	{
-	  struct server *last_server;
-	  
-	  /* find good server by address if possible, otherwise assume the last one we sent to */ 
-	  for (last_server = daemon->servers; last_server; last_server = last_server->next)
-	    if (!(last_server->flags & (SERV_LITERAL_ADDRESS | SERV_HAS_DOMAIN | SERV_FOR_NODOTS | SERV_NO_ADDR)) &&
-		sockaddr_isequal(&last_server->addr, &serveraddr))
-	      {
-		server = last_server;
-		break;
-	      }
-	} 
-      if (!option_bool(OPT_ALL_SERVERS))
-	daemon->last_server = server;
-    }
- 
-  /* We tried resending to this server with a smaller maximum size and got an answer.
-     Make that permanent. To avoid reduxing the packet size for a single dropped packet,
-     only do this when we get a truncated answer, or one larger than the safe size. */
-  if (forward->sentto->edns_pktsz > SAFE_PKTSZ && (forward->flags & FREC_TEST_PKTSZ) && 
-      ((header->hb3 & HB3_TC) || n >= SAFE_PKTSZ))
-    {
-      forward->sentto->edns_pktsz = SAFE_PKTSZ;
-      forward->sentto->pktsz_reduced = now;
-      (void)prettyprint_addr(&forward->sentto->addr, daemon->addrbuff);
-      my_syslog(LOG_WARNING, _("reducing DNS packet size for nameserver %s to %d"), daemon->addrbuff, SAFE_PKTSZ);
+	  forward_query(-1, NULL, NULL, 0, header, nn, ((char *) header) + udp_size, now, forward,
+			forward->flags & FREC_AD_QUESTION, forward->flags & FREC_DO_QUESTION, 0);
+	  return;
+	}
     }
 
-    
   /* If the answer is an error, keep the forward record in place in case
      we get a good reply from another server. Kill it when we've
      had replies from all to avoid filling the forwarding table when
      everything is broken */
-  if (forward->forwardall == 0 || --forward->forwardall == 1 || RCODE(header) != REFUSED)
+
+  /* decrement count of replies recieved if we sent to more than one server. */
+  if (forward->forwardall && (--forward->forwardall > 1) && RCODE(header) == REFUSED)
+    return;
+
+  /* We tried resending to this server with a smaller maximum size and got an answer.
+     Make that permanent. To avoid reduxing the packet size for a single dropped packet,
+     only do this when we get a truncated answer, or one larger than the safe size. */
+  if (server->edns_pktsz > SAFE_PKTSZ && (forward->flags & FREC_TEST_PKTSZ) && 
+      ((header->hb3 & HB3_TC) || n >= SAFE_PKTSZ))
     {
-      int check_rebind = 0, no_cache_dnssec = 0, cache_secure = 0, bogusanswer = 0;
-      
-      if (option_bool(OPT_NO_REBIND))
-	check_rebind = !(forward->flags & FREC_NOREBIND);
-      
-      /*   Don't cache replies where DNSSEC validation was turned off, either
-	   the upstream server told us so, or the original query specified it.  */
-      if ((header->hb4 & HB4_CD) || (forward->flags & FREC_CHECKING_DISABLED))
-	no_cache_dnssec = 1;
-      
+      server->edns_pktsz = SAFE_PKTSZ;
+      server->pktsz_reduced = now;
+      (void)prettyprint_addr(&server->addr, daemon->addrbuff);
+      my_syslog(LOG_WARNING, _("reducing DNS packet size for nameserver %s to %d"), daemon->addrbuff, SAFE_PKTSZ);
+    }
+
+  forward->sentto = server;
+
+  /* We have a good answer, and will now validate it or return it. 
+     It may be some time before this the validation completes, but we don't need
+     any more answers, so close the socket(s) on which we were expecting
+     answers, to conserve file descriptors, and to save work reading and
+     discarding answers for other upstreams. */
+  free_rfds(&forward->rfds);
+
+  /* calculate modified moving average of server latency */
+  if (server->query_latency == 0)
+    server->mma_latency = (dnsmasq_milliseconds() - forward->forward_timestamp) * 128; /* init */
+  else
+    server->mma_latency += dnsmasq_milliseconds() - forward->forward_timestamp - server->query_latency;
+  /* denominator controls how many queries we average over. */
+  server->query_latency = server->mma_latency/128;
+  
+  
 #ifdef HAVE_DNSSEC
-      if ((forward->sentto->flags & SERV_DO_DNSSEC) && 
-	  option_bool(OPT_DNSSEC_VALID) && !(forward->flags & FREC_CHECKING_DISABLED))
+  if ((forward->sentto->flags & SERV_DO_DNSSEC) && 
+      option_bool(OPT_DNSSEC_VALID) &&
+      !(forward->flags & FREC_CHECKING_DISABLED))
+    dnssec_validate(forward, header, n, STAT_OK, now);
+  else
+#endif
+    return_reply(now, forward, header, n, STAT_OK); 
+}
+
+static void return_reply(time_t now, struct frec *forward, struct dns_header *header, ssize_t n, int status)
+{
+  int check_rebind = 0, no_cache_dnssec = 0, cache_secure = 0, bogusanswer = 0;
+  size_t nn;
+  int ede = EDE_UNSET;
+
+  (void)status;
+
+  daemon->log_display_id = forward->frec_src.log_id;
+  daemon->log_source_addr = &forward->frec_src.source;
+  
+  /* Don't cache replies where DNSSEC validation was turned off, either
+     the upstream server told us so, or the original query specified it.  */
+  if ((header->hb4 & HB4_CD) || (forward->flags & FREC_CHECKING_DISABLED))
+    no_cache_dnssec = 1;
+
+#ifdef HAVE_DNSSEC
+  if (!STAT_ISEQUAL(status, STAT_OK))
+    {
+      /* status is STAT_OK when validation not turned on. */
+      no_cache_dnssec = 0;
+      
+      if (STAT_ISEQUAL(status, STAT_TRUNCATED))
 	{
-	  int status = 0;
+	  header->hb3 |= HB3_TC;
+	  log_query(F_SECSTAT, "result", NULL, "TRUNCATED", 0);
+	}
+      else
+	{
+	  char *result, *domain = "result";
+	  union all_addr a;
 
-	  /* We've had a reply already, which we're validating. Ignore this duplicate */
-	  if (forward->blocking_query)
-	    return;
-	  
-	   /* Truncated answer can't be validated.
-	      If this is an answer to a DNSSEC-generated query, we still
-	      need to get the client to retry over TCP, so return
-	      an answer with the TC bit set, even if the actual answer fits.
-	   */
-	  if (header->hb3 & HB3_TC)
-	    status = STAT_TRUNCATED;
-	  
-	  while (1)
+	  a.log.ede = ede = errflags_to_ede(status);
+
+	  if (STAT_ISEQUAL(status, STAT_ABANDONED))
 	    {
-	      /* As soon as anything returns BOGUS, we stop and unwind, to do otherwise
-		 would invite infinite loops, since the answers to DNSKEY and DS queries
-		 will not be cached, so they'll be repeated. */
-	      if (status != STAT_BOGUS && status != STAT_TRUNCATED && status != STAT_ABANDONED)
-		{
-		  if (forward->flags & FREC_DNSKEY_QUERY)
-		    status = dnssec_validate_by_ds(now, header, n, daemon->namebuff, daemon->keyname, forward->class);
-		  else if (forward->flags & FREC_DS_QUERY)
-		    status = dnssec_validate_ds(now, header, n, daemon->namebuff, daemon->keyname, forward->class);
-		  else
-		    status = dnssec_validate_reply(now, header, n, daemon->namebuff, daemon->keyname, &forward->class, 
-						   !option_bool(OPT_DNSSEC_IGN_NS) && (forward->sentto->flags & SERV_DO_DNSSEC),
-						   NULL, NULL, NULL);
-#ifdef HAVE_DUMPFILE
-		  if (status == STAT_BOGUS)
-		    dump_packet((forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)) ? DUMP_SEC_BOGUS : DUMP_BOGUS,
-				header, (size_t)n, &serveraddr, NULL);
-#endif
-		}
-	      
-	      /* Can't validate, as we're missing key data. Put this
-		 answer aside, whilst we get that. */     
-	      if (status == STAT_NEED_DS || status == STAT_NEED_KEY)
-		{
-		  struct frec *new, *orig;
-		  
-		  /* Free any saved query */
-		  if (forward->stash)
-		    blockdata_free(forward->stash);
-		  
-		  /* Now save reply pending receipt of key data */
-		  if (!(forward->stash = blockdata_alloc((char *)header, n)))
-		    return;
-		  forward->stash_len = n;
-		  
-		  /* Find the original query that started it all.... */
-		  for (orig = forward; orig->dependent; orig = orig->dependent);
-		  
-		  /* Make sure we don't expire and free the orig frec during the
-		     allocation of a new one. */
-		  if (--orig->work_counter == 0 || !(new = get_new_frec(now, NULL, orig)))
-		    status = STAT_ABANDONED;
-		  else
-		    {
-		      int querytype, fd, type = SERV_DO_DNSSEC;
-		      struct frec *next = new->next;
-		      char *domain;
-		      
-		      *new = *forward; /* copy everything, then overwrite */
-		      new->next = next;
-		      new->blocking_query = NULL;
-
-		      /* Find server to forward to. This will normally be the 
-			 same as for the original query, but may be another if
-			 servers for domains are involved. */		      
-		      if (search_servers(now, NULL, F_DNSSECOK, daemon->keyname, &type, &domain, NULL) == 0)
-			{
-			  struct server *start, *new_server = NULL;
-			  start = server = forward->sentto;
-			  
-			  while (1)
-			    {
-			      if (server_test_type(start, domain, type, SERV_DO_DNSSEC))
-				{
-				  new_server = start;
-				  if (server == start)
-				    {
-				      new_server = NULL;
-				      break;
-				    }
-				}
-			      
-			      if (!(start = start->next))
-				start = daemon->servers;
-			      if (start == server)
-				break;
-			    }
-			  
-			  if (new_server)
-			    server = new_server;
-			}
-		      
-		      new->sentto = server;
-		      new->rfds = NULL;
-		      new->frec_src.next = NULL;
-		      new->flags &= ~(FREC_DNSKEY_QUERY | FREC_DS_QUERY | FREC_HAS_EXTRADATA);
-		      new->forwardall = 0;
-		      
-		      new->dependent = forward; /* to find query awaiting new one. */
-		      forward->blocking_query = new; /* for garbage cleaning */
-		      /* validate routines leave name of required record in daemon->keyname */
-		      if (status == STAT_NEED_KEY)
-			{
-			  new->flags |= FREC_DNSKEY_QUERY; 
-			  querytype = T_DNSKEY;
-			}
-		      else 
-			{
-			  new->flags |= FREC_DS_QUERY;
-			  querytype = T_DS;
-			}
-
-		      nn = dnssec_generate_query(header,((unsigned char *) header) + server->edns_pktsz,
-						 daemon->keyname, forward->class, querytype, server->edns_pktsz);
-
-		      memcpy(new->hash, hash_questions(header, nn, daemon->namebuff), HASH_SIZE);
-		      new->new_id = get_id();
-		      header->id = htons(new->new_id);
-		      /* Save query for retransmission */
-		      new->stash = blockdata_alloc((char *)header, nn);
-		      new->stash_len = nn;
-		      
-		      /* Don't resend this. */
-		      daemon->srv_save = NULL;
-		      
-		      if ((fd = allocate_rfd(&new->rfds, server)) != -1)
-			{
-#ifdef HAVE_CONNTRACK
-			  if (option_bool(OPT_CONNTRACK))
-			    set_outgoing_mark(orig, fd);
-#endif
-			  server_send_log(server, fd, header, nn, DUMP_SEC_QUERY,
-					  F_NOEXTRA | F_DNSSEC, daemon->keyname,
-					  querystr("dnssec-query", querytype));
-			  server->queries++;
-			}
-		    }		  
-		  return;
-		}
-	  
-	      /* Validated original answer, all done. */
-	      if (!forward->dependent)
-		break;
-	      
-	      /* validated subsidiary query, (and cached result)
-		 pop that and return to the previous query we were working on. */
-	      struct frec *prev = forward->dependent;
-	      free_frec(forward);
-	      forward = prev;
-	      forward->blocking_query = NULL; /* already gone */
-	      blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
-	      n = forward->stash_len;
+	      result = "ABANDONED";
+	      status = STAT_BOGUS;
 	    }
-	
-	  
-	  no_cache_dnssec = 0;
-	  
-	  if (status == STAT_TRUNCATED)
-	    header->hb3 |= HB3_TC;
 	  else
-	    {
-	      char *result, *domain = "result";
-	      
-	      if (status == STAT_ABANDONED)
-		{
-		  result = "ABANDONED";
-		  status = STAT_BOGUS;
-		}
-	      else
-		result = (status == STAT_SECURE ? "SECURE" : (status == STAT_INSECURE ? "INSECURE" : "BOGUS"));
-	      
-	      if (status == STAT_BOGUS && extract_request(header, n, daemon->namebuff, NULL))
-		domain = daemon->namebuff;
-	      
-	      log_query(F_SECSTAT, domain, NULL, result);
-	    }
+	    result = (STAT_ISEQUAL(status, STAT_SECURE) ? "SECURE" : (STAT_ISEQUAL(status, STAT_INSECURE) ? "INSECURE" : "BOGUS"));
 	  
-	  if (status == STAT_SECURE)
+	  if (STAT_ISEQUAL(status, STAT_SECURE))
 	    cache_secure = 1;
-	  else if (status == STAT_BOGUS)
+	  else if (STAT_ISEQUAL(status, STAT_BOGUS))
 	    {
 	      no_cache_dnssec = 1;
 	      bogusanswer = 1;
+	      
+	      if (extract_request(header, n, daemon->namebuff, NULL))
+		domain = daemon->namebuff;
 	    }
-	}
-
-#endif
-
-      /* restore CD bit to the value in the query */
-      if (forward->flags & FREC_CHECKING_DISABLED)
-	header->hb4 |= HB4_CD;
-      else
-	header->hb4 &= ~HB4_CD;
-
-      /* Never cache answers which are contingent on the source or MAC address EDSN0 option,
-	 since the cache is ignorant of such things. */
-      if (forward->flags & FREC_NO_CACHE)
-	no_cache_dnssec = 1;
       
-      if ((nn = process_reply(header, now, forward->sentto, (size_t)n, check_rebind, no_cache_dnssec, cache_secure, bogusanswer, 
-			      forward->flags & FREC_AD_QUESTION, forward->flags & FREC_DO_QUESTION, 
-			      forward->flags & FREC_ADDED_PHEADER, forward->flags & FREC_HAS_SUBNET, &forward->frec_src.source)))
-	{
-	  struct frec_src *src;
+	  log_query(F_SECSTAT, domain, &a, result, 0);
+	}
+    }
 
-	  header->id = htons(forward->frec_src.orig_id);
-	  header->hb4 |= HB4_RA; /* recursion if available */
+  if ((daemon->limit[LIMIT_CRYPTO] - forward->validate_counter) > (int)daemon->metrics[METRIC_CRYPTO_HWM])
+    daemon->metrics[METRIC_CRYPTO_HWM] = daemon->limit[LIMIT_CRYPTO] - forward->validate_counter;
+
+  if ((daemon->limit[LIMIT_WORK] - forward->work_counter) > (int)daemon->metrics[METRIC_WORK_HWM])
+    daemon->metrics[METRIC_WORK_HWM] = daemon->limit[LIMIT_WORK] - forward->work_counter;
+#endif
+  
+  if (option_bool(OPT_NO_REBIND))
+    check_rebind = !(forward->flags & FREC_NOREBIND);
+  
+  /* restore CD bit to the value in the query */
+  if (forward->flags & FREC_CHECKING_DISABLED)
+    header->hb4 |= HB4_CD;
+  else
+    header->hb4 &= ~HB4_CD;
+  
+  /* Never cache answers which are contingent on the source or MAC address EDSN0 option,
+     since the cache is ignorant of such things. */
+  if (forward->flags & FREC_NO_CACHE)
+    no_cache_dnssec = 1;
+  
+  if ((nn = process_reply(header, now, forward->sentto, (size_t)n, check_rebind, no_cache_dnssec, cache_secure, bogusanswer, 
+			  forward->flags & FREC_AD_QUESTION, forward->flags & FREC_DO_QUESTION, 
+			  forward->flags & FREC_ADDED_PHEADER, &forward->frec_src.source,
+			  ((unsigned char *)header) + daemon->edns_pktsz, ede)))
+    {
+      struct frec_src *src;
+      
+      header->id = htons(forward->frec_src.orig_id);
 #ifdef HAVE_DNSSEC
-	  /* We added an EDNSO header for the purpose of getting DNSSEC RRs, and set the value of the UDP payload size
-	     greater than the no-EDNS0-implied 512 to have space for the RRSIGS. If, having stripped them and the EDNS0
-             header, the answer is still bigger than 512, truncate it and mark it so. The client then retries with TCP. */
-	  if (option_bool(OPT_DNSSEC_VALID) && (forward->flags & FREC_ADDED_PHEADER) && (nn > PACKETSZ))
+      /* We added an EDNSO header for the purpose of getting DNSSEC RRs, and set the value of the UDP payload size
+	 greater than the no-EDNS0-implied 512 to have space for the RRSIGS. If, having stripped them and the EDNS0
+	 header, the answer is still bigger than 512, truncate it and mark it so. The client then retries with TCP. */
+      if (option_bool(OPT_DNSSEC_VALID) && (forward->flags & FREC_ADDED_PHEADER) && (nn > PACKETSZ))
+	{
+	  header->ancount = htons(0);
+	  header->nscount = htons(0);
+	  header->arcount = htons(0);
+	  header->hb3 |= HB3_TC;
+	  nn = resize_packet(header, nn, NULL, 0);
+	}
+#endif
+      
+      for (src = &forward->frec_src; src; src = src->next)
+	{
+	  header->id = htons(src->orig_id);
+	  
+#if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
+	  if (option_bool(OPT_CMARK_ALST_EN))
 	    {
-	      header->ancount = htons(0);
-	      header->nscount = htons(0);
-	      header->arcount = htons(0);
-	      header->hb3 |= HB3_TC;
-	      nn = resize_packet(header, nn, NULL, 0);
+	      unsigned int mark;
+	      int have_mark = get_incoming_mark(&src->source, &src->dest, /* istcp: */ 0, &mark);
+	      if (have_mark && ((u32)mark & daemon->allowlist_mask))
+		report_addresses(header, nn, mark);
 	    }
 #endif
-
-	  for (src = &forward->frec_src; src; src = src->next)
+	  
+	  if (src->fd != -1)
 	    {
-	      header->id = htons(src->orig_id);
-	      
 #ifdef HAVE_DUMPFILE
-	      dump_packet(DUMP_REPLY, daemon->packet, (size_t)nn, NULL, &src->source);
-#endif
-	      
+	      dump_packet_udp(DUMP_REPLY, daemon->packet, (size_t)nn, NULL, &src->source, src->fd);
+#endif 
 	      send_from(src->fd, option_bool(OPT_NOWILD) || option_bool (OPT_CLEVERBIND), daemon->packet, nn, 
 			&src->source, &src->dest, src->iface);
-
+	      
 	      if (option_bool(OPT_EXTRALOG) && src != &forward->frec_src)
 		{
 		  daemon->log_display_id = src->log_id;
 		  daemon->log_source_addr = &src->source;
-		  log_query(F_UPSTREAM, "query", NULL, "duplicate");
+		  log_query(F_UPSTREAM, "query", NULL, "duplicate", 0);
 		}
 	    }
 	}
-
-      free_frec(forward); /* cancel */
     }
+
+  free_frec(forward); /* cancel */
 }
 
 
+#ifdef HAVE_CONNTRACK
+static int is_query_allowed_for_mark(u32 mark, const char *name)
+{
+  int is_allowable_name, did_validate_name = 0;
+  struct allowlist *allowlists;
+  char **patterns_pos;
+  
+  for (allowlists = daemon->allowlists; allowlists; allowlists = allowlists->next)
+    if (allowlists->mark == (mark & daemon->allowlist_mask & allowlists->mask))
+      for (patterns_pos = allowlists->patterns; *patterns_pos; patterns_pos++)
+	{
+	  if (!strcmp(*patterns_pos, "*"))
+	    return 1;
+	  if (!did_validate_name)
+	    {
+	      is_allowable_name = name ? is_valid_dns_name(name) : 0;
+	      did_validate_name = 1;
+	    }
+	  if (is_allowable_name && is_dns_name_matching_pattern(name, *patterns_pos))
+	    return 1;
+	}
+  return 0;
+}
+
+static size_t answer_disallowed(struct dns_header *header, size_t qlen, u32 mark, const char *name)
+{
+  unsigned char *p;
+  (void)name;
+  (void)mark;
+  
+#ifdef HAVE_UBUS
+  if (name)
+    ubus_event_bcast_connmark_allowlist_refused(mark, name);
+#endif
+  
+  setup_reply(header, /* flags: */ 0, EDE_BLOCKED);
+  
+  if (!(p = skip_questions(header, qlen)))
+    return 0;
+  return p - (unsigned char *)header;
+}
+#endif
+
 void receive_query(struct listener *listen, time_t now)
 {
   struct dns_header *header = (struct dns_header *)daemon->packet;
@@ -1291,6 +1491,11 @@
   size_t m;
   ssize_t n;
   int if_index = 0, auth_dns = 0, do_bit = 0, have_pseudoheader = 0;
+#ifdef HAVE_CONNTRACK
+  unsigned int mark = 0;
+  int have_mark = 0;
+  int is_single_query = 0, allowed = 1;
+#endif
 #ifdef HAVE_AUTH
   int local_auth = 0;
 #endif
@@ -1313,7 +1518,7 @@
   int family = listen->addr.sa.sa_family;
    /* Can always get recvd interface for IPv6 */
   int check_dst = !option_bool(OPT_NOWILD) || family == AF_INET6;
-
+  
   /* packet buffer overwritten */
   daemon->srv_save = NULL;
 
@@ -1399,7 +1604,8 @@
 	  static int warned = 0;
 	  if (!warned)
 	    {
-	      my_syslog(LOG_WARNING, _("Ignoring query from non-local network"));
+	      prettyprint_addr(&source_addr, daemon->addrbuff);
+	      my_syslog(LOG_WARNING, _("ignoring query from non-local network %s (logged only once)"), daemon->addrbuff);
 	      warned = 1;
 	    }
 	  return;
@@ -1517,7 +1723,12 @@
   daemon->log_source_addr = &source_addr;
 
 #ifdef HAVE_DUMPFILE
-  dump_packet(DUMP_QUERY, daemon->packet, (size_t)n, &source_addr, NULL);
+  dump_packet_udp(DUMP_QUERY, daemon->packet, (size_t)n, &source_addr, NULL, listen->fd);
+#endif
+  
+#ifdef HAVE_CONNTRACK
+  if (option_bool(OPT_CMARK_ALST_EN))
+    have_mark = get_incoming_mark(&source_addr, &dst_addr, /* istcp: */ 0, &mark);
 #endif
 	  
   if (extract_request(header, (size_t)n, daemon->namebuff, &type))
@@ -1525,10 +1736,12 @@
 #ifdef HAVE_AUTH
       struct auth_zone *zone;
 #endif
-      char *types = querystr(auth_dns ? "auth" : "query", type);
-
       log_query_mysockaddr(F_QUERY | F_FORWARD, daemon->namebuff,
-			   &source_addr, types);
+			   &source_addr, auth_dns ? "auth" : "query", type);
+      
+#ifdef HAVE_CONNTRACK
+      is_single_query = 1;
+#endif
 
 #ifdef HAVE_AUTH
       /* find queries for zones we're authoritative for, and answer them directly */
@@ -1563,93 +1776,320 @@
 	
       /* If the client provides an EDNS0 UDP size, use that to limit our reply.
 	 (bounded by the maximum configured). If no EDNS0, then it
-	 defaults to 512 */
+	 defaults to 512. We write this value into the query packet too, so that
+	 if it's forwarded, we don't specify a maximum size greater than we can handle. */
       if (udp_size > daemon->edns_pktsz)
 	udp_size = daemon->edns_pktsz;
       else if (udp_size < PACKETSZ)
 	udp_size = PACKETSZ; /* Sanity check - can't reduce below default. RFC 6891 6.2.3 */
-    }
 
+      pheader -= 6; /* ext_class */
+      PUTSHORT(udp_size, pheader); /* Bounding forwarded queries to maximum configured */
+    }
+  
+#ifdef HAVE_CONNTRACK
 #ifdef HAVE_AUTH
-  if (auth_dns)
+  if (!auth_dns || local_auth)
+#endif
+    if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
+      allowed = is_query_allowed_for_mark((u32)mark, is_single_query ? daemon->namebuff : NULL);
+#endif
+  
+  if (0);
+#ifdef HAVE_CONNTRACK
+  else if (!allowed)
+    {
+      u16 swap = htons(EDE_BLOCKED);
+
+      m = answer_disallowed(header, (size_t)n, (u32)mark, is_single_query ? daemon->namebuff : NULL);
+      
+      if (have_pseudoheader && m != 0)
+	m = add_pseudoheader(header,  m,  ((unsigned char *) header) + udp_size, daemon->edns_pktsz,
+			     EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
+      
+      if (m >= 1)
+	{
+#ifdef HAVE_DUMPFILE
+	  dump_packet_udp(DUMP_REPLY, daemon->packet, m, NULL, &source_addr, listen->fd);
+#endif
+	  send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
+		    (char *)header, m, &source_addr, &dst_addr, if_index);
+	  daemon->metrics[METRIC_DNS_LOCAL_ANSWERED]++;
+	}
+    }
+#endif
+#ifdef HAVE_AUTH
+  else if (auth_dns)
     {
       m = answer_auth(header, ((char *) header) + udp_size, (size_t)n, now, &source_addr, 
 		      local_auth, do_bit, have_pseudoheader);
       if (m >= 1)
 	{
+#ifdef HAVE_DUMPFILE
+	  dump_packet_udp(DUMP_REPLY, daemon->packet, m, NULL, &source_addr, listen->fd);
+#endif
+#if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
+	  if (local_auth)
+	    if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
+	      report_addresses(header, m, mark);
+#endif
 	  send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
 		    (char *)header, m, &source_addr, &dst_addr, if_index);
 	  daemon->metrics[METRIC_DNS_AUTH_ANSWERED]++;
 	}
     }
-  else
 #endif
+  else
     {
+      int stale, filtered;
       int ad_reqd = do_bit;
-       /* RFC 6840 5.7 */
+      int fd = listen->fd;
+      struct blockdata *saved_question = blockdata_alloc((char *) header, (size_t)n);
+      
+      /* RFC 6840 5.7 */
       if (header->hb4 & HB4_AD)
 	ad_reqd = 1;
 
       m = answer_request(header, ((char *) header) + udp_size, (size_t)n, 
-			 dst_addr_4, netmask, now, ad_reqd, do_bit, have_pseudoheader);
+			 dst_addr_4, netmask, now, ad_reqd, do_bit, have_pseudoheader, &stale, &filtered);
       
       if (m >= 1)
 	{
+	  if (have_pseudoheader)
+	    {
+	      int ede = EDE_UNSET;
+
+	      if (filtered)
+		ede = EDE_FILTERED;
+	      else if (stale)
+		ede = EDE_STALE;
+
+	      if (ede != EDE_UNSET)
+		{
+		  u16 swap = htons(ede);
+		  
+		  m = add_pseudoheader(header,  m,  ((unsigned char *) header) + udp_size, daemon->edns_pktsz,
+				       EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
+		}
+	    }
+	  
+#ifdef HAVE_DUMPFILE
+	  dump_packet_udp(DUMP_REPLY, daemon->packet, m, NULL, &source_addr, listen->fd);
+#endif
+#if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
+	  if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
+	    report_addresses(header, m, mark);
+#endif
 	  send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
 		    (char *)header, m, &source_addr, &dst_addr, if_index);
 	  daemon->metrics[METRIC_DNS_LOCAL_ANSWERED]++;
+	  if (stale)
+	    daemon->metrics[METRIC_DNS_STALE_ANSWERED]++;
 	}
-      else if (forward_query(listen->fd, &source_addr, &dst_addr, if_index,
-			     header, (size_t)n, now, NULL, ad_reqd, do_bit))
-	daemon->metrics[METRIC_DNS_QUERIES_FORWARDED]++;
-      else
-	daemon->metrics[METRIC_DNS_LOCAL_ANSWERED]++;
+      
+      if (stale)
+	{
+	  /* We answered with stale cache data, so forward the query anyway to
+	     refresh that. */
+	  m = 0;
+	  
+	  /* We've already answered the client, so don't send it the answer 
+	     when it comes back. */
+	  fd = -1;
+	}
+      
+      if (saved_question)
+	{
+	  if (m == 0)
+	    {
+	      blockdata_retrieve(saved_question, (size_t)n, header);
+	      
+	      if (forward_query(fd, &source_addr, &dst_addr, if_index,
+				header, (size_t)n,  ((char *) header) + udp_size, now, NULL, ad_reqd, do_bit, 0))
+		daemon->metrics[METRIC_DNS_QUERIES_FORWARDED]++;
+	      else
+		daemon->metrics[METRIC_DNS_LOCAL_ANSWERED]++;
+	    }
+	  
+	  blockdata_free(saved_question);
+	}
     }
 }
 
+/* Send query in packet, qsize to a server determined by first,last,start and
+   get the reply. return reply size. */
+static ssize_t tcp_talk(int first, int last, int start, unsigned char *packet,  size_t qsize,
+			int have_mark, unsigned int mark, struct server **servp)
+{
+  int firstsendto = -1;
+  u16 *length = (u16 *)packet;
+  unsigned char *payload = &packet[2];
+  struct dns_header *header = (struct dns_header *)payload;
+  unsigned char c1, c2;
+  unsigned char hash[HASH_SIZE], *hashp;
+  unsigned int rsize;
+  
+  (void)mark;
+  (void)have_mark;
+
+  if (!(hashp = hash_questions(header, (unsigned int)qsize, daemon->namebuff)))
+    return 0;
+
+  memcpy(hash, hashp, HASH_SIZE);
+  
+  while (1) 
+    {
+      int data_sent = 0, timedout = 0;
+      struct server *serv;
+      
+      if (firstsendto == -1)
+	firstsendto = start;
+      else
+	{
+	  start++;
+	  
+	  if (start == last)
+	    start = first;
+	  
+	  if (start == firstsendto)
+	    break;
+	}
+      
+      serv = daemon->serverarray[start];
+      
+    retry:
+      *length = htons(qsize);
+      
+      if (serv->tcpfd == -1)
+	{
+	  if ((serv->tcpfd = socket(serv->addr.sa.sa_family, SOCK_STREAM, 0)) == -1)
+	    continue;
+	  
+#ifdef HAVE_CONNTRACK
+	  /* Copy connection mark of incoming query to outgoing connection. */
+	  if (have_mark)
+	    setsockopt(serv->tcpfd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
+#endif			  
+	  
+	  if ((!local_bind(serv->tcpfd,  &serv->source_addr, serv->interface, 0, 1)))
+	    {
+	      close(serv->tcpfd);
+	      serv->tcpfd = -1;
+	      continue;
+	    }
+
+#ifdef TCP_SYNCNT
+	  /* TCP connections by default take ages to time out. 
+	     At least on Linux, we can reduce that to only two attempts
+	     to get a reply. For DNS, that's more sensible. */
+	  mark = 2;
+	  setsockopt(serv->tcpfd, IPPROTO_TCP, TCP_SYNCNT, &mark, sizeof(unsigned int));
+#endif
+	  
+#ifdef MSG_FASTOPEN
+	  server_send(serv, serv->tcpfd, packet, qsize + sizeof(u16), MSG_FASTOPEN);
+	  
+	  if (errno == 0)
+	    data_sent = 1;
+	  else if (errno == ETIMEDOUT || errno == EHOSTUNREACH)
+	    timedout = 1;
+#endif
+	  
+	  /* If fastopen failed due to lack of reply, then there's no point in
+	     trying again in non-FASTOPEN mode. */
+	  if (timedout || (!data_sent && connect(serv->tcpfd, &serv->addr.sa, sa_len(&serv->addr)) == -1))
+	    {
+	      close(serv->tcpfd);
+	      serv->tcpfd = -1;
+	      continue;
+	    }
+	  
+	  daemon->serverarray[first]->last_server = start;
+	  serv->flags &= ~SERV_GOT_TCP;
+	}
+      
+      if ((!data_sent && !read_write(serv->tcpfd, packet, qsize + sizeof(u16), 0)) ||
+	  !read_write(serv->tcpfd, &c1, 1, 1) ||
+	  !read_write(serv->tcpfd, &c2, 1, 1) ||
+	  !read_write(serv->tcpfd, payload, (rsize = (c1 << 8) | c2), 1))
+	{
+	  close(serv->tcpfd);
+	  serv->tcpfd = -1;
+	  /* We get data then EOF, reopen connection to same server,
+	     else try next. This avoids DoS from a server which accepts
+	     connections and then closes them. */
+	  if (serv->flags & SERV_GOT_TCP)
+	    goto retry;
+	  else
+	    continue;
+	}
+
+      /* If the hash of the question section doesn't match the crc we sent, then
+	 someone might be attempting to insert bogus values into the cache by 
+	 sending replies containing questions and bogus answers. 
+	 Try another server, or give up */
+      if (!(hashp = hash_questions(header, rsize, daemon->namebuff)) || memcmp(hash, hashp, HASH_SIZE) != 0)
+	continue;
+      
+      serv->flags |= SERV_GOT_TCP;
+      
+      *servp = serv;
+      return rsize;
+    }
+
+  return 0;
+}
+		  
 #ifdef HAVE_DNSSEC
-/* Recurse up the key hierarchy */
+/* Recurse down the key hierarchy */
 static int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n, 
 			   int class, char *name, char *keyname, struct server *server, 
-			   int have_mark, unsigned int mark, int *keycount)
+			   int have_mark, unsigned int mark, int *keycount, int *validatecount)
 {
-  int new_status;
+  int first, last, start, new_status;
   unsigned char *packet = NULL;
-  unsigned char *payload = NULL;
   struct dns_header *new_header = NULL;
-  u16 *length = NULL;
- 
+  
   while (1)
     {
-      int type = SERV_DO_DNSSEC;
-      char *domain;
-      size_t m; 
-      unsigned char c1, c2;
-      struct server *firstsendto = NULL;
-      
+      size_t m;
+      int log_save;
+            
       /* limit the amount of work we do, to avoid cycling forever on loops in the DNS */
-      if (--(*keycount) == 0)
-	new_status = STAT_ABANDONED;
-      else if (status == STAT_NEED_KEY)
-	new_status = dnssec_validate_by_ds(now, header, n, name, keyname, class);
-      else if (status == STAT_NEED_DS)
-	new_status = dnssec_validate_ds(now, header, n, name, keyname, class);
+      if (STAT_ISEQUAL(status, STAT_NEED_KEY))
+	new_status = dnssec_validate_by_ds(now, header, n, name, keyname, class, validatecount);
+      else if (STAT_ISEQUAL(status, STAT_NEED_DS))
+	new_status = dnssec_validate_ds(now, header, n, name, keyname, class, validatecount);
       else 
 	new_status = dnssec_validate_reply(now, header, n, name, keyname, &class,
 					   !option_bool(OPT_DNSSEC_IGN_NS) && (server->flags & SERV_DO_DNSSEC),
-					   NULL, NULL, NULL);
+					   NULL, NULL, NULL, validatecount);
       
-      if (new_status != STAT_NEED_DS && new_status != STAT_NEED_KEY)
+      if (!STAT_ISEQUAL(new_status, STAT_NEED_DS) && !STAT_ISEQUAL(new_status, STAT_NEED_KEY) && !STAT_ISEQUAL(new_status, STAT_ABANDONED))
 	break;
-
+      
+      if ((*keycount)-- == 0)
+	{
+	  my_syslog(LOG_WARNING, _("limit exceeded: per-query subqueries"));
+	  new_status = STAT_ABANDONED;
+	}
+      
+      if (STAT_ISEQUAL(new_status, STAT_ABANDONED))
+	{
+	  /* Log the actual validation that made us barf. */
+	  unsigned char *p = (unsigned char *)(header+1);
+	  if  (extract_name(header, n, &p, daemon->namebuff, 0, 4) == 1)
+	    my_syslog(LOG_WARNING, _("validation of %s failed: resource limit exceeded."),
+		      daemon->namebuff[0] ? daemon->namebuff : ".");
+	  break;
+	}
+      
       /* Can't validate because we need a key/DS whose name now in keyname.
 	 Make query for same, and recurse to validate */
       if (!packet)
 	{
 	  packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16));
-	  payload = &packet[2];
-	  new_header = (struct dns_header *)payload;
-	  length = (u16 *)packet;
+	  new_header = (struct dns_header *)&packet[2];
 	}
       
       if (!packet)
@@ -1657,106 +2097,29 @@
 	  new_status = STAT_ABANDONED;
 	  break;
 	}
-
-      m = dnssec_generate_query(new_header, ((unsigned char *) new_header) + 65536, keyname, class, 
-				new_status == STAT_NEED_KEY ? T_DNSKEY : T_DS, server->edns_pktsz);
       
-      *length = htons(m);
-
-      /* Find server to forward to. This will normally be the 
-	 same as for the original query, but may be another if
-	 servers for domains are involved. */		      
-      if (search_servers(now, NULL, F_DNSSECOK, keyname, &type, &domain, NULL) != 0)
+      m = dnssec_generate_query(new_header, ((unsigned char *) new_header) + 65536, keyname, class, 
+				STAT_ISEQUAL(new_status, STAT_NEED_KEY) ? T_DNSKEY : T_DS, server->edns_pktsz);
+      
+      if ((start = dnssec_server(server, daemon->keyname, &first, &last)) == -1 ||
+	  (m = tcp_talk(first, last, start, packet, m, have_mark, mark, &server)) == 0)
 	{
 	  new_status = STAT_ABANDONED;
 	  break;
 	}
-	
-      while (1)
-	{
-	  int data_sent = 0;
-	  
-	  if (!firstsendto)
-	    firstsendto = server;
-	  else
-	    {
-	      if (!(server = server->next))
-		server = daemon->servers;
-	      if (server == firstsendto)
-		{
-		  /* can't find server to accept our query. */
-		  new_status = STAT_ABANDONED;
-		  break;
-		}
-	    }
-	  
-	  if (!server_test_type(server, domain, type, SERV_DO_DNSSEC))
-	    continue;
 
-	retry:
-	  /* may need to make new connection. */
-	  if (server->tcpfd == -1)
-	    {
-	      if ((server->tcpfd = socket(server->addr.sa.sa_family, SOCK_STREAM, 0)) == -1)
-		continue; /* No good, next server */
-	      
-#ifdef HAVE_CONNTRACK
-	      /* Copy connection mark of incoming query to outgoing connection. */
-	      if (have_mark)
-		setsockopt(server->tcpfd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
-#endif	
-	      
-	      if (!local_bind(server->tcpfd,  &server->source_addr, server->interface, 0, 1))
-		{
-		  close(server->tcpfd);
-		  server->tcpfd = -1;
-		  continue; /* No good, next server */
-		}
-	      
-#ifdef MSG_FASTOPEN
-	      server_send(server, server->tcpfd, packet, m + sizeof(u16), MSG_FASTOPEN);
-
-	      if (errno == 0)
-		data_sent = 1;
-#endif
-	      
-	      if (!data_sent && connect(server->tcpfd, &server->addr.sa, sa_len(&server->addr)) == -1)
-		{
-		  close(server->tcpfd);
-		  server->tcpfd = -1;
-		  continue; /* No good, next server */
-		}
-	      
-	      server->flags &= ~SERV_GOT_TCP;
-	    }
-	  
-	  if ((!data_sent && !read_write(server->tcpfd, packet, m + sizeof(u16), 0)) ||
-	      !read_write(server->tcpfd, &c1, 1, 1) ||
-	      !read_write(server->tcpfd, &c2, 1, 1) ||
-	      !read_write(server->tcpfd, payload, (c1 << 8) | c2, 1))
-	    {
-	      close(server->tcpfd);
-	      server->tcpfd = -1;
-	      /* We get data then EOF, reopen connection to same server,
-		 else try next. This avoids DoS from a server which accepts
-		 connections and then closes them. */
-	      if (server->flags & SERV_GOT_TCP)
-		goto retry;
-	      else
-		continue;
-	    }
-
-	  log_query_mysockaddr(F_NOEXTRA | F_DNSSEC, keyname, &server->addr,
-		      querystr("dnssec-query", new_status == STAT_NEED_KEY ? T_DNSKEY : T_DS));
-
-	  server->flags |= SERV_GOT_TCP;
-	  
-	  m = (c1 << 8) | c2;
-	  new_status = tcp_key_recurse(now, new_status, new_header, m, class, name, keyname, server, have_mark, mark, keycount);
-	  break;
-	}
+      log_save = daemon->log_display_id;
+      daemon->log_display_id = ++daemon->log_id;
       
-      if (new_status != STAT_OK)
+      log_query_mysockaddr(F_NOEXTRA | F_DNSSEC | F_SERVER, keyname, &server->addr,
+			   STAT_ISEQUAL(new_status, STAT_NEED_KEY) ? "dnssec-query[DNSKEY]" : "dnssec-query[DS]", 0);
+      
+      new_status = tcp_key_recurse(now, new_status, new_header, m, class, name, keyname, server,
+				   have_mark, mark, keycount, validatecount);
+      
+      daemon->log_display_id = log_save;
+      
+      if (!STAT_ISEQUAL(new_status, STAT_OK))
 	break;
     }
     
@@ -1775,24 +2138,28 @@
 unsigned char *tcp_request(int confd, time_t now,
 			   union mysockaddr *local_addr, struct in_addr netmask, int auth_dns)
 {
-  size_t size = 0;
-  int norebind = 0;
+  size_t size = 0, saved_size = 0;
+  int norebind;
+#ifdef HAVE_CONNTRACK
+  int is_single_query = 0, allowed = 1;
+#endif
 #ifdef HAVE_AUTH
   int local_auth = 0;
 #endif
   int checking_disabled, do_bit, added_pheader = 0, have_pseudoheader = 0;
-  int check_subnet, cacheable, no_cache_dnssec = 0, cache_secure = 0, bogusanswer = 0;
+  int cacheable, no_cache_dnssec = 0, cache_secure = 0, bogusanswer = 0;
   size_t m;
+  struct blockdata *saved_question = NULL;
   unsigned short qtype;
   unsigned int gotname;
-  unsigned char c1, c2;
   /* Max TCP packet + slop + size */
   unsigned char *packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16));
   unsigned char *payload = &packet[2];
+  unsigned char c1, c2;
   /* largest field in header is 16-bits, so this is still sufficiently aligned */
   struct dns_header *header = (struct dns_header *)payload;
   u16 *length = (u16 *)packet;
-  struct server *last_server;
+  struct server *serv;
   struct in_addr dst_addr_4;
   union mysockaddr peer_addr;
   socklen_t peer_len = sizeof(union mysockaddr);
@@ -1800,16 +2167,15 @@
   unsigned char *pheader;
   unsigned int mark = 0;
   int have_mark = 0;
-
-  (void)mark;
-  (void)have_mark;
-
-  if (getpeername(confd, (struct sockaddr *)&peer_addr, &peer_len) == -1)
+  int first, last, filtered, stale, do_stale = 0;
+  unsigned int flags = 0;
+    
+  if (!packet || getpeername(confd, (struct sockaddr *)&peer_addr, &peer_len) == -1)
     return packet;
 
 #ifdef HAVE_CONNTRACK
   /* Get connection mark of incoming query to set on outgoing connections. */
-  if (option_bool(OPT_CONNTRACK))
+  if (option_bool(OPT_CONNTRACK) || option_bool(OPT_CMARK_ALST_EN))
     {
       union all_addr local;
 		      
@@ -1847,20 +2213,27 @@
 	}
       if (!addr)
 	{
-	  my_syslog(LOG_WARNING, _("Ignoring query from non-local network"));
+	  prettyprint_addr(&peer_addr, daemon->addrbuff);
+	  my_syslog(LOG_WARNING, _("ignoring query from non-local network %s"), daemon->addrbuff);
 	  return packet;
 	}
     }
 
   while (1)
     {
-      if (query_count == TCP_MAX_QUERIES ||
-	  !packet ||
-	  !read_write(confd, &c1, 1, 1) || !read_write(confd, &c2, 1, 1) ||
-	  !(size = c1 << 8 | c2) ||
-	  !read_write(confd, payload, size, 1))
-       	return packet; 
-  
+      int ede = EDE_UNSET;
+
+      if (!do_stale)
+	{
+	  if (query_count == TCP_MAX_QUERIES)
+	    break;
+	  
+	  if (!read_write(confd, &c1, 1, 1) || !read_write(confd, &c2, 1, 1) ||
+	      !(size = c1 << 8 | c2) ||
+	      !read_write(confd, payload, size, 1))
+	    break;
+	}
+      
       if (size < (int)sizeof(struct dns_header))
 	continue;
 
@@ -1884,24 +2257,32 @@
 #ifdef HAVE_AUTH
 	  struct auth_zone *zone;
 #endif
-	  char *types = querystr(auth_dns ? "auth" : "query", qtype);
-	  
-	  log_query_mysockaddr(F_QUERY | F_FORWARD, daemon->namebuff,
-			       &peer_addr, types);
-	  
-#ifdef HAVE_AUTH
-	  /* find queries for zones we're authoritative for, and answer them directly */
-	  if (!auth_dns && !option_bool(OPT_LOCALISE))
-	    for (zone = daemon->auth_zones; zone; zone = zone->next)
-	      if (in_zone(zone, daemon->namebuff, NULL))
-		{
-		  auth_dns = 1;
-		  local_auth = 1;
-		  break;
-		}
+
+#ifdef HAVE_CONNTRACK
+	  is_single_query = 1;
 #endif
+
+	  if (!do_stale)
+	    {
+	      log_query_mysockaddr(F_QUERY | F_FORWARD, daemon->namebuff,
+				   &peer_addr, auth_dns ? "auth" : "query", qtype);
+	      
+#ifdef HAVE_AUTH
+	      /* find queries for zones we're authoritative for, and answer them directly */
+	      if (!auth_dns && !option_bool(OPT_LOCALISE))
+		for (zone = daemon->auth_zones; zone; zone = zone->next)
+		  if (in_zone(zone, daemon->namebuff, NULL))
+		    {
+		      auth_dns = 1;
+		      local_auth = 1;
+		      break;
+		    }
+#endif
+	    }
 	}
       
+      norebind = domain_no_rebind(daemon->namebuff);
+      
       if (local_addr->sa.sa_family == AF_INET)
 	dst_addr_4 = local_addr->in.sin_addr;
       else
@@ -1920,259 +2301,255 @@
 	  if (flags & 0x8000)
 	    do_bit = 1; /* do bit */ 
 	}
-
+      
+#ifdef HAVE_CONNTRACK
 #ifdef HAVE_AUTH
-      if (auth_dns)
+      if (!auth_dns || local_auth)
+#endif
+	if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
+	  allowed = is_query_allowed_for_mark((u32)mark, is_single_query ? daemon->namebuff : NULL);
+#endif
+
+      if (0);
+#ifdef HAVE_CONNTRACK
+      else if (!allowed)
+	{
+	  u16 swap = htons(EDE_BLOCKED);
+
+	  m = answer_disallowed(header, size, (u32)mark, is_single_query ? daemon->namebuff : NULL);
+	  
+	  if (have_pseudoheader && m != 0)
+	    m = add_pseudoheader(header,  m, ((unsigned char *) header) + 65536, daemon->edns_pktsz,
+				 EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
+	}
+#endif
+#ifdef HAVE_AUTH
+      else if (auth_dns)
 	m = answer_auth(header, ((char *) header) + 65536, (size_t)size, now, &peer_addr, 
 			local_auth, do_bit, have_pseudoheader);
-      else
 #endif
+      else
 	{
 	   int ad_reqd = do_bit;
 	   /* RFC 6840 5.7 */
 	   if (header->hb4 & HB4_AD)
 	     ad_reqd = 1;
-	   
-	   /* m > 0 if answered from cache */
-	   m = answer_request(header, ((char *) header) + 65536, (size_t)size, 
-			      dst_addr_4, netmask, now, ad_reqd, do_bit, have_pseudoheader);
-	  
+
+	   if (do_stale)
+	     m = 0;
+	   else
+	     {
+	       if (saved_question)
+		 blockdata_free(saved_question);
+	       
+	       saved_question = blockdata_alloc((char *) header, (size_t)size);
+	       saved_size = size;
+	       
+	       /* m > 0 if answered from cache */
+	       m = answer_request(header, ((char *) header) + 65536, (size_t)size, 
+				  dst_addr_4, netmask, now, ad_reqd, do_bit, have_pseudoheader, &stale, &filtered);
+	     }
 	  /* Do this by steam now we're not in the select() loop */
 	  check_log_writer(1); 
 	  
-	  if (m == 0)
+	  if (m == 0 && saved_question)
 	    {
-	      unsigned int flags = 0;
-	      union all_addr *addrp = NULL;
-	      int type = SERV_DO_DNSSEC;
-	      char *domain = NULL;
-	      unsigned char *oph = find_pseudoheader(header, size, NULL, NULL, NULL, NULL);
+	      struct server *master;
+	      int start;
 
-	      if (gotname)
-		flags = search_servers(now, &addrp, gotname, daemon->namebuff, &type, &domain, &norebind);
-
-       if(!option_bool(OPT_EDNS_RESTRICT) || (option_bool(OPT_EDNS_RESTRICT) && !domain))
-	      size = add_edns0_config(header, size, ((unsigned char *) header) + 65536, &peer_addr, now, &check_subnet, &cacheable);
-
-#ifdef HAVE_DNSSEC
-	      if (option_bool(OPT_DNSSEC_VALID) && (type & SERV_DO_DNSSEC))
-		{
-		  size = add_do_bit(header, size, ((unsigned char *) header) + 65536);
-		  
-		  /* For debugging, set Checking Disabled, otherwise, have the upstream check too,
-		     this allows it to select auth servers when one is returning bad data. */
-		  if (option_bool(OPT_DNSSEC_DEBUG))
-		    header->hb4 |= HB4_CD;
-		}
-#endif
-
-	      /* Check if we added a pheader on forwarding - may need to
-		 strip it from the reply. */
-	      if (!oph && find_pseudoheader(header, size, NULL, NULL, NULL, NULL))
-		added_pheader = 1;
-
-	      type &= ~SERV_DO_DNSSEC;
+	      blockdata_retrieve(saved_question, (size_t)saved_size, header);
+	      size = saved_size;
 	      
-	      if (type != 0  || option_bool(OPT_ORDER) || !daemon->last_server)
-		last_server = daemon->servers;
+	      if (lookup_domain(daemon->namebuff, gotname, &first, &last))
+		flags = is_local_answer(now, first, daemon->namebuff);
 	      else
-		last_server = daemon->last_server;
-	      
-	      if (!flags && last_server)
 		{
-		  struct server *firstsendto = NULL;
-		  unsigned char hash[HASH_SIZE];
-		  memcpy(hash, hash_questions(header, (unsigned int)size, daemon->namebuff), HASH_SIZE);
-
-		  /* Loop round available servers until we succeed in connecting to one.
-		     Note that this code subtly ensures that consecutive queries on this connection
-		     which can go to the same server, do so. */
-		  while (1) 
-		    {
-		      int data_sent = 0;
-
-		      if (!firstsendto)
-			firstsendto = last_server;
-		      else
-			{
-			  if (!(last_server = last_server->next))
-			    last_server = daemon->servers;
-			  
-			  if (last_server == firstsendto)
-			    break;
-			}
-		      
-		      /* server for wrong domain */
-		      if (!server_test_type(last_server, domain, type, 0))
-			continue;
-
-		    retry:
-		      *length = htons(size);
-
-		      if (last_server->tcpfd == -1)
-			{
-			  if ((last_server->tcpfd = socket(last_server->addr.sa.sa_family, SOCK_STREAM, 0)) == -1)
-			    continue;
-			  
-#ifdef HAVE_CONNTRACK
-			  /* Copy connection mark of incoming query to outgoing connection. */
-			  if (have_mark)
-			    setsockopt(last_server->tcpfd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
-#endif			  
-		      
-			  if ((!local_bind(last_server->tcpfd,  &last_server->source_addr, last_server->interface, 0, 1)))
-			    {
-			      close(last_server->tcpfd);
-			      last_server->tcpfd = -1;
-			      continue;
-			    }
-			  
-#ifdef MSG_FASTOPEN
-			    server_send(last_server, last_server->tcpfd, packet, size + sizeof(u16), MSG_FASTOPEN);
-
-			    if (errno == 0)
-			      data_sent = 1;
-#endif
-			    
-			    if (!data_sent && connect(last_server->tcpfd, &last_server->addr.sa, sa_len(&last_server->addr)) == -1)
-			    {
-			      close(last_server->tcpfd);
-			      last_server->tcpfd = -1;
-			      continue;
-			    }
-			  
-			  last_server->flags &= ~SERV_GOT_TCP;
-			}
-		      
-		      /* get query name again for logging - may have been overwritten */
-		      if (!(gotname = extract_request(header, (unsigned int)size, daemon->namebuff, &qtype)))
-			strcpy(daemon->namebuff, "query");
-		      
-		      if ((!data_sent && !read_write(last_server->tcpfd, packet, size + sizeof(u16), 0)) ||
-			  !read_write(last_server->tcpfd, &c1, 1, 1) ||
-			  !read_write(last_server->tcpfd, &c2, 1, 1) ||
-			  !read_write(last_server->tcpfd, payload, (c1 << 8) | c2, 1))
-			{
-			  close(last_server->tcpfd);
-			  last_server->tcpfd = -1;
-			  /* We get data then EOF, reopen connection to same server,
-			     else try next. This avoids DoS from a server which accepts
-			     connections and then closes them. */
-			  if (last_server->flags & SERV_GOT_TCP)
-			    goto retry;
-			  else
-			    continue;
-			}
-		      
-		      last_server->flags |= SERV_GOT_TCP;
-
-		      m = (c1 << 8) | c2;
-
-		      log_query_mysockaddr(F_SERVER | F_FORWARD, daemon->namebuff,
-					   &last_server->addr, NULL);
-
+		  /* No configured servers */
+		  ede = EDE_NOT_READY;
+		  flags = 0;
+		}
+	      
+	      /* don't forward A or AAAA queries for simple names, except the empty name */
+	      if (!flags &&
+		  option_bool(OPT_NODOTS_LOCAL) &&
+		  (gotname & (F_IPV4 | F_IPV6)) &&
+		  !strchr(daemon->namebuff, '.') &&
+		  strlen(daemon->namebuff) != 0)
+		flags = check_for_local_domain(daemon->namebuff, now) ? F_NOERR : F_NXDOMAIN;
+		
+	      if (!flags && ede != EDE_NOT_READY)
+		{
+		  master = daemon->serverarray[first];
+		  
+		  if (option_bool(OPT_ORDER) || master->last_server == -1)
+		    start = first;
+		  else
+		    start = master->last_server;
+		  
+		  size = add_edns0_config(header, size, ((unsigned char *) header) + 65536, &peer_addr, now, &cacheable);
+		  
 #ifdef HAVE_DNSSEC
-		      if (option_bool(OPT_DNSSEC_VALID) && !checking_disabled && (last_server->flags & SERV_DO_DNSSEC))
-			{
-			  int keycount = DNSSEC_WORK; /* Limit to number of DNSSEC questions, to catch loops and avoid filling cache. */
-			  int status = tcp_key_recurse(now, STAT_OK, header, m, 0, daemon->namebuff, daemon->keyname, 
-						       last_server, have_mark, mark, &keycount);
-			  char *result, *domain = "result";
-			  
-			  if (status == STAT_ABANDONED)
-			    {
-			      result = "ABANDONED";
-			      status = STAT_BOGUS;
-			    }
-			  else
-			    result = (status == STAT_SECURE ? "SECURE" : (status == STAT_INSECURE ? "INSECURE" : "BOGUS"));
-			  
-			  if (status == STAT_BOGUS && extract_request(header, m, daemon->namebuff, NULL))
-			    domain = daemon->namebuff;
-
-			  log_query(F_SECSTAT, domain, NULL, result);
-			  
-			  if (status == STAT_BOGUS)
-			    {
-			      no_cache_dnssec = 1;
-			      bogusanswer = 1;
-			    }
-
-			  if (status == STAT_SECURE)
-			    cache_secure = 1;
-			}
-#endif
-
-		      /* restore CD bit to the value in the query */
-		      if (checking_disabled)
+		  if (option_bool(OPT_DNSSEC_VALID) && (master->flags & SERV_DO_DNSSEC))
+		    {
+		      size = add_do_bit(header, size, ((unsigned char *) header) + 65536);
+		      
+		      /* For debugging, set Checking Disabled, otherwise, have the upstream check too,
+			 this allows it to select auth servers when one is returning bad data. */
+		      if (option_bool(OPT_DNSSEC_DEBUG))
 			header->hb4 |= HB4_CD;
-		      else
-			header->hb4 &= ~HB4_CD;
-		      
-		      /* There's no point in updating the cache, since this process will exit and
-			 lose the information after a few queries. We make this call for the alias and 
-			 bogus-nxdomain side-effects. */
-		      /* If the crc of the question section doesn't match the crc we sent, then
-			 someone might be attempting to insert bogus values into the cache by 
-			 sending replies containing questions and bogus answers. */
-		      if (memcmp(hash, hash_questions(header, (unsigned int)m, daemon->namebuff), HASH_SIZE) != 0)
-			{ 
-			  m = 0;
-			  break;
-			}
-
-		      /* Never cache answers which are contingent on the source or MAC address EDSN0 option,
-			 since the cache is ignorant of such things. */
-		      if (!cacheable)
-			no_cache_dnssec = 1;
-		      
-		      m = process_reply(header, now, last_server, (unsigned int)m, 
-					option_bool(OPT_NO_REBIND) && !norebind, no_cache_dnssec, cache_secure, bogusanswer,
-					ad_reqd, do_bit, added_pheader, check_subnet, &peer_addr); 
-		      
+		    }
+#endif
+		  
+		  /* Check if we added a pheader on forwarding - may need to
+		     strip it from the reply. */
+		  if (!have_pseudoheader && find_pseudoheader(header, size, NULL, NULL, NULL, NULL))
+		    added_pheader = 1;
+		  
+		  /* Loop round available servers until we succeed in connecting to one. */
+		  if ((m = tcp_talk(first, last, start, packet, size, have_mark, mark, &serv)) == 0)
+		    {
+		      ede = EDE_NETERR;
 		      break;
 		    }
+		  
+		  /* get query name again for logging - may have been overwritten */
+		  if (!(gotname = extract_request(header, (unsigned int)size, daemon->namebuff, &qtype)))
+		    strcpy(daemon->namebuff, "query");
+		  log_query_mysockaddr(F_SERVER | F_FORWARD, daemon->namebuff, &serv->addr, NULL, 0);
+		  
+#ifdef HAVE_DNSSEC
+		  if (option_bool(OPT_DNSSEC_VALID) && !checking_disabled && (master->flags & SERV_DO_DNSSEC))
+		    {
+		      int keycount = daemon->limit[LIMIT_WORK]; /* Limit to number of DNSSEC questions, to catch loops and avoid filling cache. */
+		      int validatecount = daemon->limit[LIMIT_CRYPTO]; 
+		      int status = tcp_key_recurse(now, STAT_OK, header, m, 0, daemon->namebuff, daemon->keyname, 
+						   serv, have_mark, mark, &keycount, &validatecount);
+		      char *result, *domain = "result";
+		      
+		      union all_addr a;
+		      a.log.ede = ede = errflags_to_ede(status);
+		      
+		      if (STAT_ISEQUAL(status, STAT_ABANDONED))
+			{
+			  result = "ABANDONED";
+			  status = STAT_BOGUS;
+			}
+		      else
+			result = (STAT_ISEQUAL(status, STAT_SECURE) ? "SECURE" : (STAT_ISEQUAL(status, STAT_INSECURE) ? "INSECURE" : "BOGUS"));
+		      
+		      if (STAT_ISEQUAL(status, STAT_SECURE))
+			cache_secure = 1;
+		      else if (STAT_ISEQUAL(status, STAT_BOGUS))
+			{
+			  no_cache_dnssec = 1;
+			  bogusanswer = 1;
+			  
+			  if (extract_request(header, m, daemon->namebuff, NULL))
+			    domain = daemon->namebuff;
+			}
+		      
+		      log_query(F_SECSTAT, domain, &a, result, 0);
+		    
+		      if ((daemon->limit[LIMIT_CRYPTO] - validatecount) > (int)daemon->metrics[METRIC_CRYPTO_HWM])
+			daemon->metrics[METRIC_CRYPTO_HWM] = daemon->limit[LIMIT_CRYPTO] - validatecount;
+
+		      if ((daemon->limit[LIMIT_WORK] - keycount) > (int)daemon->metrics[METRIC_WORK_HWM])
+			daemon->metrics[METRIC_WORK_HWM] = daemon->limit[LIMIT_WORK] - keycount;
+		    }
+#endif
+		  
+		  /* restore CD bit to the value in the query */
+		  if (checking_disabled)
+		    header->hb4 |= HB4_CD;
+		  else
+		    header->hb4 &= ~HB4_CD;
+		  
+		  /* Never cache answers which are contingent on the source or MAC address EDSN0 option,
+		     since the cache is ignorant of such things. */
+		  if (!cacheable)
+		    no_cache_dnssec = 1;
+		  
+		  m = process_reply(header, now, serv, (unsigned int)m, 
+				    option_bool(OPT_NO_REBIND) && !norebind, no_cache_dnssec, cache_secure, bogusanswer,
+				    ad_reqd, do_bit, added_pheader, &peer_addr, ((unsigned char *)header) + 65536, ede); 
 		}
+	    }
+	}
 	
-	      /* In case of local answer or no connections made. */
-	      if (m == 0)
-		{
-		  m = setup_reply(header, (unsigned int)size, addrp, flags, daemon->local_ttl);
-		  if (have_pseudoheader)
-		    m = add_pseudoheader(header, m, ((unsigned char *) header) + 65536, daemon->edns_pktsz, 0, NULL, 0, do_bit, 0);
-		}
+      if (do_stale)
+	break;
+    
+      /* In case of local answer or no connections made. */
+      if (m == 0)
+	{
+	  if (!(m = make_local_answer(flags, gotname, size, header, daemon->namebuff,
+				      ((char *) header) + 65536, first, last, ede)))
+	    break;
+	  
+	  if (have_pseudoheader)
+	    {
+	      u16 swap = htons((u16)ede);
+	      
+	      if (ede != EDE_UNSET)
+		m = add_pseudoheader(header, m, ((unsigned char *) header) + 65536, daemon->edns_pktsz, EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
+	      else
+		m = add_pseudoheader(header, m, ((unsigned char *) header) + 65536, daemon->edns_pktsz, 0, NULL, 0, do_bit, 0);
+	    }
+	}
+      else if (have_pseudoheader)
+	{
+	  ede = EDE_UNSET;
+	  
+	  if (filtered)
+	    ede = EDE_FILTERED;
+	  else if (stale)
+	    ede = EDE_STALE;
+	  
+	  if (ede != EDE_UNSET)
+	    {
+	      u16 swap = htons((u16)ede);
+	      
+	      m = add_pseudoheader(header, m, ((unsigned char *) header) + 65536, daemon->edns_pktsz, EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
 	    }
 	}
 	  
       check_log_writer(1);
       
       *length = htons(m);
-           
-      if (m == 0 || !read_write(confd, packet, m + sizeof(u16), 0))
-	return packet;
-    }
-}
-
-static struct frec *allocate_frec(time_t now)
-{
-  struct frec *f;
-  
-  if ((f = (struct frec *)whine_malloc(sizeof(struct frec))))
-    {
-      f->next = daemon->frec_list;
-      f->time = now;
-      f->sentto = NULL;
-      f->rfds = NULL;
-      f->flags = 0;
-#ifdef HAVE_DNSSEC
-      f->dependent = NULL;
-      f->blocking_query = NULL;
-      f->stash = NULL;
+      
+#if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
+#ifdef HAVE_AUTH
+      if (!auth_dns || local_auth)
 #endif
-      daemon->frec_list = f;
+	if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
+	  report_addresses(header, m, mark);
+#endif
+      if (!read_write(confd, packet, m + sizeof(u16), 0))
+	break;
+      
+      /* If we answered with stale data, this process will now try and get fresh data into
+	 the cache and cannot therefore accept new queries. Close the incoming
+	 connection to signal that to the client. Then set do_stale and loop round
+	 once more to try and get fresh data, after which we exit. */
+      if (stale)
+	{
+	  shutdown(confd, SHUT_RDWR);
+	  close(confd);
+	  do_stale = 1;
+	}
     }
 
-  return f;
+  /* If we ran once to get fresh data, confd is already closed. */
+  if (!do_stale)
+    {
+      shutdown(confd, SHUT_RDWR);
+      close(confd);
+    }
+
+  if (saved_question)
+    blockdata_free(saved_question);
+  
+  return packet;
 }
 
 /* return a UDP socket bound to a random port, have to cope with straying into
@@ -2183,16 +2560,36 @@
 
   if ((fd = socket(s->source_addr.sa.sa_family, SOCK_DGRAM, 0)) != -1)
     {
+      /* We need to set IPV6ONLY so we can use the same ports
+	 for IPv4 and IPV6, otherwise, in restriced port situations,
+	 we can end up with all our available ports in use for 
+	 one address family, and the other address family cannot be used. */
+      if (s->source_addr.sa.sa_family == AF_INET6)
+	{
+	  int opt = 1;
+
+	  if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &opt, sizeof(opt)) == -1)
+	    {
+	      close(fd);
+	      return -1;
+	    }
+	}
+      
       if (local_bind(fd, &s->source_addr, s->interface, s->ifindex, 0))
 	return fd;
 
-      if (s->interface[0] == 0)
-	(void)prettyprint_addr(&s->source_addr, daemon->namebuff);
-      else
-	strcpy(daemon->namebuff, s->interface);
-
-      my_syslog(LOG_ERR, _("failed to bind server socket to %s: %s"),
-		daemon->namebuff, strerror(errno));
+      /* don't log errors due to running out of available ports, we handle those. */
+      if (!sockaddr_isnull(&s->source_addr) || errno != EADDRINUSE)
+	{
+	  if (s->interface[0] == 0)
+	    (void)prettyprint_addr(&s->source_addr, daemon->addrbuff);
+	  else
+	    safe_strncpy(daemon->addrbuff, s->interface, ADDRSTRLEN);
+	  
+	  my_syslog(LOG_ERR, _("failed to bind server socket to %s: %s"),
+		    daemon->addrbuff, strerror(errno));
+	}
+	  
       close(fd);
     }
   
@@ -2222,39 +2619,93 @@
 {
   static int finger = 0;
   int i, j = 0;
-  struct randfd_list *rfl;
+  int ports_full = 0;
+  struct randfd_list **up, *rfl, *found, **found_link;
   struct randfd *rfd = NULL;
   int fd = 0;
+  int ports_avail = 0;
+  
+  /* We can't have more randomsocks for this AF available than ports in  our port range,
+     so check that here, to avoid trying and failing to bind every port
+     in local_bind(), called from random_sock(). The actual check is below when 
+     ports_avail != 0 */
+  if (daemon->max_port != 0)
+    {
+      ports_avail = daemon->max_port - daemon->min_port + 1;
+      if (ports_avail >= SMALL_PORT_RANGE)
+	ports_avail = 0;
+    }
   
   /* If server has a pre-allocated fd, use that. */
   if (serv->sfd)
     return serv->sfd->fd;
   
-  /* existing suitable random port socket linked to this transaction? */
-  for (rfl = *fdlp; rfl; rfl = rfl->next)
+  /* existing suitable random port socket linked to this transaction?
+     Find the last one in the list and count how many there are. */
+  for (found = NULL, found_link = NULL, i = 0, up = fdlp, rfl = *fdlp; rfl; up = &rfl->next, rfl = rfl->next)
     if (server_isequal(serv, rfl->rfd->serv))
-      return rfl->rfd->fd;
+      {
+	i++;
+	found = rfl;
+	found_link = up;
+      }
 
-  /* No. need new link. */
+  /* We have the maximum number for this query already. Promote
+     the last one on the list to the head, to circulate them,
+     and return it. */
+  if (found && i >= daemon->randport_limit)
+    {
+      *found_link = found->next;
+      found->next = *fdlp;
+      *fdlp = found;
+      return found->rfd->fd;
+    }
+
+  /* check for all available ports in use. */
+  if (ports_avail != 0)
+    {
+      int ports_inuse;
+
+      for (ports_inuse = 0, i = 0; i < daemon->numrrand; i++)
+	if (daemon->randomsocks[i].refcount != 0 &&
+	    daemon->randomsocks[i].serv->source_addr.sa.sa_family == serv->source_addr.sa.sa_family &&
+	    ++ports_inuse >= ports_avail)
+	  {
+	    ports_full = 1;
+	    break;
+	  }
+    }
+  
+  /* limit the number of sockets we have open to avoid starvation of 
+     (eg) TFTP. Once we have a reasonable number, randomness should be OK */
+  if (!ports_full)
+    for (i = 0; i < daemon->numrrand; i++)
+      if (daemon->randomsocks[i].refcount == 0)
+	{
+	  if ((fd = random_sock(serv)) != -1)
+	    {
+	      rfd = &daemon->randomsocks[i];
+	      rfd->serv = serv;
+	      rfd->fd = fd;
+	      rfd->refcount = 1;
+	    }
+	  break;
+	}
+    
+  /* No good existing. Need new link. */
   if ((rfl = daemon->rfl_spare))
     daemon->rfl_spare = rfl->next;
   else if (!(rfl = whine_malloc(sizeof(struct randfd_list))))
-    return -1;
-   
-  /* limit the number of sockets we have open to avoid starvation of 
-     (eg) TFTP. Once we have a reasonable number, randomness should be OK */
-  for (i = 0; i < daemon->numrrand; i++)
-    if (daemon->randomsocks[i].refcount == 0)
-      {
-	if ((fd = random_sock(serv)) != -1)
-    	  {
-	    rfd = &daemon->randomsocks[i];
-	    rfd->serv = serv;
-	    rfd->fd = fd;
-	    rfd->refcount = 1;
-	  }
-	break;
-      }
+    {
+      /* malloc failed, don't leak allocated sock */
+      if (rfd)
+	{
+	  close(rfd->fd);
+	  rfd->refcount = 0;
+	}
+
+      return -1;
+    }
   
   /* No free ones or cannot get new socket, grab an existing one */
   if (!rfd)
@@ -2265,14 +2716,23 @@
 	    server_isequal(serv, daemon->randomsocks[i].serv) &&
 	    daemon->randomsocks[i].refcount != 0xfffe)
 	  {
-	    finger = i + 1;
-	    rfd = &daemon->randomsocks[i];
-	    rfd->refcount++;
-	    break;
+	    struct randfd_list *rl;
+	    /* Don't pick one we already have. */
+	    for (rl = *fdlp; rl; rl = rl->next)
+	      if (rl->rfd == &daemon->randomsocks[i])
+		break;
+
+	    if (!rl)
+	      {
+		finger = i + 1;
+		rfd = &daemon->randomsocks[i];
+		rfd->refcount++;
+		break;
+	      }
 	  }
       }
 
-  if (j == daemon->numrrand)
+  if (!rfd) /* should be when j == daemon->numrrand */
     {
       struct randfd_list *rfl_poll;
 
@@ -2380,172 +2840,203 @@
   f->sentto = NULL;
   f->flags = 0;
 
-#ifdef HAVE_DNSSEC
   if (f->stash)
     {
       blockdata_free(f->stash);
       f->stash = NULL;
     }
-
+  
+#ifdef HAVE_DNSSEC
   /* Anything we're waiting on is pointless now, too */
   if (f->blocking_query)
-    free_frec(f->blocking_query);
+    {
+      struct frec *n, **up;
+
+      /* unlink outselves from the blocking query's dependents list. */
+      for (n = f->blocking_query->dependent, up = &f->blocking_query->dependent; n; n = n->next_dependent)
+	if (n == f)
+	  {
+	    *up = n->next_dependent;
+	    break;
+	  }
+	else
+	  up = &n->next_dependent;
+
+      /* If we were the only/last dependent, free the blocking query too. */
+      if (!f->blocking_query->dependent)
+	free_frec(f->blocking_query);
+    }
+  
   f->blocking_query = NULL;
   f->dependent = NULL;
+  f->next_dependent = NULL;
 #endif
 }
 
 
 
-/* if wait==NULL return a free or older than TIMEOUT record.
-   else return *wait zero if one available, or *wait is delay to
-   when the oldest in-use record will expire. Impose an absolute
+/* Impose an absolute
    limit of 4*TIMEOUT before we wipe things (for random sockets).
-   If force is non-NULL, always return a result, even if we have
-   to allocate above the limit, and never free the record pointed
-   to by the force argument. */
-struct frec *get_new_frec(time_t now, int *wait, struct frec *force)
+   If force is set, always return a result, even if we have
+   to allocate above the limit, and don'y free any records.
+   This is set when allocating for DNSSEC to avoid cutting off
+   the branch we are sitting on. */
+static struct frec *get_new_frec(time_t now, struct server *master, int force)
 {
   struct frec *f, *oldest, *target;
   int count;
   
-  if (wait)
-    *wait = 0;
-
-  for (f = daemon->frec_list, oldest = NULL, target =  NULL, count = 0; f; f = f->next, count++)
-    if (!f->sentto)
-      target = f;
-    else 
-      {
+  /* look for free records, garbage collect old records and count number in use by our server-group. */
+  for (f = daemon->frec_list, oldest = NULL, target =  NULL, count = 0; f; f = f->next)
+    {
+      if (!f->sentto)
+	target = f;
+      else
+	{
 #ifdef HAVE_DNSSEC
-	    /* Don't free DNSSEC sub-queries here, as we may end up with
-	       dangling references to them. They'll go when their "real" query 
-	       is freed. */
-	    if (!f->dependent && f != force)
+	  /* Don't free DNSSEC sub-queries here, as we may end up with
+	     dangling references to them. They'll go when their "real" query 
+	     is freed. */
+	  if (!f->dependent && !force)
 #endif
-	      {
-		if (difftime(now, f->time) >= 4*TIMEOUT)
-		  {
-		    free_frec(f);
-		    target = f;
-		  }
-	     
-	    
-		if (!oldest || difftime(f->time, oldest->time) <= 0)
-		  oldest = f;
-	      }
-      }
+	    {
+	      if (difftime(now, f->time) >= 4*TIMEOUT)
+		{
+		  daemon->metrics[METRIC_DNS_UNANSWERED_QUERY]++;
+		  free_frec(f);
+		  target = f;
+		}
+	      else if (!oldest || difftime(f->time, oldest->time) <= 0)
+		oldest = f;
+	    }
+	}
+      
+      if (f->sentto && ((int)difftime(now, f->time)) < TIMEOUT && server_samegroup(f->sentto, master))
+	count++;
+    }
+
+  if (!force && count >= daemon->ftabsize)
+    {
+      query_full(now, master->domain);
+      return NULL;
+    }
+  
+  if (!target && oldest && ((int)difftime(now, oldest->time)) >= TIMEOUT)
+    { 
+      /* can't find empty one, use oldest if there is one and it's older than timeout */
+      daemon->metrics[METRIC_DNS_UNANSWERED_QUERY]++;
+      free_frec(oldest);
+      target = oldest;
+    }
+  
+  if (!target && (target = (struct frec *)whine_malloc(sizeof(struct frec))))
+    {
+      target->next = daemon->frec_list;
+      daemon->frec_list = target;
+    }
 
   if (target)
     {
       target->time = now;
-      return target;
+      target->forward_delay = daemon->fast_retry_time;
     }
   
-  /* can't find empty one, use oldest if there is one
-     and it's older than timeout */
-  if (!force && oldest && ((int)difftime(now, oldest->time)) >= TIMEOUT)
-    { 
-      /* keep stuff for twice timeout if we can by allocating a new
-	 record instead */
-      if (difftime(now, oldest->time) < 2*TIMEOUT && 
-	  count <= daemon->ftabsize &&
-	  (f = allocate_frec(now)))
-	return f;
-
-      if (!wait)
-	{
-	  free_frec(oldest);
-	  oldest->time = now;
-	}
-      return oldest;
-    }
-  
-  /* none available, calculate time 'till oldest record expires */
-  if (!force && count > daemon->ftabsize)
-    {
-      if (oldest && wait)
-	*wait = oldest->time + (time_t)TIMEOUT - now;
-      
-      query_full(now);
-      
-      return NULL;
-    }
-  
-  if (!(f = allocate_frec(now)) && wait)
-    /* wait one second on malloc failure */
-    *wait = 1;
-
-  return f; /* OK if malloc fails and this is NULL */
+  return target;
 }
 
-static void query_full(time_t now)
+static void query_full(time_t now, char *domain)
 {
   static time_t last_log = 0;
   
   if ((int)difftime(now, last_log) > 5)
     {
       last_log = now;
-      my_syslog(LOG_WARNING, _("Maximum number of concurrent DNS queries reached (max: %d)"), daemon->ftabsize);
+      if (!domain || strlen(domain) == 0)
+	my_syslog(LOG_WARNING, _("Maximum number of concurrent DNS queries reached (max: %d)"), daemon->ftabsize);
+      else
+	my_syslog(LOG_WARNING, _("Maximum number of concurrent DNS queries to %s reached (max: %d)"), domain, daemon->ftabsize);
     }
 }
 
 
-static struct frec *lookup_frec(unsigned short id, int fd, void *hash)
+static struct frec *lookup_frec(unsigned short id, int fd, void *hash, int *firstp, int *lastp)
 {
   struct frec *f;
   struct server *s;
-  int type;
+  int first, last;
   struct randfd_list *fdl;
-  
-  for(f = daemon->frec_list; f; f = f->next)
-    if (f->sentto && f->new_id == id && 
-	(memcmp(hash, f->hash, HASH_SIZE) == 0))
-      {
-	/* sent from random port */
-	for (fdl = f->rfds; fdl; fdl = fdl->next)
-	  if (fdl->rfd->fd == fd)
-	  return f;
-	
-	/* Sent to upstream from socket associated with a server. 
-	   Note we have to iterate over all the possible servers, since they may
-	   have different bound sockets. */
-	type = f->sentto->flags & SERV_TYPE;
-	s = f->sentto;
-	do {
-	  if (server_test_type(s, f->sentto->domain, type, 0) &&
-	      s->sfd && s->sfd->fd == fd)
-	    return f;
+
+  if (hash)
+    for (f = daemon->frec_list; f; f = f->next)
+      if (f->sentto && f->new_id == id && 
+	  (memcmp(hash, f->hash, HASH_SIZE) == 0))
+	{
+	  filter_servers(f->sentto->arrayposn, F_SERVER, firstp, lastp);
 	  
-	  s = s->next ? s->next : daemon->servers;
-	} while (s != f->sentto);
-      }
+	  /* sent from random port */
+	  for (fdl = f->rfds; fdl; fdl = fdl->next)
+	    if (fdl->rfd->fd == fd)
+	      return f;
+	  
+	  /* Sent to upstream from socket associated with a server. 
+	     Note we have to iterate over all the possible servers, since they may
+	     have different bound sockets. */
+	  for (first = *firstp, last = *lastp; first != last; first++)
+	    {
+	      s = daemon->serverarray[first];
+	      if (s->sfd && s->sfd->fd == fd)
+		return f;
+	    }
+	}
   
   return NULL;
 }
 
-static struct frec *lookup_frec_by_query(void *hash, unsigned int flags)
+static struct frec *lookup_frec_by_query(void *hash, unsigned int flags, unsigned int flagmask)
 {
   struct frec *f;
 
-  /* FREC_DNSKEY and FREC_DS_QUERY are never set in flags, so the test below 
-     ensures that no frec created for internal DNSSEC query can be returned here.
-     
-     Similarly FREC_NO_CACHE is never set in flags, so a query which is
-     contigent on a particular source address EDNS0 option will never be matched. */
-
-#define FLAGMASK (FREC_CHECKING_DISABLED | FREC_AD_QUESTION | FREC_DO_QUESTION \
-		  | FREC_HAS_PHEADER | FREC_DNSKEY_QUERY | FREC_DS_QUERY | FREC_NO_CACHE)
-  
-  for(f = daemon->frec_list; f; f = f->next)
-    if (f->sentto &&
-	(f->flags & FLAGMASK) == flags &&
-	memcmp(hash, f->hash, HASH_SIZE) == 0)
-      return f;
+  if (hash)
+    for (f = daemon->frec_list; f; f = f->next)
+      if (f->sentto &&
+	  (f->flags & flagmask) == flags &&
+	  memcmp(hash, f->hash, HASH_SIZE) == 0)
+	return f;
   
   return NULL;
 }
 
+#ifdef HAVE_DNSSEC
+/* DNSSEC frecs have the complete query in the block stash.
+   Search for an existing query using that. */
+static struct frec *lookup_frec_dnssec(char *target, int class, int flags, struct dns_header *header)
+{
+   struct frec *f;
+
+   for (f = daemon->frec_list; f; f = f->next)
+     if (f->sentto &&
+	 (f->flags & flags) &&
+	 blockdata_retrieve(f->stash, f->stash_len, (void *)header))
+       {
+	 unsigned char *p = (unsigned char *)(header+1);
+	 int hclass;
+
+	 if (extract_name(header, f->stash_len, &p, target, 0, 4) != 1)
+	   continue;
+
+	 p += 2;  /* type, known from flags */ 
+	 GETSHORT(hclass, p);
+
+	 if (class != hclass)
+	   continue;
+
+	 return f;
+       }
+
+   return NULL;
+}
+#endif
+
 /* Send query packet again, if we can. */
 void resend_query()
 {
@@ -2570,9 +3061,6 @@
     if (daemon->randomsocks[i].refcount != 0 && daemon->randomsocks[i].serv == server)
       daemon->randomsocks[i].serv = NULL;
   
-  if (daemon->last_server == server)
-    daemon->last_server = NULL;
-  
   if (daemon->srv_save == server)
     daemon->srv_save = NULL;
 }
@@ -2596,8 +3084,3 @@
 	return ret;
     }
 }
-
-
-
-
-