ipsec: bind an SA to a worker

the sequence number increment and the anti-replay window
checks must be atomic. Given the vector nature of VPP we
can't simply use atomic increments for sequence numbers,
since a vector on thread 1 with lower sequence numbers could
be 'overtaken' by packets on thread 2 with higher sequence
numbers.
The anti-replay logic requires a critical section, not just
atomics, and we don't want that.
So when the SA see the first packet it is bound to that worker
all subsequent packets, that arrive on a different worker,
are subject to a handoff.

Type: feature

Change-Id: Ia20a8645fb50622ea6235ab015a537f033d531a4
Signed-off-by: Neale Ranns <nranns@cisco.com>
diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt
index 9867b7c..37f7810 100644
--- a/src/vnet/CMakeLists.txt
+++ b/src/vnet/CMakeLists.txt
@@ -581,6 +581,7 @@
   ipsec/ipsec.c
   ipsec/ipsec_cli.c
   ipsec/ipsec_format.c
+  ipsec/ipsec_handoff.c
   ipsec/ipsec_input.c
   ipsec/ipsec_punt.c
   ipsec/ipsec_sa.c
@@ -602,6 +603,7 @@
   ipsec/esp_decrypt.c
   ipsec/ah_decrypt.c
   ipsec/ah_encrypt.c
+  ipsec/ipsec_handoff.c
   ipsec/ipsec_output.c
   ipsec/ipsec_input.c
   ipsec/ipsec_tun_in.c
diff --git a/src/vnet/ipsec/ah_decrypt.c b/src/vnet/ipsec/ah_decrypt.c
index f46fa6e..22f9a09 100644
--- a/src/vnet/ipsec/ah_decrypt.c
+++ b/src/vnet/ipsec/ah_decrypt.c
@@ -24,10 +24,11 @@
 #include <vnet/ipsec/ah.h>
 #include <vnet/ipsec/ipsec_io.h>
 
-#define foreach_ah_decrypt_next \
-  _ (DROP, "error-drop")        \
-  _ (IP4_INPUT, "ip4-input")    \
-  _ (IP6_INPUT, "ip6-input")
+#define foreach_ah_decrypt_next                 \
+  _(DROP, "error-drop")                         \
+  _(IP4_INPUT, "ip4-input")                     \
+  _(IP6_INPUT, "ip6-input")                     \
+  _(HANDOFF, "handoff")
 
 #define _(v, s) AH_DECRYPT_NEXT_##v,
 typedef enum
@@ -175,6 +176,21 @@
 					  thread_index, current_sa_index);
 	}
 
+      if (PREDICT_FALSE (~0 == sa0->decrypt_thread_index))
+	{
+	  /* this is the first packet to use this SA, claim the SA
+	   * for this thread. this could happen simultaneously on
+	   * another thread */
+	  clib_atomic_cmp_and_swap (&sa0->decrypt_thread_index, ~0,
+				    ipsec_sa_assign_thread (thread_index));
+	}
+
+      if (PREDICT_TRUE (thread_index != sa0->decrypt_thread_index))
+	{
+	  next[0] = AH_DECRYPT_NEXT_HANDOFF;
+	  goto next;
+	}
+
       pd->sa_index = current_sa_index;
 
       ih4 = vlib_buffer_get_current (b[0]);
@@ -421,9 +437,10 @@
 
   .n_next_nodes = AH_DECRYPT_N_NEXT,
   .next_nodes = {
-#define _(s,n) [AH_DECRYPT_NEXT_##s] = n,
-    foreach_ah_decrypt_next
-#undef _
+    [AH_DECRYPT_NEXT_DROP] = "ip4-drop",
+    [AH_DECRYPT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+    [AH_DECRYPT_NEXT_IP6_INPUT] = "ip6-input",
+    [AH_DECRYPT_NEXT_HANDOFF] = "esp4-decrypt-tun-handoff",
   },
 };
 /* *INDENT-ON* */
@@ -447,9 +464,10 @@
 
   .n_next_nodes = AH_DECRYPT_N_NEXT,
   .next_nodes = {
-#define _(s,n) [AH_DECRYPT_NEXT_##s] = n,
-    foreach_ah_decrypt_next
-#undef _
+    [AH_DECRYPT_NEXT_DROP] = "ip6-drop",
+    [AH_DECRYPT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+    [AH_DECRYPT_NEXT_IP6_INPUT] = "ip6-input",
+    [AH_DECRYPT_NEXT_HANDOFF] = "esp6-decrypt-handoff",
   },
 };
 /* *INDENT-ON* */
diff --git a/src/vnet/ipsec/ah_encrypt.c b/src/vnet/ipsec/ah_encrypt.c
index 75294a2..80b3fcc 100644
--- a/src/vnet/ipsec/ah_encrypt.c
+++ b/src/vnet/ipsec/ah_encrypt.c
@@ -24,9 +24,8 @@
 #include <vnet/ipsec/ah.h>
 
 #define foreach_ah_encrypt_next \
-  _ (DROP, "error-drop")            \
-  _ (IP4_LOOKUP, "ip4-lookup")      \
-  _ (IP6_LOOKUP, "ip6-lookup")      \
+  _ (DROP, "error-drop")                           \
+  _ (HANDOFF, "handoff")                           \
   _ (INTERFACE_OUTPUT, "interface-output")
 
 
@@ -183,6 +182,21 @@
       pd->sa_index = current_sa_index;
       next[0] = AH_ENCRYPT_NEXT_DROP;
 
+      if (PREDICT_FALSE (~0 == sa0->encrypt_thread_index))
+	{
+	  /* this is the first packet to use this SA, claim the SA
+	   * for this thread. this could happen simultaneously on
+	   * another thread */
+	  clib_atomic_cmp_and_swap (&sa0->encrypt_thread_index, ~0,
+				    ipsec_sa_assign_thread (thread_index));
+	}
+
+      if (PREDICT_TRUE (thread_index != sa0->encrypt_thread_index))
+	{
+	  next[0] = AH_ENCRYPT_NEXT_HANDOFF;
+	  goto next;
+	}
+
       if (PREDICT_FALSE (esp_seq_advance (sa0)))
 	{
 	  b[0]->error = node->errors[AH_ENCRYPT_ERROR_SEQ_CYCLED];
@@ -420,9 +434,9 @@
 
   .n_next_nodes = AH_ENCRYPT_N_NEXT,
   .next_nodes = {
-#define _(s,n) [AH_ENCRYPT_NEXT_##s] = n,
-    foreach_ah_encrypt_next
-#undef _
+    [AH_ENCRYPT_NEXT_DROP] = "ip4-drop",
+    [AH_ENCRYPT_NEXT_HANDOFF] = "ah4-encrypt-handoff",
+    [AH_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "interface-output",
   },
 };
 /* *INDENT-ON* */
@@ -446,9 +460,9 @@
 
   .n_next_nodes = AH_ENCRYPT_N_NEXT,
   .next_nodes = {
-#define _(s,n) [AH_ENCRYPT_NEXT_##s] = n,
-    foreach_ah_encrypt_next
-#undef _
+    [AH_ENCRYPT_NEXT_DROP] = "ip6-drop",
+    [AH_ENCRYPT_NEXT_HANDOFF] = "ah6-encrypt-handoff",
+    [AH_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "interface-output",
   },
 };
 /* *INDENT-ON* */
diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c
index cddda1f..a56a784 100644
--- a/src/vnet/ipsec/esp_decrypt.c
+++ b/src/vnet/ipsec/esp_decrypt.c
@@ -27,7 +27,8 @@
 #define foreach_esp_decrypt_next                \
 _(DROP, "error-drop")                           \
 _(IP4_INPUT, "ip4-input-no-checksum")           \
-_(IP6_INPUT, "ip6-input")
+_(IP6_INPUT, "ip6-input")                       \
+_(HANDOFF, "handoff")
 
 #define _(v, s) ESP_DECRYPT_NEXT_##v,
 typedef enum
@@ -177,6 +178,21 @@
 	  cpd.sa_index = current_sa_index;
 	}
 
+      if (PREDICT_FALSE (~0 == sa0->decrypt_thread_index))
+	{
+	  /* this is the first packet to use this SA, claim the SA
+	   * for this thread. this could happen simultaneously on
+	   * another thread */
+	  clib_atomic_cmp_and_swap (&sa0->decrypt_thread_index, ~0,
+				    ipsec_sa_assign_thread (thread_index));
+	}
+
+      if (PREDICT_TRUE (thread_index != sa0->decrypt_thread_index))
+	{
+	  next[0] = ESP_DECRYPT_NEXT_HANDOFF;
+	  goto next;
+	}
+
       /* store packet data for next round for easier prefetch */
       pd->sa_data = cpd.sa_data;
       pd->current_data = b[0]->current_data;
@@ -595,9 +611,10 @@
 
   .n_next_nodes = ESP_DECRYPT_N_NEXT,
   .next_nodes = {
-#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n,
-    foreach_esp_decrypt_next
-#undef _
+    [ESP_DECRYPT_NEXT_DROP] = "ip4-drop",
+    [ESP_DECRYPT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+    [ESP_DECRYPT_NEXT_IP6_INPUT] = "ip6-input",
+    [ESP_DECRYPT_NEXT_HANDOFF] = "esp4-decrypt-handoff",
   },
 };
 
@@ -612,9 +629,10 @@
 
   .n_next_nodes = ESP_DECRYPT_N_NEXT,
   .next_nodes = {
-#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n,
-    foreach_esp_decrypt_next
-#undef _
+    [ESP_DECRYPT_NEXT_DROP] = "ip6-drop",
+    [ESP_DECRYPT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+    [ESP_DECRYPT_NEXT_IP6_INPUT] = "ip6-input",
+    [ESP_DECRYPT_NEXT_HANDOFF]=  "esp6-decrypt-handoff",
   },
 };
 
@@ -625,7 +643,13 @@
   .type = VLIB_NODE_TYPE_INTERNAL,
   .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
   .error_strings = esp_decrypt_error_strings,
-  .sibling_of = "esp4-decrypt",
+  .n_next_nodes = ESP_DECRYPT_N_NEXT,
+  .next_nodes = {
+    [ESP_DECRYPT_NEXT_DROP] = "ip4-drop",
+    [ESP_DECRYPT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+    [ESP_DECRYPT_NEXT_IP6_INPUT] = "ip6-input",
+    [ESP_DECRYPT_NEXT_HANDOFF] = "esp4-decrypt-handoff",
+  },
 };
 
 VLIB_REGISTER_NODE (esp6_decrypt_tun_node) = {
@@ -635,7 +659,13 @@
   .type = VLIB_NODE_TYPE_INTERNAL,
   .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
   .error_strings = esp_decrypt_error_strings,
-  .sibling_of = "esp6-decrypt",
+  .n_next_nodes = ESP_DECRYPT_N_NEXT,
+  .next_nodes = {
+    [ESP_DECRYPT_NEXT_DROP] = "ip6-drop",
+    [ESP_DECRYPT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+    [ESP_DECRYPT_NEXT_IP6_INPUT] = "ip6-input",
+    [ESP_DECRYPT_NEXT_HANDOFF]=  "esp6-decrypt-handoff",
+  },
 };
 /* *INDENT-ON* */
 
diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c
index 6170603..58e25f6 100644
--- a/src/vnet/ipsec/esp_encrypt.c
+++ b/src/vnet/ipsec/esp_encrypt.c
@@ -27,8 +27,7 @@
 
 #define foreach_esp_encrypt_next                   \
 _(DROP, "error-drop")                              \
-_(IP4_LOOKUP, "ip4-lookup")                        \
-_(IP6_LOOKUP, "ip6-lookup")                        \
+_(HANDOFF, "handoff")                              \
 _(INTERFACE_OUTPUT, "interface-output")
 
 #define _(v, s) ESP_ENCRYPT_NEXT_##v,
@@ -265,7 +264,7 @@
       esp_header_t *esp;
       u8 *payload, *next_hdr_ptr;
       u16 payload_len;
-      u32 hdr_len;
+      u32 hdr_len, config_index;
 
       if (n_left > 2)
 	{
@@ -281,9 +280,11 @@
 	{
 	  /* we are on a ipsec tunnel's feature arc */
 	  u32 next0;
+	  config_index = b[0]->current_config_index;
 	  sa_index0 = *(u32 *) vnet_feature_next_with_data (&next0, b[0],
 							    sizeof
 							    (sa_index0));
+	  vnet_buffer (b[0])->ipsec.sad_index = sa_index0;
 	  next[0] = next0;
 	}
       else
@@ -306,6 +307,25 @@
 	  iv_sz = sa0->crypto_iv_size;
 	}
 
+      if (PREDICT_FALSE (~0 == sa0->encrypt_thread_index))
+	{
+	  /* this is the first packet to use this SA, claim the SA
+	   * for this thread. this could happen simultaneously on
+	   * another thread */
+	  clib_atomic_cmp_and_swap (&sa0->encrypt_thread_index, ~0,
+				    ipsec_sa_assign_thread (thread_index));
+	}
+
+      if (PREDICT_TRUE (thread_index != sa0->encrypt_thread_index))
+	{
+	  next[0] = ESP_ENCRYPT_NEXT_HANDOFF;
+	  if (is_tun)
+	    {
+	      b[0]->current_config_index = config_index;
+	    }
+	  goto trace;
+	}
+
       if (vlib_buffer_chain_linearize (vm, b[0]) != 1)
 	{
 	  b[0]->error = node->errors[ESP_ENCRYPT_ERROR_CHAINED_BUFFER];
@@ -569,9 +589,9 @@
 
   .n_next_nodes = ESP_ENCRYPT_N_NEXT,
   .next_nodes = {
-#define _(s,n) [ESP_ENCRYPT_NEXT_##s] = n,
-    foreach_esp_encrypt_next
-#undef _
+    [ESP_ENCRYPT_NEXT_DROP] = "ip4-drop",
+    [ESP_ENCRYPT_NEXT_HANDOFF] = "esp4-encrypt-handoff",
+    [ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "interface-output",
   },
 };
 /* *INDENT-ON* */
@@ -595,9 +615,9 @@
 
   .n_next_nodes = ESP_ENCRYPT_N_NEXT,
   .next_nodes = {
-#define _(s,n) [ESP_ENCRYPT_NEXT_##s] = n,
-    foreach_esp_encrypt_next
-#undef _
+    [ESP_ENCRYPT_NEXT_DROP] = "ip6-drop",
+    [ESP_ENCRYPT_NEXT_HANDOFF] = "esp6-encrypt-handoff",
+    [ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "interface-output",
   },
 };
 /* *INDENT-ON* */
@@ -619,9 +639,11 @@
   .n_errors = ARRAY_LEN(esp_encrypt_error_strings),
   .error_strings = esp_encrypt_error_strings,
 
-  .n_next_nodes = 1,
+  .n_next_nodes = ESP_ENCRYPT_N_NEXT,
   .next_nodes = {
     [ESP_ENCRYPT_NEXT_DROP] = "ip4-drop",
+    [ESP_ENCRYPT_NEXT_HANDOFF] = "esp4-encrypt-handoff",
+    [ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "error-drop",
   },
 };
 
@@ -664,9 +686,11 @@
   .n_errors = ARRAY_LEN(esp_encrypt_error_strings),
   .error_strings = esp_encrypt_error_strings,
 
-  .n_next_nodes = 1,
+  .n_next_nodes = ESP_ENCRYPT_N_NEXT,
   .next_nodes = {
     [ESP_ENCRYPT_NEXT_DROP] = "ip6-drop",
+    [ESP_ENCRYPT_NEXT_HANDOFF] = "esp6-encrypt-handoff",
+    [ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "error-drop",
   },
 };
 
diff --git a/src/vnet/ipsec/ipsec.c b/src/vnet/ipsec/ipsec.c
index c54b94a..0f4f282 100644
--- a/src/vnet/ipsec/ipsec.c
+++ b/src/vnet/ipsec/ipsec.c
@@ -405,6 +405,32 @@
 
   vec_validate_aligned (im->ptd, vlib_num_workers (), CLIB_CACHE_LINE_BYTES);
 
+  im->ah4_enc_fq_index =
+    vlib_frame_queue_main_init (ah4_encrypt_node.index, 0);
+  im->ah4_dec_fq_index =
+    vlib_frame_queue_main_init (ah4_decrypt_node.index, 0);
+  im->ah6_enc_fq_index =
+    vlib_frame_queue_main_init (ah6_encrypt_node.index, 0);
+  im->ah6_dec_fq_index =
+    vlib_frame_queue_main_init (ah6_decrypt_node.index, 0);
+
+  im->esp4_enc_fq_index =
+    vlib_frame_queue_main_init (esp4_encrypt_node.index, 0);
+  im->esp4_dec_fq_index =
+    vlib_frame_queue_main_init (esp4_decrypt_node.index, 0);
+  im->esp6_enc_fq_index =
+    vlib_frame_queue_main_init (esp6_encrypt_node.index, 0);
+  im->esp6_dec_fq_index =
+    vlib_frame_queue_main_init (esp6_decrypt_node.index, 0);
+  im->esp4_enc_tun_fq_index =
+    vlib_frame_queue_main_init (esp4_encrypt_tun_node.index, 0);
+  im->esp6_enc_tun_fq_index =
+    vlib_frame_queue_main_init (esp6_encrypt_tun_node.index, 0);
+  im->esp4_dec_tun_fq_index =
+    vlib_frame_queue_main_init (esp4_decrypt_tun_node.index, 0);
+  im->esp6_dec_tun_fq_index =
+    vlib_frame_queue_main_init (esp6_decrypt_tun_node.index, 0);
+
   return 0;
 }
 
diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h
index 975ebc6..af75841 100644
--- a/src/vnet/ipsec/ipsec.h
+++ b/src/vnet/ipsec/ipsec.h
@@ -163,6 +163,21 @@
 
   /* per-thread data */
   ipsec_per_thread_data_t *ptd;
+
+  /** Worker handoff */
+  u32 ah4_enc_fq_index;
+  u32 ah4_dec_fq_index;
+  u32 ah6_enc_fq_index;
+  u32 ah6_dec_fq_index;
+
+  u32 esp4_enc_fq_index;
+  u32 esp4_dec_fq_index;
+  u32 esp6_enc_fq_index;
+  u32 esp6_dec_fq_index;
+  u32 esp4_enc_tun_fq_index;
+  u32 esp6_enc_tun_fq_index;
+  u32 esp4_dec_tun_fq_index;
+  u32 esp6_dec_tun_fq_index;
 } ipsec_main_t;
 
 typedef enum ipsec_format_flags_t_
@@ -179,14 +194,18 @@
 
 clib_error_t *ipsec_check_support_cb (ipsec_main_t * im, ipsec_sa_t * sa);
 
-extern vlib_node_registration_t esp4_encrypt_node;
-extern vlib_node_registration_t esp4_decrypt_node;
 extern vlib_node_registration_t ah4_encrypt_node;
 extern vlib_node_registration_t ah4_decrypt_node;
-extern vlib_node_registration_t esp6_encrypt_node;
-extern vlib_node_registration_t esp6_decrypt_node;
 extern vlib_node_registration_t ah6_encrypt_node;
 extern vlib_node_registration_t ah6_decrypt_node;
+extern vlib_node_registration_t esp4_encrypt_node;
+extern vlib_node_registration_t esp4_decrypt_node;
+extern vlib_node_registration_t esp6_encrypt_node;
+extern vlib_node_registration_t esp6_decrypt_node;
+extern vlib_node_registration_t esp4_encrypt_tun_node;
+extern vlib_node_registration_t esp6_encrypt_tun_node;
+extern vlib_node_registration_t esp4_decrypt_tun_node;
+extern vlib_node_registration_t esp6_decrypt_tun_node;
 extern vlib_node_registration_t ipsec4_if_input_node;
 extern vlib_node_registration_t ipsec6_if_input_node;
 
diff --git a/src/vnet/ipsec/ipsec_format.c b/src/vnet/ipsec/ipsec_format.c
index e6e8289..8b9d8c4 100644
--- a/src/vnet/ipsec/ipsec_format.c
+++ b/src/vnet/ipsec/ipsec_format.c
@@ -292,6 +292,8 @@
 
   s = format (s, "\n   locks %d", sa->node.fn_locks);
   s = format (s, "\n   salt 0x%x", clib_net_to_host_u32 (sa->salt));
+  s = format (s, "\n   thread-indices [encrypt:%d decrypt:%d]",
+	      sa->encrypt_thread_index, sa->decrypt_thread_index);
   s = format (s, "\n   seq %u seq-hi %u", sa->seq, sa->seq_hi);
   s = format (s, "\n   last-seq %u last-seq-hi %u window %U",
 	      sa->last_seq, sa->last_seq_hi,
diff --git a/src/vnet/ipsec/ipsec_handoff.c b/src/vnet/ipsec/ipsec_handoff.c
new file mode 100644
index 0000000..4446f1e
--- /dev/null
+++ b/src/vnet/ipsec/ipsec_handoff.c
@@ -0,0 +1,450 @@
+/*
+ * esp_encrypt.c : IPSec ESP encrypt node
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/ipsec_sa.h>
+
+#define foreach_ipsec_handoff_error  \
+_(CONGESTION_DROP, "congestion drop")
+
+typedef enum
+{
+#define _(sym,str) IPSEC_HANDOFF_ERROR_##sym,
+  foreach_ipsec_handoff_error
+#undef _
+    NAT44_HANDOFF_N_ERROR,
+} ipsec_handoff_error_t;
+
+static char *ipsec_handoff_error_strings[] = {
+#define _(sym,string) string,
+  foreach_ipsec_handoff_error
+#undef _
+};
+
+typedef struct ipsec_handoff_trace_t_
+{
+  u32 next_worker_index;
+} ipsec_handoff_trace_t;
+
+static u8 *
+format_ipsec_handoff_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  ipsec_handoff_trace_t *t = va_arg (*args, ipsec_handoff_trace_t *);
+
+  s = format (s, "next-worker %d", t->next_worker_index);
+
+  return s;
+}
+
+/* do worker handoff based on thread_index in NAT HA protcol header */
+static_always_inline uword
+ipsec_handoff (vlib_main_t * vm,
+	       vlib_node_runtime_t * node,
+	       vlib_frame_t * frame, u32 fq_index, bool is_enc)
+{
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+  u16 thread_indices[VLIB_FRAME_SIZE], *ti;
+  u32 n_enq, n_left_from, *from;
+  ipsec_main_t *im;
+
+  im = &ipsec_main;
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  vlib_get_buffers (vm, from, bufs, n_left_from);
+
+  b = bufs;
+  ti = thread_indices;
+
+  while (n_left_from >= 4)
+    {
+      ipsec_sa_t *sa0, *sa1, *sa2, *sa3;
+      u32 sai0, sai1, sai2, sai3;
+
+      /* Prefetch next iteration. */
+      if (n_left_from >= 12)
+	{
+	  vlib_prefetch_buffer_header (b[8], LOAD);
+	  vlib_prefetch_buffer_header (b[9], LOAD);
+	  vlib_prefetch_buffer_header (b[10], LOAD);
+	  vlib_prefetch_buffer_header (b[11], LOAD);
+
+	  vlib_prefetch_buffer_data (b[4], LOAD);
+	  vlib_prefetch_buffer_data (b[5], LOAD);
+	  vlib_prefetch_buffer_data (b[6], LOAD);
+	  vlib_prefetch_buffer_data (b[7], LOAD);
+	}
+
+      sai0 = vnet_buffer (b[0])->ipsec.sad_index;
+      sai1 = vnet_buffer (b[1])->ipsec.sad_index;
+      sai2 = vnet_buffer (b[2])->ipsec.sad_index;
+      sai3 = vnet_buffer (b[3])->ipsec.sad_index;
+      sa0 = pool_elt_at_index (im->sad, sai0);
+      sa1 = pool_elt_at_index (im->sad, sai1);
+      sa2 = pool_elt_at_index (im->sad, sai2);
+      sa3 = pool_elt_at_index (im->sad, sai3);
+
+      if (is_enc)
+	{
+	  ti[0] = sa0->encrypt_thread_index;
+	  ti[1] = sa1->encrypt_thread_index;
+	  ti[2] = sa2->encrypt_thread_index;
+	  ti[3] = sa3->encrypt_thread_index;
+	}
+      else
+	{
+	  ti[0] = sa0->decrypt_thread_index;
+	  ti[1] = sa1->decrypt_thread_index;
+	  ti[2] = sa2->decrypt_thread_index;
+	  ti[3] = sa3->decrypt_thread_index;
+	}
+
+      if (node->flags & VLIB_NODE_FLAG_TRACE)
+	{
+	  if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      ipsec_handoff_trace_t *t =
+		vlib_add_trace (vm, node, b[0], sizeof (*t));
+	      t->next_worker_index = ti[0];
+	    }
+	  if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      ipsec_handoff_trace_t *t =
+		vlib_add_trace (vm, node, b[1], sizeof (*t));
+	      t->next_worker_index = ti[1];
+	    }
+	  if (PREDICT_FALSE (b[2]->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      ipsec_handoff_trace_t *t =
+		vlib_add_trace (vm, node, b[2], sizeof (*t));
+	      t->next_worker_index = ti[2];
+	    }
+	  if (PREDICT_FALSE (b[3]->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      ipsec_handoff_trace_t *t =
+		vlib_add_trace (vm, node, b[3], sizeof (*t));
+	      t->next_worker_index = ti[3];
+	    }
+	}
+
+      n_left_from -= 4;
+      ti += 4;
+      b += 4;
+    }
+  while (n_left_from > 0)
+    {
+      ipsec_sa_t *sa0;
+      u32 sai0;
+
+      sai0 = vnet_buffer (b[0])->ipsec.sad_index;
+      sa0 = pool_elt_at_index (im->sad, sai0);
+
+      if (is_enc)
+	ti[0] = sa0->encrypt_thread_index;
+      else
+	ti[0] = sa0->decrypt_thread_index;
+
+      if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+	{
+	  ipsec_handoff_trace_t *t =
+	    vlib_add_trace (vm, node, b[0], sizeof (*t));
+	  t->next_worker_index = ti[0];
+	}
+
+      n_left_from -= 1;
+      ti += 1;
+      b += 1;
+    }
+
+  n_enq = vlib_buffer_enqueue_to_thread (vm, fq_index, from,
+					 thread_indices, frame->n_vectors, 1);
+
+  if (n_enq < frame->n_vectors)
+    vlib_node_increment_counter (vm, node->node_index,
+				 IPSEC_HANDOFF_ERROR_CONGESTION_DROP,
+				 frame->n_vectors - n_enq);
+
+  return n_enq;
+}
+
+VLIB_NODE_FN (esp4_encrypt_handoff) (vlib_main_t * vm,
+				     vlib_node_runtime_t * node,
+				     vlib_frame_t * from_frame)
+{
+  ipsec_main_t *im = &ipsec_main;
+
+  return ipsec_handoff (vm, node, from_frame, im->esp4_enc_fq_index, true);
+}
+
+VLIB_NODE_FN (esp6_encrypt_handoff) (vlib_main_t * vm,
+				     vlib_node_runtime_t * node,
+				     vlib_frame_t * from_frame)
+{
+  ipsec_main_t *im = &ipsec_main;
+
+  return ipsec_handoff (vm, node, from_frame, im->esp6_enc_fq_index, true);
+}
+
+VLIB_NODE_FN (esp4_encrypt_tun_handoff) (vlib_main_t * vm,
+					 vlib_node_runtime_t * node,
+					 vlib_frame_t * from_frame)
+{
+  ipsec_main_t *im = &ipsec_main;
+
+  return ipsec_handoff (vm, node, from_frame, im->esp4_enc_tun_fq_index,
+			true);
+}
+
+VLIB_NODE_FN (esp6_encrypt_tun_handoff) (vlib_main_t * vm,
+					 vlib_node_runtime_t * node,
+					 vlib_frame_t * from_frame)
+{
+  ipsec_main_t *im = &ipsec_main;
+
+  return ipsec_handoff (vm, node, from_frame, im->esp6_enc_tun_fq_index,
+			true);
+}
+
+VLIB_NODE_FN (esp4_decrypt_handoff) (vlib_main_t * vm,
+				     vlib_node_runtime_t * node,
+				     vlib_frame_t * from_frame)
+{
+  ipsec_main_t *im = &ipsec_main;
+
+  return ipsec_handoff (vm, node, from_frame, im->esp4_dec_fq_index, false);
+}
+
+VLIB_NODE_FN (esp6_decrypt_handoff) (vlib_main_t * vm,
+				     vlib_node_runtime_t * node,
+				     vlib_frame_t * from_frame)
+{
+  ipsec_main_t *im = &ipsec_main;
+
+  return ipsec_handoff (vm, node, from_frame, im->esp6_dec_fq_index, false);
+}
+
+VLIB_NODE_FN (esp4_decrypt_tun_handoff) (vlib_main_t * vm,
+					 vlib_node_runtime_t * node,
+					 vlib_frame_t * from_frame)
+{
+  ipsec_main_t *im = &ipsec_main;
+
+  return ipsec_handoff (vm, node, from_frame, im->esp4_dec_tun_fq_index,
+			false);
+}
+
+VLIB_NODE_FN (esp6_decrypt_tun_handoff) (vlib_main_t * vm,
+					 vlib_node_runtime_t * node,
+					 vlib_frame_t * from_frame)
+{
+  ipsec_main_t *im = &ipsec_main;
+
+  return ipsec_handoff (vm, node, from_frame, im->esp6_dec_tun_fq_index,
+			false);
+}
+
+VLIB_NODE_FN (ah4_encrypt_handoff) (vlib_main_t * vm,
+				    vlib_node_runtime_t * node,
+				    vlib_frame_t * from_frame)
+{
+  ipsec_main_t *im = &ipsec_main;
+
+  return ipsec_handoff (vm, node, from_frame, im->ah4_enc_fq_index, true);
+}
+
+VLIB_NODE_FN (ah6_encrypt_handoff) (vlib_main_t * vm,
+				    vlib_node_runtime_t * node,
+				    vlib_frame_t * from_frame)
+{
+  ipsec_main_t *im = &ipsec_main;
+
+  return ipsec_handoff (vm, node, from_frame, im->ah6_enc_fq_index, true);
+}
+
+VLIB_NODE_FN (ah4_decrypt_handoff) (vlib_main_t * vm,
+				    vlib_node_runtime_t * node,
+				    vlib_frame_t * from_frame)
+{
+  ipsec_main_t *im = &ipsec_main;
+
+  return ipsec_handoff (vm, node, from_frame, im->ah4_dec_fq_index, false);
+}
+
+VLIB_NODE_FN (ah6_decrypt_handoff) (vlib_main_t * vm,
+				    vlib_node_runtime_t * node,
+				    vlib_frame_t * from_frame)
+{
+  ipsec_main_t *im = &ipsec_main;
+
+  return ipsec_handoff (vm, node, from_frame, im->ah6_dec_fq_index, false);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (esp4_encrypt_handoff) = {
+  .name = "esp4-encrypt-handoff",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ipsec_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(ipsec_handoff_error_strings),
+  .error_strings = ipsec_handoff_error_strings,
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
+VLIB_REGISTER_NODE (esp6_encrypt_handoff) = {
+  .name = "esp6-encrypt-handoff",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ipsec_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(ipsec_handoff_error_strings),
+  .error_strings = ipsec_handoff_error_strings,
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
+VLIB_REGISTER_NODE (esp4_encrypt_tun_handoff) = {
+  .name = "esp4-encrypt-tun-handoff",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ipsec_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(ipsec_handoff_error_strings),
+  .error_strings = ipsec_handoff_error_strings,
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
+VLIB_REGISTER_NODE (esp6_encrypt_tun_handoff) = {
+  .name = "esp6-encrypt-tun-handoff",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ipsec_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(ipsec_handoff_error_strings),
+  .error_strings = ipsec_handoff_error_strings,
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
+VLIB_REGISTER_NODE (esp4_decrypt_handoff) = {
+  .name = "esp4-decrypt-handoff",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ipsec_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(ipsec_handoff_error_strings),
+  .error_strings = ipsec_handoff_error_strings,
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
+VLIB_REGISTER_NODE (esp6_decrypt_handoff) = {
+  .name = "esp6-decrypt-handoff",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ipsec_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(ipsec_handoff_error_strings),
+  .error_strings = ipsec_handoff_error_strings,
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
+VLIB_REGISTER_NODE (esp4_decrypt_tun_handoff) = {
+  .name = "esp4-decrypt-tun-handoff",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ipsec_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(ipsec_handoff_error_strings),
+  .error_strings = ipsec_handoff_error_strings,
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
+VLIB_REGISTER_NODE (esp6_decrypt_tun_handoff) = {
+  .name = "esp6-decrypt-tun-handoff",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ipsec_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(ipsec_handoff_error_strings),
+  .error_strings = ipsec_handoff_error_strings,
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
+VLIB_REGISTER_NODE (ah4_encrypt_handoff) = {
+  .name = "ah4-encrypt-handoff",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ipsec_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(ipsec_handoff_error_strings),
+  .error_strings = ipsec_handoff_error_strings,
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
+VLIB_REGISTER_NODE (ah6_encrypt_handoff) = {
+  .name = "ah6-encrypt-handoff",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ipsec_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(ipsec_handoff_error_strings),
+  .error_strings = ipsec_handoff_error_strings,
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
+VLIB_REGISTER_NODE (ah4_decrypt_handoff) = {
+  .name = "ah4-decrypt-handoff",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ipsec_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(ipsec_handoff_error_strings),
+  .error_strings = ipsec_handoff_error_strings,
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
+VLIB_REGISTER_NODE (ah6_decrypt_handoff) = {
+  .name = "ah6-decrypt-handoff",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ipsec_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(ipsec_handoff_error_strings),
+  .error_strings = ipsec_handoff_error_strings,
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ipsec_sa.c b/src/vnet/ipsec/ipsec_sa.c
index 712d0d5..4401c2e 100644
--- a/src/vnet/ipsec/ipsec_sa.c
+++ b/src/vnet/ipsec/ipsec_sa.c
@@ -163,6 +163,8 @@
   sa->protocol = proto;
   sa->flags = flags;
   sa->salt = salt;
+  sa->encrypt_thread_index = (vlib_num_workers ())? ~0 : 0;
+  sa->decrypt_thread_index = (vlib_num_workers ())? ~0 : 0;
   if (integ_alg != IPSEC_INTEG_ALG_NONE)
     {
       ipsec_sa_set_integ_alg (sa, integ_alg);
diff --git a/src/vnet/ipsec/ipsec_sa.h b/src/vnet/ipsec/ipsec_sa.h
index 7997153..e0d74e1 100644
--- a/src/vnet/ipsec/ipsec_sa.h
+++ b/src/vnet/ipsec/ipsec_sa.h
@@ -114,6 +114,8 @@
   u8 crypto_iv_size;
   u8 crypto_block_size;
   u8 integ_icv_size;
+  u32 encrypt_thread_index;
+  u32 decrypt_thread_index;
   u32 spi;
   u32 seq;
   u32 seq_hi;
@@ -436,6 +438,18 @@
     }
 }
 
+
+/*
+ * Makes choice for thread_id should be assigned.
+ *  if input ~0, gets random worker_id based on unix_time_now_nsec
+*/
+always_inline u32
+ipsec_sa_assign_thread (u32 thread_id)
+{
+  return ((thread_id) ? thread_id
+	  : (unix_time_now_nsec () % vlib_num_workers ()) + 1);
+}
+
 #endif /* __IPSEC_SPD_SA_H__ */
 
 /*