vcl: ldp support SO_ORIGINAL_DST

Type: improvement

Support SO_ORIGINAL_DST socket option to get original dst_ip4 and dst_port if nat44 rule enabled.

Change-Id: If00e00d03e48f3b78a23a68f1b078954d79dd0f7
Signed-off-by: qinyang <qiny@yusur.tech>
diff --git a/src/plugins/nat/nat44-ed/nat44_ed.c b/src/plugins/nat/nat44-ed/nat44_ed.c
index 2ccd461..74359cc 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed.c
@@ -4104,7 +4104,49 @@
 			 idaddr, idport, xdaddr, xdport, proto, 0,
 			 is_twicenat);
 }
+__clib_export void
+nat44_original_dst_lookup (ip4_address_t *i2o_src, u16 i2o_src_port,
+			   ip4_address_t *i2o_dst, u16 i2o_dst_port,
+			   ip_protocol_t proto, u32 *original_dst,
+			   u16 *original_dst_port)
+{
+  snat_main_per_thread_data_t *tsm;
+  snat_main_t *sm = &snat_main;
+  u32 fib_index = 0;
+  snat_session_t *s;
+  ip4_header_t ip;
 
+  ip.src_address.as_u32 = i2o_src->as_u32;
+  fib_index = fib_table_find (FIB_PROTOCOL_IP4, 0);
+
+  if (sm->num_workers > 1)
+    {
+      tsm = vec_elt_at_index (
+	sm->per_thread_data,
+	nat44_ed_get_in2out_worker_index (0, &ip, fib_index, 0));
+    }
+  else
+    {
+      tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
+    }
+
+  /* query */
+  clib_bihash_kv_16_8_t kv = { 0 }, value;
+  init_ed_k (&kv, i2o_src->as_u32, i2o_src_port, i2o_dst->as_u32, i2o_dst_port,
+	     fib_index, proto);
+  if (tsm->sessions == NULL ||
+      clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
+    {
+      return;
+    }
+  s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
+  if (s)
+    {
+      *original_dst = s->i2o.rewrite.saddr.as_u32;
+      *original_dst_port = s->i2o.rewrite.sport;
+    }
+  return;
+}
 /*
  * fd.io coding-style-patch-verification: ON
  *
diff --git a/src/vcl/ldp.c b/src/vcl/ldp.c
index 2256a2b..a8062b4 100644
--- a/src/vcl/ldp.c
+++ b/src/vcl/ldp.c
@@ -67,6 +67,10 @@
 #define UDP_SEGMENT 103
 #endif
 
+#ifndef SO_ORIGINAL_DST
+/* from <linux/netfilter_ipv4.h> */
+#define SO_ORIGINAL_DST 80
+#endif
 typedef struct ldp_worker_ctx_
 {
   u8 *io_buffer;
@@ -2043,6 +2047,21 @@
 	      break;
 	    }
 	  break;
+	case SOL_IP:
+	  switch (optname)
+	    {
+	    case SO_ORIGINAL_DST:
+	      rv =
+		vls_attr (vlsh, VPPCOM_ATTR_GET_ORIGINAL_DST, optval, optlen);
+	      break;
+	    default:
+	      LDBG (0,
+		    "ERROR: fd %d: getsockopt SOL_IP: vlsh %u "
+		    "optname %d unsupported!",
+		    fd, vlsh, optname);
+	      break;
+	    }
+	  break;
 	case SOL_IPV6:
 	  switch (optname)
 	    {
diff --git a/src/vcl/vcl_bapi.c b/src/vcl/vcl_bapi.c
index afe8824..6071f64 100644
--- a/src/vcl/vcl_bapi.c
+++ b/src/vcl/vcl_bapi.c
@@ -360,7 +360,8 @@
     (vcm->cfg.app_scope_global ? APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE : 0) |
     (app_is_proxy ? APP_OPTIONS_FLAGS_IS_PROXY : 0) |
     (vcm->cfg.use_mq_eventfd ? APP_OPTIONS_FLAGS_EVT_MQ_USE_EVENTFD : 0) |
-    (vcm->cfg.huge_page ? APP_OPTIONS_FLAGS_USE_HUGE_PAGE : 0);
+    (vcm->cfg.huge_page ? APP_OPTIONS_FLAGS_USE_HUGE_PAGE : 0) |
+    (vcm->cfg.app_original_dst ? APP_OPTIONS_FLAGS_GET_ORIGINAL_DST : 0);
   bmp->options[APP_OPTIONS_PROXY_TRANSPORT] =
     (u64) ((vcm->cfg.app_proxy_transport_tcp ? 1 << TRANSPORT_PROTO_TCP : 0) |
 	   (vcm->cfg.app_proxy_transport_udp ? 1 << TRANSPORT_PROTO_UDP : 0));
diff --git a/src/vcl/vcl_cfg.c b/src/vcl/vcl_cfg.c
index be142ea..edea60d 100644
--- a/src/vcl/vcl_cfg.c
+++ b/src/vcl/vcl_cfg.c
@@ -464,6 +464,11 @@
 	      VCFG_DBG (0, "VCL<%d>: configured with multithread workers",
 			getpid ());
 	    }
+	  else if (unformat (line_input, "app_original_dst"))
+	    {
+	      vcl_cfg->app_original_dst = 1;
+	      VCFG_DBG (0, "VCL<%d>: support original destination", getpid ());
+	    }
 	  else if (unformat (line_input, "}"))
 	    {
 	      vc_cfg_input = 0;
diff --git a/src/vcl/vcl_private.h b/src/vcl/vcl_private.h
index 39a0f05..8345e34 100644
--- a/src/vcl/vcl_private.h
+++ b/src/vcl/vcl_private.h
@@ -180,6 +180,9 @@
 #if VCL_ELOG
   elog_track_t elog_track;
 #endif
+
+  u16 original_dst_port; /**< original dst port (network order) */
+  u32 original_dst_ip4;	 /**< original dst ip4 (network order) */
 } vcl_session_t;
 
 typedef struct vppcom_cfg_t_
@@ -208,6 +211,7 @@
   u32 tls_engine;
   u8 mt_wrk_supported;
   u8 huge_page;
+  u8 app_original_dst;
 } vppcom_cfg_t;
 
 void vppcom_cfg (vppcom_cfg_t * vcl_cfg);
diff --git a/src/vcl/vcl_sapi.c b/src/vcl/vcl_sapi.c
index 3a97fa2..e3e2b6a 100644
--- a/src/vcl/vcl_sapi.c
+++ b/src/vcl/vcl_sapi.c
@@ -130,7 +130,8 @@
     (vcm->cfg.app_scope_global ? APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE : 0) |
     (app_is_proxy ? APP_OPTIONS_FLAGS_IS_PROXY : 0) |
     (vcm->cfg.use_mq_eventfd ? APP_OPTIONS_FLAGS_EVT_MQ_USE_EVENTFD : 0) |
-    (vcm->cfg.huge_page ? APP_OPTIONS_FLAGS_USE_HUGE_PAGE : 0);
+    (vcm->cfg.huge_page ? APP_OPTIONS_FLAGS_USE_HUGE_PAGE : 0) |
+    (vcm->cfg.app_original_dst ? APP_OPTIONS_FLAGS_GET_ORIGINAL_DST : 0);
   mp->options[APP_OPTIONS_PROXY_TRANSPORT] =
     (u64) ((vcm->cfg.app_proxy_transport_tcp ? 1 << TRANSPORT_PROTO_TCP : 0) |
 	   (vcm->cfg.app_proxy_transport_udp ? 1 << TRANSPORT_PROTO_UDP : 0));
diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c
index d9cc885..06a345d 100644
--- a/src/vcl/vppcom.c
+++ b/src/vcl/vppcom.c
@@ -351,6 +351,11 @@
 
   session->vpp_handle = mp->handle;
   session->session_state = VCL_STATE_READY;
+  if (mp->rmt.is_ip4)
+    {
+      session->original_dst_ip4 = mp->original_dst_ip4;
+      session->original_dst_port = mp->original_dst_port;
+    }
   session->transport.rmt_port = mp->rmt.port;
   session->transport.is_ip4 = mp->rmt.is_ip4;
   clib_memcpy_fast (&session->transport.rmt_ip, &mp->rmt.ip,
@@ -3611,6 +3616,33 @@
 	rv = VPPCOM_EINVAL;
       break;
 
+    case VPPCOM_ATTR_GET_ORIGINAL_DST:
+      if (!session->transport.is_ip4)
+	{
+	  /* now original dst only support ipv4*/
+	  rv = VPPCOM_EAFNOSUPPORT;
+	  break;
+	}
+      if (PREDICT_TRUE (buffer && buflen && (*buflen >= sizeof (*ep)) &&
+			ep->ip))
+	{
+	  ep->is_ip4 = session->transport.is_ip4;
+	  ep->port = session->original_dst_port;
+	  clib_memcpy_fast (ep->ip, &session->original_dst_ip4,
+			    sizeof (ip4_address_t));
+	  *buflen = sizeof (*ep);
+	  VDBG (1,
+		"VPPCOM_ATTR_GET_ORIGINAL_DST: sh %u, is_ip4 = %u, addr = %U"
+		" port %d",
+		session_handle, ep->is_ip4, vcl_format_ip4_address,
+		(ip4_address_t *) (&session->original_dst_ip4),
+		ep->is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
+		clib_net_to_host_u16 (ep->port));
+	}
+      else
+	rv = VPPCOM_EINVAL;
+      break;
+
     case VPPCOM_ATTR_SET_LCL_ADDR:
       if (PREDICT_TRUE (buffer && buflen &&
 			(*buflen >= sizeof (*ep)) && ep->ip))
diff --git a/src/vcl/vppcom.h b/src/vcl/vppcom.h
index 71a49ab..7826076 100644
--- a/src/vcl/vppcom.h
+++ b/src/vcl/vppcom.h
@@ -176,6 +176,7 @@
   VPPCOM_ATTR_SET_DSCP,
   VPPCOM_ATTR_SET_IP_PKTINFO,
   VPPCOM_ATTR_GET_IP_PKTINFO,
+  VPPCOM_ATTR_GET_ORIGINAL_DST,
 } vppcom_attr_op_t;
 
 typedef struct _vcl_poll
diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c
index cfa9719..5998921 100644
--- a/src/vnet/session/application.c
+++ b/src/vnet/session/application.c
@@ -1529,6 +1529,12 @@
   return app->flags & APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE;
 }
 
+int
+application_original_dst_is_enabled (application_t *app)
+{
+  return app->flags & APP_OPTIONS_FLAGS_GET_ORIGINAL_DST;
+}
+
 static clib_error_t *
 application_start_stop_proxy_fib_proto (application_t * app, u8 fib_proto,
 					u8 transport_proto, u8 is_start)
diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h
index 09737a6..e100fe8 100644
--- a/src/vnet/session/application.h
+++ b/src/vnet/session/application.h
@@ -300,6 +300,7 @@
 void application_setup_proxy (application_t * app);
 void application_remove_proxy (application_t * app);
 void application_namespace_cleanup (app_namespace_t *app_ns);
+int application_original_dst_is_enabled (application_t *app);
 
 segment_manager_props_t *application_get_segment_manager_properties (u32
 								     app_index);
diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h
index 138953b..510068b 100644
--- a/src/vnet/session/application_interface.h
+++ b/src/vnet/session/application_interface.h
@@ -233,7 +233,8 @@
   _ (USE_LOCAL_SCOPE, "App can use local session scope")                      \
   _ (EVT_MQ_USE_EVENTFD, "Use eventfds for signaling")                        \
   _ (MEMFD_FOR_BUILTIN, "Use memfd for builtin app segs")                     \
-  _ (USE_HUGE_PAGE, "Use huge page for FIFO")
+  _ (USE_HUGE_PAGE, "Use huge page for FIFO")                                 \
+  _ (GET_ORIGINAL_DST, "Get original dst enabled")
 
 typedef enum _app_options
 {
@@ -299,15 +300,15 @@
   u8 is_ip4;			/**< set if uses ip4 networking */
 } app_session_transport_t;
 
-#define foreach_app_session_field					\
-  _(svm_fifo_t, *rx_fifo)		/**< rx fifo */			\
-  _(svm_fifo_t, *tx_fifo)		/**< tx fifo */			\
-  _(session_type_t, session_type)	/**< session type */		\
-  _(volatile u8, session_state)		/**< session state */		\
-  _(u32, session_index)			/**< index in owning pool */	\
-  _(app_session_transport_t, transport)	/**< transport info */		\
-  _(svm_msg_q_t, *vpp_evt_q)		/**< vpp event queue  */	\
-  _(u8, is_dgram)			/**< flag for dgram mode */	\
+#define foreach_app_session_field                                             \
+  _ (svm_fifo_t, *rx_fifo)		 /**< rx fifo */                      \
+  _ (svm_fifo_t, *tx_fifo)		 /**< tx fifo */                      \
+  _ (session_type_t, session_type)	 /**< session type */                 \
+  _ (volatile u8, session_state)	 /**< session state */                \
+  _ (u32, session_index)		 /**< index in owning pool */         \
+  _ (app_session_transport_t, transport) /**< transport info */               \
+  _ (svm_msg_q_t, *vpp_evt_q)		 /**< vpp event queue  */             \
+  _ (u8, is_dgram)			 /**< flag for dgram mode */
 
 typedef struct
 {
@@ -386,6 +387,8 @@
   transport_endpoint_t lcl;
   transport_endpoint_t rmt;
   u8 flags;
+  u32 original_dst_ip4;
+  u16 original_dst_port;
 } __clib_packed session_accepted_msg_t;
 
 typedef struct session_accepted_reply_msg_
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index 5bb5776..228234c 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -17,6 +17,7 @@
  * @brief Session and session manager
  */
 
+#include <vnet/plugin/plugin.h>
 #include <vnet/session/session.h>
 #include <vnet/session/application.h>
 #include <vnet/dpo/load_balance.h>
@@ -1762,6 +1763,22 @@
 					      f->segment_index);
 }
 
+void
+session_get_original_dst (transport_endpoint_t *i2o_src,
+			  transport_endpoint_t *i2o_dst,
+			  transport_proto_t transport_proto, u32 *original_dst,
+			  u16 *original_dst_port)
+{
+  session_main_t *smm = vnet_get_session_main ();
+  ip_protocol_t proto =
+    (transport_proto == TRANSPORT_PROTO_TCP ? IPPROTO_TCP : IPPROTO_UDP);
+  if (!smm->original_dst_lookup || !i2o_dst->is_ip4)
+    return;
+  smm->original_dst_lookup (&i2o_src->ip.ip4, i2o_src->port, &i2o_dst->ip.ip4,
+			    i2o_dst->port, proto, original_dst,
+			    original_dst_port);
+}
+
 /* *INDENT-OFF* */
 static session_fifo_rx_fn *session_tx_fns[TRANSPORT_TX_N_FNS] = {
     session_tx_fifo_peek_and_snd,
@@ -2292,6 +2309,11 @@
 	smm->no_adaptive = 1;
       else if (unformat (input, "use-dma"))
 	smm->dma_enabled = 1;
+      else if (unformat (input, "nat44-original-dst-enable"))
+	{
+	  smm->original_dst_lookup = vlib_get_plugin_symbol (
+	    "nat_plugin.so", "nat44_original_dst_lookup");
+	}
       /*
        * Deprecated but maintained for compatibility
        */
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
index 10bae27..9c08f1a 100644
--- a/src/vnet/session/session.h
+++ b/src/vnet/session/session.h
@@ -193,6 +193,10 @@
 u8 session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e);
 
 typedef void (*session_update_time_fn) (f64 time_now, u8 thread_index);
+typedef void (*nat44_original_dst_lookup_fn) (
+  ip4_address_t *i2o_src, u16 i2o_src_port, ip4_address_t *i2o_dst,
+  u16 i2o_dst_port, ip_protocol_t proto, u32 *original_dst,
+  u16 *original_dst_port);
 
 typedef struct session_main_
 {
@@ -281,6 +285,9 @@
   u32 preallocated_sessions;
 
   u16 msg_id_base;
+
+  /** Query nat44-ed session to get original dst ip4 & dst port. */
+  nat44_original_dst_lookup_fn original_dst_lookup;
 } session_main_t;
 
 extern session_main_t session_main;
@@ -812,6 +819,10 @@
 
 session_t *session_alloc_for_connection (transport_connection_t * tc);
 session_t *session_alloc_for_half_open (transport_connection_t *tc);
+void session_get_original_dst (transport_endpoint_t *i2o_src,
+			       transport_endpoint_t *i2o_dst,
+			       transport_proto_t transport_proto,
+			       u32 *original_dst, u16 *original_dst_port);
 
 typedef void (pool_safe_realloc_rpc_fn) (void *rpc_args);
 
diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c
index 3e99938..3d70733 100644
--- a/src/vnet/session/session_api.c
+++ b/src/vnet/session/session_api.c
@@ -136,6 +136,13 @@
       m.mq_index = s->thread_index;
     }
 
+  if (application_original_dst_is_enabled (app))
+    {
+      session_get_original_dst (&m.lcl, &m.rmt,
+				session_get_transport_proto (s),
+				&m.original_dst_ip4, &m.original_dst_port);
+    }
+
   app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_ACCEPTED, &m, sizeof (m));
 
   return 0;