blob: 32a0884d86b86f811c17ab6942a2117c9a91460e [file] [log] [blame]
/*
* Copyright (c) 2016 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @file
* @brief Local TCP/IP stack punt infrastructure.
*
* Provides a set of VPP nodes together with the relevant APIs and CLI
* commands in order to adjust and dispatch packets from the VPP data plane
* to the local TCP/IP stack
*/
#include <vnet/ip/ip.h>
#include <vlib/vlib.h>
#include <vnet/pg/pg.h>
#include <vnet/udp/udp.h>
#include <vnet/tcp/tcp.h>
#include <vnet/ip/punt.h>
#include <vlib/unix/unix.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/uio.h>
#include <stdlib.h>
typedef enum
{
#define punt_error(n,s) PUNT_ERROR_##n,
#include <vnet/ip/punt_error.def>
#undef punt_error
PUNT_N_ERROR,
} punt_error_t;
#define foreach_punt_next \
_ (PUNT4, "ip4-punt") \
_ (PUNT6, "ip6-punt")
typedef enum
{
#define _(s,n) PUNT_NEXT_##s,
foreach_punt_next
#undef _
PUNT_N_NEXT,
} punt_next_t;
enum punt_socket_rx_next_e
{
PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT,
PUNT_SOCKET_RX_NEXT_IP4_LOOKUP,
PUNT_SOCKET_RX_NEXT_IP6_LOOKUP,
PUNT_SOCKET_RX_N_NEXT
};
#define punt_next_punt(is_ip4) (is_ip4 ? PUNT_NEXT_PUNT4 : PUNT_NEXT_PUNT6)
/** @brief IPv4/IPv6 UDP punt node main loop.
This is the main loop inline function for IPv4/IPv6 UDP punt
transition node.
@param vm vlib_main_t corresponding to the current thread
@param node vlib_node_runtime_t
@param frame vlib_frame_t whose contents should be dispatched
@param is_ipv4 indicates if called for IPv4 or IPv6 node
*/
always_inline uword
udp46_punt_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * from_frame, int is_ip4)
{
u32 n_left_from, *from, *to_next;
word advance;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
/* udp[46]_lookup hands us the data payload, not the IP header */
if (is_ip4)
advance = -(sizeof (ip4_header_t) + sizeof (udp_header_t));
else
advance = -(sizeof (ip6_header_t) + sizeof (udp_header_t));
while (n_left_from > 0)
{
u32 n_left_to_next;
vlib_get_next_frame (vm, node, punt_next_punt (is_ip4), to_next,
n_left_to_next);
while (n_left_from > 0 && n_left_to_next > 0)
{
u32 bi0;
vlib_buffer_t *b0;
bi0 = from[0];
to_next[0] = bi0;
from += 1;
to_next += 1;
n_left_from -= 1;
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
vlib_buffer_advance (b0, advance);
b0->error = node->errors[PUNT_ERROR_UDP_PORT];
}
vlib_put_next_frame (vm, node, punt_next_punt (is_ip4), n_left_to_next);
}
return from_frame->n_vectors;
}
static char *punt_error_strings[] = {
#define punt_error(n,s) s,
#include "punt_error.def"
#undef punt_error
};
/** @brief IPv4 UDP punt node.
@node ip4-udp-punt
This is the IPv4 UDP punt transition node. It is registered as a next
node for the "ip4-udp-lookup" handling UDP port(s) requested for punt.
The buffer's current data pointer is adjusted to the original packet
IPv4 header. All buffers are dispatched to "error-punt".
@param vm vlib_main_t corresponding to the current thread
@param node vlib_node_runtime_t
@param frame vlib_frame_t whose contents should be dispatched
@par Graph mechanics: next index usage
@em Sets:
- <code>vnet_buffer(b)->current_data</code>
- <code>vnet_buffer(b)->current_len</code>
<em>Next Index:</em>
- Dispatches the packet to the "error-punt" node
*/
VLIB_NODE_FN (udp4_punt_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * from_frame)
{
return udp46_punt_inline (vm, node, from_frame, 1 /* is_ip4 */ );
}
/** @brief IPv6 UDP punt node.
@node ip6-udp-punt
This is the IPv6 UDP punt transition node. It is registered as a next
node for the "ip6-udp-lookup" handling UDP port(s) requested for punt.
The buffer's current data pointer is adjusted to the original packet
IPv6 header. All buffers are dispatched to "error-punt".
@param vm vlib_main_t corresponding to the current thread
@param node vlib_node_runtime_t
@param frame vlib_frame_t whose contents should be dispatched
@par Graph mechanics: next index usage
@em Sets:
- <code>vnet_buffer(b)->current_data</code>
- <code>vnet_buffer(b)->current_len</code>
<em>Next Index:</em>
- Dispatches the packet to the "error-punt" node
*/
VLIB_NODE_FN (udp6_punt_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * from_frame)
{
return udp46_punt_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_punt_node) = {
.name = "ip4-udp-punt",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = PUNT_N_ERROR,
.error_strings = punt_error_strings,
.n_next_nodes = PUNT_N_NEXT,
.next_nodes = {
#define _(s,n) [PUNT_NEXT_##s] = n,
foreach_punt_next
#undef _
},
};
VLIB_REGISTER_NODE (udp6_punt_node) = {
.name = "ip6-udp-punt",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = PUNT_N_ERROR,
.error_strings = punt_error_strings,
.n_next_nodes = PUNT_N_NEXT,
.next_nodes = {
#define _(s,n) [PUNT_NEXT_##s] = n,
foreach_punt_next
#undef _
},
};
/* *INDENT-ON* */
typedef struct
{
punt_client_t client;
u8 is_midchain;
} udp_punt_trace_t;
static u8 *
format_udp_punt_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
udp_punt_trace_t *t = va_arg (*args, udp_punt_trace_t *);
u32 indent = format_get_indent (s);
s = format (s, "to: %s", t->client.caddr.sun_path);
if (t->is_midchain)
{
s = format (s, "\n%U(buffer is part of chain)", format_white_space,
indent);
}
return s;
}
always_inline uword
punt_socket_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame,
punt_type_t pt, ip_address_family_t af)
{
u32 *buffers = vlib_frame_vector_args (frame);
uword n_packets = frame->n_vectors;
struct iovec *iovecs = 0;
punt_main_t *pm = &punt_main;
int i;
u32 node_index = AF_IP4 == af ? udp4_punt_socket_node.index :
udp6_punt_socket_node.index;
for (i = 0; i < n_packets; i++)
{
struct iovec *iov;
vlib_buffer_t *b;
uword l;
punt_packetdesc_t packetdesc;
punt_client_t *c;
b = vlib_get_buffer (vm, buffers[i]);
if (PUNT_TYPE_L4 == pt)
{
/* Reverse UDP Punt advance */
udp_header_t *udp;
if (AF_IP4 == af)
{
vlib_buffer_advance (b, -(sizeof (ip4_header_t) +
sizeof (udp_header_t)));
ip4_header_t *ip = vlib_buffer_get_current (b);
udp = (udp_header_t *) (ip + 1);
}
else
{
vlib_buffer_advance (b, -(sizeof (ip6_header_t) +
sizeof (udp_header_t)));
ip6_header_t *ip = vlib_buffer_get_current (b);
udp = (udp_header_t *) (ip + 1);
}
/*
* Find registerered client
* If no registered client, drop packet and count
*/
c = punt_client_l4_get (af, clib_net_to_host_u16 (udp->dst_port));
}
else if (PUNT_TYPE_IP_PROTO == pt)
{
/* Reverse UDP Punt advance */
ip_protocol_t proto;
if (AF_IP4 == af)
{
ip4_header_t *ip = vlib_buffer_get_current (b);
proto = ip->protocol;
}
else
{
ip6_header_t *ip = vlib_buffer_get_current (b);
proto = ip->protocol;
}
c = punt_client_ip_proto_get (af, proto);
}
else if (PUNT_TYPE_EXCEPTION == pt)
{
c = punt_client_exception_get (b->punt_reason);
}
else
c = NULL;
if (PREDICT_FALSE (NULL == c))
{
vlib_node_increment_counter (vm, node_index,
PUNT_ERROR_SOCKET_TX_ERROR, 1);
goto error;
}
struct sockaddr_un *caddr = &c->caddr;
if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
{
udp_punt_trace_t *t;
t = vlib_add_trace (vm, node, b, sizeof (t[0]));
clib_memcpy_fast (&t->client, c, sizeof (t->client));
}
/* Re-set iovecs if present. */
if (iovecs)
_vec_len (iovecs) = 0;
/* Add packet descriptor */
packetdesc.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
packetdesc.action = 0;
vec_add2 (iovecs, iov, 1);
iov->iov_base = &packetdesc;
iov->iov_len = sizeof (packetdesc);
/** VLIB buffer chain -> Unix iovec(s). */
vlib_buffer_advance (b, -(sizeof (ethernet_header_t)));
vec_add2 (iovecs, iov, 1);
iov->iov_base = b->data + b->current_data;
iov->iov_len = l = b->current_length;
if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
{
do
{
b = vlib_get_buffer (vm, b->next_buffer);
if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
{
udp_punt_trace_t *t;
t = vlib_add_trace (vm, node, b, sizeof (t[0]));
clib_memcpy_fast (&t->client, c, sizeof (t->client));
t->is_midchain = 1;
}
vec_add2 (iovecs, iov, 1);
iov->iov_base = b->data + b->current_data;
iov->iov_len = b->current_length;
l += b->current_length;
}
while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
}
struct msghdr msg = {
.msg_name = caddr,
.msg_namelen = sizeof (*caddr),
.msg_iov = iovecs,
.msg_iovlen = vec_len (iovecs),
};
if (sendmsg (pm->socket_fd, &msg, 0) < (ssize_t) l)
vlib_node_increment_counter (vm, node_index,
PUNT_ERROR_SOCKET_TX_ERROR, 1);
else
vlib_node_increment_counter (vm, node_index, PUNT_ERROR_SOCKET_TX, 1);
}
error:
vlib_buffer_free (vm, buffers, n_packets);
return n_packets;
}
static uword
udp4_punt_socket (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * from_frame)
{
return punt_socket_inline (vm, node, from_frame, PUNT_TYPE_L4, AF_IP4);
}
static uword
udp6_punt_socket (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * from_frame)
{
return punt_socket_inline (vm, node, from_frame, PUNT_TYPE_L4, AF_IP6);
}
static uword
ip4_proto_punt_socket (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * from_frame)
{
return punt_socket_inline (vm, node, from_frame,
PUNT_TYPE_IP_PROTO, AF_IP4);
}
static uword
ip6_proto_punt_socket (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * from_frame)
{
return punt_socket_inline (vm, node, from_frame,
PUNT_TYPE_IP_PROTO, AF_IP6);
}
static uword
exception_punt_socket (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * from_frame)
{
return punt_socket_inline (vm, node, from_frame,
PUNT_TYPE_EXCEPTION, AF_IP4);
}
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_punt_socket_node) = {
.function = udp4_punt_socket,
.name = "ip4-udp-punt-socket",
.format_trace = format_udp_punt_trace,
.flags = VLIB_NODE_FLAG_IS_DROP,
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = PUNT_N_ERROR,
.error_strings = punt_error_strings,
};
VLIB_REGISTER_NODE (udp6_punt_socket_node) = {
.function = udp6_punt_socket,
.name = "ip6-udp-punt-socket",
.format_trace = format_udp_punt_trace,
.flags = VLIB_NODE_FLAG_IS_DROP,
.vector_size = sizeof (u32),
.n_errors = PUNT_N_ERROR,
.error_strings = punt_error_strings,
};
VLIB_REGISTER_NODE (ip4_proto_punt_socket_node) = {
.function = ip4_proto_punt_socket,
.name = "ip4-proto-punt-socket",
.format_trace = format_udp_punt_trace,
.flags = VLIB_NODE_FLAG_IS_DROP,
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = PUNT_N_ERROR,
.error_strings = punt_error_strings,
};
VLIB_REGISTER_NODE (ip6_proto_punt_socket_node) = {
.function = ip6_proto_punt_socket,
.name = "ip6-proto-punt-socket",
.format_trace = format_udp_punt_trace,
.flags = VLIB_NODE_FLAG_IS_DROP,
.vector_size = sizeof (u32),
.n_errors = PUNT_N_ERROR,
.error_strings = punt_error_strings,
};
VLIB_REGISTER_NODE (exception_punt_socket_node) = {
.function = exception_punt_socket,
.name = "exception-punt-socket",
.format_trace = format_udp_punt_trace,
.flags = VLIB_NODE_FLAG_IS_DROP,
.vector_size = sizeof (u32),
.n_errors = PUNT_N_ERROR,
.error_strings = punt_error_strings,
};
/* *INDENT-ON* */
typedef struct
{
enum punt_action_e action;
u32 sw_if_index;
} punt_trace_t;
static u8 *
format_punt_trace (u8 * s, va_list * va)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
vnet_main_t *vnm = vnet_get_main ();
punt_trace_t *t = va_arg (*va, punt_trace_t *);
s = format (s, "%U Action: %d", format_vnet_sw_if_index_name,
vnm, t->sw_if_index, t->action);
return s;
}
static uword
punt_socket_rx_fd (vlib_main_t * vm, vlib_node_runtime_t * node, u32 fd)
{
const uword buffer_size = vlib_buffer_get_default_data_size (vm);
u32 n_trace = vlib_get_trace_count (vm, node);
u32 next = node->cached_next_index;
u32 n_left_to_next, next_index;
u32 *to_next;
u32 error = PUNT_ERROR_NONE;
vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
/* $$$$ Only dealing with one buffer at the time for now */
u32 bi;
vlib_buffer_t *b;
punt_packetdesc_t packetdesc;
ssize_t size;
struct iovec io[2];
if (vlib_buffer_alloc (vm, &bi, 1) != 1)
{
error = PUNT_ERROR_NOBUFFER;
goto error;
}
b = vlib_get_buffer (vm, bi);
io[0].iov_base = &packetdesc;
io[0].iov_len = sizeof (packetdesc);
io[1].iov_base = b->data;
io[1].iov_len = buffer_size;
size = readv (fd, io, 2);
/* We need at least the packet descriptor plus a header */
if (size <= (int) (sizeof (packetdesc) + sizeof (ip4_header_t)))
{
vlib_buffer_free (vm, &bi, 1);
error = PUNT_ERROR_READV;
goto error;
}
b->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED;
b->current_length = size - sizeof (packetdesc);
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
switch (packetdesc.action)
{
case PUNT_L2:
vnet_buffer (b)->sw_if_index[VLIB_TX] = packetdesc.sw_if_index;
next_index = PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT;
break;
case PUNT_IP4_ROUTED:
vnet_buffer (b)->sw_if_index[VLIB_RX] = packetdesc.sw_if_index;
vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
next_index = PUNT_SOCKET_RX_NEXT_IP4_LOOKUP;
break;
case PUNT_IP6_ROUTED:
vnet_buffer (b)->sw_if_index[VLIB_RX] = packetdesc.sw_if_index;
vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
next_index = PUNT_SOCKET_RX_NEXT_IP6_LOOKUP;
break;
default:
error = PUNT_ERROR_ACTION;
vlib_buffer_free (vm, &bi, 1);
goto error;
}
if (PREDICT_FALSE (n_trace > 0))
{
punt_trace_t *t;
vlib_trace_buffer (vm, node, next_index, b, 1 /* follow_chain */ );
vlib_set_trace_count (vm, node, --n_trace);
t = vlib_add_trace (vm, node, b, sizeof (*t));
t->sw_if_index = packetdesc.sw_if_index;
t->action = packetdesc.action;
}
to_next[0] = bi;
to_next++;
n_left_to_next--;
vlib_validate_buffer_enqueue_x1 (vm, node, next, to_next, n_left_to_next,
bi, next_index);
vlib_put_next_frame (vm, node, next, n_left_to_next);
return 1;
error:
vlib_node_increment_counter (vm, punt_socket_rx_node.index, error, 1);
return 0;
}
static uword
punt_socket_rx (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
{
punt_main_t *pm = &punt_main;
u32 total_count = 0;
int i;
for (i = 0; i < vec_len (pm->ready_fds); i++)
{
total_count += punt_socket_rx_fd (vm, node, pm->ready_fds[i]);
vec_del1 (pm->ready_fds, i);
}
return total_count;
}
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (punt_socket_rx_node) =
{
.function = punt_socket_rx,
.name = "punt-socket-rx",
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_INTERRUPT,
.vector_size = 1,
.n_errors = PUNT_N_ERROR,
.error_strings = punt_error_strings,
.n_next_nodes = PUNT_SOCKET_RX_N_NEXT,
.next_nodes = {
[PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT] = "interface-output",
[PUNT_SOCKET_RX_NEXT_IP4_LOOKUP] = "ip4-lookup",
[PUNT_SOCKET_RX_NEXT_IP6_LOOKUP] = "ip6-lookup",
},
.format_trace = format_punt_trace,
};
/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
*/