blob: e87fd8a90924d078bb9ea75ade5112cfddeed4ef [file] [log] [blame]
/*
* Copyright (c) 2016 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <unistd.h>
#include <stdio.h>
#include <sys/uio.h>
#include <limits.h>
#define __need_IOV_MAX
#include <bits/stdio_lim.h>
#include <netinet/tcp.h>
#include <vppinfra/types.h>
#include <vppinfra/time.h>
#include <vppinfra/hash.h>
#include <vppinfra/pool.h>
#include <vcl/vcom_socket.h>
#include <vcl/vcom_socket_wrapper.h>
#include <vcl/vcom.h>
#include <vcl/vppcom.h>
#ifndef IOV_MAX
#define IOV_MAX __IOV_MAX
#endif
/*
* VCOM_SOCKET Private definitions and functions.
*/
typedef struct vcom_socket_main_t_
{
u8 init;
clib_time_t clib_time;
pid_t my_pid;
/* vcom_socket pool */
vcom_socket_t *vsockets;
/* Hash table for socketidx to fd mapping */
uword *sockidx_by_fd;
/* vcom_epoll pool */
vcom_epoll_t *vepolls;
/* Hash table for epollidx to epfd mapping */
uword *epollidx_by_epfd;
/* common epitem poll for all epfd */
/* TBD: epitem poll per epfd */
/* vcom_epitem pool */
vcom_epitem_t *vepitems;
/* Hash table for epitemidx to epfdfd mapping */
uword *epitemidx_by_epfdfd;
/* Hash table - key:epfd, value:vec of epitemidx */
uword *epitemidxs_by_epfd;
/* Hash table - key:fd, value:vec of epitemidx */
uword *epitemidxs_by_fd;
u8 *io_buffer;
} vcom_socket_main_t;
vcom_socket_main_t vcom_socket_main;
static int
vcom_socket_open_socket (int domain, int type, int protocol)
{
int rv = -1;
/* handle domains implemented by vpp */
switch (domain)
{
case AF_INET:
case AF_INET6:
/* get socket type and
* handle the socket types supported by vpp */
switch (type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
{
case SOCK_STREAM:
case SOCK_DGRAM:
/* the type argument serves a second purpose,
* in addition to specifying a socket type,
* it may include the bitwise OR of any of
* SOCK_NONBLOCK and SOCK_CLOEXEC, to modify
* the behavior of socket. */
rv = libc_socket (domain, type, protocol);
if (rv == -1)
rv = -errno;
break;
default:
break;
}
break;
default:
break;
}
return rv;
}
static int
vcom_socket_open_epoll (int flags)
{
int rv = -1;
if (flags < 0)
{
return -EINVAL;
}
if (flags && (flags & ~EPOLL_CLOEXEC))
{
return -EINVAL;
}
/* flags can be either zero or EPOLL_CLOEXEC */
rv = libc_epoll_create1 (flags);
if (rv == -1)
rv = -errno;
return rv;
}
static int
vcom_socket_close_socket (int fd)
{
int rv;
rv = libc_close (fd);
if (rv == -1)
rv = -errno;
return rv;
}
static int
vcom_socket_close_epoll (int epfd)
{
int rv;
rv = libc_close (epfd);
if (rv == -1)
rv = -errno;
return rv;
}
/*
* Public API functions
*/
int
vcom_socket_is_vcom_fd (int fd)
{
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
p = hash_get (vsm->sockidx_by_fd, fd);
if (p)
{
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND)
return 1;
}
return 0;
}
int
vcom_socket_is_vcom_epfd (int epfd)
{
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_epoll_t *vepoll;
p = hash_get (vsm->epollidx_by_epfd, epfd);
if (p)
{
vepoll = pool_elt_at_index (vsm->vepolls, p[0]);
if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND)
return 1;
}
return 0;
}
static inline int
vcom_socket_get_sid (int fd)
{
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
p = hash_get (vsm->sockidx_by_fd, fd);
if (p)
{
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND)
return vsock->sid;
}
return INVALID_SESSION_ID;
}
static inline int
vcom_socket_get_vep_idx (int epfd)
{
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_epoll_t *vepoll;
p = hash_get (vsm->epollidx_by_epfd, epfd);
if (p)
{
vepoll = pool_elt_at_index (vsm->vepolls, p[0]);
if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND)
return vepoll->vep_idx;
}
return INVALID_VEP_IDX;
}
static inline int
vcom_socket_get_sid_and_vsock (int fd, vcom_socket_t ** vsockp)
{
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
p = hash_get (vsm->sockidx_by_fd, fd);
if (p)
{
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND)
{
*vsockp = vsock;
return vsock->sid;
}
}
return INVALID_SESSION_ID;
}
static inline int
vcom_socket_get_vep_idx_and_vepoll (int epfd, vcom_epoll_t ** vepollp)
{
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_epoll_t *vepoll;
p = hash_get (vsm->epollidx_by_epfd, epfd);
if (p)
{
vepoll = pool_elt_at_index (vsm->vepolls, p[0]);
if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND)
{
*vepollp = vepoll;
return vepoll->vep_idx;
}
}
return INVALID_VEP_IDX;
}
static int
vcom_socket_close_vepoll (int epfd)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_epoll_t *vepoll;
p = hash_get (vsm->epollidx_by_epfd, epfd);
if (!p)
return -EBADF;
vepoll = pool_elt_at_index (vsm->vepolls, p[0]);
if (!vepoll)
return -EBADF;
if (vepoll->type != EPOLL_TYPE_VPPCOM_BOUND)
return -EINVAL;
if (vepoll->count)
{
if (!vepoll->close)
{
vepoll->close = 1;
return 0;
}
else
{
return -EBADF;
}
}
/* count is zero */
rv = vppcom_session_close (vepoll->vep_idx);
rv = vcom_socket_close_epoll (vepoll->epfd);
vepoll_init (vepoll);
hash_unset (vsm->epollidx_by_epfd, epfd);
pool_put (vsm->vepolls, vepoll);
return rv;
}
static int
vcom_socket_close_vsock (int fd)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
vcom_epitem_t *vepitem;
i32 *vepitemidxs = 0;
i32 *vepitemidxs_var = 0;
p = hash_get (vsm->sockidx_by_fd, fd);
if (!p)
return -EBADF;
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (!vsock)
return -ENOTSOCK;
if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
return -EINVAL;
rv = vppcom_session_close (vsock->sid);
rv = vcom_socket_close_socket (vsock->fd);
vsocket_init (vsock);
hash_unset (vsm->sockidx_by_fd, fd);
pool_put (vsm->vsockets, vsock);
/*
* NOTE:
* Before calling close(), user should remove
* this fd from the epoll-set of all epoll instances,
* otherwise resource(epitems) leaks ensues.
*/
/*
* 00. close all epoll instances that are marked as "close"
* of which this fd is the "last" remaining member.
* 01. epitems associated with this fd are intentionally
* not removed, see NOTE: above.
* */
/* does this fd participate in epoll */
p = hash_get (vsm->epitemidxs_by_fd, fd);
if (p)
{
vepitemidxs = *(i32 **) p;
vec_foreach (vepitemidxs_var, vepitemidxs)
{
vepitem = pool_elt_at_index (vsm->vepitems, vepitemidxs_var[0]);
if (vepitem && vepitem->fd == fd &&
vepitem->type == FD_TYPE_VCOM_SOCKET)
{
i32 vep_idx;
vcom_epoll_t *vepoll;
if ((vep_idx =
vcom_socket_get_vep_idx_and_vepoll (vepitem->epfd,
&vepoll)) !=
INVALID_VEP_IDX)
{
if (vepoll->close)
{
if (vepoll->count == 1)
{
/*
* force count to zero and
* close this epoll instance
* */
vepoll->count = 0;
vcom_socket_close_vepoll (vepoll->epfd);
}
else
{
vepoll->count -= 1;
}
}
}
}
}
}
return rv;
}
int
vcom_socket_close (int __fd)
{
int rv;
if (vcom_socket_is_vcom_fd (__fd))
{
rv = vcom_socket_close_vsock (__fd);
}
else if (vcom_socket_is_vcom_epfd (__fd))
{
rv = vcom_socket_close_vepoll (__fd);
}
else
{
rv = -EBADF;
}
return rv;
}
ssize_t
vcom_socket_read (int __fd, void *__buf, size_t __nbytes)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
p = hash_get (vsm->sockidx_by_fd, __fd);
if (!p)
return -EBADF;
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (!vsock)
return -ENOTSOCK;
if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
return -EINVAL;
if (!__buf)
{
return -EINVAL;
}
rv = vcom_fcntl (__fd, F_GETFL, 0);
if (rv < 0)
{
return rv;
}
/* is blocking */
if (!(rv & O_NONBLOCK))
{
do
{
rv = vppcom_session_read (vsock->sid, __buf, __nbytes);
}
/* coverity[CONSTANT_EXPRESSION_RESULT] */
while (rv == -EAGAIN || rv == -EWOULDBLOCK);
return rv;
}
/* The file descriptor refers to a socket and has been
* marked nonblocking(O_NONBLOCK) and the read would
* block.
* */
/* is non blocking */
rv = vppcom_session_read (vsock->sid, __buf, __nbytes);
return rv;
}
ssize_t
vcom_socket_readv (int __fd, const struct iovec * __iov, int __iovcnt)
{
int rv;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
ssize_t total = 0, len = 0;
int i;
p = hash_get (vsm->sockidx_by_fd, __fd);
if (!p)
return -EBADF;
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (!vsock)
return -ENOTSOCK;
if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
return -EINVAL;
if (__iov == 0 || __iovcnt == 0 || __iovcnt > IOV_MAX)
return -EINVAL;
/* Sanity check */
for (i = 0; i < __iovcnt; ++i)
{
if (SSIZE_MAX - len < __iov[i].iov_len)
return -EINVAL;
len += __iov[i].iov_len;
}
rv = vcom_fcntl (__fd, F_GETFL, 0);
if (rv < 0)
{
return rv;
}
/* is blocking */
if (!(rv & O_NONBLOCK))
{
do
{
for (i = 0; i < __iovcnt; ++i)
{
rv = vppcom_session_read (vsock->sid, __iov[i].iov_base,
__iov[i].iov_len);
if (rv < 0)
break;
else
{
total += rv;
if (rv < __iov[i].iov_len)
/* Read less than buffer provided, no point to continue */
break;
}
}
}
/* coverity[CONSTANT_EXPRESSION_RESULT] */
while ((rv == -EAGAIN || rv == -EWOULDBLOCK) && total == 0);
return total;
}
/* is non blocking */
for (i = 0; i < __iovcnt; ++i)
{
rv = vppcom_session_read (vsock->sid, __iov[i].iov_base,
__iov[i].iov_len);
if (rv < 0)
{
if (total > 0)
break;
else
{
errno = rv;
return rv;
}
}
else
{
total += rv;
if (rv < __iov[i].iov_len)
/* Read less than buffer provided, no point to continue */
break;
}
}
return total;
}
ssize_t
vcom_socket_write (int __fd, const void *__buf, size_t __n)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
if (!__buf)
{
return -EINVAL;
}
p = hash_get (vsm->sockidx_by_fd, __fd);
if (!p)
return -EBADF;
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (!vsock)
return -ENOTSOCK;
if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
return -EINVAL;
rv = vppcom_session_write (vsock->sid, (void *) __buf, __n);
return rv;
}
ssize_t
vcom_socket_writev (int __fd, const struct iovec * __iov, int __iovcnt)
{
int rv = -1;
ssize_t total = 0;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
int i;
p = hash_get (vsm->sockidx_by_fd, __fd);
if (!p)
return -EBADF;
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (!vsock)
return -ENOTSOCK;
if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
return -EINVAL;
if (__iov == 0 || __iovcnt == 0 || __iovcnt > IOV_MAX)
return -EINVAL;
for (i = 0; i < __iovcnt; ++i)
{
rv = vppcom_session_write (vsock->sid, __iov[i].iov_base,
__iov[i].iov_len);
if (rv < 0)
{
if (total > 0)
break;
else
return rv;
}
else
total += rv;
}
return total;
}
/*
* RETURN: 0 - invalid cmd
* 1 - cmd not handled by vcom and vppcom
* 2 - cmd handled by vcom socket resource
* 3 - cmd handled by vppcom
* */
/* TBD: incomplete list of cmd */
static int
vcom_socket_check_fcntl_cmd (int __cmd)
{
switch (__cmd)
{
/*cmd not handled by vcom and vppcom */
/* Fallthrough */
case F_DUPFD:
case F_DUPFD_CLOEXEC:
return 1;
/* cmd handled by vcom socket resource */
/* Fallthrough */
case F_GETFD:
case F_SETFD:
case F_GETLK:
case F_SETLK:
case F_SETLKW:
case F_GETOWN:
case F_SETOWN:
return 2;
/* cmd handled by vcom and vppcom */
case F_SETFL:
case F_GETFL:
return 3;
/* cmd not handled by vcom and vppcom */
default:
return 1;
}
return 0;
}
static inline int
vcom_session_fcntl_va (int __sid, int __cmd, va_list __ap)
{
int flags = va_arg (__ap, int);
int rv = -EOPNOTSUPP;
uint32_t size;
size = sizeof (flags);
if (__cmd == F_SETFL)
{
rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_FLAGS, &flags, &size);
}
else if (__cmd == F_GETFL)
{
rv = vppcom_session_attr (__sid, VPPCOM_ATTR_GET_FLAGS, &flags, &size);
if (rv == VPPCOM_OK)
rv = flags;
}
return rv;
}
int
vcom_socket_fcntl_va (int __fd, int __cmd, va_list __ap)
{
int rv = -EBADF;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
p = hash_get (vsm->sockidx_by_fd, __fd);
if (!p)
return -EBADF;
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (!vsock)
return -ENOTSOCK;
if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
return -EINVAL;
switch (vcom_socket_check_fcntl_cmd (__cmd))
{
/* invalid cmd */
case 0:
rv = -EBADF;
break;
/*cmd not handled by vcom and vppcom */
case 1:
rv = libc_vfcntl (vsock->fd, __cmd, __ap);
break;
/* cmd handled by vcom socket resource */
case 2:
rv = libc_vfcntl (vsock->fd, __cmd, __ap);
break;
/* cmd handled by vppcom */
case 3:
rv = vcom_session_fcntl_va (vsock->sid, __cmd, __ap);
break;
default:
rv = -EINVAL;
break;
}
return rv;
}
/*
* RETURN: 0 - invalid cmd
* 1 - cmd not handled by vcom and vppcom
* 2 - cmd handled by vcom socket resource
* 3 - cmd handled by vppcom
*/
static int
vcom_socket_check_ioctl_cmd (unsigned long int __cmd)
{
int rc;
switch (__cmd)
{
/* cmd handled by vppcom */
case FIONREAD:
rc = 3;
break;
/* cmd not handled by vcom and vppcom */
default:
rc = 1;
break;
}
return rc;
}
static inline int
vcom_session_ioctl_va (int __sid, int __cmd, va_list __ap)
{
int rv;
switch (__cmd)
{
case FIONREAD:
rv = vppcom_session_attr (__sid, VPPCOM_ATTR_GET_NREAD, 0, 0);
break;
case FIONBIO:
{
u32 flags = va_arg (__ap, int) ? O_NONBLOCK : 0;
u32 len = sizeof (flags);
rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_FLAGS, &flags, &len);
}
break;
default:
rv = -EOPNOTSUPP;
break;
}
return rv;
}
int
vcom_socket_ioctl_va (int __fd, unsigned long int __cmd, va_list __ap)
{
int rv = -EBADF;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
p = hash_get (vsm->sockidx_by_fd, __fd);
if (!p)
return -EBADF;
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (!vsock)
return -ENOTSOCK;
if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
return -EINVAL;
switch (vcom_socket_check_ioctl_cmd (__cmd))
{
/* Not supported cmd */
case 0:
rv = -EOPNOTSUPP;
break;
/* cmd not handled by vcom and vppcom */
case 1:
rv = libc_vioctl (vsock->fd, __cmd, __ap);
break;
/* cmd handled by vcom socket resource */
case 2:
rv = libc_vioctl (vsock->fd, __cmd, __ap);
break;
/* cmd handled by vppcom */
case 3:
rv = vcom_session_ioctl_va (vsock->sid, __cmd, __ap);
break;
default:
rv = -EINVAL;
break;
}
return rv;
}
static inline int
vcom_socket_fds_2_sid_fds (
/* dest */
int *vcom_nsid_fds,
fd_set * __restrict vcom_rd_sid_fds,
fd_set * __restrict vcom_wr_sid_fds,
fd_set * __restrict vcom_ex_sid_fds,
/* src */
int vcom_nfds,
fd_set * __restrict vcom_readfds,
fd_set * __restrict vcom_writefds,
fd_set * __restrict vcom_exceptfds)
{
int rv = 0;
int fd;
int sid;
/* invalid max_sid is -1 */
int max_sid = -1;
int nsid = 0;
/*
* set sid in sid sets corresponding to fd's in fd sets
* compute nsid and vcom_nsid_fds from sid sets
*/
for (fd = 0; fd < vcom_nfds; fd++)
{
/*
* F fd set, src
* S sid set, dest
*/
#define _(S,F) \
if ((F) && (S) && FD_ISSET (fd, (F))) \
{ \
sid = vcom_socket_get_sid (fd); \
if (sid != INVALID_SESSION_ID) \
{ \
FD_SET (sid, (S)); \
if (sid > max_sid) \
{ \
max_sid = sid; \
} \
++nsid; \
} \
else \
{ \
rv = -EBADFD; \
goto done; \
} \
}
_(vcom_rd_sid_fds, vcom_readfds);
_(vcom_wr_sid_fds, vcom_writefds);
_(vcom_ex_sid_fds, vcom_exceptfds);
#undef _
}
*vcom_nsid_fds = max_sid != -1 ? max_sid + 1 : 0;
rv = nsid;
done:
return rv;
}
/*
* PRE: 00. sid sets were derived from fd sets
* 01. sid sets were updated with sids that actually changed
* status
* 02. fd sets still has watched fds
*
* This function will modify in place fd sets to indicate which fd's
* actually changed status(inferred from sid sets)
*/
static inline int
vcom_socket_sid_fds_2_fds (
/* dest */
int *new_vcom_nfds,
int vcom_nfds,
fd_set * __restrict vcom_readfds,
fd_set * __restrict vcom_writefds,
fd_set * __restrict vcom_exceptfds,
/* src */
int vcom_nsid_fds,
fd_set * __restrict vcom_rd_sid_fds,
fd_set * __restrict vcom_wr_sid_fds,
fd_set * __restrict vcom_ex_sid_fds)
{
int rv = 0;
int fd;
int sid;
/* invalid max_fd is -1 */
int max_fd = -1;
int nfd = 0;
/*
* modify in place fd sets to indicate which fd's
* actually changed status(inferred from sid sets)
*/
for (fd = 0; fd < vcom_nfds; fd++)
{
/*
* F fd set, dest
* S sid set, src
*/
#define _(S,F) \
if ((F) && (S) && FD_ISSET (fd, (F))) \
{ \
sid = vcom_socket_get_sid (fd); \
if (sid != INVALID_SESSION_ID) \
{ \
if (!FD_ISSET (sid, (S))) \
{ \
FD_CLR(fd, (F)); \
} \
} \
else \
{ \
rv = -EBADFD; \
goto done; \
} \
}
_(vcom_rd_sid_fds, vcom_readfds);
_(vcom_wr_sid_fds, vcom_writefds);
_(vcom_ex_sid_fds, vcom_exceptfds);
#undef _
}
/*
* compute nfd and new_vcom_nfds from fd sets
*/
for (fd = 0; fd < vcom_nfds; fd++)
{
#define _(F) \
if ((F) && FD_ISSET (fd, (F))) \
{ \
if (fd > max_fd) \
{ \
max_fd = fd; \
} \
++nfd; \
}
_(vcom_readfds);
_(vcom_writefds);
_(vcom_exceptfds);
#undef _
}
*new_vcom_nfds = max_fd != -1 ? max_fd + 1 : 0;
rv = nfd;
done:
return rv;
}
/*
* PRE:
* vom_socket_select is always called with
* timeout->tv_sec and timeout->tv_usec set to zero.
* hence vppcom_select return immediately.
*/
/*
* TBD: do{body;} while(timeout conditional); timeout loop
*/
int
vcom_socket_select (int vcom_nfds, fd_set * __restrict vcom_readfds,
fd_set * __restrict vcom_writefds,
fd_set * __restrict vcom_exceptfds,
struct timeval *__restrict timeout)
{
static unsigned long vcom_nsid_fds = 0;
int vcom_nsid = 0;
int rv = -EBADF;
int new_vcom_nfds = 0;
int new_vcom_nfd = 0;
/* vcom sid fds */
fd_set vcom_rd_sid_fds;
fd_set vcom_wr_sid_fds;
fd_set vcom_ex_sid_fds;
/* in seconds eg. 3.123456789 seconds */
double time_to_wait = (double) 0;
/* validate inputs */
if (vcom_nfds < 0)
{
return -EINVAL;
}
/* convert timeval timeout to double time_to_wait */
if (timeout)
{
if (timeout->tv_sec == 0 && timeout->tv_usec == 0)
{
/* polling: vppcom_select returns immediately */
time_to_wait = (double) 0;
}
else
{
/*TBD: use timeval api */
time_to_wait = (double) timeout->tv_sec +
(double) timeout->tv_usec / (double) 1000000 +
(double) (timeout->tv_usec % 1000000) / (double) 1000000;
}
}
else
{
/*
* no timeout: vppcom_select can block indefinitely
* waiting for a file descriptor to become ready
* */
/* set to a phantom value */
time_to_wait = ~0;
}
/* zero the sid_sets */
/*
* F fd set
* S sid set
*/
#define _(S,F) \
if ((F)) \
{ \
FD_ZERO ((S)); \
}
_(&vcom_rd_sid_fds, vcom_readfds);
_(&vcom_wr_sid_fds, vcom_writefds);
_(&vcom_ex_sid_fds, vcom_exceptfds);
#undef _
if (vcom_nfds == 0)
{
if (time_to_wait > 0)
{
if (VCOM_DEBUG > 0)
fprintf (stderr,
"[%d] vcom_socket_select called to "
"emulate delay_ns()!\n", getpid ());
rv = vppcom_select (0, NULL, NULL, NULL, time_to_wait);
}
else
{
fprintf (stderr, "[%d] vcom_socket_select called vcom_nfds = 0 "
"and invalid time_to_wait (%f)!\n",
getpid (), time_to_wait);
}
return 0;
}
/* populate read, write and except sid_sets */
vcom_nsid = vcom_socket_fds_2_sid_fds (
/* dest */
vcom_readfds || vcom_writefds
|| vcom_exceptfds ? (int *)
&vcom_nsid_fds : NULL,
vcom_readfds ? &vcom_rd_sid_fds :
NULL,
vcom_writefds ? &vcom_wr_sid_fds :
NULL,
vcom_exceptfds ? &vcom_ex_sid_fds :
NULL,
/* src */
vcom_nfds,
vcom_readfds,
vcom_writefds, vcom_exceptfds);
if (vcom_nsid < 0)
{
return vcom_nsid;
}
rv = vppcom_select (vcom_nsid_fds,
vcom_readfds ? (unsigned long *) &vcom_rd_sid_fds :
NULL,
vcom_writefds ? (unsigned long *) &vcom_wr_sid_fds :
NULL,
vcom_exceptfds ? (unsigned long *) &vcom_ex_sid_fds :
NULL, time_to_wait);
if (VCOM_DEBUG > 2)
fprintf (stderr, "[%d] called vppcom_select(): "
"'%04d'='%04d'\n", getpid (), rv, (int) vcom_nsid_fds);
/* check if any file descriptors changed status */
if (rv > 0)
{
/*
* on exit, sets are modified in place to indicate which
* file descriptors actually changed status
* */
/*
* comply with pre-condition
* do not clear vcom fd sets befor calling
* vcom_socket_sid_fds_2_fds
*/
new_vcom_nfd = vcom_socket_sid_fds_2_fds (
/* dest */
&new_vcom_nfds,
vcom_nfds,
vcom_readfds,
vcom_writefds,
vcom_exceptfds,
/* src */
vcom_nsid_fds,
vcom_readfds ?
&vcom_rd_sid_fds : NULL,
vcom_writefds ?
&vcom_wr_sid_fds : NULL,
vcom_exceptfds ?
&vcom_ex_sid_fds : NULL);
if (new_vcom_nfd < 0)
{
return new_vcom_nfd;
}
if (new_vcom_nfds < 0)
{
return -EINVAL;
}
rv = new_vcom_nfd;
}
return rv;
}
int
vcom_socket_socket (int __domain, int __type, int __protocol)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
vcom_socket_t *vsock;
i32 fd;
i32 sid;
i32 sockidx;
u8 is_nonblocking = __type & SOCK_NONBLOCK ? 1 : 0;
int type = __type & ~(SOCK_NONBLOCK | SOCK_CLOEXEC);
fd = vcom_socket_open_socket (__domain, __type, __protocol);
if (fd < 0)
{
rv = fd;
goto out;
}
sid = vppcom_session_create (VPPCOM_VRF_DEFAULT,
(type == SOCK_DGRAM) ?
VPPCOM_PROTO_UDP : VPPCOM_PROTO_TCP,
is_nonblocking);
if (sid < 0)
{
rv = sid;
goto out_close_socket;
}
pool_get (vsm->vsockets, vsock);
vsocket_init (vsock);
sockidx = vsock - vsm->vsockets;
hash_set (vsm->sockidx_by_fd, fd, sockidx);
vsocket_set (vsock, fd, sid, SOCKET_TYPE_VPPCOM_BOUND);
return fd;
out_close_socket:
vcom_socket_close_socket (fd);
out:
return rv;
}
int
vcom_socket_socketpair (int __domain, int __type, int __protocol,
int __fds[2])
{
/* TBD: */
return 0;
}
int
vcom_socket_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
vppcom_endpt_t ep;
p = hash_get (vsm->sockidx_by_fd, __fd);
if (!p)
return -EBADF;
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (!vsock)
return -ENOTSOCK;
if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
return -EINVAL;
if (!__addr)
{
return -EINVAL;
}
ep.vrf = VPPCOM_VRF_DEFAULT;
switch (__addr->sa_family)
{
case AF_INET:
if (__len != sizeof (struct sockaddr_in))
{
return -EINVAL;
}
ep.is_ip4 = VPPCOM_IS_IP4;
ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr;
ep.port = (u16) ((const struct sockaddr_in *) __addr)->sin_port;
break;
case AF_INET6:
if (__len != sizeof (struct sockaddr_in6))
{
return -EINVAL;
}
ep.is_ip4 = VPPCOM_IS_IP6;
ep.ip = (u8 *) & ((const struct sockaddr_in6 *) __addr)->sin6_addr;
ep.port = (u16) ((const struct sockaddr_in6 *) __addr)->sin6_port;
break;
default:
return -1;
break;
}
rv = vppcom_session_bind (vsock->sid, &ep);
return rv;
}
static inline int
vcom_session_getsockname (int sid, vppcom_endpt_t * ep)
{
int rv;
uint32_t size = sizeof (*ep);
rv = vppcom_session_attr (sid, VPPCOM_ATTR_GET_LCL_ADDR, ep, &size);
return rv;
}
int
vcom_socket_getsockname (int __fd, __SOCKADDR_ARG __addr,
socklen_t * __restrict __len)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
p = hash_get (vsm->sockidx_by_fd, __fd);
if (!p)
return -EBADF;
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (!vsock)
return -ENOTSOCK;
if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
return -EINVAL;
if (!__addr || !__len)
return -EFAULT;
vppcom_endpt_t ep;
ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr;
rv = vcom_session_getsockname (vsock->sid, &ep);
if (rv == 0)
{
if (ep.vrf == VPPCOM_VRF_DEFAULT)
{
__addr->sa_family = ep.is_ip4 == VPPCOM_IS_IP4 ? AF_INET : AF_INET6;
switch (__addr->sa_family)
{
case AF_INET:
((struct sockaddr_in *) __addr)->sin_port = ep.port;
*__len = sizeof (struct sockaddr_in);
break;
case AF_INET6:
((struct sockaddr_in6 *) __addr)->sin6_port = ep.port;
*__len = sizeof (struct sockaddr_in6);
break;
default:
break;
}
}
}
return rv;
}
int
vcom_socket_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
vppcom_endpt_t ep;
p = hash_get (vsm->sockidx_by_fd, __fd);
if (p)
{
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
ep.vrf = VPPCOM_VRF_DEFAULT;
switch (__addr->sa_family)
{
case AF_INET:
ep.is_ip4 = VPPCOM_IS_IP4;
ep.ip =
(uint8_t *) & ((const struct sockaddr_in *) __addr)->sin_addr;
ep.port =
(uint16_t) ((const struct sockaddr_in *) __addr)->sin_port;
break;
case AF_INET6:
ep.is_ip4 = VPPCOM_IS_IP6;
ep.ip =
(uint8_t *) & ((const struct sockaddr_in6 *) __addr)->sin6_addr;
ep.port =
(uint16_t) ((const struct sockaddr_in6 *) __addr)->sin6_port;
break;
default:
return -1;
break;
}
rv = vppcom_session_connect (vsock->sid, &ep);
}
return rv;
}
static inline int
vcom_session_getpeername (int sid, vppcom_endpt_t * ep)
{
int rv;
uint32_t size = sizeof (*ep);
rv = vppcom_session_attr (sid, VPPCOM_ATTR_GET_PEER_ADDR, ep, &size);
return rv;
}
static inline int
vcom_socket_copy_ep_to_sockaddr (__SOCKADDR_ARG __addr,
socklen_t * __restrict __len,
vppcom_endpt_t * ep)
{
int rv = 0;
int sa_len, copy_len;
__addr->sa_family = (ep->is_ip4 == VPPCOM_IS_IP4) ? AF_INET : AF_INET6;
switch (__addr->sa_family)
{
case AF_INET:
((struct sockaddr_in *) __addr)->sin_port = ep->port;
if (*__len > sizeof (struct sockaddr_in))
*__len = sizeof (struct sockaddr_in);
sa_len = sizeof (struct sockaddr_in) - sizeof (struct in_addr);
copy_len = *__len - sa_len;
if (copy_len > 0)
memcpy (&((struct sockaddr_in *) __addr)->sin_addr, ep->ip, copy_len);
break;
case AF_INET6:
((struct sockaddr_in6 *) __addr)->sin6_port = ep->port;
if (*__len > sizeof (struct sockaddr_in6))
*__len = sizeof (struct sockaddr_in6);
sa_len = sizeof (struct sockaddr_in6) - sizeof (struct in6_addr);
copy_len = *__len - sa_len;
if (copy_len > 0)
memcpy (((struct sockaddr_in6 *) __addr)->sin6_addr.
__in6_u.__u6_addr8, ep->ip, copy_len);
break;
default:
/* Not possible */
rv = -EAFNOSUPPORT;
break;
}
return rv;
}
int
vcom_socket_getpeername (int __fd, __SOCKADDR_ARG __addr,
socklen_t * __restrict __len)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
u8 src_addr[sizeof (struct sockaddr_in6)];
vppcom_endpt_t ep;
p = hash_get (vsm->sockidx_by_fd, __fd);
if (!p)
return -EBADF;
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (!vsock)
return -ENOTSOCK;
if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
return -EINVAL;
if (!__addr || !__len)
return -EFAULT;
ep.ip = src_addr;
rv = vcom_session_getpeername (vsock->sid, &ep);
if (rv == 0)
rv = vcom_socket_copy_ep_to_sockaddr (__addr, __len, &ep);
return rv;
}
ssize_t
vcom_socket_send (int __fd, const void *__buf, size_t __n, int __flags)
{
return vcom_socket_sendto (__fd, __buf, __n, __flags, NULL, 0);
}
/* NOTE: this function is not thread safe or 32-bit friendly */
ssize_t
vcom_socket_sendfile (int __out_fd, int __in_fd, off_t * __offset,
size_t __len)
{
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
size_t n_bytes_left = __len;
u32 out_sockidx, out_sid = ~0;
size_t bytes_to_read;
int nbytes;
int rv, errno_val;
ssize_t results = 0;
u8 eagain = 0;
if (VCOM_DEBUG > 2)
clib_warning ("[%d] __out_fd %d, __in_fd %d, __offset %p, __len %lu",
getpid (), __out_fd, __in_fd, __offset, __len);
p = hash_get (vsm->sockidx_by_fd, __out_fd);
if (!p)
{
clib_warning ("[%d] ERROR: invalid __out_fd (%d), fd lookup failed!",
getpid (), __len);
return -EBADF;
}
out_sockidx = p[0];
vsock = pool_elt_at_index (vsm->vsockets, out_sockidx);
if (!vsock)
{
clib_warning ("[%d] ERROR: invalid __out_fd (%d) / out_sockidx %u, "
"missing vsock pool element!",
getpid (), __len, out_sockidx);
return -ENOTSOCK;
}
out_sid = vsock->sid;
if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
{
clib_warning ("[%d] ERROR: __out_fd (%d), socket (sid %u) "
"is not VCL bound!", getpid (), __out_fd, out_sid);
return -EINVAL;
}
if (__offset)
{
off_t offset = lseek (__in_fd, *__offset, SEEK_SET);
if (offset == -1)
{
errno_val = errno;
perror ("lseek()");
clib_warning ("[%d] ERROR: lseek SEEK_SET failed: "
"in_fd %d, offset %p (%ld), rv %ld, errno %d",
getpid (), __in_fd, __offset, *__offset, offset,
errno_val);
return -errno_val;
}
ASSERT (offset == *__offset);
}
do
{
rv = vppcom_session_attr (out_sid, VPPCOM_ATTR_GET_NWRITE, 0, 0);
if (rv < 0)
{
clib_warning ("[%d] ERROR: vppcom_session_attr (out_sid (%u), "
"VPPCOM_ATTR_GET_NWRITE, 0, 0) returned %d (%s)!",
getpid (), out_sid, rv, vppcom_retval_str (rv));
vec_reset_length (vsm->io_buffer);
return rv;
}
bytes_to_read = (size_t) rv;
if (VCOM_DEBUG > 2)
clib_warning ("[%d] results %ld, n_bytes_left %lu, "
"bytes_to_read %lu", getpid (), results,
n_bytes_left, bytes_to_read);
if (bytes_to_read == 0)
{
u32 flags, flags_len = sizeof (flags);
rv = vppcom_session_attr (out_sid, VPPCOM_ATTR_GET_FLAGS, &flags,
&flags_len);
ASSERT (rv == VPPCOM_OK);
if (flags & O_NONBLOCK)
{
if (!results)
{
if (VCOM_DEBUG > 2)
clib_warning ("[%d] EAGAIN", getpid ());
eagain = 1;
}
goto update_offset;
}
else
continue;
}
bytes_to_read = clib_min (n_bytes_left, bytes_to_read);
vec_validate (vsm->io_buffer, bytes_to_read);
nbytes = libc_read (__in_fd, vsm->io_buffer, bytes_to_read);
if (nbytes < 0)
{
errno_val = errno;
perror ("read()");
clib_warning ("[%d] ERROR: libc_read (__in_fd (%d), "
"io_buffer %p, bytes_to_read %lu) returned "
"errno %d",
getpid (), __in_fd, vsm->io_buffer,
bytes_to_read, errno_val);
if (results == 0)
{
vec_reset_length (vsm->io_buffer);
return -errno_val;
}
goto update_offset;
}
rv = vppcom_session_write (out_sid, vsm->io_buffer, nbytes);
if (rv < 0)
{
clib_warning ("[%d] ERROR: vppcom_session_write ("
"out_sid %u, io_buffer %p, nbytes %d) "
"returned %d (%s)",
getpid (), out_sid, vsm->io_buffer, nbytes,
rv, vppcom_retval_str (rv));
if (results == 0)
{
vec_reset_length (vsm->io_buffer);
return rv;
}
goto update_offset;
}
results += nbytes;
ASSERT (n_bytes_left >= nbytes);
n_bytes_left = n_bytes_left - nbytes;
}
while (n_bytes_left > 0);
update_offset:
if (__offset)
{
off_t offset = lseek (__in_fd, *__offset, SEEK_SET);
if (offset == -1)
{
errno_val = errno;
perror ("lseek()");
clib_warning ("[%d] ERROR: lseek (__in_fd %d, __offset %p "
"(%ld), SEEK_SET) returned errno %d",
getpid (), __in_fd, __offset, *__offset, errno_val);
vec_reset_length (vsm->io_buffer);
return -errno_val;
}
*__offset += results + 1;
}
vec_reset_length (vsm->io_buffer);
return eagain ? -EAGAIN : results;
}
ssize_t
vcom_socket_recv (int __fd, void *__buf, size_t __n, int __flags)
{
int rv = -1;
rv = vcom_socket_recvfrom (__fd, __buf, __n, __flags, NULL, 0);
return rv;
}
/*
* RETURN 1 if __fd is (SOCK_STREAM, SOCK_SEQPACKET),
* 0 otherwise
* */
int
vcom_socket_is_connection_mode_socket (int __fd)
{
int rv = -1;
/* TBD define new vppcom api */
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
int type;
socklen_t optlen;
p = hash_get (vsm->sockidx_by_fd, __fd);
if (p)
{
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND)
{
optlen = sizeof (type);
rv = libc_getsockopt (__fd, SOL_SOCKET, SO_TYPE, &type, &optlen);
if (rv != 0)
{
return 0;
}
/* get socket type */
switch (type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
{
case SOCK_STREAM:
case SOCK_SEQPACKET:
return 1;
break;
default:
return 0;
break;
}
}
}
return 0;
}
static inline ssize_t
vcom_session_sendto (int __sid, void *__buf, size_t __n,
int __flags, __CONST_SOCKADDR_ARG __addr,
socklen_t __addr_len)
{
vppcom_endpt_t *ep = 0;
vppcom_endpt_t _ep;
if (__addr)
{
ep = &_ep;
ep->vrf = VPPCOM_VRF_DEFAULT;
switch (__addr->sa_family)
{
case AF_INET:
ep->is_ip4 = VPPCOM_IS_IP4;
ep->ip =
(uint8_t *) & ((const struct sockaddr_in *) __addr)->sin_addr;
ep->port =
(uint16_t) ((const struct sockaddr_in *) __addr)->sin_port;
break;
case AF_INET6:
ep->is_ip4 = VPPCOM_IS_IP6;
ep->ip =
(uint8_t *) & ((const struct sockaddr_in6 *) __addr)->sin6_addr;
ep->port =
(uint16_t) ((const struct sockaddr_in6 *) __addr)->sin6_port;
break;
default:
return -EAFNOSUPPORT;
}
}
return vppcom_session_sendto (__sid, __buf, __n, __flags, ep);;
}
ssize_t
vcom_socket_sendto (int __fd, const void *__buf, size_t __n,
int __flags, __CONST_SOCKADDR_ARG __addr,
socklen_t __addr_len)
{
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
if (!__buf)
{
return -EINVAL;
}
p = hash_get (vsm->sockidx_by_fd, __fd);
if (!p)
return -EBADF;
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (!vsock)
return -ENOTSOCK;
if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
{
return -EINVAL;
}
if (vcom_socket_is_connection_mode_socket (__fd))
{
/* ignore __addr and _addr_len */
/* and EISCONN may be returned when they are not NULL and 0 */
if ((__addr != NULL) || (__addr_len != 0))
{
return -EISCONN;
}
}
else
{
if (!__addr)
{
return -EDESTADDRREQ;
}
/* not a vppcom supported address family */
if (!((__addr->sa_family == AF_INET) ||
(__addr->sa_family == AF_INET6)))
{
return -EINVAL;
}
}
return vcom_session_sendto (vsock->sid, (void *) __buf, (int) __n,
__flags, __addr, __addr_len);
}
static inline ssize_t
vcom_session_recvfrom (int __sid, void *__restrict __buf, size_t __n,
int __flags, __SOCKADDR_ARG __addr,
socklen_t * __restrict __addr_len)
{
int rv;
vppcom_endpt_t ep;
u8 src_addr[sizeof (struct sockaddr_in6)];
if (__addr)
{
ep.ip = src_addr;
rv = vppcom_session_recvfrom (__sid, __buf, __n, __flags, &ep);
if (rv > 0)
rv = vcom_socket_copy_ep_to_sockaddr (__addr, __addr_len, &ep);
}
else
rv = vppcom_session_recvfrom (__sid, __buf, __n, __flags, NULL);
return rv;
}
ssize_t
vcom_socket_recvfrom (int __fd, void *__restrict __buf, size_t __n,
int __flags, __SOCKADDR_ARG __addr,
socklen_t * __restrict __addr_len)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
if (__addr && !__addr_len)
return -EINVAL;
p = hash_get (vsm->sockidx_by_fd, __fd);
if (!p)
return -EBADF;
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (!vsock)
return -ENOTSOCK;
if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
{
return -EINVAL;
}
rv = vcom_session_recvfrom (vsock->sid, __buf, __n,
__flags, __addr, __addr_len);
return rv;
}
/* TBD: move it to vppcom */
static inline ssize_t
vcom_session_sendmsg (int __sid, const struct msghdr *__message, int __flags)
{
int rv = -1;
/* rv = vppcom_session_write (__sid, (void *) __message->__buf,
(int)__n); */
return rv;
}
ssize_t
vcom_socket_sendmsg (int __fd, const struct msghdr * __message, int __flags)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
p = hash_get (vsm->sockidx_by_fd, __fd);
if (!p)
return -EBADF;
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (!vsock)
return -ENOTSOCK;
if (vcom_socket_is_connection_mode_socket (__fd))
{
/* ignore __addr and _addr_len */
/* and EISCONN may be returned when they are not NULL and 0 */
if ((__message->msg_name != NULL) || (__message->msg_namelen != 0))
{
return -EISCONN;
}
}
else
{
/* TBD: validate __message->msg_name and __message->msg_namelen
* and return -EINVAL on validation error
* */
;
}
rv = vcom_session_sendmsg (vsock->sid, __message, __flags);
return rv;
}
#ifdef __USE_GNU
int
vcom_socket_sendmmsg (int __fd, struct mmsghdr *__vmessages,
unsigned int __vlen, int __flags)
{
/* TBD: define a new vppcom api */
return 0;
}
#endif
/* TBD: move it to vppcom */
static inline ssize_t
vcom_session_recvmsg (int __sid, struct msghdr *__message, int __flags)
{
int rv = -1;
/* rv = vppcom_session_read (__sid, (void *) __message->__buf,
(int)__n); */
rv = -EOPNOTSUPP;
return rv;
}
ssize_t
vcom_socket_recvmsg (int __fd, struct msghdr * __message, int __flags)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
p = hash_get (vsm->sockidx_by_fd, __fd);
if (!p)
return -EBADF;
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (!vsock)
return -ENOTSOCK;
if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
return -EINVAL;
if (!__message)
{
return -EINVAL;
}
/* validate __flags */
rv = vcom_session_recvmsg (vsock->sid, __message, __flags);
return rv;
}
#ifdef __USE_GNU
int
vcom_socket_recvmmsg (int __fd, struct mmsghdr *__vmessages,
unsigned int __vlen, int __flags,
struct timespec *__tmo)
{
/* TBD: define a new vppcom api */
return 0;
}
#endif
/* TBD: move it to vppcom */
static inline int
vcom_session_get_sockopt (int __sid, int __level, int __optname,
void *__restrict __optval,
socklen_t * __restrict __optlen)
{
int rv = 0;
/* 1. for socket level options that are NOT socket attributes
* and that has corresponding vpp options get from vppcom */
switch (__level)
{
case SOL_SOCKET:
switch (__optname)
{
case SO_ERROR:
*(int *) __optval = 0;
break;
default:
break;
}
default:
break;
}
/* 2. unhandled options */
return rv;
}
int
vcom_socket_getsockopt (int __fd, int __level, int __optname,
void *__restrict __optval,
socklen_t * __restrict __optlen)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
if (!__optval || !__optlen)
return -EINVAL;
p = hash_get (vsm->sockidx_by_fd, __fd);
if (!p)
return -EBADF;
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (!vsock)
return -ENOTSOCK;
if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
return -EINVAL;
switch (__level)
{
case SOL_SOCKET:
switch (__optname)
{
/*
* 1. for socket level options that are socket attributes,
* get from libc_getsockopt.
* 2. for socket level options that are NOT socket
* attributes and that has corresponding vpp options
* get from vppcom.
* 3. for socket level options unimplemented
* return -ENOPROTOOPT */
case SO_DEBUG:
case SO_DONTROUTE:
case SO_BROADCAST:
case SO_SNDBUF:
case SO_RCVBUF:
case SO_REUSEADDR:
case SO_REUSEPORT:
case SO_KEEPALIVE:
case SO_TYPE:
case SO_PROTOCOL:
case SO_DOMAIN:
case SO_OOBINLINE:
case SO_NO_CHECK:
case SO_PRIORITY:
case SO_LINGER:
case SO_BSDCOMPAT:
case SO_TIMESTAMP:
case SO_TIMESTAMPNS:
case SO_TIMESTAMPING:
case SO_RCVTIMEO:
case SO_SNDTIMEO:
case SO_RCVLOWAT:
case SO_SNDLOWAT:
case SO_PASSCRED:
case SO_PEERCRED:
case SO_PEERNAME:
case SO_ACCEPTCONN:
case SO_PASSSEC:
case SO_PEERSEC:
case SO_MARK:
case SO_RXQ_OVFL:
case SO_WIFI_STATUS:
case SO_PEEK_OFF:
case SO_NOFCS:
case SO_BINDTODEVICE:
case SO_GET_FILTER:
case SO_LOCK_FILTER:
case SO_BPF_EXTENSIONS:
case SO_SELECT_ERR_QUEUE:
#ifdef CONFIG_NET_RX_BUSY_POLL
case SO_BUSY_POLL:
#endif
case SO_MAX_PACING_RATE:
#ifdef SO_INCOMING_CPU
case SO_INCOMING_CPU:
#endif
rv = libc_getsockopt (__fd, __level, __optname, __optval, __optlen);
if (rv != 0)
{
rv = -errno;
return rv;
}
break;
case SO_ERROR:
rv = vcom_session_get_sockopt (vsock->sid, __level, __optname,
__optval, __optlen);
break;
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
*/
return -ENOPROTOOPT;
}
break;
default:
/* 1. handle options that are NOT socket level options,
* but have corresponding vpp otions. */
rv = vcom_session_get_sockopt (vsock->sid, __level, __optname,
__optval, __optlen);
break;
}
return rv;
}
/* TBD: move it to vppcom */
static inline int
vcom_session_setsockopt (int __sid, int __level, int __optname,
const void *__optval, socklen_t __optlen)
{
int rv = -EOPNOTSUPP;
switch (__level)
{
case SOL_TCP:
switch (__optname)
{
case TCP_KEEPIDLE:
rv =
vppcom_session_attr (__sid, VPPCOM_ATTR_SET_TCP_KEEPIDLE, 0, 0);
break;
case TCP_KEEPINTVL:
rv =
vppcom_session_attr (__sid, VPPCOM_ATTR_SET_TCP_KEEPINTVL, 0, 0);
break;
default:
break;
}
break;
case SOL_IPV6:
switch (__optname)
{
case IPV6_V6ONLY:
rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_V6ONLY, 0, 0);
break;
default:
break;
}
break;
case SOL_SOCKET:
switch (__optname)
{
case SO_KEEPALIVE:
rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_KEEPALIVE, 0, 0);
break;
case SO_REUSEADDR:
rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_REUSEADDR, 0, 0);
break;
case SO_BROADCAST:
rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_BROADCAST, 0, 0);
break;
default:
break;
}
break;
default:
break;
}
return rv;
}
int
vcom_socket_setsockopt (int __fd, int __level, int __optname,
const void *__optval, socklen_t __optlen)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
p = hash_get (vsm->sockidx_by_fd, __fd);
if (!p)
return -EBADF;
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
if (!vsock)
return -ENOTSOCK;
if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
return -EINVAL;
/*
* Options without arguments
*/
if (__optname == SO_BINDTODEVICE)
{
rv = libc_setsockopt (__fd, __level, __optname, __optval, __optlen);
if (rv != 0)
{
rv = -errno;
}
return rv;
}
if (!__optval)
return -EFAULT;
if (__optlen < sizeof (int))
return -EINVAL;
switch (__level)
{
case SOL_IPV6:
switch (__optname)
{
case IPV6_V6ONLY:
rv = vcom_session_setsockopt (vsock->sid, __level, __optname,
__optval, __optlen);
break;
default:
return -EOPNOTSUPP;
}
break;
case SOL_TCP:
switch (__optname)
{
case TCP_NODELAY:
return 0;
case TCP_KEEPIDLE:
case TCP_KEEPINTVL:
rv = vcom_session_setsockopt (vsock->sid, __level, __optname,
__optval, __optlen);
break;
default:
return -EOPNOTSUPP;
}
break;
/* handle options at socket level */
case SOL_SOCKET:
switch (__optname)
{
case SO_REUSEADDR:
case SO_BROADCAST:
case SO_KEEPALIVE:
rv = vcom_session_setsockopt (vsock->sid, __level, __optname,
__optval, __optlen);
break;
/*
* 1. for socket level options that are socket attributes,
* set it from libc_getsockopt
* 2. for socket level options that are NOT socket
* attributes and that has corresponding vpp options
* set it from vppcom
* 3. for socket level options unimplemented
* return -ENOPROTOOPT */
case SO_DEBUG:
case SO_DONTROUTE:
case SO_SNDBUF:
case SO_RCVBUF:
case SO_REUSEPORT:
case SO_TYPE:
case SO_PROTOCOL:
case SO_DOMAIN:
case SO_ERROR:
case SO_OOBINLINE:
case SO_NO_CHECK:
case SO_PRIORITY:
case SO_LINGER:
case SO_BSDCOMPAT:
case SO_TIMESTAMP:
case SO_TIMESTAMPNS:
case SO_TIMESTAMPING:
case SO_RCVTIMEO:
case SO_SNDTIMEO:
case SO_RCVLOWAT:
case SO_SNDLOWAT:
case SO_PASSCRED:
case SO_PEERCRED:
case SO_PEERNAME:
case SO_ACCEPTCONN:
case SO_PASSSEC:
case SO_PEERSEC:
case SO_MARK:
case SO_RXQ_OVFL:
case SO_WIFI_STATUS:
case SO_PEEK_OFF:
case SO_NOFCS:
/*
* SO_BINDTODEVICE already handled as
* "Options without arguments" */
/* case SO_BINDTODEVICE: */
case SO_GET_FILTER:
case SO_LOCK_FILTER:
case SO_BPF_EXTENSIONS:
case SO_SELECT_ERR_QUEUE:
#ifdef CONFIG_NET_RX_BUSY_POLL
case SO_BUSY_POLL:
#endif
case SO_MAX_PACING_RATE:
#ifdef SO_INCOMING_CPU
case SO_INCOMING_CPU:
#endif
rv = libc_setsockopt (__fd, __level, __optname, __optval, __optlen);
if (rv != 0)
{
rv = -errno;
return rv;
}
break;
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
*/
return -ENOPROTOOPT;
}
break;
default:
return -ENOPROTOOPT;
}
return rv;
}
int
vcom_socket_listen (int __fd, int __n)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
p = hash_get (vsm->sockidx_by_fd, __fd);
if (p)
{
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
/* TBD vppcom to accept __n parameter */
rv = vppcom_session_listen (vsock->sid, __n);
}
return rv;
}
static int
vcom_socket_connected_socket (int __fd, int __sid,
int *__domain,
int *__type, int *__protocol, int flags)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
vcom_socket_t *vsock;
i32 fd;
i32 sockidx;
socklen_t optlen;
optlen = sizeof (*__domain);
rv = libc_getsockopt (__fd, SOL_SOCKET, SO_DOMAIN, __domain, &optlen);
if (rv != 0)
{
rv = -errno;
goto out;
}
optlen = sizeof (*__type);
rv = libc_getsockopt (__fd, SOL_SOCKET, SO_TYPE, __type, &optlen);
if (rv != 0)
{
rv = -errno;
goto out;
}
optlen = sizeof (*__protocol);
rv = libc_getsockopt (__fd, SOL_SOCKET, SO_PROTOCOL, __protocol, &optlen);
if (rv != 0)
{
rv = -errno;
goto out;
}
fd = vcom_socket_open_socket (*__domain, *__type | flags, *__protocol);
if (fd < 0)
{
rv = fd;
goto out;
}
pool_get (vsm->vsockets, vsock);
vsocket_init (vsock);
sockidx = vsock - vsm->vsockets;
hash_set (vsm->sockidx_by_fd, fd, sockidx);
vsocket_set (vsock, fd, __sid, SOCKET_TYPE_VPPCOM_BOUND);
return fd;
out:
return rv;
}
/* If flag is 0, then accept4() is the same as accept().
* SOCK_NONBLOCK and SOCK_CLOEXEC can be bitwise ORed in flags
*/
static int
vcom_socket_accept_flags (int __fd, __SOCKADDR_ARG __addr,
socklen_t * __restrict __addr_len, int flags)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
int fd;
int sid;
int domain;
int type;
int protocol;
uint8_t addr8[sizeof (struct in6_addr)];
vppcom_endpt_t ep;
ep.ip = addr8;
/* validate flags */
/*
* for documentation
* switch (flags)
* {
* case 0:
* case SOCK_NONBLOCK:
* case SOCK_CLOEXEC:
* case SOCK_NONBLOCK | SOCK_CLOEXEC:
* break;
*
* default:
* return -1;
* }
*/
/* flags can be 0 or can be bitwise OR
* of any of SOCK_NONBLOCK and SOCK_CLOEXEC */
if (VCOM_DEBUG > 2)
fprintf (stderr, "[%d] vcom_socket_accept_flags: "
"fd = %d, __addr = %p, __addr_len = %p flags = %d (0x%x)\n",
getpid (), __fd, __addr, __addr_len, flags, flags);
if (!(!flags || (flags & (SOCK_NONBLOCK | SOCK_CLOEXEC))))
{
/* TBD: return proper error code */
fprintf (stderr, "[%d] ERROR: vcom_socket_accept_flags: "
"invalid flags = %d (0x%x)\n", getpid (), flags, flags);
return -1;
}
/* TBD: return proper error code */
if (!vcom_socket_is_connection_mode_socket (__fd))
{
fprintf (stderr, "[%d] ERROR: vcom_socket_accept_flags: "
"connection mode socket support TBD!\n", getpid ());
return -EOPNOTSUPP;
}
p = hash_get (vsm->sockidx_by_fd, __fd);
if (p)
{
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
rv = vcom_fcntl (vsock->fd, F_GETFL, 0);
if (rv < 0)
{
fprintf (stderr, "[%d] ERROR: vcom_socket_accept_flags: "
"vcom_fcnt() returned %d!\n", getpid (), rv);
return rv;
}
/* is blocking */
if (!(rv & O_NONBLOCK))
{
/* socket is not marked as nonblocking
* and no pending connections are present
* on the queue, accept () blocks the caller
* until a connection is present.
*/
rv = vppcom_session_accept (vsock->sid, &ep, flags,
-1.0 /* wait forever */ );
}
else
{
/* The file descriptor refers to a socket and has been
* marked nonblocking(O_NONBLOCK) and the accept would
* block.
* */
/* is non blocking */
rv = vppcom_session_accept (vsock->sid, &ep, flags, 0);
/* If the socket is marked nonblocking and
* no pending connections are present on the
* queue, accept fails with the error
* EAGAIN or EWOULDBLOCK
*/
if (rv == VPPCOM_ETIMEDOUT)
{
rv = VPPCOM_EAGAIN;
}
}
if (rv < 0)
{
if (rv != VPPCOM_EAGAIN)
fprintf (stderr, "[%d] ERROR: vcom_socket_accept_flags: "
"vppcom_session_accept() returned %d!", getpid (), rv);
return rv;
}
sid = rv;
/* create a new connected socket resource and set flags
* on the new file descriptor.
* update vsockets and sockidx_by_fd table
* */
fd = vcom_socket_connected_socket (__fd, sid,
&domain, &type, &protocol, flags);
if (fd < 0)
{
fprintf (stderr, "[%d] ERROR: vcom_socket_accept_flags: "
"vcom_socket_connected_socket() returned %d!",
getpid (), rv);
return fd;
}
rv = fd;
/* TBD populate __addr and __addr_len */
/* TBD: The returned address is truncated if the buffer
* provided is too small, in this case, __addr_len will
* return a value greater than was supplied to the call.*/
if (__addr)
{
if (ep.is_cut_thru)
{
/* TBD populate __addr and __addr_len */
switch (domain)
{
case AF_INET:
((struct sockaddr_in *) __addr)->sin_family = AF_INET;
((struct sockaddr_in *) __addr)->sin_port = ep.port;
memcpy (&((struct sockaddr_in *) __addr)->sin_addr,
addr8, sizeof (struct in_addr));
/* TBD: populate __addr_len */
if (__addr_len)
{
*__addr_len = sizeof (struct sockaddr_in);
}
break;
case AF_INET6:
((struct sockaddr_in6 *) __addr)->sin6_family = AF_INET6;
((struct sockaddr_in6 *) __addr)->sin6_port = ep.port;
memcpy (((struct sockaddr_in6 *) __addr)->sin6_addr.
__in6_u.__u6_addr8, addr8,
sizeof (struct in6_addr));
/* TBD: populate __addr_len */
if (__addr_len)
{
*__addr_len = sizeof (struct sockaddr_in6);
}
break;
default:
return -EAFNOSUPPORT;
}
}
else
{
switch (ep.is_ip4)
{
case VPPCOM_IS_IP4:
((struct sockaddr_in *) __addr)->sin_family = AF_INET;
((struct sockaddr_in *) __addr)->sin_port = ep.port;
memcpy (&((struct sockaddr_in *) __addr)->sin_addr,
addr8, sizeof (struct in_addr));
/* TBD: populate __addr_len */
if (__addr_len)
{
*__addr_len = sizeof (struct sockaddr_in);
}
break;
case VPPCOM_IS_IP6:
((struct sockaddr_in6 *) __addr)->sin6_family = AF_INET6;
((struct sockaddr_in6 *) __addr)->sin6_port = ep.port;
memcpy (((struct sockaddr_in6 *) __addr)->sin6_addr.
__in6_u.__u6_addr8, addr8,
sizeof (struct in6_addr));
/* TBD: populate __addr_len */
if (__addr_len)
{
*__addr_len = sizeof (struct sockaddr_in6);
}
break;
default:
return -EAFNOSUPPORT;
}
}
}
}
return rv;
}
int
vcom_socket_accept (int __fd, __SOCKADDR_ARG __addr,
socklen_t * __restrict __addr_len)
{
/* set flags to 0 for accept() */
return vcom_socket_accept_flags (__fd, __addr, __addr_len, 0);
}
int
vcom_socket_accept4 (int __fd, __SOCKADDR_ARG __addr,
socklen_t * __restrict __addr_len, int __flags)
{
/* SOCK_NONBLOCK and SOCK_CLOEXEC can be bitwise ORed in flags */
return vcom_socket_accept_flags (__fd, __addr, __addr_len, __flags);
}
/* TBD: move it to vppcom */
static inline int
vcom_session_shutdown (int __fd, int __how)
{
return 0;
}
int
vcom_socket_shutdown (int __fd, int __how)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
uword *p;
vcom_socket_t *vsock;
p = hash_get (vsm->sockidx_by_fd, __fd);
if (p)
{
vsock = pool_elt_at_index (vsm->vsockets, p[0]);
switch (__how)
{
case SHUT_RD:
case SHUT_WR:
case SHUT_RDWR:
rv = vcom_session_shutdown (vsock->sid, __how);
return rv;
break;
default:
return -EINVAL;
break;
}
}
return rv;
}
int
vcom_socket_epoll_create1 (int __flags)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
vcom_epoll_t *vepoll;
i32 epfd;
i32 vep_idx;
i32 epollidx;
epfd = vcom_socket_open_epoll (__flags);
if (epfd < 0)
{
rv = epfd;
goto out;
}
vep_idx = vppcom_epoll_create ();
if (vep_idx < 0)
{
rv = vep_idx;
goto out_close_epoll;
}
pool_get (vsm->vepolls, vepoll);
vepoll_init (vepoll);
epollidx = vepoll - vsm->vepolls;
hash_set (vsm->epollidx_by_epfd, epfd, epollidx);
vepoll_set (vepoll, epfd, vep_idx, EPOLL_TYPE_VPPCOM_BOUND, __flags, 0, 0);
return epfd;
out_close_epoll:
vcom_socket_close_epoll (epfd);
out:
return rv;
}
/*
* PRE: vppcom_epoll_ctl() is successful
* free_vepitem_on_del : 0 - no_pool_put, 1 - pool_put
*/
int
vcom_socket_ctl_vepitem (int __epfd, int __op, int __fd,
struct epoll_event *__event,
i32 vep_idx, vcom_epoll_t * vepoll,
i32 vfd_id, void *vfd, vcom_fd_type_t type,
int free_vepitem_on_del)
{
int rv = -1;
vcom_socket_main_t *vsm = &vcom_socket_main;
vcom_epitem_t *vepitem;
vcom_epitem_key_t epfdfd = {.epfd = __epfd,.fd = __fd };
uword *p;
i32 vepitemidx;
i32 *vepitemidxs = 0;
struct epoll_event revent = {.events = 0,.data.fd = INVALID_FD };
i32 vec_idx;
/* perform control operations on the epoll instance */
switch (__op)
{
case EPOLL_CTL_ADD:
/*
* supplied file descriptor is already
* registered with this epoll instance
* */
/* vepitem exists */
p = hash_get (vsm->epitemidx_by_epfdfd, epfdfd.key);
if (p)
{
rv = -EEXIST;
goto out;
}
/* add a new vepitem */
pool_get (vsm->vepitems, vepitem);
vepitem_init (vepitem);
vepitemidx = vepitem - vsm->vepitems;
hash_set (vsm->epitemidx_by_epfdfd, epfdfd.key, vepitemidx);
vepitem_set (vepitem, __epfd, __fd, __fd, __fd, type, *__event, revent);
/* update epitemidxs */
/* by_epfd */
p = hash_get (vsm->epitemidxs_by_epfd, __epfd);
if (!p) /* not exist */
{
vepitemidxs = 0;
vec_add1 (vepitemidxs, vepitemidx);
hash_set (vsm->epitemidxs_by_epfd, __epfd, vepitemidxs);
}
else /* exists */
{
vepitemidxs = *(i32 **) p;
vec_add1 (vepitemidxs, vepitemidx);
hash_set3 (vsm->epitemidxs_by_epfd, __epfd, vepitemidxs, 0);
}
/* update epitemidxs */
/* by_fd */
p = hash_get (vsm->epitemidxs_by_fd, __fd);
if (!p) /* not exist */
{
vepitemidxs = 0;
vec_add1 (vepitemidxs, vepitemidx);
hash_set (vsm->epitemidxs_by_fd, __fd, vepitemidxs);
}
else /* exists */
{
vepitemidxs = *(i32 **) p;
vec_add1 (vepitemidxs, vepitemidx);
hash_set3 (vsm->epitemidxs_by_fd, __fd, vepitemidxs, 0);
}
/* increment vepoll fd count by 1 */
vepoll->count += 1;
rv = 0;
goto out;
break;
case EPOLL_CTL_MOD:
/*
* supplied file descriptor is not
* registered with this epoll instance
* */
/* vepitem not exist */
p = hash_get (vsm->epitemidx_by_epfdfd, epfdfd.key);
if (!p)
{
rv = -ENOENT;
goto out;
}
vepitem = pool_elt_at_index (vsm->vepitems, p[0]);
if (vepitem)
{
vepitem->event = *__event;
vepitem->revent = revent;
}
rv = 0;
goto out;
break;
case EPOLL_CTL_DEL:
/*
* supplied file descriptor is not
* registered with this epoll instance
* */
/* vepitem not exist */
p = hash_get (vsm->epitemidx_by_epfdfd, epfdfd.key);
if (!p)
{
rv = -ENOENT;
goto out;
}
vepitemidx = *(i32 *) p;
hash_unset (vsm->epitemidx_by_epfdfd, epfdfd.key);
/* update epitemidxs */
/* by_epfd */
p = hash_get (vsm->epitemidxs_by_epfd, __epfd);
if (!p) /* not exist */
{
rv = -ENOENT;
goto out;
}
else /* exists */
{
vepitemidxs = *(i32 **) p;
vec_idx = vec_search (vepitemidxs, vepitemidx);
if (vec_idx != ~0)
{
vec_del1 (vepitemidxs, vec_idx);
if (!vec_len (vepitemidxs))
{
vec_free (vepitemidxs);
hash_unset (vsm->epitemidxs_by_epfd, __epfd);
}
}
}
/* update epitemidxs */
/* by_fd */
p = hash_get (vsm->epitemidxs_by_fd, __fd);
if (!p) /* not exist */
{
rv = -ENOENT;
goto out;
}
else /* exists */
{
vepitemidxs = *(i32 **) p;
vec_idx = vec_search (vepitemidxs, vepitemidx);
if (vec_idx != ~0)
{
vec_del1 (vepitemidxs, vec_idx);
if (!vec_len (vepitemidxs))
{
vec_free (vepitemidxs);
hash_unset (vsm->epitemidxs_by_fd, __fd);
}
}
}
/* pool put vepitem */
vepitem = pool_elt_at_index (vsm->vepitems, vepitemidx);
if (free_vepitem_on_del)
{
if (!vepitem)
{
rv = -ENOENT;
goto out;
}
vepitem_init (vepitem);
pool_put (vsm->vepitems, vepitem);
}
else
{
if (!vepitem)
{
vepitem_init (vepitem);
}
}
/* decrement vepoll fd count by 1 */
vepoll->count -= 1;
rv = 0;
goto out;
break;
default:
rv = -EINVAL;
goto out;
break;
}
out:
return rv;
}
/*
* PRE: 00. null pointer check on __event
* 01. all other parameters are validated
*/
static int
vcom_socket_epoll_ctl_internal (int __epfd, int __op, int __fd,
struct epoll_event *__event,
int free_vepitem_on_del)
{
int rv = -1;
i32 cnt;
vcom_epoll_t *vepoll;
vcom_socket_t *vfd_vsock;
i32 vep_idx;
i32 sid;
/* get vep_idx and vepoll */
vep_idx = vcom_socket_get_vep_idx_and_vepoll (__epfd, &vepoll);
if (vep_idx == INVALID_VEP_IDX)
{
return -EBADF;
}
/* get vcom fd type, vfd_id and vfd */
sid = vcom_socket_get_sid_and_vsock (__fd, &vfd_vsock);
if ((sid != INVALID_SESSION_ID) &&
vcom_socket_type_is_vppcom_bound (vfd_vsock->type))
{
rv = vppcom_epoll_ctl (vep_idx, __op, sid, __event);
if (rv == VPPCOM_OK)
{
cnt = ((__op == EPOLL_CTL_ADD) ? 1 :
(__op == EPOLL_CTL_DEL) ? -1 : 0);
vepoll->count += cnt;
vepoll->vcl_cnt += cnt;
}
if (VCOM_DEBUG > 0)
fprintf (stderr,
"[%d] vcom_socket_epoll_ctl_i: vppcom_epoll_ctl() "
"returned %d\n\tepfd %d, vep_idx %d, fd %d sid %d op %d"
"\n\tcount %d, vcl_cnt %d, libc_cnt %d\n",
getpid (), rv, __epfd, vep_idx, __fd, sid, __op,
vepoll->count, vepoll->vcl_cnt, vepoll->libc_cnt);
}
else
{
rv = libc_epoll_ctl (__epfd, __op, __fd, __event);
if (rv == 0)
{
cnt = ((__op == EPOLL_CTL_ADD) ? 1 :
(__op == EPOLL_CTL_DEL) ? -1 : 0);
vepoll->count += cnt;
vepoll->libc_cnt += cnt;
}
if (VCOM_DEBUG > 0)
fprintf (stderr,
"[%d] vcom_socket_epoll_ctl_i: libc_epoll_ctl() "
"returned %d\n\tepfd %d, vep_idx %d, fd %d sid %d op %d"
"\n\tcount %d, vcl_cnt %d, libc_cnt %d\n",
getpid (), rv, __epfd, vep_idx, __fd, sid, __op,
vepoll->count, vepoll->vcl_cnt, vepoll->libc_cnt);
}
return rv;
}
int
vcom_socket_epoll_ctl (int __epfd, int __op, int __fd,
struct epoll_event *__event)
{
int rv = -1;
rv = vcom_socket_epoll_ctl_internal (__epfd, __op, __fd, __event, 1);
return rv;
}
static int
vcom_socket_epoll_ctl1 (int __epfd, int __op, int __fd,
struct epoll_event *__event)
{
int rv = -1;
rv = vcom_socket_epoll_ctl_internal (__epfd, __op, __fd, __event, 0);
return rv;
}
int
vcom_socket_epoll_pwait (int __epfd, struct epoll_event *__events,
int __maxevents, int __timeout,
const __sigset_t * __ss)
{
vcom_socket_main_t *vsm = &vcom_socket_main;
int rv = -EBADF;
int rv2;
double time_to_wait = (double) 0;
double timeout, now = 0;
vcom_epoll_t *vepoll;
i32 vep_idx;
static struct epoll_event *libc_ev = 0;
/* validate __event */
if (!__events || (__timeout < -1))
{
fprintf (stderr, "[%d] ERROR: vcom_socket_epoll_pwait: "
"Bad args __events %p, __timeout %d\n", getpid (),
__events, __timeout);
rv = -EFAULT;
goto out;
}
time_to_wait = ((__timeout >= 0) ? (double) __timeout / (double) 1000 : 0);
vep_idx = vcom_socket_get_vep_idx_and_vepoll (__epfd, &vepoll);
if (vep_idx == INVALID_VEP_IDX)
{
fprintf (stderr, "[%d] ERROR: vcom_socket_epoll_pwait: "
"Bad epoll fd %d\n", getpid (), __epfd);
return -EBADF;
}
if (vepoll->count <= 0)
{
fprintf (stderr, "[%d] ERROR: vcom_socket_epoll_pwait: No events"
" in epfd!\n\tcount %d, vcl_cnt %d, libc_cnt %d\n",
getpid (), vepoll->count, vepoll->vcl_cnt, vepoll->libc_cnt);
rv = -EINVAL;
goto out;
}
if (vepoll->libc_cnt == 0)
{
if (VCOM_DEBUG > 2)
fprintf (stderr, "[%d] vcom_socket_epoll_pwait: libc_cnt = 0, "
"calling vppcom_epoll_wait() time_to_wait = %f\n",
getpid (), time_to_wait);
rv = vppcom_epoll_wait (vep_idx, __events, __maxevents, time_to_wait);
}
else if (vepoll->vcl_cnt == 0)
{
if (VCOM_DEBUG > 2)
fprintf (stderr, "[%d] vcom_socket_epoll_pwait: vcl_cnt = 0, "
"calling libc_epoll_pwait()\n", getpid ());
rv = libc_epoll_pwait (__epfd, __events, __maxevents, __timeout, __ss);
}
else
{
if (VCOM_DEBUG > 2)
fprintf (stderr, "[%d] vcom_socket_epoll_pwait: vcl_cnt = %d, "
"libc_cnt = %d -> mixed polling (time_to_wait = %f, "
"__timeout = %d)\n",
getpid (), vepoll->vcl_cnt, vepoll->libc_cnt,
time_to_wait, __timeout);
vec_validate (libc_ev, __maxevents);
timeout = clib_time_now (&vsm->clib_time) + time_to_wait;
do
{
rv = vppcom_epoll_wait (vep_idx, __events, __maxevents, 0);
rv2 = libc_epoll_pwait (__epfd, libc_ev, __maxevents, 1, __ss);
if (VCOM_DEBUG == 666)
fprintf (stderr, "[%d] vcom_socket_epoll_pwait: "
"rv = %d, rv2 = %d, timeout = %f, now = %f\n",
getpid (), rv, rv2, timeout, now);
if ((rv > 0) || (rv2 > 0))
{
if (VCOM_DEBUG > 2)
fprintf (stderr, "[%d] vcom_socket_epoll_pwait: "
"rv = %d, rv2 = %d\n", getpid (), rv, rv2);
int n = __maxevents - rv;
n = rv2 <= n ? rv2 : n;
rv = (rv > 0) ? rv : 0;
clib_memcpy (&__events[rv], libc_ev, n * sizeof (*libc_ev));
rv += rv2;
goto out;
}
else if ((rv < 0) || (rv2 < 0))
{
if (rv < 0)
fprintf (stderr,
"[%d] ERROR: vppcom_epoll_wait() returned %d\n",
getpid (), rv);
if (rv2 < 0)
{
fprintf (stderr,
"[%d] ERROR: libc_epoll_wait() failed, errno %d\n",
getpid (), errno);
rv = (rv < 0) ? rv : -errno;
}
goto out;
}
if (__timeout != -1)
now = clib_time_now (&vsm->clib_time);
}
while (now < timeout);
}
out:
vec_reset_length (libc_ev);
return rv;
}
static inline void
vcom_pollfds_2_selectfds (
/* src */
struct pollfd *__fds, nfds_t __nfds,
/* dest */
int vcom_nfds,
fd_set * __restrict vcom_readfds,
fd_set * __restrict vcom_writefds,
fd_set * __restrict vcom_exceptfds)
{
nfds_t fds_idx = 0;
for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
{
/* ignore negative fds */
if (__fds[fds_idx].fd < 0)
{
continue;
}
/* for POLLRDHUP, POLLERR, POLLHUP and POLLNVAL */
FD_SET (__fds[fds_idx].fd, vcom_exceptfds);
/* requested events */
if (__fds[fds_idx].events)
{
if (__fds[fds_idx].events & POLLIN)
{
FD_SET (__fds[fds_idx].fd, vcom_readfds);
}
if (__fds[fds_idx].events & POLLPRI)
{
FD_SET (__fds[fds_idx].fd, vcom_readfds);
}
if (__fds[fds_idx].events & POLLOUT)
{
FD_SET (__fds[fds_idx].fd, vcom_writefds);
}
#if defined __USE_XOPEN || defined __USE_XOPEN2K8
if (__fds[fds_idx].events & POLLRDNORM)
{
FD_SET (__fds[fds_idx].fd, vcom_readfds);
}
if (__fds[fds_idx].events & POLLRDBAND)
{
FD_SET (__fds[fds_idx].fd, vcom_readfds);
}
if (__fds[fds_idx].events & POLLWRNORM)
{
FD_SET (__fds[fds_idx].fd, vcom_writefds);
}
if (__fds[fds_idx].events & POLLWRBAND)
{
FD_SET (__fds[fds_idx].fd, vcom_writefds);
}
#endif
}
} /* for (fds_idx = 0; fds_idx < __nfds; fds_idx++) */
}
static inline void
vcom_selectfds_2_pollfds (
/* dest */
struct pollfd *__fds, nfds_t __nfds, int *nfd,
/* src */
int vcom_nfds,
fd_set * __restrict vcom_readfds,
fd_set * __restrict vcom_writefds,
fd_set * __restrict vcom_exceptfds)
{
nfds_t fds_idx = 0;
for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
{
/* ignore negative fds */
if (__fds[fds_idx].fd < 0)
{
__fds[fds_idx].revents = 0;
}
/* for POLLRDHUP, POLLERR, POLLHUP and POLLNVAL */
if (FD_ISSET (__fds[fds_idx].fd, vcom_exceptfds))
{
/*
* TBD: for now any select exception
* is flagged as POLLERR
* */
__fds[fds_idx].revents |= POLLERR;
}
/* requested events */
if (__fds[fds_idx].events & POLLIN)
{
if (FD_ISSET (__fds[fds_idx].fd, vcom_readfds))
{
__fds[fds_idx].revents |= POLLIN;
}
}
if (__fds[fds_idx].events & POLLPRI)
{
if (FD_ISSET (__fds[fds_idx].fd, vcom_readfds))
{
__fds[fds_idx].revents |= POLLIN;
}
}
if (__fds[fds_idx].events & POLLOUT)
{
if (FD_ISSET (__fds[fds_idx].fd, vcom_writefds))
{
__fds[fds_idx].revents |= POLLOUT;
}
}
#if defined __USE_XOPEN || defined __USE_XOPEN2K8
if (__fds[fds_idx].events & POLLRDNORM)
{
if (FD_ISSET (__fds[fds_idx].fd, vcom_readfds))
{
__fds[fds_idx].revents |= POLLRDNORM;
}
}
if (__fds[fds_idx].events & POLLRDBAND)
{
if (FD_ISSET (__fds[fds_idx].fd, vcom_readfds))
{
__fds[fds_idx].revents |= POLLRDBAND;
}
}
if (__fds[fds_idx].events & POLLWRNORM)
{
if (FD_ISSET (__fds[fds_idx].fd, vcom_writefds))
{
__fds[fds_idx].revents |= POLLWRNORM;
}
}
if (__fds[fds_idx].events & POLLWRBAND)
{
if (FD_ISSET (__fds[fds_idx].fd, vcom_writefds))
{
__fds[fds_idx].revents |= POLLWRBAND;
}
}
#endif
} /* for (fds_idx = 0; fds_idx < __nfds; fds_idx++) */
/*
* nfd:
* the number of structures which have nonzero revents fields
* (in other words, those descriptors with events or
* errors reported)
* */
*nfd = 0;
for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
{
/* ignore negative fds */
if (__fds[fds_idx].fd < 0)
{
continue;
}
if (__fds[fds_idx].revents)
{
(*nfd)++;
}
}
}
/*
* PRE: parameters are validated,
* vcom_socket_poll is always called with __timeout set to zero
* hence returns immediately
*
* ACTION: handle non negative validated vcom fds and ignore rest
*/
/*
* implements vcom_socket_poll () interface
*
* internally uses vcom_socket_select ()
* to realize the behavior
* */
int
vcom_socket_poll_select_impl (struct pollfd *__fds, nfds_t __nfds,
int __timeout)
{
int rv;
nfds_t fds_idx = 0;
int nfd = 0;
/* vcom */
int vcom_nfds = 0;
fd_set vcom_readfds;
fd_set vcom_writefds;
fd_set vcom_exceptfds;
int vcom_nfd = -1;
/* invalid max_vcom_fd is -1 */
int max_vcom_fd = -1;
/* __timeout is zero to get ready events and return immediately */
struct timeval tv = {.tv_sec = 0,.tv_usec = 0 };
/* validate __nfds from select perspective */
if (__nfds > FD_SETSIZE)
{
rv = -EINVAL;
goto poll_done;
}
/* zero vcom fd sets */
/*
* V vcom fd set
*/
#define _(V) \
FD_ZERO ((V))
_(&vcom_readfds);
_(&vcom_writefds);
_(&vcom_exceptfds);
#undef _
vcom_nfds = 0;
vcom_nfd = -1;
for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
{
/* ignore negative fds */
if (__fds[fds_idx].fd < 0)
{
continue;
}
/* non negative validated vcom fds */
if (__fds[fds_idx].fd > FD_SETSIZE)
{
rv = -EINVAL;
goto poll_done;
}
/* max_vcom_fd and vcom_nfd */
if (__fds[fds_idx].fd > max_vcom_fd)
{
/* requested events */
if (__fds[fds_idx].events)
{
max_vcom_fd = __fds[fds_idx].fd;
}
}
++vcom_nfd;
}
vcom_nfds = max_vcom_fd != -1 ? max_vcom_fd + 1 : 0;
if (!vcom_nfds)
{
rv = vcom_nfds;
goto poll_done;
}
vcom_pollfds_2_selectfds (
/* src */
__fds, __nfds,
/* dest */
vcom_nfds,
&vcom_readfds, &vcom_writefds, &vcom_exceptfds);
/* select on vcom fds */
vcom_nfd = vcom_socket_select (vcom_nfds,
&vcom_readfds,
&vcom_writefds, &vcom_exceptfds, &tv);
if (VCOM_DEBUG > 2)
fprintf (stderr,
"[%d] vcom_socket_select: "
"'%04d'='%04d'\n", getpid (), vcom_nfd, vcom_nfds);
if (vcom_nfd < 0)
{
rv = vcom_nfd;
goto poll_done;
}
vcom_selectfds_2_pollfds (
/* dest */
__fds, __nfds, &nfd,
/* src */
vcom_nfds,
&vcom_readfds, &vcom_writefds, &vcom_exceptfds);
rv = nfd;
poll_done:
return rv;
}
/*
* TBD: remove this static function once vppcom
* has an implementation in place
*
* ACTION:
*/
static int
vppcom_poll (struct pollfd *__fds, nfds_t __nfds, double time_to_wait)
{
return -EOPNOTSUPP;
}
int
vcom_socket_poll_vppcom_impl (struct pollfd *__fds, nfds_t __nfds,
int __timeout)
{
nfds_t fds_idx = 0;
/* in seconds eg. 3.123456789 seconds */
double time_to_wait = (double) 0;
i32 sid;
i32 vep_idx;
/* replace vcom fd with session idx */
for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
{
/* ignore negative fds */
if (__fds[fds_idx].fd < 0)
{
continue;
}
/* non negative validated vcom fds */
sid = vcom_socket_get_sid (__fds[fds_idx].fd);
if (sid != INVALID_SESSION_ID)
{
__fds[fds_idx].fd = sid;
}
else
{
/* get vep_idx */
vep_idx = vcom_socket_get_vep_idx (__fds[fds_idx].fd);
if (vep_idx != INVALID_VEP_IDX)
{
__fds[fds_idx].fd = vep_idx;
}
else
{
return -EBADF;
}
}
}
/* validate __timeout */
if (__timeout > 0)
{
time_to_wait = (double) __timeout / (double) 1000;
}
else if (__timeout == 0)
{
time_to_wait = (double) 0;
}
else
{
time_to_wait = ~0;
}
return vppcom_poll (__fds, __nfds, time_to_wait);
}
int
vcom_socket_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout)
{
/* select an implementation */
/* return vcom_socket_poll_vppcom_impl (__fds, __nfds, __timeout); */
return vcom_socket_poll_select_impl (__fds, __nfds, __timeout);
}
#ifdef __USE_GNU
int
vcom_socket_ppoll (struct pollfd *__fds, nfds_t __nfds,
const struct timespec *__timeout, const __sigset_t * __ss)
{
return -EOPNOTSUPP;
}
#endif
int
vcom_socket_main_init (void)
{
vcom_socket_main_t *vsm = &vcom_socket_main;
if (VCOM_DEBUG > 0)
printf ("vcom_socket_main_init\n");
if (!vsm->init)
{
/* TBD: define FD_MAXSIZE and use it here */
pool_alloc (vsm->vsockets, FD_SETSIZE);
vsm->sockidx_by_fd = hash_create (0, sizeof (i32));
pool_alloc (vsm->vepolls, FD_SETSIZE);
vsm->epollidx_by_epfd = hash_create (0, sizeof (i32));
pool_alloc (vsm->vepitems, FD_SETSIZE);
vsm->epitemidx_by_epfdfd = hash_create (0, sizeof (i32));
vsm->epitemidxs_by_epfd = hash_create (0, sizeof (i32 *));
vsm->epitemidxs_by_fd = hash_create (0, sizeof (i32 *));
clib_time_init (&vsm->clib_time);
vsm->init = 1;
}
return 0;
}
void
vcom_socket_main_show (void)
{
vcom_socket_main_t *vsm = &vcom_socket_main;
vcom_socket_t *vsock;
vcom_epoll_t *vepoll;
vcom_epitem_t *vepitem;
i32 epfd;
i32 fd;
i32 *vepitemidxs, *vepitemidxs_var;
if (vsm->init)
{
/* from active list of vsockets show vsock */
/* *INDENT-OFF* */
pool_foreach (vsock, vsm->vsockets,
({
printf(
"fd='%04d', sid='%08x',type='%-30s'\n",
vsock->fd, vsock->sid,
vcom_socket_type_str (vsock->type));
}));
/* *INDENT-ON* */
/* from active list of vepolls, show vepoll */
/* *INDENT-OFF* */
pool_foreach (vepoll, vsm->vepolls,
({
printf(
"epfd='%04d', vep_idx='%08x', "
"type='%-30s', "
"flags='%d', count='%d', close='%d'\n",
vepoll->epfd, vepoll->vep_idx,
vcom_socket_epoll_type_str (vepoll->type),
vepoll->flags, vepoll->count, vepoll->close);
}));
/* *INDENT-ON* */
/* from active list of vepitems, show vepitem */
/* *INDENT-OFF* */
pool_foreach (vepitem, vsm->vepitems,
({
printf(
"epfd='%04d', fd='%04d', "
"next_fd='%04d', prev_fd='%04d', "
"type='%-30s', "
"events='%04x', revents='%04x'\n",
vepitem->epfd, vepitem->fd,
vepitem->next_fd, vepitem->prev_fd,
vcom_socket_vcom_fd_type_str (vepitem->type),
vepitem->event.events, vepitem->revent.events);
}));
/* *INDENT-ON* */
/* show epitemidxs for epfd */
/* *INDENT-OFF* */
hash_foreach (epfd, vepitemidxs,
vsm->epitemidxs_by_epfd,
({
printf("\n[ '%04d': ", epfd);
vec_foreach (vepitemidxs_var,vepitemidxs)
{
printf("'%04d' ", (int)vepitemidxs_var[0]);
}
printf("]\n");
}));
/* *INDENT-ON* */
/* show epitemidxs for fd */
/* *INDENT-OFF* */
hash_foreach (fd, vepitemidxs,
vsm->epitemidxs_by_fd,
({
printf("\n{ '%04d': ", fd);
vec_foreach (vepitemidxs_var,vepitemidxs)
{
printf("'%04d' ", (int)vepitemidxs_var[0]);
}
printf("}\n");
}));
/* *INDENT-ON* */
}
}
void
vcom_socket_main_destroy (void)
{
vcom_socket_main_t *vsm = &vcom_socket_main;
vcom_socket_t *vsock;
vcom_epoll_t *vepoll;
vcom_epitem_t *vepitem;
i32 epfd;
i32 fd;
i32 *vepitemidxs;
if (VCOM_DEBUG > 0)
printf ("vcom_socket_main_destroy\n");
if (vsm->init)
{
/*
* from active list of vepitems,
* remove all "vepitem" elements from the pool in a safe way
* */
/* *INDENT-OFF* */
pool_flush (vepitem, vsm->vepitems,
({
if ((vepitem->type == FD_TYPE_EPOLL) ||
(vepitem->type == FD_TYPE_VCOM_SOCKET))
{
vcom_socket_epoll_ctl1 (vepitem->epfd, EPOLL_CTL_DEL,
vepitem->fd, NULL);
vepitem_init (vepitem);
}
}));
/* *INDENT-ON* */
pool_free (vsm->vepitems);
hash_free (vsm->epitemidx_by_epfdfd);
/* free vepitemidxs for each epfd */
/* *INDENT-OFF* */
hash_foreach (epfd, vepitemidxs,
vsm->epitemidxs_by_epfd,
({
vec_free (vepitemidxs);
}));
/* *INDENT-ON* */
hash_free (vsm->epitemidxs_by_epfd);
/* free vepitemidxs for each fd */
/* *INDENT-OFF* */
hash_foreach (fd, vepitemidxs,
vsm->epitemidxs_by_fd,
({
vec_free (vepitemidxs);
}));
/* *INDENT-ON* */
hash_free (vsm->epitemidxs_by_fd);
/*
* from active list of vsockets,
* close socket and vppcom session
* */
/* *INDENT-OFF* */
pool_foreach (vsock, vsm->vsockets,
({
if (vsock->type == SOCKET_TYPE_VPPCOM_BOUND)
{
vppcom_session_close (vsock->sid);
vcom_socket_close_socket (vsock->fd);
vsocket_init (vsock);
}
}));
/* *INDENT-ON* */
/*
* return vsocket element to the pool
* */
/* *INDENT-OFF* */
pool_flush (vsock, vsm->vsockets,
({
// vsocket_init(vsock);
;
}));
/* *INDENT-ON* */
pool_free (vsm->vsockets);
hash_free (vsm->sockidx_by_fd);
/*
* from active list of vepolls,
* close epoll and vppcom_epoll
* */
/* *INDENT-OFF* */
pool_foreach (vepoll, vsm->vepolls,
({
if (vepoll->type == EPOLL_TYPE_VPPCOM_BOUND)
{
vppcom_session_close (vepoll->vep_idx);
vcom_socket_close_epoll (vepoll->epfd); /* TBD: */
vepoll_init (vepoll);
}
}));
/* *INDENT-ON* */
/*
* return vepoll element to the pool
* */
/* *INDENT-OFF* */
pool_flush (vepoll, vsm->vepolls,
({
// vepoll_init(vepoll);
;
}));
/* *INDENT-ON* */
pool_free (vsm->vepolls);
hash_free (vsm->epollidx_by_epfd);
vsm->init = 0;
}
}
/*
* fd.io coding-style-patch-verification: ON
*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
*/