| /* |
| * Copyright (c) 2016-2019 Cisco and/or its affiliates. |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at: |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifdef HAVE_GNU_SOURCE |
| #define _GNU_SOURCE |
| #endif |
| |
| #include <unistd.h> |
| #include <stdio.h> |
| #include <signal.h> |
| #include <dlfcn.h> |
| #include <pthread.h> |
| #include <time.h> |
| #include <stdarg.h> |
| #include <sys/resource.h> |
| #include <netinet/tcp.h> |
| #include <netinet/udp.h> |
| |
| #include <vcl/ldp_socket_wrapper.h> |
| #include <vcl/ldp.h> |
| #include <sys/time.h> |
| |
| #include <vcl/vcl_locked.h> |
| #include <vppinfra/time.h> |
| #include <vppinfra/bitmap.h> |
| #include <vppinfra/lock.h> |
| #include <vppinfra/pool.h> |
| #include <vppinfra/hash.h> |
| |
| #define HAVE_CONSTRUCTOR_ATTRIBUTE |
| #ifdef HAVE_CONSTRUCTOR_ATTRIBUTE |
| #define CONSTRUCTOR_ATTRIBUTE \ |
| __attribute__ ((constructor)) |
| #else |
| #define CONSTRUCTOR_ATTRIBUTE |
| #endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */ |
| |
| #define HAVE_DESTRUCTOR_ATTRIBUTE |
| #ifdef HAVE_DESTRUCTOR_ATTRIBUTE |
| #define DESTRUCTOR_ATTRIBUTE \ |
| __attribute__ ((destructor)) |
| #else |
| #define DESTRUCTOR_ATTRIBUTE |
| #endif |
| |
| #define LDP_MAX_NWORKERS 32 |
| |
| #ifdef HAVE_GNU_SOURCE |
| #define SOCKADDR_GET_SA(__addr) __addr.__sockaddr__; |
| #else |
| #define SOCKADDR_GET_SA(__addr) _addr; |
| #endif |
| |
| #ifndef UDP_SEGMENT |
| #define UDP_SEGMENT 103 |
| #endif |
| |
| #ifndef SO_ORIGINAL_DST |
| /* from <linux/netfilter_ipv4.h> */ |
| #define SO_ORIGINAL_DST 80 |
| #endif |
| |
| typedef struct ldp_worker_ctx_ |
| { |
| u8 *io_buffer; |
| clib_time_t clib_time; |
| |
| /* |
| * Select state |
| */ |
| clib_bitmap_t *rd_bitmap; |
| clib_bitmap_t *wr_bitmap; |
| clib_bitmap_t *ex_bitmap; |
| clib_bitmap_t *si_rd_bitmap; |
| clib_bitmap_t *si_wr_bitmap; |
| clib_bitmap_t *si_ex_bitmap; |
| clib_bitmap_t *libc_rd_bitmap; |
| clib_bitmap_t *libc_wr_bitmap; |
| clib_bitmap_t *libc_ex_bitmap; |
| |
| /* |
| * Poll state |
| */ |
| vcl_poll_t *vcl_poll; |
| struct pollfd *libc_poll; |
| u16 *libc_poll_idxs; |
| |
| /* |
| * Epoll state |
| */ |
| u8 epoll_wait_vcl; |
| u8 mq_epfd_added; |
| int vcl_mq_epfd; |
| } ldp_worker_ctx_t; |
| |
| __thread ldp_worker_ctx_t _ldp_worker = {}; |
| |
| /* clib_bitmap_t, fd_mask and vcl_si_set are used interchangeably. Make sure |
| * they are the same size */ |
| STATIC_ASSERT (sizeof (clib_bitmap_t) == sizeof (fd_mask), |
| "ldp bitmap size mismatch"); |
| STATIC_ASSERT (sizeof (vcl_si_set) == sizeof (fd_mask), |
| "ldp bitmap size mismatch"); |
| |
| typedef struct |
| { |
| int init; |
| char app_name[LDP_APP_NAME_MAX]; |
| u32 vlsh_bit_val; |
| u32 vlsh_bit_mask; |
| u32 debug; |
| |
| /** vcl needs next epoll_create to go to libc_epoll */ |
| u8 vcl_needs_real_epoll; |
| |
| /** |
| * crypto state used only for testing |
| */ |
| u8 transparent_tls; |
| u32 ckpair_index; |
| } ldp_main_t; |
| |
| #define LDP_DEBUG ldp->debug |
| |
| #define LDBG(_lvl, _fmt, _args...) \ |
| if (ldp->debug > _lvl) \ |
| { \ |
| int errno_saved = errno; \ |
| fprintf (stderr, "ldp<%d>: " _fmt "\n", getpid(), ##_args); \ |
| errno = errno_saved; \ |
| } |
| |
| static ldp_main_t ldp_main = { |
| .vlsh_bit_val = (1 << LDP_SID_BIT_MIN), |
| .vlsh_bit_mask = (1 << LDP_SID_BIT_MIN) - 1, |
| .debug = LDP_DEBUG_INIT, |
| .transparent_tls = 0, |
| .ckpair_index = ~0, |
| }; |
| |
| static ldp_main_t *ldp = &ldp_main; |
| |
| static inline ldp_worker_ctx_t * |
| ldp_worker_get_current (void) |
| { |
| return &_ldp_worker; |
| } |
| |
| /* |
| * RETURN: 0 on success or -1 on error. |
| * */ |
| static inline void |
| ldp_set_app_name (char *app_name) |
| { |
| snprintf (ldp->app_name, LDP_APP_NAME_MAX, "%s-ldp-%d", app_name, getpid ()); |
| } |
| |
| static inline char * |
| ldp_get_app_name () |
| { |
| if (ldp->app_name[0] == '\0') |
| ldp_set_app_name (program_invocation_short_name); |
| |
| return ldp->app_name; |
| } |
| |
| static inline int |
| ldp_vlsh_to_fd (vls_handle_t vlsh) |
| { |
| return (vlsh + ldp->vlsh_bit_val); |
| } |
| |
| static inline vls_handle_t |
| ldp_fd_to_vlsh (int fd) |
| { |
| if (fd < ldp->vlsh_bit_val) |
| return VLS_INVALID_HANDLE; |
| |
| return (fd - ldp->vlsh_bit_val); |
| } |
| |
| static void |
| ldp_init_cfg (void) |
| { |
| char *env_var_str = getenv (LDP_ENV_DEBUG); |
| if (env_var_str) |
| { |
| u32 tmp; |
| if (sscanf (env_var_str, "%u", &tmp) != 1) |
| clib_warning ("LDP<%d>: WARNING: Invalid LDP debug level specified in" |
| " the env var " LDP_ENV_DEBUG " (%s)!", getpid (), |
| env_var_str); |
| else |
| { |
| ldp->debug = tmp; |
| LDBG (0, "configured LDP debug level (%u) from env var " |
| LDP_ENV_DEBUG "!", ldp->debug); |
| } |
| } |
| |
| env_var_str = getenv (LDP_ENV_APP_NAME); |
| if (env_var_str) |
| { |
| ldp_set_app_name (env_var_str); |
| LDBG (0, "configured LDP app name (%s) from the env var " |
| LDP_ENV_APP_NAME "!", ldp->app_name); |
| } |
| |
| env_var_str = getenv (LDP_ENV_SID_BIT); |
| if (env_var_str) |
| { |
| u32 sb; |
| if (sscanf (env_var_str, "%u", &sb) != 1) |
| { |
| LDBG (0, "WARNING: Invalid LDP sid bit specified in the env var " |
| LDP_ENV_SID_BIT " (%s)! sid bit value %d (0x%x)", env_var_str, |
| ldp->vlsh_bit_val, ldp->vlsh_bit_val); |
| } |
| else if (sb < LDP_SID_BIT_MIN) |
| { |
| ldp->vlsh_bit_val = (1 << LDP_SID_BIT_MIN); |
| ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1; |
| |
| LDBG (0, "WARNING: LDP sid bit (%u) specified in the env var " |
| LDP_ENV_SID_BIT " (%s) is too small. Using LDP_SID_BIT_MIN" |
| " (%d)! sid bit value %d (0x%x)", sb, env_var_str, |
| LDP_SID_BIT_MIN, ldp->vlsh_bit_val, ldp->vlsh_bit_val); |
| } |
| else if (sb > LDP_SID_BIT_MAX) |
| { |
| ldp->vlsh_bit_val = (1 << LDP_SID_BIT_MAX); |
| ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1; |
| |
| LDBG (0, "WARNING: LDP sid bit (%u) specified in the env var " |
| LDP_ENV_SID_BIT " (%s) is too big. Using LDP_SID_BIT_MAX" |
| " (%d)! sid bit value %d (0x%x)", sb, env_var_str, |
| LDP_SID_BIT_MAX, ldp->vlsh_bit_val, ldp->vlsh_bit_val); |
| } |
| else |
| { |
| ldp->vlsh_bit_val = (1 << sb); |
| ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1; |
| |
| LDBG (0, "configured LDP sid bit (%u) from " |
| LDP_ENV_SID_BIT "! sid bit value %d (0x%x)", sb, |
| ldp->vlsh_bit_val, ldp->vlsh_bit_val); |
| } |
| |
| /* Make sure there are enough bits in the fd set for vcl sessions */ |
| if (ldp->vlsh_bit_val > FD_SETSIZE / 2) |
| { |
| /* Only valid for select/pselect, so just WARNING and not exit */ |
| LDBG (0, |
| "WARNING: LDP vlsh bit value %d > FD_SETSIZE/2 %d, " |
| "select/pselect not supported now!", |
| ldp->vlsh_bit_val, FD_SETSIZE / 2); |
| } |
| } |
| env_var_str = getenv (LDP_ENV_TLS_TRANS); |
| if (env_var_str) |
| { |
| ldp->transparent_tls = 1; |
| } |
| } |
| |
| static int |
| ldp_init (void) |
| { |
| int rv; |
| |
| if (ldp->init) |
| { |
| LDBG (0, "LDP is initialized already"); |
| return 0; |
| } |
| |
| ldp_init_cfg (); |
| ldp->init = 1; |
| ldp->vcl_needs_real_epoll = 1; |
| rv = vls_app_create (ldp_get_app_name ()); |
| if (rv != VPPCOM_OK) |
| { |
| ldp->vcl_needs_real_epoll = 0; |
| if (rv == VPPCOM_EEXIST) |
| return 0; |
| LDBG (2, |
| "\nERROR: ldp_init: vppcom_app_create()" |
| " failed! rv = %d (%s)\n", |
| rv, vppcom_retval_str (rv)); |
| ldp->init = 0; |
| return rv; |
| } |
| ldp->vcl_needs_real_epoll = 0; |
| |
| LDBG (0, "LDP initialization: done!"); |
| |
| return 0; |
| } |
| |
| #define ldp_init_check() \ |
| if (PREDICT_FALSE (!ldp->init)) \ |
| { \ |
| if ((errno = -ldp_init ())) \ |
| return -1; \ |
| } |
| |
| int |
| close (int fd) |
| { |
| vls_handle_t vlsh; |
| int rv, epfd; |
| |
| ldp_init_check (); |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| epfd = vls_get_libc_epfd (vlsh); |
| if (epfd > 0) |
| { |
| ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); |
| |
| LDBG (0, "fd %d: calling libc_close: epfd %u", fd, epfd); |
| |
| libc_close (epfd); |
| ldpw->mq_epfd_added = 0; |
| |
| vls_set_libc_epfd (vlsh, 0); |
| } |
| else if (PREDICT_FALSE (epfd < 0)) |
| { |
| errno = -epfd; |
| rv = -1; |
| goto done; |
| } |
| |
| LDBG (0, "fd %d: calling vls_close: vlsh %u", fd, vlsh); |
| |
| rv = vls_close (vlsh); |
| if (rv != VPPCOM_OK) |
| { |
| errno = -rv; |
| rv = -1; |
| } |
| } |
| else |
| { |
| LDBG (0, "fd %d: calling libc_close", fd); |
| rv = libc_close (fd); |
| } |
| |
| done: |
| return rv; |
| } |
| |
| ssize_t |
| read (int fd, void *buf, size_t nbytes) |
| { |
| vls_handle_t vlsh; |
| ssize_t size; |
| |
| ldp_init_check (); |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| size = vls_read (vlsh, buf, nbytes); |
| if (size < 0) |
| { |
| errno = -size; |
| size = -1; |
| } |
| } |
| else |
| { |
| size = libc_read (fd, buf, nbytes); |
| } |
| |
| return size; |
| } |
| |
| ssize_t |
| readv (int fd, const struct iovec * iov, int iovcnt) |
| { |
| int rv = 0, i, total = 0; |
| vls_handle_t vlsh; |
| ssize_t size = 0; |
| |
| ldp_init_check (); |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| for (i = 0; i < iovcnt; ++i) |
| { |
| rv = vls_read (vlsh, iov[i].iov_base, iov[i].iov_len); |
| if (rv <= 0) |
| break; |
| else |
| { |
| total += rv; |
| if (rv < iov[i].iov_len) |
| break; |
| } |
| } |
| if (rv < 0 && total == 0) |
| { |
| errno = -rv; |
| size = -1; |
| } |
| else |
| size = total; |
| } |
| else |
| { |
| size = libc_readv (fd, iov, iovcnt); |
| } |
| |
| return size; |
| } |
| |
| ssize_t |
| write (int fd, const void *buf, size_t nbytes) |
| { |
| vls_handle_t vlsh; |
| ssize_t size = 0; |
| |
| ldp_init_check (); |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| size = vls_write_msg (vlsh, (void *) buf, nbytes); |
| if (size < 0) |
| { |
| errno = -size; |
| size = -1; |
| } |
| } |
| else |
| { |
| size = libc_write (fd, buf, nbytes); |
| } |
| |
| return size; |
| } |
| |
| ssize_t |
| writev (int fd, const struct iovec * iov, int iovcnt) |
| { |
| ssize_t size = 0, total = 0; |
| vls_handle_t vlsh; |
| int i, rv = 0; |
| |
| ldp_init_check (); |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| for (i = 0; i < iovcnt; ++i) |
| { |
| rv = vls_write_msg (vlsh, iov[i].iov_base, iov[i].iov_len); |
| if (rv < 0) |
| break; |
| else |
| { |
| total += rv; |
| if (rv < iov[i].iov_len) |
| break; |
| } |
| } |
| |
| if (rv < 0 && total == 0) |
| { |
| errno = -rv; |
| size = -1; |
| } |
| else |
| size = total; |
| } |
| else |
| { |
| size = libc_writev (fd, iov, iovcnt); |
| } |
| |
| return size; |
| } |
| |
| static int |
| fcntl_internal (int fd, int cmd, va_list ap) |
| { |
| vls_handle_t vlsh; |
| int rv = 0; |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| LDBG (0, "fd %u vlsh %d, cmd %u", fd, vlsh, cmd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| int flags = va_arg (ap, int); |
| u32 size; |
| |
| size = sizeof (flags); |
| rv = -EOPNOTSUPP; |
| switch (cmd) |
| { |
| case F_SETFL: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_SET_FLAGS, &flags, &size); |
| break; |
| |
| case F_GETFL: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_FLAGS, &flags, &size); |
| if (rv == VPPCOM_OK) |
| rv = flags; |
| break; |
| case F_SETFD: |
| /* TODO handle this */ |
| LDBG (0, "F_SETFD ignored flags %u", flags); |
| rv = 0; |
| break; |
| default: |
| rv = -EOPNOTSUPP; |
| break; |
| } |
| if (rv < 0) |
| { |
| errno = -rv; |
| rv = -1; |
| } |
| } |
| else |
| { |
| #ifdef HAVE_FCNTL64 |
| rv = libc_vfcntl64 (fd, cmd, ap); |
| #else |
| rv = libc_vfcntl (fd, cmd, ap); |
| #endif |
| } |
| |
| return rv; |
| } |
| |
| int |
| fcntl (int fd, int cmd, ...) |
| { |
| va_list ap; |
| int rv; |
| |
| ldp_init_check (); |
| |
| va_start (ap, cmd); |
| rv = fcntl_internal (fd, cmd, ap); |
| va_end (ap); |
| |
| return rv; |
| } |
| |
| int |
| fcntl64 (int fd, int cmd, ...) |
| { |
| va_list ap; |
| int rv; |
| |
| ldp_init_check (); |
| |
| va_start (ap, cmd); |
| rv = fcntl_internal (fd, cmd, ap); |
| va_end (ap); |
| return rv; |
| } |
| |
| int |
| ioctl (int fd, unsigned long int cmd, ...) |
| { |
| vls_handle_t vlsh; |
| va_list ap; |
| int rv; |
| |
| ldp_init_check (); |
| |
| va_start (ap, cmd); |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| switch (cmd) |
| { |
| case FIONREAD: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0); |
| break; |
| case TIOCOUTQ: |
| { |
| u32 *buf = va_arg (ap, void *); |
| u32 *buflen = va_arg (ap, u32 *); |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_NWRITEQ, buf, buflen); |
| } |
| break; |
| case FIONBIO: |
| { |
| u32 flags = *(va_arg (ap, int *)) ? O_NONBLOCK : 0; |
| u32 size = sizeof (flags); |
| |
| /* TBD: When VPPCOM_ATTR_[GS]ET_FLAGS supports flags other than |
| * non-blocking, the flags should be read here and merged |
| * with O_NONBLOCK. |
| */ |
| rv = vls_attr (vlsh, VPPCOM_ATTR_SET_FLAGS, &flags, &size); |
| } |
| break; |
| |
| default: |
| rv = -EOPNOTSUPP; |
| break; |
| } |
| if (rv < 0) |
| { |
| errno = -rv; |
| rv = -1; |
| } |
| } |
| else |
| { |
| rv = libc_vioctl (fd, cmd, ap); |
| } |
| |
| va_end (ap); |
| return rv; |
| } |
| |
| always_inline void |
| ldp_select_init_maps (fd_set * __restrict original, |
| clib_bitmap_t ** resultb, clib_bitmap_t ** libcb, |
| clib_bitmap_t ** vclb, int nfds, u32 minbits, |
| u32 n_bytes, uword * si_bits, uword * libc_bits) |
| { |
| uword si_bits_set, libc_bits_set; |
| vls_handle_t vlsh; |
| int fd; |
| |
| clib_bitmap_validate (*vclb, minbits); |
| clib_bitmap_validate (*libcb, minbits); |
| clib_bitmap_validate (*resultb, minbits); |
| clib_memcpy_fast (*resultb, original, n_bytes); |
| memset (original, 0, n_bytes); |
| |
| clib_bitmap_foreach (fd, *resultb) { |
| if (fd > nfds) |
| break; |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh == VLS_INVALID_HANDLE) |
| clib_bitmap_set_no_check (*libcb, fd, 1); |
| else if (vlsh_to_worker_index (vlsh) != vppcom_worker_index ()) |
| clib_warning ("migration currently not supported"); |
| else |
| *vclb = clib_bitmap_set (*vclb, vlsh_to_session_index (vlsh), 1); |
| } |
| |
| si_bits_set = clib_bitmap_last_set (*vclb) + 1; |
| *si_bits = (si_bits_set > *si_bits) ? si_bits_set : *si_bits; |
| clib_bitmap_validate (*resultb, *si_bits); |
| |
| libc_bits_set = clib_bitmap_last_set (*libcb) + 1; |
| *libc_bits = (libc_bits_set > *libc_bits) ? libc_bits_set : *libc_bits; |
| } |
| |
| always_inline int |
| ldp_select_vcl_map_to_libc (clib_bitmap_t * vclb, fd_set * __restrict libcb) |
| { |
| vls_handle_t vlsh; |
| uword si; |
| int fd; |
| |
| if (!libcb) |
| return 0; |
| |
| clib_bitmap_foreach (si, vclb) { |
| vlsh = vls_session_index_to_vlsh (si); |
| ASSERT (vlsh != VLS_INVALID_HANDLE); |
| fd = ldp_vlsh_to_fd (vlsh); |
| if (PREDICT_FALSE (fd < 0)) |
| { |
| errno = EBADFD; |
| return -1; |
| } |
| FD_SET (fd, libcb); |
| } |
| |
| return 0; |
| } |
| |
| always_inline void |
| ldp_select_libc_map_merge (clib_bitmap_t * result, fd_set * __restrict libcb) |
| { |
| uword fd; |
| |
| if (!libcb) |
| return; |
| |
| clib_bitmap_foreach (fd, result) |
| FD_SET ((int)fd, libcb); |
| } |
| |
| int |
| ldp_pselect (int nfds, fd_set * __restrict readfds, |
| fd_set * __restrict writefds, |
| fd_set * __restrict exceptfds, |
| const struct timespec *__restrict timeout, |
| const __sigset_t * __restrict sigmask) |
| { |
| u32 minbits = clib_max (nfds, BITS (uword)), n_bytes; |
| struct timespec libc_tspec = { 0 }; |
| f64 time_out, vcl_timeout = 0; |
| uword si_bits, libc_bits; |
| ldp_worker_ctx_t *ldpw; |
| int rv, bits_set = 0; |
| |
| if (nfds < 0) |
| { |
| errno = EINVAL; |
| return -1; |
| } |
| |
| if (PREDICT_FALSE (vppcom_worker_index () == ~0)) |
| vls_register_vcl_worker (); |
| |
| ldpw = ldp_worker_get_current (); |
| |
| if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0)) |
| clib_time_init (&ldpw->clib_time); |
| |
| if (timeout) |
| { |
| time_out = (timeout->tv_sec == 0 && timeout->tv_nsec == 0) ? |
| (f64) 0 : (f64) timeout->tv_sec + (f64) timeout->tv_nsec / (f64) 1e9; |
| |
| time_out += clib_time_now (&ldpw->clib_time); |
| |
| /* select as fine grained sleep */ |
| if (!nfds) |
| { |
| while (clib_time_now (&ldpw->clib_time) < time_out) |
| ; |
| return 0; |
| } |
| } |
| else if (!nfds) |
| { |
| errno = EINVAL; |
| return -1; |
| } |
| else |
| time_out = -1; |
| |
| if (nfds <= ldp->vlsh_bit_val) |
| { |
| rv = libc_pselect (nfds, readfds, writefds, exceptfds, |
| timeout, sigmask); |
| goto done; |
| } |
| |
| si_bits = libc_bits = 0; |
| n_bytes = nfds / 8 + ((nfds % 8) ? 1 : 0); |
| |
| if (readfds) |
| ldp_select_init_maps (readfds, &ldpw->rd_bitmap, &ldpw->libc_rd_bitmap, |
| &ldpw->si_rd_bitmap, nfds, minbits, n_bytes, |
| &si_bits, &libc_bits); |
| if (writefds) |
| ldp_select_init_maps (writefds, &ldpw->wr_bitmap, |
| &ldpw->libc_wr_bitmap, &ldpw->si_wr_bitmap, nfds, |
| minbits, n_bytes, &si_bits, &libc_bits); |
| if (exceptfds) |
| ldp_select_init_maps (exceptfds, &ldpw->ex_bitmap, |
| &ldpw->libc_ex_bitmap, &ldpw->si_ex_bitmap, nfds, |
| minbits, n_bytes, &si_bits, &libc_bits); |
| |
| if (PREDICT_FALSE (!si_bits && !libc_bits)) |
| { |
| errno = EINVAL; |
| rv = -1; |
| goto done; |
| } |
| |
| if (!si_bits) |
| libc_tspec = timeout ? *timeout : libc_tspec; |
| |
| do |
| { |
| if (si_bits) |
| { |
| if (readfds) |
| clib_memcpy_fast (ldpw->rd_bitmap, ldpw->si_rd_bitmap, |
| vec_len (ldpw->si_rd_bitmap) * |
| sizeof (clib_bitmap_t)); |
| if (writefds) |
| clib_memcpy_fast (ldpw->wr_bitmap, ldpw->si_wr_bitmap, |
| vec_len (ldpw->si_wr_bitmap) * |
| sizeof (clib_bitmap_t)); |
| if (exceptfds) |
| clib_memcpy_fast (ldpw->ex_bitmap, ldpw->si_ex_bitmap, |
| vec_len (ldpw->si_ex_bitmap) * |
| sizeof (clib_bitmap_t)); |
| |
| rv = vls_select (si_bits, readfds ? ldpw->rd_bitmap : NULL, |
| writefds ? ldpw->wr_bitmap : NULL, |
| exceptfds ? ldpw->ex_bitmap : NULL, vcl_timeout); |
| if (rv < 0) |
| { |
| errno = -rv; |
| rv = -1; |
| goto done; |
| } |
| else if (rv > 0) |
| { |
| if (ldp_select_vcl_map_to_libc (ldpw->rd_bitmap, readfds)) |
| { |
| rv = -1; |
| goto done; |
| } |
| |
| if (ldp_select_vcl_map_to_libc (ldpw->wr_bitmap, writefds)) |
| { |
| rv = -1; |
| goto done; |
| } |
| |
| if (ldp_select_vcl_map_to_libc (ldpw->ex_bitmap, exceptfds)) |
| { |
| rv = -1; |
| goto done; |
| } |
| bits_set = rv; |
| } |
| } |
| if (libc_bits) |
| { |
| if (readfds) |
| clib_memcpy_fast (ldpw->rd_bitmap, ldpw->libc_rd_bitmap, |
| vec_len (ldpw->libc_rd_bitmap) * |
| sizeof (clib_bitmap_t)); |
| if (writefds) |
| clib_memcpy_fast (ldpw->wr_bitmap, ldpw->libc_wr_bitmap, |
| vec_len (ldpw->libc_wr_bitmap) * |
| sizeof (clib_bitmap_t)); |
| if (exceptfds) |
| clib_memcpy_fast (ldpw->ex_bitmap, ldpw->libc_ex_bitmap, |
| vec_len (ldpw->libc_ex_bitmap) * |
| sizeof (clib_bitmap_t)); |
| |
| rv = libc_pselect (libc_bits, |
| readfds ? (fd_set *) ldpw->rd_bitmap : NULL, |
| writefds ? (fd_set *) ldpw->wr_bitmap : NULL, |
| exceptfds ? (fd_set *) ldpw->ex_bitmap : NULL, |
| &libc_tspec, sigmask); |
| if (rv > 0) |
| { |
| ldp_select_libc_map_merge (ldpw->rd_bitmap, readfds); |
| ldp_select_libc_map_merge (ldpw->wr_bitmap, writefds); |
| ldp_select_libc_map_merge (ldpw->ex_bitmap, exceptfds); |
| bits_set += rv; |
| } |
| } |
| |
| if (bits_set) |
| { |
| rv = bits_set; |
| goto done; |
| } |
| } |
| while ((time_out == -1) || (clib_time_now (&ldpw->clib_time) < time_out)); |
| rv = 0; |
| |
| done: |
| /* TBD: set timeout to amount of time left */ |
| clib_bitmap_zero (ldpw->rd_bitmap); |
| clib_bitmap_zero (ldpw->si_rd_bitmap); |
| clib_bitmap_zero (ldpw->libc_rd_bitmap); |
| clib_bitmap_zero (ldpw->wr_bitmap); |
| clib_bitmap_zero (ldpw->si_wr_bitmap); |
| clib_bitmap_zero (ldpw->libc_wr_bitmap); |
| clib_bitmap_zero (ldpw->ex_bitmap); |
| clib_bitmap_zero (ldpw->si_ex_bitmap); |
| clib_bitmap_zero (ldpw->libc_ex_bitmap); |
| |
| return rv; |
| } |
| |
| int |
| select (int nfds, fd_set * __restrict readfds, |
| fd_set * __restrict writefds, |
| fd_set * __restrict exceptfds, struct timeval *__restrict timeout) |
| { |
| struct timespec tspec; |
| |
| if (timeout) |
| { |
| tspec.tv_sec = timeout->tv_sec; |
| tspec.tv_nsec = timeout->tv_usec * 1000; |
| } |
| return ldp_pselect (nfds, readfds, writefds, exceptfds, |
| timeout ? &tspec : NULL, NULL); |
| } |
| |
| #ifdef __USE_XOPEN2K |
| int |
| pselect (int nfds, fd_set * __restrict readfds, |
| fd_set * __restrict writefds, |
| fd_set * __restrict exceptfds, |
| const struct timespec *__restrict timeout, |
| const __sigset_t * __restrict sigmask) |
| { |
| return ldp_pselect (nfds, readfds, writefds, exceptfds, timeout, 0); |
| } |
| #endif |
| |
| /* If transparent TLS mode is turned on, then ldp will load key and cert. |
| */ |
| static int |
| load_cert_key_pair (void) |
| { |
| char *cert_str = getenv (LDP_ENV_TLS_CERT); |
| char *key_str = getenv (LDP_ENV_TLS_KEY); |
| char cert_buf[4096], key_buf[4096]; |
| int cert_size, key_size; |
| vppcom_cert_key_pair_t crypto; |
| int ckp_index; |
| FILE *fp; |
| |
| if (!cert_str || !key_str) |
| { |
| LDBG (0, "ERROR: failed to read LDP environment %s\n", |
| LDP_ENV_TLS_CERT); |
| return -1; |
| } |
| |
| fp = fopen (cert_str, "r"); |
| if (fp == NULL) |
| { |
| LDBG (0, "ERROR: failed to open cert file %s \n", cert_str); |
| return -1; |
| } |
| cert_size = fread (cert_buf, sizeof (char), sizeof (cert_buf), fp); |
| fclose (fp); |
| |
| fp = fopen (key_str, "r"); |
| if (fp == NULL) |
| { |
| LDBG (0, "ERROR: failed to open key file %s \n", key_str); |
| return -1; |
| } |
| key_size = fread (key_buf, sizeof (char), sizeof (key_buf), fp); |
| fclose (fp); |
| |
| crypto.cert = cert_buf; |
| crypto.key = key_buf; |
| crypto.cert_len = cert_size; |
| crypto.key_len = key_size; |
| ckp_index = vppcom_add_cert_key_pair (&crypto); |
| if (ckp_index < 0) |
| { |
| LDBG (0, "ERROR: failed to add cert key pair\n"); |
| return -1; |
| } |
| |
| ldp->ckpair_index = ckp_index; |
| |
| return 0; |
| } |
| |
| static int |
| assign_cert_key_pair (vls_handle_t vlsh) |
| { |
| uint32_t ckp_len; |
| |
| if (ldp->ckpair_index == ~0 && load_cert_key_pair () < 0) |
| return -1; |
| |
| ckp_len = sizeof (ldp->ckpair_index); |
| return vls_attr (vlsh, VPPCOM_ATTR_SET_CKPAIR, &ldp->ckpair_index, &ckp_len); |
| } |
| |
| int |
| socket (int domain, int type, int protocol) |
| { |
| int rv, sock_type = type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK); |
| u8 is_nonblocking = type & SOCK_NONBLOCK ? 1 : 0; |
| vls_handle_t vlsh; |
| |
| ldp_init_check (); |
| |
| if (((domain == AF_INET) || (domain == AF_INET6)) && |
| ((sock_type == SOCK_STREAM) || (sock_type == SOCK_DGRAM))) |
| { |
| u8 proto; |
| if (ldp->transparent_tls) |
| { |
| proto = VPPCOM_PROTO_TLS; |
| } |
| else |
| proto = ((sock_type == SOCK_DGRAM) ? |
| VPPCOM_PROTO_UDP : VPPCOM_PROTO_TCP); |
| |
| LDBG (0, "calling vls_create: proto %u (%s), is_nonblocking %u", |
| proto, vppcom_proto_str (proto), is_nonblocking); |
| |
| vlsh = vls_create (proto, is_nonblocking); |
| if (vlsh < 0) |
| { |
| errno = -vlsh; |
| rv = -1; |
| } |
| else |
| { |
| if (ldp->transparent_tls) |
| { |
| if (assign_cert_key_pair (vlsh) < 0) |
| return -1; |
| } |
| rv = ldp_vlsh_to_fd (vlsh); |
| } |
| } |
| else |
| { |
| LDBG (0, "calling libc_socket"); |
| rv = libc_socket (domain, type, protocol); |
| } |
| |
| return rv; |
| } |
| |
| /* |
| * Create two new sockets, of type TYPE in domain DOMAIN and using |
| * protocol PROTOCOL, which are connected to each other, and put file |
| * descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero, |
| * one will be chosen automatically. |
| * Returns 0 on success, -1 for errors. |
| * */ |
| int |
| socketpair (int domain, int type, int protocol, int fds[2]) |
| { |
| int rv, sock_type = type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK); |
| |
| ldp_init_check (); |
| |
| if (((domain == AF_INET) || (domain == AF_INET6)) && |
| ((sock_type == SOCK_STREAM) || (sock_type == SOCK_DGRAM))) |
| { |
| LDBG (0, "LDP-TBD"); |
| errno = ENOSYS; |
| rv = -1; |
| } |
| else |
| { |
| LDBG (1, "calling libc_socketpair"); |
| rv = libc_socketpair (domain, type, protocol, fds); |
| } |
| |
| return rv; |
| } |
| |
| int |
| bind (int fd, __CONST_SOCKADDR_ARG _addr, socklen_t len) |
| { |
| const struct sockaddr *addr = SOCKADDR_GET_SA (_addr); |
| vls_handle_t vlsh; |
| int rv; |
| |
| ldp_init_check (); |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| vppcom_endpt_t ep; |
| |
| switch (addr->sa_family) |
| { |
| case AF_INET: |
| if (len != sizeof (struct sockaddr_in)) |
| { |
| LDBG (0, "ERROR: fd %d: vlsh %u: Invalid AF_INET addr len %u!", |
| fd, vlsh, len); |
| errno = EINVAL; |
| rv = -1; |
| goto done; |
| } |
| ep.is_ip4 = VPPCOM_IS_IP4; |
| ep.ip = (u8 *) & ((const struct sockaddr_in *) addr)->sin_addr; |
| ep.port = (u16) ((const struct sockaddr_in *) addr)->sin_port; |
| break; |
| |
| case AF_INET6: |
| if (len != sizeof (struct sockaddr_in6)) |
| { |
| LDBG (0, "ERROR: fd %d: vlsh %u: Invalid AF_INET6 addr len %u!", |
| fd, vlsh, len); |
| errno = EINVAL; |
| rv = -1; |
| goto done; |
| } |
| ep.is_ip4 = VPPCOM_IS_IP6; |
| ep.ip = (u8 *) & ((const struct sockaddr_in6 *) addr)->sin6_addr; |
| ep.port = (u16) ((const struct sockaddr_in6 *) addr)->sin6_port; |
| break; |
| |
| default: |
| LDBG (0, "ERROR: fd %d: vlsh %u: Unsupported address family %u!", |
| fd, vlsh, addr->sa_family); |
| errno = EAFNOSUPPORT; |
| rv = -1; |
| goto done; |
| } |
| LDBG (0, "fd %d: calling vls_bind: vlsh %u, addr %p, len %u", fd, vlsh, |
| addr, len); |
| |
| rv = vls_bind (vlsh, &ep); |
| if (rv != VPPCOM_OK) |
| { |
| errno = -rv; |
| rv = -1; |
| } |
| } |
| else |
| { |
| LDBG (0, "fd %d: calling libc_bind: addr %p, len %u", fd, addr, len); |
| rv = libc_bind (fd, addr, len); |
| } |
| |
| done: |
| LDBG (1, "fd %d: returning %d", fd, rv); |
| |
| return rv; |
| } |
| |
| static inline int |
| ldp_copy_ep_to_sockaddr (struct sockaddr *addr, socklen_t *__restrict len, |
| vppcom_endpt_t *ep) |
| { |
| int rv = 0, sa_len, copy_len; |
| |
| ldp_init_check (); |
| |
| if (addr && len && ep) |
| { |
| addr->sa_family = (ep->is_ip4 == VPPCOM_IS_IP4) ? AF_INET : AF_INET6; |
| switch (addr->sa_family) |
| { |
| case AF_INET: |
| ((struct sockaddr_in *) addr)->sin_port = ep->port; |
| if (*len > sizeof (struct sockaddr_in)) |
| *len = sizeof (struct sockaddr_in); |
| sa_len = sizeof (struct sockaddr_in) - sizeof (struct in_addr); |
| copy_len = *len - sa_len; |
| if (copy_len > 0) |
| memcpy (&((struct sockaddr_in *) addr)->sin_addr, ep->ip, |
| copy_len); |
| break; |
| |
| case AF_INET6: |
| ((struct sockaddr_in6 *) addr)->sin6_port = ep->port; |
| if (*len > sizeof (struct sockaddr_in6)) |
| *len = sizeof (struct sockaddr_in6); |
| sa_len = sizeof (struct sockaddr_in6) - sizeof (struct in6_addr); |
| copy_len = *len - sa_len; |
| if (copy_len > 0) |
| memcpy (((struct sockaddr_in6 *) addr)->sin6_addr. |
| __in6_u.__u6_addr8, ep->ip, copy_len); |
| break; |
| |
| default: |
| /* Not possible */ |
| rv = -EAFNOSUPPORT; |
| break; |
| } |
| } |
| return rv; |
| } |
| |
| int |
| getsockname (int fd, __SOCKADDR_ARG _addr, socklen_t *__restrict len) |
| { |
| struct sockaddr *addr = SOCKADDR_GET_SA (_addr); |
| vls_handle_t vlsh; |
| int rv; |
| |
| ldp_init_check (); |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| vppcom_endpt_t ep; |
| u8 addr_buf[sizeof (struct in6_addr)]; |
| u32 size = sizeof (ep); |
| |
| ep.ip = addr_buf; |
| |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_LCL_ADDR, &ep, &size); |
| if (rv != VPPCOM_OK) |
| { |
| errno = -rv; |
| rv = -1; |
| } |
| else |
| { |
| rv = ldp_copy_ep_to_sockaddr (addr, len, &ep); |
| if (rv != VPPCOM_OK) |
| { |
| errno = -rv; |
| rv = -1; |
| } |
| } |
| } |
| else |
| { |
| rv = libc_getsockname (fd, _addr, len); |
| } |
| |
| return rv; |
| } |
| |
| int |
| connect (int fd, __CONST_SOCKADDR_ARG _addr, socklen_t len) |
| { |
| const struct sockaddr *addr = SOCKADDR_GET_SA (_addr); |
| vls_handle_t vlsh; |
| int rv; |
| |
| ldp_init_check (); |
| |
| if (!addr) |
| { |
| LDBG (0, "ERROR: fd %d: NULL addr, len %u", fd, len); |
| errno = EINVAL; |
| rv = -1; |
| goto done; |
| } |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| vppcom_endpt_t ep; |
| |
| switch (addr->sa_family) |
| { |
| case AF_INET: |
| if (len != sizeof (struct sockaddr_in)) |
| { |
| LDBG (0, "fd %d: ERROR vlsh %u: Invalid AF_INET addr len %u!", |
| fd, vlsh, len); |
| errno = EINVAL; |
| rv = -1; |
| goto done; |
| } |
| ep.is_ip4 = VPPCOM_IS_IP4; |
| ep.ip = (u8 *) & ((const struct sockaddr_in *) addr)->sin_addr; |
| ep.port = (u16) ((const struct sockaddr_in *) addr)->sin_port; |
| break; |
| |
| case AF_INET6: |
| if (len != sizeof (struct sockaddr_in6)) |
| { |
| LDBG (0, "fd %d: ERROR vlsh %u: Invalid AF_INET6 addr len %u!", |
| fd, vlsh, len); |
| errno = EINVAL; |
| rv = -1; |
| goto done; |
| } |
| ep.is_ip4 = VPPCOM_IS_IP6; |
| ep.ip = (u8 *) & ((const struct sockaddr_in6 *) addr)->sin6_addr; |
| ep.port = (u16) ((const struct sockaddr_in6 *) addr)->sin6_port; |
| break; |
| |
| default: |
| LDBG (0, "fd %d: ERROR vlsh %u: Unsupported address family %u!", |
| fd, vlsh, addr->sa_family); |
| errno = EAFNOSUPPORT; |
| rv = -1; |
| goto done; |
| } |
| LDBG (0, "fd %d: calling vls_connect(): vlsh %u addr %p len %u", fd, |
| vlsh, addr, len); |
| |
| rv = vls_connect (vlsh, &ep); |
| if (rv != VPPCOM_OK) |
| { |
| errno = -rv; |
| rv = -1; |
| } |
| } |
| else |
| { |
| LDBG (0, "fd %d: calling libc_connect(): addr %p, len %u", |
| fd, addr, len); |
| |
| rv = libc_connect (fd, addr, len); |
| } |
| |
| done: |
| LDBG (1, "fd %d: returning %d (0x%x)", fd, rv, rv); |
| return rv; |
| } |
| |
| int |
| getpeername (int fd, __SOCKADDR_ARG _addr, socklen_t *__restrict len) |
| { |
| struct sockaddr *addr = SOCKADDR_GET_SA (_addr); |
| vls_handle_t vlsh; |
| int rv; |
| |
| ldp_init_check (); |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| vppcom_endpt_t ep; |
| u8 addr_buf[sizeof (struct in6_addr)]; |
| u32 size = sizeof (ep); |
| |
| ep.ip = addr_buf; |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_PEER_ADDR, &ep, &size); |
| if (rv != VPPCOM_OK) |
| { |
| errno = -rv; |
| rv = -1; |
| } |
| else |
| { |
| rv = ldp_copy_ep_to_sockaddr (addr, len, &ep); |
| if (rv != VPPCOM_OK) |
| { |
| errno = -rv; |
| rv = -1; |
| } |
| } |
| } |
| else |
| { |
| rv = libc_getpeername (fd, addr, len); |
| } |
| |
| return rv; |
| } |
| |
| ssize_t |
| send (int fd, const void *buf, size_t n, int flags) |
| { |
| vls_handle_t vlsh = ldp_fd_to_vlsh (fd); |
| ssize_t size; |
| |
| ldp_init_check (); |
| |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| size = vls_sendto (vlsh, (void *) buf, n, flags, NULL); |
| if (size < VPPCOM_OK) |
| { |
| errno = -size; |
| size = -1; |
| } |
| } |
| else |
| { |
| size = libc_send (fd, buf, n, flags); |
| } |
| |
| return size; |
| } |
| |
| ssize_t |
| sendfile (int out_fd, int in_fd, off_t * offset, size_t len) |
| { |
| ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); |
| vls_handle_t vlsh; |
| ssize_t size = 0; |
| |
| ldp_init_check (); |
| |
| vlsh = ldp_fd_to_vlsh (out_fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| int rv; |
| ssize_t results = 0; |
| size_t n_bytes_left = len; |
| size_t bytes_to_read; |
| int nbytes; |
| u8 eagain = 0; |
| u32 flags, flags_len = sizeof (flags); |
| |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_FLAGS, &flags, &flags_len); |
| if (PREDICT_FALSE (rv != VPPCOM_OK)) |
| { |
| LDBG (0, "ERROR: out fd %d: vls_attr: vlsh %u, returned %d (%s)!", |
| out_fd, vlsh, rv, vppcom_retval_str (rv)); |
| |
| vec_reset_length (ldpw->io_buffer); |
| errno = -rv; |
| size = -1; |
| goto done; |
| } |
| |
| if (offset) |
| { |
| off_t off = lseek (in_fd, *offset, SEEK_SET); |
| if (PREDICT_FALSE (off == -1)) |
| { |
| size = -1; |
| goto done; |
| } |
| |
| ASSERT (off == *offset); |
| } |
| |
| do |
| { |
| size = vls_attr (vlsh, VPPCOM_ATTR_GET_NWRITE, 0, 0); |
| if (size < 0) |
| { |
| LDBG (0, "ERROR: fd %d: vls_attr: vlsh %u returned %ld (%s)!", |
| out_fd, vlsh, size, vppcom_retval_str (size)); |
| vec_reset_length (ldpw->io_buffer); |
| errno = -size; |
| size = -1; |
| goto done; |
| } |
| |
| bytes_to_read = size; |
| if (bytes_to_read == 0) |
| { |
| if (flags & O_NONBLOCK) |
| { |
| if (!results) |
| eagain = 1; |
| goto update_offset; |
| } |
| else |
| continue; |
| } |
| bytes_to_read = clib_min (n_bytes_left, bytes_to_read); |
| vec_validate (ldpw->io_buffer, bytes_to_read); |
| nbytes = libc_read (in_fd, ldpw->io_buffer, bytes_to_read); |
| if (nbytes < 0) |
| { |
| if (results == 0) |
| { |
| vec_reset_length (ldpw->io_buffer); |
| size = -1; |
| goto done; |
| } |
| goto update_offset; |
| } |
| |
| size = vls_write (vlsh, ldpw->io_buffer, nbytes); |
| if (size < 0) |
| { |
| if (size == VPPCOM_EAGAIN) |
| { |
| if (flags & O_NONBLOCK) |
| { |
| if (!results) |
| eagain = 1; |
| goto update_offset; |
| } |
| else |
| continue; |
| } |
| if (results == 0) |
| { |
| vec_reset_length (ldpw->io_buffer); |
| errno = -size; |
| size = -1; |
| goto done; |
| } |
| goto update_offset; |
| } |
| |
| results += nbytes; |
| ASSERT (n_bytes_left >= nbytes); |
| n_bytes_left = n_bytes_left - nbytes; |
| } |
| while (n_bytes_left > 0); |
| |
| update_offset: |
| vec_reset_length (ldpw->io_buffer); |
| if (offset) |
| { |
| off_t off = lseek (in_fd, *offset, SEEK_SET); |
| if (PREDICT_FALSE (off == -1)) |
| { |
| size = -1; |
| goto done; |
| } |
| |
| ASSERT (off == *offset); |
| *offset += results + 1; |
| } |
| if (eagain) |
| { |
| errno = EAGAIN; |
| size = -1; |
| } |
| else |
| size = results; |
| } |
| else |
| { |
| size = libc_sendfile (out_fd, in_fd, offset, len); |
| } |
| |
| done: |
| return size; |
| } |
| |
| ssize_t |
| sendfile64 (int out_fd, int in_fd, off_t * offset, size_t len) |
| { |
| return sendfile (out_fd, in_fd, offset, len); |
| } |
| |
| ssize_t |
| recv (int fd, void *buf, size_t n, int flags) |
| { |
| vls_handle_t vlsh; |
| ssize_t size; |
| |
| ldp_init_check (); |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| size = vls_recvfrom (vlsh, buf, n, flags, NULL); |
| if (size < 0) |
| { |
| errno = -size; |
| size = -1; |
| } |
| } |
| else |
| { |
| size = libc_recv (fd, buf, n, flags); |
| } |
| |
| return size; |
| } |
| |
| ssize_t |
| __recv_chk (int fd, void *buf, size_t n, size_t buflen, int flags) |
| { |
| if (n > buflen) |
| return -1; |
| |
| return recv (fd, buf, n, flags); |
| } |
| |
| static inline int |
| ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n, |
| vppcom_endpt_tlv_t *app_tlvs, int flags, |
| __CONST_SOCKADDR_ARG _addr, socklen_t addr_len) |
| { |
| const struct sockaddr *addr = SOCKADDR_GET_SA (_addr); |
| vppcom_endpt_t *ep = 0; |
| vppcom_endpt_t _ep; |
| |
| _ep.app_tlvs = app_tlvs; |
| |
| if (addr) |
| { |
| ep = &_ep; |
| switch (addr->sa_family) |
| { |
| case AF_INET: |
| ep->is_ip4 = VPPCOM_IS_IP4; |
| ep->ip = |
| (uint8_t *) & ((const struct sockaddr_in *) addr)->sin_addr; |
| ep->port = (uint16_t) ((const struct sockaddr_in *) addr)->sin_port; |
| break; |
| |
| case AF_INET6: |
| ep->is_ip4 = VPPCOM_IS_IP6; |
| ep->ip = |
| (uint8_t *) & ((const struct sockaddr_in6 *) addr)->sin6_addr; |
| ep->port = |
| (uint16_t) ((const struct sockaddr_in6 *) addr)->sin6_port; |
| break; |
| |
| default: |
| return EAFNOSUPPORT; |
| } |
| } |
| |
| return vls_sendto (vlsh, (void *) buf, n, flags, ep); |
| } |
| |
| static int |
| ldp_vls_recvfrom (vls_handle_t vlsh, void *__restrict buf, size_t n, int flags, |
| __SOCKADDR_ARG _addr, socklen_t *__restrict addr_len) |
| { |
| u8 src_addr[sizeof (struct sockaddr_in6)]; |
| struct sockaddr *addr = SOCKADDR_GET_SA (_addr); |
| vppcom_endpt_t ep; |
| ssize_t size; |
| int rv; |
| |
| if (addr) |
| { |
| ep.ip = src_addr; |
| size = vls_recvfrom (vlsh, buf, n, flags, &ep); |
| |
| if (size > 0) |
| { |
| rv = ldp_copy_ep_to_sockaddr (addr, addr_len, &ep); |
| if (rv < 0) |
| size = rv; |
| } |
| } |
| else |
| size = vls_recvfrom (vlsh, buf, n, flags, NULL); |
| |
| return size; |
| } |
| |
| ssize_t |
| sendto (int fd, const void *buf, size_t n, int flags, |
| __CONST_SOCKADDR_ARG _addr, socklen_t addr_len) |
| { |
| const struct sockaddr *addr = SOCKADDR_GET_SA (_addr); |
| vls_handle_t vlsh; |
| ssize_t size; |
| |
| ldp_init_check (); |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| size = ldp_vls_sendo (vlsh, buf, n, NULL, flags, addr, addr_len); |
| if (size < 0) |
| { |
| errno = -size; |
| size = -1; |
| } |
| } |
| else |
| { |
| size = libc_sendto (fd, buf, n, flags, addr, addr_len); |
| } |
| |
| return size; |
| } |
| |
| ssize_t |
| recvfrom (int fd, void *__restrict buf, size_t n, int flags, |
| __SOCKADDR_ARG addr, socklen_t * __restrict addr_len) |
| { |
| vls_handle_t vlsh; |
| ssize_t size; |
| |
| ldp_init_check (); |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| size = ldp_vls_recvfrom (vlsh, buf, n, flags, addr, addr_len); |
| if (size < 0) |
| { |
| errno = -size; |
| size = -1; |
| } |
| } |
| else |
| { |
| size = libc_recvfrom (fd, buf, n, flags, addr, addr_len); |
| } |
| |
| return size; |
| } |
| |
| static int |
| ldp_parse_cmsg (vls_handle_t vlsh, const struct msghdr *msg, |
| vppcom_endpt_tlv_t **app_tlvs) |
| { |
| uint8_t *ad, *at = (uint8_t *) *app_tlvs; |
| vppcom_endpt_tlv_t *adh; |
| struct in_pktinfo *pi; |
| struct cmsghdr *cmsg; |
| |
| cmsg = CMSG_FIRSTHDR (msg); |
| |
| while (cmsg != NULL) |
| { |
| switch (cmsg->cmsg_level) |
| { |
| case SOL_UDP: |
| switch (cmsg->cmsg_type) |
| { |
| case UDP_SEGMENT: |
| vec_add2 (at, adh, sizeof (*adh)); |
| adh->data_type = VCL_UDP_SEGMENT; |
| adh->data_len = sizeof (uint16_t); |
| vec_add2 (at, ad, sizeof (uint16_t)); |
| *(uint16_t *) ad = *(uint16_t *) CMSG_DATA (cmsg); |
| break; |
| default: |
| LDBG (1, "SOL_UDP cmsg_type %u not supported", cmsg->cmsg_type); |
| break; |
| } |
| break; |
| case SOL_IP: |
| switch (cmsg->cmsg_type) |
| { |
| case IP_PKTINFO: |
| vec_add2 (at, adh, sizeof (*adh)); |
| adh->data_type = VCL_IP_PKTINFO; |
| adh->data_len = sizeof (struct in_addr); |
| vec_add2 (at, ad, sizeof (struct in_addr)); |
| pi = (void *) CMSG_DATA (cmsg); |
| clib_memcpy_fast (ad, &pi->ipi_spec_dst, |
| sizeof (struct in_addr)); |
| break; |
| default: |
| LDBG (1, "SOL_IP cmsg_type %u not supported", cmsg->cmsg_type); |
| break; |
| } |
| break; |
| default: |
| LDBG (1, "cmsg_level %u not supported", cmsg->cmsg_level); |
| break; |
| } |
| cmsg = CMSG_NXTHDR ((struct msghdr *) msg, cmsg); |
| } |
| *app_tlvs = (vppcom_endpt_tlv_t *) at; |
| return 0; |
| } |
| |
| static int |
| ldp_make_cmsg (vls_handle_t vlsh, struct msghdr *msg) |
| { |
| u32 optval, optlen = sizeof (optval); |
| struct cmsghdr *cmsg; |
| |
| cmsg = CMSG_FIRSTHDR (msg); |
| memset (cmsg, 0, sizeof (*cmsg)); |
| |
| if (!vls_attr (vlsh, VPPCOM_ATTR_GET_IP_PKTINFO, (void *) &optval, &optlen)) |
| return 0; |
| |
| if (optval) |
| { |
| vppcom_endpt_t ep; |
| u8 addr_buf[sizeof (struct in_addr)]; |
| u32 size = sizeof (ep); |
| |
| ep.ip = addr_buf; |
| |
| if (!vls_attr (vlsh, VPPCOM_ATTR_GET_LCL_ADDR, &ep, &size)) |
| { |
| struct in_pktinfo pi = {}; |
| |
| clib_memcpy (&pi.ipi_addr, ep.ip, sizeof (struct in_addr)); |
| cmsg->cmsg_level = SOL_IP; |
| cmsg->cmsg_type = IP_PKTINFO; |
| cmsg->cmsg_len = CMSG_LEN (sizeof (pi)); |
| clib_memcpy (CMSG_DATA (cmsg), &pi, sizeof (pi)); |
| } |
| } |
| |
| return 0; |
| } |
| |
| ssize_t |
| sendmsg (int fd, const struct msghdr * msg, int flags) |
| { |
| vls_handle_t vlsh; |
| ssize_t size; |
| |
| ldp_init_check (); |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| vppcom_endpt_tlv_t *app_tlvs = 0; |
| struct iovec *iov = msg->msg_iov; |
| ssize_t total = 0; |
| int i, rv = 0; |
| |
| ldp_parse_cmsg (vlsh, msg, &app_tlvs); |
| |
| for (i = 0; i < msg->msg_iovlen; ++i) |
| { |
| rv = ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, app_tlvs, |
| flags, msg->msg_name, msg->msg_namelen); |
| if (rv < 0) |
| break; |
| else |
| { |
| total += rv; |
| if (rv < iov[i].iov_len) |
| break; |
| } |
| } |
| |
| vec_free (app_tlvs); |
| |
| if (rv < 0 && total == 0) |
| { |
| errno = -rv; |
| size = -1; |
| } |
| else |
| size = total; |
| } |
| else |
| { |
| size = libc_sendmsg (fd, msg, flags); |
| } |
| |
| return size; |
| } |
| |
| #ifdef _GNU_SOURCE |
| int |
| sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags) |
| { |
| ssize_t size; |
| const char *func_str; |
| u32 sh = ldp_fd_to_vlsh (fd); |
| |
| ldp_init_check (); |
| |
| if (sh != VLS_INVALID_HANDLE) |
| { |
| clib_warning ("LDP<%d>: LDP-TBD", getpid ()); |
| errno = ENOSYS; |
| size = -1; |
| } |
| else |
| { |
| func_str = "libc_sendmmsg"; |
| |
| if (LDP_DEBUG > 2) |
| clib_warning ("LDP<%d>: fd %d (0x%x): calling %s(): " |
| "vmessages %p, vlen %u, flags 0x%x", |
| getpid (), fd, fd, func_str, vmessages, vlen, flags); |
| |
| size = libc_sendmmsg (fd, vmessages, vlen, flags); |
| } |
| |
| if (LDP_DEBUG > 2) |
| { |
| if (size < 0) |
| { |
| int errno_val = errno; |
| clib_warning ("LDP<%d>: ERROR: fd %d (0x%x): %s() failed! " |
| "rv %d, errno = %d", getpid (), fd, fd, |
| func_str, size, errno_val); |
| errno = errno_val; |
| } |
| else |
| clib_warning ("LDP<%d>: fd %d (0x%x): returning %d (0x%x)", |
| getpid (), fd, fd, size, size); |
| } |
| return size; |
| } |
| #endif |
| |
| ssize_t |
| recvmsg (int fd, struct msghdr * msg, int flags) |
| { |
| vls_handle_t vlsh; |
| ssize_t size; |
| |
| ldp_init_check (); |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| struct iovec *iov = msg->msg_iov; |
| ssize_t max_deq, total = 0; |
| int i, rv = 0; |
| |
| max_deq = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0); |
| if (!max_deq) |
| return 0; |
| |
| for (i = 0; i < msg->msg_iovlen; i++) |
| { |
| rv = ldp_vls_recvfrom (vlsh, iov[i].iov_base, iov[i].iov_len, flags, |
| (i == 0 ? msg->msg_name : NULL), |
| (i == 0 ? &msg->msg_namelen : NULL)); |
| if (rv <= 0) |
| break; |
| else |
| { |
| total += rv; |
| if (rv < iov[i].iov_len) |
| break; |
| } |
| if (total >= max_deq) |
| break; |
| } |
| |
| if (rv < 0 && total == 0) |
| { |
| errno = -rv; |
| size = -1; |
| } |
| else |
| { |
| if (msg->msg_controllen) |
| ldp_make_cmsg (vlsh, msg); |
| size = total; |
| } |
| } |
| else |
| { |
| size = libc_recvmsg (fd, msg, flags); |
| } |
| |
| return size; |
| } |
| |
| #ifdef _GNU_SOURCE |
| int |
| recvmmsg (int fd, struct mmsghdr *vmessages, |
| unsigned int vlen, int flags, struct timespec *tmo) |
| { |
| ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); |
| u32 sh; |
| |
| ldp_init_check (); |
| |
| sh = ldp_fd_to_vlsh (fd); |
| |
| if (sh != VLS_INVALID_HANDLE) |
| { |
| struct mmsghdr *mh; |
| ssize_t rv = 0; |
| u32 nvecs = 0; |
| f64 time_out; |
| |
| if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0)) |
| clib_time_init (&ldpw->clib_time); |
| if (tmo) |
| { |
| time_out = (f64) tmo->tv_sec + (f64) tmo->tv_nsec / (f64) 1e9; |
| time_out += clib_time_now (&ldpw->clib_time); |
| } |
| else |
| { |
| time_out = (f64) ~0; |
| } |
| |
| while (nvecs < vlen) |
| { |
| mh = &vmessages[nvecs]; |
| rv = recvmsg (fd, &mh->msg_hdr, flags); |
| if (rv > 0) |
| { |
| mh->msg_len = rv; |
| nvecs += 1; |
| continue; |
| } |
| |
| if (!time_out || clib_time_now (&ldpw->clib_time) >= time_out) |
| break; |
| |
| usleep (1); |
| } |
| |
| return nvecs > 0 ? nvecs : rv; |
| } |
| else |
| { |
| return libc_recvmmsg (fd, vmessages, vlen, flags, tmo); |
| } |
| } |
| #endif |
| |
| int |
| getsockopt (int fd, int level, int optname, |
| void *__restrict optval, socklen_t * __restrict optlen) |
| { |
| vls_handle_t vlsh; |
| int rv; |
| |
| ldp_init_check (); |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| rv = -EOPNOTSUPP; |
| |
| switch (level) |
| { |
| case SOL_TCP: |
| switch (optname) |
| { |
| case TCP_NODELAY: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_NODELAY, |
| optval, optlen); |
| break; |
| case TCP_MAXSEG: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_USER_MSS, |
| optval, optlen); |
| break; |
| case TCP_KEEPIDLE: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_KEEPIDLE, |
| optval, optlen); |
| break; |
| case TCP_KEEPINTVL: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_KEEPINTVL, |
| optval, optlen); |
| break; |
| case TCP_INFO: |
| if (optval && optlen && (*optlen == sizeof (struct tcp_info))) |
| { |
| LDBG (1, "fd %d: vlsh %u SOL_TCP, TCP_INFO, optval %p, " |
| "optlen %d: #LDP-NOP#", fd, vlsh, optval, *optlen); |
| memset (optval, 0, *optlen); |
| rv = VPPCOM_OK; |
| } |
| else |
| rv = -EFAULT; |
| break; |
| case TCP_CONGESTION: |
| *optlen = strlen ("cubic"); |
| strncpy (optval, "cubic", *optlen + 1); |
| rv = 0; |
| break; |
| default: |
| LDBG (0, "ERROR: fd %d: getsockopt SOL_TCP: sid %u, " |
| "optname %d unsupported!", fd, vlsh, optname); |
| break; |
| } |
| break; |
| case SOL_IP: |
| switch (optname) |
| { |
| case SO_ORIGINAL_DST: |
| rv = |
| vls_attr (vlsh, VPPCOM_ATTR_GET_ORIGINAL_DST, optval, optlen); |
| break; |
| default: |
| LDBG (0, |
| "ERROR: fd %d: getsockopt SOL_IP: vlsh %u " |
| "optname %d unsupported!", |
| fd, vlsh, optname); |
| break; |
| } |
| break; |
| case SOL_IPV6: |
| switch (optname) |
| { |
| case IPV6_V6ONLY: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_V6ONLY, optval, optlen); |
| break; |
| default: |
| LDBG (0, "ERROR: fd %d: getsockopt SOL_IPV6: vlsh %u " |
| "optname %d unsupported!", fd, vlsh, optname); |
| break; |
| } |
| break; |
| case SOL_SOCKET: |
| switch (optname) |
| { |
| case SO_ACCEPTCONN: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_LISTEN, optval, optlen); |
| break; |
| case SO_KEEPALIVE: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_KEEPALIVE, optval, optlen); |
| break; |
| case SO_PROTOCOL: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_PROTOCOL, optval, optlen); |
| *(int *) optval = *(int *) optval ? SOCK_DGRAM : SOCK_STREAM; |
| break; |
| case SO_SNDBUF: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TX_FIFO_LEN, |
| optval, optlen); |
| break; |
| case SO_RCVBUF: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_RX_FIFO_LEN, |
| optval, optlen); |
| break; |
| case SO_REUSEADDR: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_REUSEADDR, optval, optlen); |
| break; |
| case SO_REUSEPORT: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_REUSEPORT, optval, optlen); |
| break; |
| case SO_BROADCAST: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_BROADCAST, optval, optlen); |
| break; |
| case SO_DOMAIN: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_DOMAIN, optval, optlen); |
| break; |
| case SO_ERROR: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_GET_ERROR, optval, optlen); |
| break; |
| case SO_BINDTODEVICE: |
| rv = 0; |
| break; |
| default: |
| LDBG (0, "ERROR: fd %d: getsockopt SOL_SOCKET: vlsh %u " |
| "optname %d unsupported!", fd, vlsh, optname); |
| break; |
| } |
| break; |
| default: |
| break; |
| } |
| |
| if (rv != VPPCOM_OK) |
| { |
| errno = -rv; |
| rv = -1; |
| } |
| } |
| else |
| { |
| rv = libc_getsockopt (fd, level, optname, optval, optlen); |
| } |
| |
| return rv; |
| } |
| |
| int |
| setsockopt (int fd, int level, int optname, |
| const void *optval, socklen_t optlen) |
| { |
| vls_handle_t vlsh; |
| int rv; |
| |
| ldp_init_check (); |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| rv = -EOPNOTSUPP; |
| |
| switch (level) |
| { |
| case SOL_TCP: |
| switch (optname) |
| { |
| case TCP_NODELAY: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_NODELAY, |
| (void *) optval, &optlen); |
| break; |
| case TCP_MAXSEG: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_USER_MSS, |
| (void *) optval, &optlen); |
| break; |
| case TCP_KEEPIDLE: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_KEEPIDLE, |
| (void *) optval, &optlen); |
| break; |
| case TCP_KEEPINTVL: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_KEEPINTVL, |
| (void *) optval, &optlen); |
| break; |
| case TCP_CONGESTION: |
| case TCP_CORK: |
| /* Ignore */ |
| rv = 0; |
| break; |
| default: |
| LDBG (0, "ERROR: fd %d: setsockopt() SOL_TCP: vlsh %u" |
| "optname %d unsupported!", fd, vlsh, optname); |
| break; |
| } |
| break; |
| case SOL_IPV6: |
| switch (optname) |
| { |
| case IPV6_V6ONLY: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_SET_V6ONLY, |
| (void *) optval, &optlen); |
| break; |
| default: |
| LDBG (0, "ERROR: fd %d: setsockopt SOL_IPV6: vlsh %u" |
| "optname %d unsupported!", fd, vlsh, optname); |
| break; |
| } |
| break; |
| case SOL_SOCKET: |
| switch (optname) |
| { |
| case SO_KEEPALIVE: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_SET_KEEPALIVE, |
| (void *) optval, &optlen); |
| break; |
| case SO_REUSEADDR: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_SET_REUSEADDR, |
| (void *) optval, &optlen); |
| break; |
| case SO_REUSEPORT: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_SET_REUSEPORT, (void *) optval, |
| &optlen); |
| break; |
| case SO_BROADCAST: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_SET_BROADCAST, |
| (void *) optval, &optlen); |
| break; |
| case SO_LINGER: |
| rv = 0; |
| break; |
| default: |
| LDBG (0, "ERROR: fd %d: setsockopt SOL_SOCKET: vlsh %u " |
| "optname %d unsupported!", fd, vlsh, optname); |
| break; |
| } |
| break; |
| case SOL_IP: |
| switch (optname) |
| { |
| case IP_PKTINFO: |
| rv = vls_attr (vlsh, VPPCOM_ATTR_SET_IP_PKTINFO, (void *) optval, |
| &optlen); |
| break; |
| default: |
| LDBG (0, |
| "ERROR: fd %d: setsockopt SOL_IP: vlsh %u optname %d" |
| "unsupported!", |
| fd, vlsh, optname); |
| break; |
| } |
| break; |
| default: |
| break; |
| } |
| |
| if (rv != VPPCOM_OK) |
| { |
| errno = -rv; |
| rv = -1; |
| } |
| } |
| else |
| { |
| rv = libc_setsockopt (fd, level, optname, optval, optlen); |
| } |
| |
| return rv; |
| } |
| |
| int |
| listen (int fd, int n) |
| { |
| vls_handle_t vlsh; |
| int rv; |
| |
| ldp_init_check (); |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| LDBG (0, "fd %d: calling vls_listen: vlsh %u, n %d", fd, vlsh, n); |
| |
| rv = vls_listen (vlsh, n); |
| if (rv != VPPCOM_OK) |
| { |
| errno = -rv; |
| rv = -1; |
| } |
| } |
| else |
| { |
| LDBG (0, "fd %d: calling libc_listen(): n %d", fd, n); |
| rv = libc_listen (fd, n); |
| } |
| |
| LDBG (1, "fd %d: returning %d", fd, rv); |
| return rv; |
| } |
| |
| static inline int |
| ldp_accept4 (int listen_fd, __SOCKADDR_ARG _addr, |
| socklen_t *__restrict addr_len, int flags) |
| { |
| struct sockaddr *addr = SOCKADDR_GET_SA (_addr); |
| vls_handle_t listen_vlsh, accept_vlsh; |
| int rv; |
| |
| ldp_init_check (); |
| |
| listen_vlsh = ldp_fd_to_vlsh (listen_fd); |
| if (listen_vlsh != VLS_INVALID_HANDLE) |
| { |
| vppcom_endpt_t ep; |
| u8 src_addr[sizeof (struct sockaddr_in6)]; |
| memset (&ep, 0, sizeof (ep)); |
| ep.ip = src_addr; |
| |
| LDBG (0, "listen fd %d: calling vppcom_session_accept: listen sid %u," |
| " ep %p, flags 0x%x", listen_fd, listen_vlsh, &ep, flags); |
| |
| accept_vlsh = vls_accept (listen_vlsh, &ep, flags); |
| if (accept_vlsh < 0) |
| { |
| errno = -accept_vlsh; |
| rv = -1; |
| } |
| else |
| { |
| rv = ldp_copy_ep_to_sockaddr (addr, addr_len, &ep); |
| if (rv != VPPCOM_OK) |
| { |
| (void) vls_close (accept_vlsh); |
| errno = -rv; |
| rv = -1; |
| } |
| else |
| { |
| rv = ldp_vlsh_to_fd (accept_vlsh); |
| } |
| } |
| } |
| else |
| { |
| LDBG (0, "listen fd %d: calling libc_accept4(): addr %p, addr_len %p," |
| " flags 0x%x", listen_fd, addr, addr_len, flags); |
| |
| rv = libc_accept4 (listen_fd, addr, addr_len, flags); |
| } |
| |
| LDBG (1, "listen fd %d: accept returning %d", listen_fd, rv); |
| |
| return rv; |
| } |
| |
| int |
| accept4 (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len, |
| int flags) |
| { |
| return ldp_accept4 (fd, addr, addr_len, flags); |
| } |
| |
| int |
| accept (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len) |
| { |
| return ldp_accept4 (fd, addr, addr_len, 0); |
| } |
| |
| int |
| shutdown (int fd, int how) |
| { |
| vls_handle_t vlsh; |
| int rv = 0; |
| |
| ldp_init_check (); |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| LDBG (0, "called shutdown: fd %u vlsh %u how %d", fd, vlsh, how); |
| rv = vls_shutdown (vlsh, how); |
| } |
| else |
| { |
| LDBG (0, "fd %d: calling libc_shutdown: how %d", fd, how); |
| rv = libc_shutdown (fd, how); |
| } |
| |
| return rv; |
| } |
| |
| int |
| epoll_create1 (int flags) |
| { |
| ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); |
| vls_handle_t vlsh; |
| int rv; |
| |
| ldp_init_check (); |
| |
| if (ldp->vcl_needs_real_epoll || vls_use_real_epoll ()) |
| { |
| rv = libc_epoll_create1 (flags); |
| ldp->vcl_needs_real_epoll = 0; |
| /* Assume this is a request to create the mq epfd */ |
| ldpw->vcl_mq_epfd = rv; |
| LDBG (0, "created vcl epfd %u", rv); |
| return rv; |
| } |
| |
| vlsh = vls_epoll_create (); |
| if (PREDICT_FALSE (vlsh == VLS_INVALID_HANDLE)) |
| { |
| errno = -vlsh; |
| rv = -1; |
| } |
| else |
| { |
| rv = ldp_vlsh_to_fd (vlsh); |
| } |
| LDBG (0, "epoll_create epfd %u vlsh %u", rv, vlsh); |
| return rv; |
| } |
| |
| int |
| epoll_create (int size) |
| { |
| return epoll_create1 (0); |
| } |
| |
| int |
| epoll_ctl (int epfd, int op, int fd, struct epoll_event *event) |
| { |
| vls_handle_t vep_vlsh, vlsh; |
| int rv; |
| |
| ldp_init_check (); |
| |
| vep_vlsh = ldp_fd_to_vlsh (epfd); |
| if (PREDICT_FALSE (vep_vlsh == VLS_INVALID_HANDLE)) |
| { |
| /* The LDP epoll_create1 always creates VCL epfd's. |
| * The app should never have a kernel base epoll fd unless it |
| * was acquired outside of the LD_PRELOAD process context. |
| * In any case, if we get one, punt it to libc_epoll_ctl. |
| */ |
| LDBG (1, |
| "epfd %d: calling libc_epoll_ctl: op %d, fd %d" |
| " events 0x%x", |
| epfd, op, fd, event ? event->events : 0); |
| |
| rv = libc_epoll_ctl (epfd, op, fd, event); |
| goto done; |
| } |
| |
| vlsh = ldp_fd_to_vlsh (fd); |
| |
| LDBG (0, "epfd %d ep_vlsh %d, fd %u vlsh %d, op %u", epfd, vep_vlsh, fd, |
| vlsh, op); |
| |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| LDBG (1, |
| "epfd %d: calling vls_epoll_ctl: ep_vlsh %d op %d, vlsh %u," |
| " events 0x%x", |
| epfd, vep_vlsh, op, vlsh, event ? event->events : 0); |
| |
| rv = vls_epoll_ctl (vep_vlsh, op, vlsh, event); |
| if (rv != VPPCOM_OK) |
| { |
| errno = -rv; |
| rv = -1; |
| } |
| } |
| else |
| { |
| int libc_epfd; |
| |
| libc_epfd = vls_get_libc_epfd (vep_vlsh); |
| if (!libc_epfd) |
| { |
| LDBG (1, "epfd %d, vep_vlsh %d calling libc_epoll_create1: " |
| "EPOLL_CLOEXEC", epfd, vep_vlsh); |
| |
| libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC); |
| if (libc_epfd < 0) |
| { |
| rv = libc_epfd; |
| goto done; |
| } |
| |
| rv = vls_set_libc_epfd (vep_vlsh, libc_epfd); |
| if (rv < 0) |
| { |
| errno = -rv; |
| rv = -1; |
| goto done; |
| } |
| } |
| else if (PREDICT_FALSE (libc_epfd < 0)) |
| { |
| errno = -epfd; |
| rv = -1; |
| goto done; |
| } |
| |
| LDBG (1, "epfd %d: calling libc_epoll_ctl: libc_epfd %d, op %d, fd %d," |
| " event %p", epfd, libc_epfd, op, fd, event); |
| |
| rv = libc_epoll_ctl (libc_epfd, op, fd, event); |
| } |
| |
| done: |
| return rv; |
| } |
| |
| static inline int |
| ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents, |
| int timeout, const sigset_t * sigmask) |
| { |
| ldp_worker_ctx_t *ldpw; |
| double time_to_wait = (double) 0, max_time; |
| int libc_epfd, rv = 0; |
| vls_handle_t ep_vlsh; |
| |
| ldp_init_check (); |
| |
| if (PREDICT_FALSE (!events || (timeout < -1))) |
| { |
| errno = EFAULT; |
| return -1; |
| } |
| |
| if (PREDICT_FALSE (vppcom_worker_index () == ~0)) |
| vls_register_vcl_worker (); |
| |
| ldpw = ldp_worker_get_current (); |
| if (epfd == ldpw->vcl_mq_epfd) |
| return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask); |
| |
| ep_vlsh = ldp_fd_to_vlsh (epfd); |
| if (PREDICT_FALSE (ep_vlsh == VLS_INVALID_HANDLE)) |
| { |
| LDBG (0, "epfd %d: bad ep_vlsh %d!", epfd, ep_vlsh); |
| errno = EBADFD; |
| return -1; |
| } |
| |
| if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0)) |
| clib_time_init (&ldpw->clib_time); |
| time_to_wait = ((timeout >= 0) ? (double) timeout / 1000 : 0); |
| max_time = clib_time_now (&ldpw->clib_time) + time_to_wait; |
| |
| libc_epfd = vls_get_libc_epfd (ep_vlsh); |
| if (PREDICT_FALSE (libc_epfd < 0)) |
| { |
| errno = -libc_epfd; |
| rv = -1; |
| goto done; |
| } |
| |
| LDBG (2, "epfd %d: vep_idx %d, libc_epfd %d, events %p, maxevents %d, " |
| "timeout %d, sigmask %p: time_to_wait %.02f", epfd, ep_vlsh, |
| libc_epfd, events, maxevents, timeout, sigmask, time_to_wait); |
| do |
| { |
| if (!ldpw->epoll_wait_vcl) |
| { |
| rv = vls_epoll_wait (ep_vlsh, events, maxevents, 0); |
| if (rv > 0) |
| { |
| ldpw->epoll_wait_vcl = 1; |
| goto done; |
| } |
| else if (rv < 0) |
| { |
| errno = -rv; |
| rv = -1; |
| goto done; |
| } |
| } |
| else |
| ldpw->epoll_wait_vcl = 0; |
| |
| if (libc_epfd > 0) |
| { |
| rv = libc_epoll_pwait (libc_epfd, events, maxevents, 0, sigmask); |
| if (rv != 0) |
| goto done; |
| } |
| } |
| while ((timeout == -1) || (clib_time_now (&ldpw->clib_time) < max_time)); |
| |
| done: |
| return rv; |
| } |
| |
| static inline int |
| ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events, |
| int maxevents, int timeout, const sigset_t * sigmask) |
| { |
| int libc_epfd, rv = 0, num_ev, libc_num_ev, vcl_wups = 0; |
| struct epoll_event *libc_evts; |
| ldp_worker_ctx_t *ldpw; |
| vls_handle_t ep_vlsh; |
| |
| ldp_init_check (); |
| |
| if (PREDICT_FALSE (!events || (timeout < -1))) |
| { |
| errno = EFAULT; |
| return -1; |
| } |
| |
| /* Make sure the vcl worker is valid. Could be that epoll fd was created on |
| * one thread but it is now used on another */ |
| if (PREDICT_FALSE (vppcom_worker_index () == ~0)) |
| vls_register_vcl_worker (); |
| |
| ldpw = ldp_worker_get_current (); |
| if (epfd == ldpw->vcl_mq_epfd) |
| return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask); |
| |
| ep_vlsh = ldp_fd_to_vlsh (epfd); |
| if (PREDICT_FALSE (ep_vlsh == VLS_INVALID_HANDLE)) |
| { |
| LDBG (0, "epfd %d: bad ep_vlsh %d!", epfd, ep_vlsh); |
| errno = EBADFD; |
| return -1; |
| } |
| |
| libc_epfd = vls_get_libc_epfd (ep_vlsh); |
| if (PREDICT_FALSE (!libc_epfd)) |
| { |
| LDBG (1, "epfd %d, vep_vlsh %d calling libc_epoll_create1: " |
| "EPOLL_CLOEXEC", epfd, ep_vlsh); |
| libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC); |
| if (libc_epfd < 0) |
| { |
| rv = libc_epfd; |
| goto done; |
| } |
| |
| rv = vls_set_libc_epfd (ep_vlsh, libc_epfd); |
| if (rv < 0) |
| { |
| errno = -rv; |
| rv = -1; |
| goto done; |
| } |
| } |
| if (PREDICT_FALSE (libc_epfd <= 0)) |
| { |
| errno = -libc_epfd; |
| rv = -1; |
| goto done; |
| } |
| |
| if (PREDICT_FALSE (!ldpw->mq_epfd_added)) |
| { |
| struct epoll_event e = { 0 }; |
| ldpw->vcl_mq_epfd = vppcom_mq_epoll_fd (); |
| e.events = EPOLLIN; |
| e.data.fd = ldpw->vcl_mq_epfd; |
| if (libc_epoll_ctl (libc_epfd, EPOLL_CTL_ADD, ldpw->vcl_mq_epfd, &e) < |
| 0) |
| { |
| LDBG (0, "epfd %d, add libc mq epoll fd %d to libc epoll fd %d", |
| epfd, ldpw->vcl_mq_epfd, libc_epfd); |
| rv = -1; |
| goto done; |
| } |
| ldpw->mq_epfd_added = 1; |
| } |
| |
| /* Request to only drain unhandled to prevent libc_epoll_wait starved */ |
| rv = vls_epoll_wait (ep_vlsh, events, maxevents, -2); |
| if (rv > 0) |
| { |
| timeout = 0; |
| if (rv >= maxevents) |
| goto done; |
| maxevents -= rv; |
| } |
| else if (PREDICT_FALSE (rv < 0)) |
| { |
| errno = -rv; |
| rv = -1; |
| goto done; |
| } |
| |
| epoll_again: |
| |
| libc_evts = &events[rv]; |
| libc_num_ev = |
| libc_epoll_pwait (libc_epfd, libc_evts, maxevents, timeout, sigmask); |
| if (libc_num_ev <= 0) |
| { |
| rv = rv >= 0 ? rv : -1; |
| goto done; |
| } |
| |
| for (int i = 0; i < libc_num_ev; i++) |
| { |
| if (libc_evts[i].data.fd == ldpw->vcl_mq_epfd) |
| { |
| /* We should remove mq epoll fd from events. */ |
| libc_num_ev--; |
| if (i != libc_num_ev) |
| { |
| libc_evts[i].events = libc_evts[libc_num_ev].events; |
| libc_evts[i].data.u64 = libc_evts[libc_num_ev].data.u64; |
| } |
| num_ev = vls_epoll_wait (ep_vlsh, &libc_evts[libc_num_ev], |
| maxevents - libc_num_ev, 0); |
| if (PREDICT_TRUE (num_ev > 0)) |
| rv += num_ev; |
| /* Woken up by vcl but no events generated. Accept it once */ |
| if (rv == 0 && libc_num_ev == 0 && timeout && vcl_wups++ < 1) |
| goto epoll_again; |
| break; |
| } |
| } |
| |
| rv += libc_num_ev; |
| |
| done: |
| return rv; |
| } |
| |
| int |
| epoll_pwait (int epfd, struct epoll_event *events, |
| int maxevents, int timeout, const sigset_t * sigmask) |
| { |
| if (vls_use_eventfd ()) |
| return ldp_epoll_pwait_eventfd (epfd, events, maxevents, timeout, |
| sigmask); |
| else |
| return ldp_epoll_pwait (epfd, events, maxevents, timeout, sigmask); |
| } |
| |
| int |
| epoll_wait (int epfd, struct epoll_event *events, int maxevents, int timeout) |
| { |
| if (vls_use_eventfd ()) |
| return ldp_epoll_pwait_eventfd (epfd, events, maxevents, timeout, NULL); |
| else |
| return ldp_epoll_pwait (epfd, events, maxevents, timeout, NULL); |
| } |
| |
| int |
| poll (struct pollfd *fds, nfds_t nfds, int timeout) |
| { |
| ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); |
| int rv, i, n_revents = 0; |
| vls_handle_t vlsh; |
| vcl_poll_t *vp; |
| double max_time; |
| |
| LDBG (3, "fds %p, nfds %ld, timeout %d", fds, nfds, timeout); |
| |
| if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0)) |
| clib_time_init (&ldpw->clib_time); |
| |
| max_time = (timeout >= 0) ? (f64) timeout / 1000 : 0; |
| max_time += clib_time_now (&ldpw->clib_time); |
| |
| for (i = 0; i < nfds; i++) |
| { |
| if (fds[i].fd < 0) |
| continue; |
| |
| vlsh = ldp_fd_to_vlsh (fds[i].fd); |
| if (vlsh != VLS_INVALID_HANDLE) |
| { |
| fds[i].fd = -fds[i].fd; |
| vec_add2 (ldpw->vcl_poll, vp, 1); |
| vp->fds_ndx = i; |
| vp->sh = vlsh_to_sh (vlsh); |
| vp->events = fds[i].events; |
| #ifdef __USE_XOPEN2K |
| if (fds[i].events & POLLRDNORM) |
| vp->events |= POLLIN; |
| if (fds[i].events & POLLWRNORM) |
| vp->events |= POLLOUT; |
| #endif |
| vp->revents = fds[i].revents; |
| } |
| else |
| { |
| vec_add1 (ldpw->libc_poll, fds[i]); |
| vec_add1 (ldpw->libc_poll_idxs, i); |
| } |
| } |
| |
| do |
| { |
| if (vec_len (ldpw->vcl_poll)) |
| { |
| rv = vppcom_poll (ldpw->vcl_poll, vec_len (ldpw->vcl_poll), 0); |
| if (rv < 0) |
| { |
| errno = -rv; |
| rv = -1; |
| goto done; |
| } |
| else |
| n_revents += rv; |
| } |
| |
| if (vec_len (ldpw->libc_poll)) |
| { |
| rv = libc_poll (ldpw->libc_poll, vec_len (ldpw->libc_poll), 0); |
| if (rv < 0) |
| goto done; |
| else |
| n_revents += rv; |
| } |
| |
| if (n_revents) |
| { |
| rv = n_revents; |
| goto done; |
| } |
| } |
| while ((timeout < 0) || (clib_time_now (&ldpw->clib_time) < max_time)); |
| rv = 0; |
| |
| done: |
| vec_foreach (vp, ldpw->vcl_poll) |
| { |
| fds[vp->fds_ndx].fd = -fds[vp->fds_ndx].fd; |
| fds[vp->fds_ndx].revents = vp->revents; |
| #ifdef __USE_XOPEN2K |
| if ((fds[vp->fds_ndx].revents & POLLIN) && |
| (fds[vp->fds_ndx].events & POLLRDNORM)) |
| fds[vp->fds_ndx].revents |= POLLRDNORM; |
| if ((fds[vp->fds_ndx].revents & POLLOUT) && |
| (fds[vp->fds_ndx].events & POLLWRNORM)) |
| fds[vp->fds_ndx].revents |= POLLWRNORM; |
| #endif |
| } |
| vec_reset_length (ldpw->vcl_poll); |
| |
| for (i = 0; i < vec_len (ldpw->libc_poll); i++) |
| { |
| fds[ldpw->libc_poll_idxs[i]].revents = ldpw->libc_poll[i].revents; |
| } |
| vec_reset_length (ldpw->libc_poll_idxs); |
| vec_reset_length (ldpw->libc_poll); |
| |
| return rv; |
| } |
| |
| #ifdef _GNU_SOURCE |
| int |
| ppoll (struct pollfd *fds, nfds_t nfds, |
| const struct timespec *timeout, const sigset_t * sigmask) |
| { |
| ldp_init_check (); |
| |
| clib_warning ("LDP<%d>: LDP-TBD", getpid ()); |
| errno = ENOSYS; |
| |
| |
| return -1; |
| } |
| #endif |
| |
| void CONSTRUCTOR_ATTRIBUTE ldp_constructor (void); |
| |
| void DESTRUCTOR_ATTRIBUTE ldp_destructor (void); |
| |
| /* |
| * This function is called when the library is loaded |
| */ |
| void |
| ldp_constructor (void) |
| { |
| swrap_constructor (); |
| if (ldp_init () != 0) |
| { |
| fprintf (stderr, "\nLDP<%d>: ERROR: ldp_constructor: failed!\n", |
| getpid ()); |
| _exit (1); |
| } |
| else if (LDP_DEBUG > 0) |
| clib_warning ("LDP<%d>: LDP constructor: done!\n", getpid ()); |
| } |
| |
| /* |
| * This function is called when the library is unloaded |
| */ |
| void |
| ldp_destructor (void) |
| { |
| /* |
| swrap_destructor (); |
| if (ldp->init) |
| ldp->init = 0; |
| */ |
| |
| /* Don't use clib_warning() here because that calls writev() |
| * which will call ldp_init(). |
| */ |
| if (LDP_DEBUG > 0) |
| fprintf (stderr, "%s:%d: LDP<%d>: LDP destructor: done!\n", |
| __func__, __LINE__, getpid ()); |
| } |
| |
| |
| /* |
| * fd.io coding-style-patch-verification: ON |
| * |
| * Local Variables: |
| * eval: (c-set-style "gnu") |
| * End: |
| */ |