LD_PRELOAD poll implementation
- add poll implementation
- implementation based on vppcom_poll
- implementation based on vppcom_select
- currently vppcom_select implementation is picked
Change-Id: If6c2862ae72e9969335aca5b8085957c98287dc0
Signed-off-by: shrinivasan ganapathy <shrinivasanganapathy@gmail.com>
Signed-off-by: Dave Wallace <dwallacelf@gmail.com>
diff --git a/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom.c b/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom.c
index d5b3e12..35dbf57 100644
--- a/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom.c
+++ b/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom.c
@@ -19,6 +19,7 @@
#include <pthread.h>
#include <time.h>
#include <stdarg.h>
+#include <sys/resource.h>
#include <libvcl-ldpreload/vcom_socket_wrapper.h>
#include <libvcl-ldpreload/vcom.h>
@@ -2940,24 +2941,319 @@
This function is a cancellation point and therefore not marked with
__THROW. */
+
int
vcom_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout)
{
- if (vcom_init () != 0)
+ int rv = 0;
+ pid_t pid = getpid ();
+
+ struct rlimit nofile_limit;
+ struct pollfd vcom_fds[MAX_POLL_NFDS_DEFAULT];
+ nfds_t fds_idx = 0;
+
+ /* actual set of file descriptors to be monitored */
+ nfds_t libc_nfds = 0;
+ nfds_t vcom_nfds = 0;
+
+ /* ready file descriptors
+ *
+ * number of structures which have nonzero revents fields
+ * in other words, descriptors with events or errors reported.
+ * */
+ /* after call to libc_poll () */
+ int rlibc_nfds = 0;
+ /* after call to vcom_socket_poll () */
+ int rvcom_nfds = 0;
+
+
+ /* timeout value in units of timespec */
+ struct timespec timeout_ts;
+ struct timespec start_time, now, end_time;
+
+
+ /* get start_time */
+ rv = clock_gettime (CLOCK_MONOTONIC, &start_time);
+ if (rv == -1)
{
- return -1;
+ rv = -errno;
+ goto poll_done;
}
- return -EOPNOTSUPP;
+ /* set timeout_ts & end_time */
+ if (__timeout >= 0)
+ {
+ /* set timeout_ts */
+ timeout_ts.tv_sec = __timeout / MSEC_PER_SEC;
+ timeout_ts.tv_nsec = (__timeout % MSEC_PER_SEC) * NSEC_PER_MSEC;
+ set_normalized_timespec (&timeout_ts,
+ timeout_ts.tv_sec, timeout_ts.tv_nsec);
+ /* set end_time */
+ if (__timeout)
+ {
+ end_time = timespec_add (start_time, timeout_ts);
+ }
+ else
+ {
+ end_time = start_time;
+ }
+ }
+
+ if (vcom_init () != 0)
+ {
+ rv = -1;
+ goto poll_done;
+ }
+
+ /* validate __fds */
+ if (!__fds)
+ {
+ rv = -EFAULT;
+ goto poll_done;
+ }
+
+ /* validate __nfds */
+ /*TBD: call getrlimit once when vcl-ldpreload library is init */
+ rv = getrlimit (RLIMIT_NOFILE, &nofile_limit);
+ if (rv != 0)
+ {
+ rv = -errno;
+ goto poll_done;
+ }
+ if (__nfds >= nofile_limit.rlim_cur || __nfds < 0)
+ {
+ rv = -EINVAL;
+ goto poll_done;
+ }
+
+ /*
+ * for the POC, it's fair to assume that nfds is less than 1024
+ * */
+ if (__nfds >= MAX_POLL_NFDS_DEFAULT)
+ {
+ rv = -EINVAL;
+ goto poll_done;
+ }
+
+ /* set revents field (output parameter)
+ * to zero
+ * */
+ for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
+ {
+ __fds[fds_idx].revents = 0;
+ }
+
+#if 0
+ /* set revents field (output parameter)
+ * to zero for user ignored fds
+ * */
+ for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
+ {
+ /*
+ * if negative fd, ignore events field
+ * and set output parameter (revents field) to zero */
+ if (__fds[fds_idx].fd < 0)
+ {
+ __fds[fds_idx].revents = 0;
+ }
+ }
+#endif
+
+ /*
+ * 00. prepare __fds and vcom_fds for polling
+ * copy __fds to vcom_fds
+ * 01. negate all except libc fds in __fds,
+ * ignore user negated fds
+ * 02. negate all except vcom_fds in vocm fds,
+ * ignore user negated fds
+ * ignore fd 0 by setting it to negative number
+ * */
+ memcpy (vcom_fds, __fds, sizeof (*__fds) * __nfds);
+ libc_nfds = 0;
+ vcom_nfds = 0;
+ for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
+ {
+ /* ignore negative fds */
+ if (__fds[fds_idx].fd < 0)
+ {
+ continue;
+ }
+
+ /*
+ * 00. ignore vcom fds in __fds
+ * 01. ignore libc fds in vcom_fds,
+ * ignore fd 0 by setting it to negative number.
+ * as fd 0 cannot be ignored.
+ */
+ if (is_vcom_socket_fd (__fds[fds_idx].fd) ||
+ is_vcom_epfd (__fds[fds_idx].fd))
+ {
+ __fds[fds_idx].fd = -__fds[fds_idx].fd;
+ vcom_nfds++;
+ }
+ else
+ {
+ libc_nfds++;
+ /* ignore fd 0 by setting it to negative number */
+ if (!vcom_fds[fds_idx].fd)
+ {
+ vcom_fds[fds_idx].fd = -1;
+ }
+ vcom_fds[fds_idx].fd = -vcom_fds[fds_idx].fd;
+ }
+ }
+
+ /*
+ * polling loop
+ *
+ * poll on libc fds and vcom fds
+ *
+ * specifying a timeout of zero causes libc_poll() and
+ * vcom_socket_poll() to return immediately, even if no
+ * file descriptors are ready
+ * */
+ do
+ {
+ rlibc_nfds = 0;
+ rvcom_nfds = 0;
+
+ /*
+ * timeout parameter for libc_poll () set to zero
+ * to poll on libc fds
+ * */
+
+ /* poll on libc fds */
+ if (libc_nfds)
+ {
+ /*
+ * a timeout of zero causes libc_poll()
+ * to return immediately
+ * */
+ rlibc_nfds = libc_poll (__fds, __nfds, 0);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] poll libc: "
+ "'%04d'='%08lu'\n", pid, rlibc_nfds, __nfds);
+
+ if (rlibc_nfds < 0)
+ {
+ rv = -errno;
+ goto poll_done_update_nfds;
+ }
+ }
+
+ /*
+ * timeout parameter for vcom_socket_poll () set to zero
+ * to poll on vcom fds
+ * */
+
+ /* poll on vcom fds */
+ if (vcom_nfds)
+ {
+ /*
+ * a timeout of zero causes vcom_socket_poll()
+ * to return immediately
+ * */
+ rvcom_nfds = vcom_socket_poll (vcom_fds, __nfds, 0);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] poll vcom: "
+ "'%04d'='%08lu'\n", pid, rvcom_nfds, __nfds);
+ if (rvcom_nfds < 0)
+ {
+ rv = rvcom_nfds;
+ goto poll_done_update_nfds;
+ }
+ }
+
+ /* check if any file descriptors changed status */
+ if ((libc_nfds && rlibc_nfds > 0) || (vcom_nfds && rvcom_nfds > 0))
+ {
+ /* something interesting happened */
+ rv = rlibc_nfds + rvcom_nfds;
+ goto poll_done_update_nfds;
+ }
+
+ rv = clock_gettime (CLOCK_MONOTONIC, &now);
+ if (rv == -1)
+ {
+ rv = -errno;
+ goto poll_done_update_nfds;
+ }
+ }
+
+ /* block indefinitely || timeout elapsed */
+ while ((__timeout < 0) || timespec_compare (&now, &end_time) < 0);
+
+ /* timeout expired before anything interesting happened */
+ rv = 0;
+
+poll_done_update_nfds:
+ for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
+ {
+ /* ignore negative fds in vcom_fds
+ * 00. user negated fds
+ * 01. libc fds
+ * */
+ if (vcom_fds[fds_idx].fd < 0)
+ {
+ continue;
+ }
+
+ /* from here on handle positive vcom fds */
+ /*
+ * restore vcom fds to positive number in __fds
+ * and update revents in __fds with the events
+ * that actually occurred in vcom fds
+ * */
+ __fds[fds_idx].fd = -__fds[fds_idx].fd;
+ if (rvcom_nfds)
+ {
+ __fds[fds_idx].revents = vcom_fds[fds_idx].revents;
+ }
+ }
+
+poll_done:
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr, "[%d] vpoll: " "'%04d'='%08lu'\n", pid, rv, __nfds);
+ return rv;
}
+/*
+ * 00. The field __fds[i].fd contains a file descriptor for an
+ * open file.
+ * If this field is negative, then the corresponding
+ * events field is ignored and the revents field returns zero.
+ * The field __fds[i].events is an input parameter.
+ * The field __fds[i].revents is an output parameter.
+ * 01. Specifying a negative value in timeout
+ * means an infinite timeout.
+ * Specifying a timeout of zero causes poll() to return
+ * immediately, even if no file descriptors are ready.
+ *
+ * NOTE: observed __nfds is less than 128 from kubecon strace files
+ */
+
+
int
poll (struct pollfd *__fds, nfds_t __nfds, int __timeout)
{
int rv = 0;
+ pid_t pid = getpid ();
- errno = EOPNOTSUPP;
- rv = -1;
+
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr, "[%d] poll1: " "'%04d'='%08lu, %d, 0x%x'\n",
+ pid, rv, __nfds, __fds[0].fd, __fds[0].events);
+ rv = vcom_poll (__fds, __nfds, __timeout);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr, "[%d] poll2: " "'%04d'='%08lu, %d, 0x%x'\n",
+ pid, rv, __nfds, __fds[0].fd, __fds[0].revents);
+ if (rv < 0)
+ {
+ errno = -rv;
+ return -1;
+ }
return rv;
}