Add binary API documentation

Change-Id: Id1a5da12b13d87bacfa81094f471b95db40c39be
Signed-off-by: Dave Barach <dave@barachs.net>
diff --git a/doxygen/user_doc.md b/doxygen/user_doc.md
index 952a72f..65b09f9 100644
--- a/doxygen/user_doc.md
+++ b/doxygen/user_doc.md
@@ -18,3 +18,4 @@
 - @subpage srmpls_doc
 - @subpage sample_plugin_doc
 - @subpage nat64_doc
+- @subpage api_doc
diff --git a/src/vlibapi/api_common.h b/src/vlibapi/api_common.h
index dc6761b..770cf47 100644
--- a/src/vlibapi/api_common.h
+++ b/src/vlibapi/api_common.h
@@ -20,71 +20,74 @@
 #ifndef included_api_common_h
 #define included_api_common_h
 
+/** \file API common definitions
+ * See api_doc.md for more info
+ */
+
 #include <vppinfra/clib_error.h>
 #include <svm/svm_common.h>
 #include <vlibmemory/unix_shared_memory_queue.h>
 
+/** API registration types
+ */
 typedef enum
 {
   REGISTRATION_TYPE_FREE = 0,
-  REGISTRATION_TYPE_SHMEM,
-  REGISTRATION_TYPE_SOCKET_LISTEN,
-  REGISTRATION_TYPE_SOCKET_SERVER,
-  REGISTRATION_TYPE_SOCKET_CLIENT,
+  REGISTRATION_TYPE_SHMEM,	/**< Shared memory connection */
+  REGISTRATION_TYPE_SOCKET_LISTEN, /**< Socket listener  */
+  REGISTRATION_TYPE_SOCKET_SERVER, /**< Socket server */
+  REGISTRATION_TYPE_SOCKET_CLIENT, /**< Socket client */
 } vl_registration_type_t;
 
+/** An API client registration, only in vpp/vlib */
+
 typedef struct vl_api_registration_
 {
-  vl_registration_type_t registration_type;
+  vl_registration_type_t registration_type; /**< type */
 
-  /* Index in VLIB's brain (not shared memory). */
+  /** Index in VLIB's brain (not shared memory). */
   u32 vl_api_registration_pool_index;
 
-  u8 *name;
+  u8 *name;			/**< Client name */
 
-  /*
-   * The following groups of data could be unioned, but my fingers are
-   * going to be sore enough.
-   */
-
-  /* shared memory only */
+  /** shared memory only: pointer to client input queue */
   unix_shared_memory_queue_t *vl_input_queue;
 
   /* socket server and client */
-  u32 clib_file_index;
-  i8 *unprocessed_input;
-  u32 unprocessed_msg_length;
-  u8 *output_vector;
+  u32 clib_file_index;		/**< Socket only: file index */
+  i8 *unprocessed_input;	/**< Socket only: pending input */
+  u32 unprocessed_msg_length;	/**< Socket only: unprocssed length */
+  u8 *output_vector;		/**< Socket only: output vecto  */
 
   /* socket client only */
-  u32 server_handle;
-  u32 server_index;
-
+  u32 server_handle;		/**< Socket client only: server handle */
+  u32 server_index;		/**< Socket client only: server index */
 } vl_api_registration_t;
 
 
-/* Trace configuration for a single message */
+/** Trace configuration for a single message */
 typedef struct
 {
-  int size;
-  int trace_enable;
-  int replay_enable;
+  int size;			/**< for sanity checking */
+  int trace_enable;		/**< trace this message  */
+  int replay_enable;		/**< This message can be replayed  */
 } trace_cfg_t;
 
-/*
- * API recording
+/**
+ * API trace state
  */
 typedef struct
 {
-  u8 endian;
-  u8 enabled;
-  u8 wrapped;
+  u8 endian;			/**< trace endianness */
+  u8 enabled;			/**< trace is enabled  */
+  u8 wrapped;			/**< trace has wrapped */
   u8 pad;
-  u32 nitems;
-  u32 curindex;
-  u8 **traces;
+  u32 nitems;			/**< Number of trace records */
+  u32 curindex;			/**< Current index in circular buffer  */
+  u8 **traces;			/**< Trace ring */
 } vl_api_trace_t;
 
+/** Trace RX / TX enum */
 typedef enum
 {
   VL_API_TRACE_TX,
@@ -94,35 +97,38 @@
 #define VL_API_LITTLE_ENDIAN 0x00
 #define VL_API_BIG_ENDIAN 0x01
 
+/** Message range (belonging to a plugin) */
 typedef struct
 {
-  u8 *name;
-  u16 first_msg_id;
-  u16 last_msg_id;
+  u8 *name;			/**< name of the plugin  */
+  u16 first_msg_id;		/**< first assigned message ID */
+  u16 last_msg_id;		/**< last assigned message ID */
 } vl_api_msg_range_t;
 
+/** Message configuration definition */
 typedef struct
 {
-  int id;
-  char *name;
-  u32 crc;
-  void *handler;
-  void *cleanup;
-  void *endian;
-  void *print;
-  int size;
-  int traced;
-  int replay;
-  int message_bounce;
-  int is_mp_safe;
+  int id;			/**< the message ID */
+  char *name;			/**< the message name */
+  u32 crc;			/**< message definition CRC  */
+  void *handler;		/**< the message handler  */
+  void *cleanup;		/**< non-default message cleanup handler */
+  void *endian;			/**< message endian function  */
+  void *print;			/**< message print function  */
+  int size;			/**< message size  */
+  int traced;			/**< is this message to be traced?  */
+  int replay;			/**< is this message to be replayed?  */
+  int message_bounce;		/**< do not free message after processing */
+  int is_mp_safe;		/**< worker thread barrier required?  */
 } vl_msg_api_msg_config_t;
 
+/** Message header structure */
 typedef struct msgbuf_
 {
-  unix_shared_memory_queue_t *q;
-  u32 data_len;
-  u32 gc_mark_timestamp;
-  u8 data[0];
+  unix_shared_memory_queue_t *q; /**< message allocated in this shmem ring  */
+  u32 data_len;			 /**< message length not including header  */
+  u32 gc_mark_timestamp;	 /**< message garbage collector mark TS  */
+  u8 data[0];			 /**< actual message begins here  */
 } msgbuf_t;
 
 /* api_shared.c prototypes */
@@ -171,101 +177,147 @@
   vl_msg_api_init_function_t *f;
 } _vl_msg_api_function_list_elt_t;
 
+/** API main structure, used by both vpp and binary API clients */
 typedef struct
 {
+  /** Message handler vector  */
   void (**msg_handlers) (void *);
+  /** Plaform-dependent (aka hardware) message handler vector */
   int (**pd_msg_handlers) (void *, int);
+
+  /** non-default message cleanup handler vector */
   void (**msg_cleanup_handlers) (void *);
+
+  /** Message endian handler vector */
   void (**msg_endian_handlers) (void *);
+
+  /** Message print function vector */
   void (**msg_print_handlers) (void *, void *);
+
+  /** Message name vector */
   const char **msg_names;
+
+  /** Don't automatically free message buffer vetor */
   u8 *message_bounce;
+
+  /** Message is mp safe vector */
   u8 *is_mp_safe;
+
+  /** Allocator ring vectors (in shared memory) */
   struct ring_alloc_ *arings;
+
+  /** Number of times that the ring allocator failed */
   u32 ring_misses;
+
+  /** Number of garbage-collected message buffers */
   u32 garbage_collects;
+
+  /** Number of missing clients / failed message sends */
   u32 missing_clients;
+
+  /** Received message trace configuration */
   vl_api_trace_t *rx_trace;
+
+  /** Sent message trace configuration */
   vl_api_trace_t *tx_trace;
+
+  /** Print every received message */
   int msg_print_flag;
+
+  /** Current trace configuration */
   trace_cfg_t *api_trace_cfg;
+
+  /** Current process PID */
   int our_pid;
+
+  /** Binary api segment descriptor */
   svm_region_t *vlib_rp;
+
+  /** Vector of all mapped shared-VM segments */
   svm_region_t **mapped_shmem_regions;
+
+  /** Binary API shared-memory segment header pointer */
   struct vl_shmem_hdr_ *shmem_hdr;
+
+  /** vlib/vpp only: vector of client registrations */
   vl_api_registration_t **vl_clients;
 
+  /** vlib/vpp only: serialized (message, name, crc) table */
   u8 *serialized_message_table_in_shmem;
 
-  /* For plugin msg allocator */
+  /** First available message ID, for theplugin msg allocator */
   u16 first_available_msg_id;
 
-  /* message range by name hash */
+  /** Message range by name hash */
   uword *msg_range_by_name;
 
-  /* vector of message ranges */
+  /** vector of message ranges */
   vl_api_msg_range_t *msg_ranges;
 
-  /* uid for the api shared memory region */
+  /** uid for the api shared memory region */
   int api_uid;
-  /* gid for the api shared memory region */
+
+  /** gid for the api shared memory region */
   int api_gid;
 
-  /* base virtual address for global VM region */
+  /** base virtual address for global VM region */
   u64 global_baseva;
 
-  /* size of the global VM region */
+  /** size of the global VM region */
   u64 global_size;
 
-  /* size of the API region */
+  /** size of the API region */
   u64 api_size;
 
-  /* size of the global VM private mheap */
+  /** size of the global VM private mheap */
   u64 global_pvt_heap_size;
 
-  /* size of the api private mheap */
+  /** size of the api private mheap */
   u64 api_pvt_heap_size;
 
-  /* Client-only data structures */
+  /** Peer input queue pointer */
   unix_shared_memory_queue_t *vl_input_queue;
 
-  /*
+  /**
    * All VLIB-side message handlers use my_client_index to identify
    * the queue / client. This works in sim replay.
    */
   int my_client_index;
-  /*
+  /**
    * This is the (shared VM) address of the registration,
    * don't use it to id the connection since it can't possibly
    * work in simulator replay.
    */
   vl_api_registration_t *my_registration;
 
+  /** (Historical) signal-based queue non-empty signal, to be removed */
   i32 vlib_signal;
 
-  /* vlib input queue length */
+  /** vpp/vlib input queue length */
   u32 vlib_input_queue_length;
 
-  /* client side message index hash table */
+  /** client message index hash table */
   uword *msg_index_by_name_and_crc;
 
+  /** Shared VM binary API region name */
   const char *region_name;
+
+  /** Chroot path to the shared memory API files */
   const char *root_path;
 
-  /* Replay in progress? */
+  /** Replay in progress? */
   int replay_in_progress;
 
-  /* Dump (msg-name, crc) snapshot here at startup */
+  /** Dump (msg-name, crc) snapshot here at startup */
   u8 *save_msg_table_filename;
 
-  /* List of API client reaper functions */
+  /** List of API client reaper functions */
   _vl_msg_api_function_list_elt_t *reaper_function_registrations;
 
 } api_main_t;
 
 extern api_main_t api_main;
 
-
 #endif /* included_api_common_h */
 
 /*
diff --git a/src/vlibapi/api_doc.md b/src/vlibapi/api_doc.md
new file mode 100644
index 0000000..e620ee1
--- /dev/null
+++ b/src/vlibapi/api_doc.md
@@ -0,0 +1,349 @@
+# Binary API support    {#api_doc}
+
+VPP provides a binary API scheme to allow a wide variety of client codes to
+program data-plane tables. As of this writing, there are hundreds of binary
+APIs.
+
+Messages are defined in `*.api` files. Today, there are about 50 api files,
+with more arriving as folks add programmable features.  The API file compiler
+sources reside in @ref src/tools/vppapigen .
+
+Here's a typical request/response message definition, from
+@ref src/vnet/interface.api :
+
+```
+     autoreply define sw_interface_set_flags
+     {
+       u32 client_index;
+       u32 context;
+       u32 sw_if_index;
+       /* 1 = up, 0 = down */
+       u8 admin_up_down;
+     };
+```
+
+To a first approximation, the API compiler renders this definition  as
+follows:
+
+```
+    /****** Message ID / handler enum ******/
+    #ifdef vl_msg_id
+    vl_msg_id(VL_API_SW_INTERFACE_SET_FLAGS, vl_api_sw_interface_set_flags_t_handler)
+    vl_msg_id(VL_API_SW_INTERFACE_SET_FLAGS_REPLY, vl_api_sw_interface_set_flags_reply_t_handler)
+    #endif	
+
+    /****** Message names ******/
+    #ifdef vl_msg_name
+    vl_msg_name(vl_api_sw_interface_set_flags_t, 1)
+    vl_msg_name(vl_api_sw_interface_set_flags_reply_t, 1)
+    #endif	
+
+    /****** Message name, crc list ******/
+    #ifdef vl_msg_name_crc_list
+    #define foreach_vl_msg_name_crc_interface \
+    _(VL_API_SW_INTERFACE_SET_FLAGS, sw_interface_set_flags, f890584a) \
+    _(VL_API_SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply, dfbf3afa) \
+    #endif	
+
+    /****** Typedefs *****/
+    #ifdef vl_typedefs
+    typedef VL_API_PACKED(struct _vl_api_sw_interface_set_flags {
+        u16 _vl_msg_id;
+        u32 client_index;
+        u32 context;
+        u32 sw_if_index;
+        u8 admin_up_down;
+    }) vl_api_sw_interface_set_flags_t;
+
+    typedef VL_API_PACKED(struct _vl_api_sw_interface_set_flags_reply {
+        u16 _vl_msg_id;
+        u32 context;
+        i32 retval;
+    }) vl_api_sw_interface_set_flags_reply_t;
+```
+
+To change the admin state of an interface, a binary api client sends a
+@ref vl_api_sw_interface_set_flags_t to vpp, which will respond  with a
+@ref vl_api_sw_interface_set_flags_reply_t message.
+
+Multiple layers of software, transport types, and shared libraries
+implement a variety of features:
+
+* API message allocation, tracing, pretty-printing, and replay.
+* Message transport via global shared memory, pairwise/private shared
+  memory, and sockets.
+* Barrier synchronization of worker threads across thread-unsafe
+  message handlers.
+    
+Correctly-coded message handlers know nothing about the transport used to
+deliver messages to/from vpp. It's reasonably straighforward to use multiple
+API message transport types simultaneously.
+
+For historical reasons, binary api messages are (putatively) sent in network
+byte order. As of this writing, we're seriously considering whether that
+choice makes sense.
+
+
+## Message Allocation
+
+Since binary API messages are always processed in order, we allocate messages
+using a ring allocator whenever possible. This scheme is extremely fast when
+compared with a traditional memory allocator, and doesn't cause heap
+fragmentation. See
+@ref src/vlibmemory/memory_shared.c @ref vl_msg_api_alloc_internal() .
+
+Regardless of transport, binary api messages always follow a @ref msgbuf_t
+header:
+
+```
+    typedef struct msgbuf_
+    {
+      unix_shared_memory_queue_t *q;
+      u32 data_len;
+      u32 gc_mark_timestamp;
+      u8 data[0];
+    } msgbuf_t;
+```
+
+This structure makes it easy to trace messages without having to
+decode them - simply save data_len bytes - and allows
+@ref vl_msg_api_free() to rapidly dispose of message buffers:
+
+```
+    void
+    vl_msg_api_free (void *a)
+    {
+      msgbuf_t *rv;
+      api_main_t *am = &api_main;
+
+      rv = (msgbuf_t *) (((u8 *) a) - offsetof (msgbuf_t, data));
+
+      /*
+       * Here's the beauty of the scheme.  Only one proc/thread has
+       * control of a given message buffer. To free a buffer, we just 
+       * clear the queue field, and leave. No locks, no hits, no errors...
+       */
+      if (rv->q)
+        {
+          rv->q = 0;
+          rv->gc_mark_timestamp = 0;
+          return;
+        }
+      <snip>
+     }
+```
+
+## Message Tracing and Replay
+
+It's extremely important that vpp can capture and replay sizeable binary API
+traces. System-level issues involving hundreds of thousands of API
+transactions can be re-run in a second or less. Partial replay allows one to
+binary-search for the point where the wheels fall off. One can add scaffolding
+to the data plane, to trigger when complex conditions obtain.
+
+With binary API trace, print, and replay, system-level bug reports of the form
+"after 300,000 API transactions, the vpp data-plane stopped forwarding
+traffic, FIX IT!" can be solved offline.
+
+More often than not, one discovers that a control-plane client
+misprograms the data plane after a long time or under complex
+circumstances. Without direct evidence, "it's a data-plane problem!"
+
+See @ref src/vlibmemory/memory_vlib.c @ref vl_msg_api_process_file() ,
+and @ref src/vlibapi/api_shared.c . See also the debug CLI command "api trace"
+
+## Client connection details
+
+Establishing a binary API connection to vpp from a C-language client
+is easy:
+
+```
+        int
+        connect_to_vpe (char *client_name, int client_message_queue_length)
+        {
+          vat_main_t *vam = &vat_main;
+          api_main_t *am = &api_main;
+
+          if (vl_client_connect_to_vlib ("/vpe-api", client_name, 
+                                    	client_message_queue_length) < 0)
+            return -1;
+
+          /* Memorize vpp's binary API message input queue address */
+          vam->vl_input_queue = am->shmem_hdr->vl_input_queue;
+          /* And our client index */
+          vam->my_client_index = am->my_client_index;
+          return 0;
+        }       
+```
+
+32 is a typical value for client_message_queue_length. Vpp cannot
+block when it needs to send an API message to a binary API client, and
+the vpp-side binary API message handlers are very fast. When sending
+asynchronous messages, make sure to scrape the binary API rx ring with
+some enthusiasm.
+
+### binary API message RX pthread
+
+Calling @ref vl_client_connect_to_vlib spins up a binary API message RX
+pthread:
+
+```
+        static void *
+        rx_thread_fn (void *arg)
+        {
+          unix_shared_memory_queue_t *q;
+          memory_client_main_t *mm = &memory_client_main;
+          api_main_t *am = &api_main;
+
+          q = am->vl_input_queue;
+
+          /* So we can make the rx thread terminate cleanly */
+          if (setjmp (mm->rx_thread_jmpbuf) == 0)
+            {
+              mm->rx_thread_jmpbuf_valid = 1;
+              while (1)
+        	{
+        	  vl_msg_api_queue_handler (q);
+        	}
+            }
+          pthread_exit (0);
+        }       
+```
+
+To handle the binary API message queue yourself, use
+@ref vl_client_connect_to_vlib_no_rx_pthread.
+
+In turn, vl_msg_api_queue_handler(...) uses mutex/condvar signalling
+to wake up, process vpp -> client traffic, then sleep. Vpp supplies a
+condvar broadcast when the vpp -> client API message queue transitions
+from empty to nonempty.
+
+Vpp checks its own binary API input queue at a very high rate.  Vpp
+invokes message handlers in "process" context [aka cooperative
+multitasking thread context] at a variable rate, depending on
+data-plane packet processing requirements.
+
+## Client disconnection details
+
+To disconnect from vpp, call @ref vl_client_disconnect_from_vlib
+. Please arrange to call this function if the client application
+terminates abnormally. Vpp makes every effort to hold a decent funeral
+for dead clients, but vpp can't guarantee to free leaked memory in the
+shared binary API segment.
+
+## Sending binary API messages to vpp
+
+The point of the exercise is to send binary API messages to vpp, and
+to receive replies from vpp. Many vpp binary APIs comprise a client
+request message, and a simple status reply. For example, to
+set the admin status of an interface, one codes:
+
+```
+    vl_api_sw_interface_set_flags_t *mp;
+
+    mp = vl_msg_api_alloc (sizeof (*mp));
+    memset (mp, 0, sizeof (*mp));
+    mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_SW_INTERFACE_SET_FLAGS);
+    mp->client_index = api_main.my_client_index;
+    mp->sw_if_index = clib_host_to_net_u32 (<interface-sw-if-index>);
+    vl_msg_api_send (api_main.shmem_hdr->vl_input_queue, (u8 *)mp);
+```
+
+Key points:
+
+* Use @ref vl_msg_api_alloc to allocate message buffers
+
+* Allocated message buffers are not initialized, and must be presumed
+  to contain trash.
+
+* Don't forget to set the _vl_msg_id field!
+
+* As of this writing, binary API message IDs and data are sent in
+  network byte order
+
+* The client-library global data structure @ref api_main keeps track
+  of sufficient pointers and handles used to communicate with vpp
+
+## Receiving binary API messages from vpp
+
+Unless you've made other arrangements (see @ref
+vl_client_connect_to_vlib_no_rx_pthread), *messages are received on a
+separate rx pthread*. Synchronization with the client application main
+thread is the responsibility of the application!
+
+Set up message handlers about as follows:
+
+```
+    #define vl_typedefs		/* define message structures */
+    #include <vpp/api/vpe_all_api_h.h>
+    #undef vl_typedefs
+
+    /* declare message handlers for each api */
+
+    #define vl_endianfun		/* define message structures */
+    #include <vpp/api/vpe_all_api_h.h>
+    #undef vl_endianfun
+
+    /* instantiate all the print functions we know about */
+    #define vl_print(handle, ...)
+    #define vl_printfun
+    #include <vpp/api/vpe_all_api_h.h>
+    #undef vl_printfun
+
+    /* Define a list of all message that the client handles */
+    #define foreach_vpe_api_reply_msg                            \
+       _(SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply)           
+
+       static clib_error_t *
+       my_api_hookup (vlib_main_t * vm)
+       {
+         api_main_t *am = &api_main;
+
+       #define _(N,n)                                                  \
+           vl_msg_api_set_handlers(VL_API_##N, #n,                     \
+                                  vl_api_##n##_t_handler,              \
+                                  vl_noop_handler,                     \
+                                  vl_api_##n##_t_endian,               \
+                                  vl_api_##n##_t_print,                \
+                                  sizeof(vl_api_##n##_t), 1);
+         foreach_vpe_api_msg;
+       #undef _
+
+         return 0;
+        }
+```
+
+The key API used to establish message handlers is @ref
+vl_msg_api_set_handlers , which sets values in multiple parallel
+vectors in the @ref api_main_t structure. As of this writing: not all
+vector element values can be set through the API. You'll see sporadic
+API message registrations followed by minor adjustments of this form:
+
+```
+    /*
+     * Thread-safe API messages
+     */
+    am->is_mp_safe[VL_API_IP_ADD_DEL_ROUTE] = 1;
+    am->is_mp_safe[VL_API_GET_NODE_GRAPH] = 1;
+```
+
+
+
+
+              
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index 401f388..b6b8752 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -2348,7 +2348,9 @@
 ?*/
 
 /*?
- * Display a serialized API message decode table
+ * Display a serialized API message decode table, compare a saved
+ * decode table with the current image, to establish API differences.
+ *
 ?*/
 /* *INDENT-OFF* */
 VLIB_CLI_COMMAND (dump_api_table_file, static) =