Improve performance for some platforms

* Add an option to use packed network headers for situations in which they
  may not be 32-bit aligned.  Revise comments accordingly.

Change-Id: I89585360793fb37348dc814d3acfbc522d1e2ed5
Signed-off-by: Dave Hudson <dhudson@codeaurora.org>
diff --git a/shortcut-fe/sfe_ipv4.c b/shortcut-fe/sfe_ipv4.c
index 75c9078..cf64fdd 100644
--- a/shortcut-fe/sfe_ipv4.c
+++ b/shortcut-fe/sfe_ipv4.c
@@ -15,6 +15,23 @@
 #include "sfe_ipv4.h"
 
 /*
+ * By default Linux IP header and transport layer header structures are
+ * unpacked, assuming that such headers should be 32-bit aligned.
+ * Unfortunately some wireless adaptors can't cope with this requirement and
+ * some CPUs can't handle misaligned accesses.  For those platforms we
+ * define SFE_IPV4_UNALIGNED_IP_HEADER and mark the structures as packed.
+ * When we do this the compiler will generate slightly worse code than for the
+ * aligned case (on most platforms) but will be much quicker than fixing
+ * things up in an unaligned trap handler.
+ */
+#define SFE_IPV4_UNALIGNED_IP_HEADER 1
+#if SFE_IPV4_UNALIGNED_IP_HEADER
+#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed))
+#else
+#define SFE_IPV4_UNALIGNED_STRUCT
+#endif
+
+/*
  * The default Linux ethhdr structure is "packed".  It also has byte aligned
  * MAC addresses and this leads to poor performance.  This version is not
  * packed and has better alignment for the MAC addresses.
@@ -26,8 +43,9 @@
 };
 
 /*
- * The default Linux iphdr structure is "packed".  This really hurts performance
- * on many CPUs.  Here's an aligned and "unpacked" version of the same thing.
+ * Based on the Linux IPv4 header, but with an optional "packed" attribute to
+ * help with performance on some platforms (see the definition of
+ * SFE_IPV4_UNALIGNED_STRUCT)
  */
 struct sfe_ipv4_iphdr {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
@@ -52,22 +70,24 @@
 	/*
 	 * The options start here.
 	 */
-};
+} SFE_IPV4_UNALIGNED_STRUCT;
 
 /*
- * The default Linux udphdr structure is "packed".  This really hurts performance
- * on many CPUs.  Here's an aligned and "unpacked" version of the same thing.
+ * Based on the Linux UDP header, but with an optional "packed" attribute to
+ * help with performance on some platforms (see the definition of
+ * SFE_IPV4_UNALIGNED_STRUCT)
  */
 struct sfe_ipv4_udphdr {
 	__be16 source;
 	__be16 dest;
 	__be16 len;
 	__sum16 check;
-};
+} SFE_IPV4_UNALIGNED_STRUCT;
 
 /*
- * The default Linux tcphdr structure is "packed".  This really hurts performance
- * on many CPUs.  Here's an aligned and "unpacked" version of the same thing.
+ * Based on the Linux TCP header, but with an optional "packed" attribute to
+ * help with performance on some platforms (see the definition of
+ * SFE_IPV4_UNALIGNED_STRUCT)
  */
 struct sfe_ipv4_tcphdr {
 	__be16 source;
@@ -102,7 +122,7 @@
 	__be16 window;
 	__sum16	check;
 	__be16 urg_ptr;
-};
+} SFE_IPV4_UNALIGNED_STRUCT;
 
 /*
  * Specifies the lower bound on ACK numbers carried in the TCP header