Add SWAP_LE?? and SWAP_BE?? macros, and make things use them.  Converts values
to/from little endian or big endian, which is a NOP if that's what the current
platform already is.
diff --git a/archival/libunarchive/decompress_unlzma.c b/archival/libunarchive/decompress_unlzma.c
index 7ae343f..fa7b37c 100644
--- a/archival/libunarchive/decompress_unlzma.c
+++ b/archival/libunarchive/decompress_unlzma.c
@@ -121,10 +121,8 @@
 	pos_state_mask = (1 << pb) - 1;
 	literal_pos_mask = (1 << lp) - 1;
 
-#if BB_BIG_ENDIAN
-	header.dict_size = bswap_32(header.dict_size);
-	header.dst_size = bswap_64(header.dst_size);
-#endif
+	header.dict_size = SWAP_LE32(header.dict_size);
+	header.dst_size = SWAP_LE64(header.dst_size);
 
 	if (header.dict_size == 0)
 		header.dict_size = 1;
diff --git a/include/platform.h b/include/platform.h
index d684c2d..10c1d7d 100644
--- a/include/platform.h
+++ b/include/platform.h
@@ -48,11 +48,6 @@
 # define __const const
 #endif
 
-#ifndef __THROW
-# define __THROW
-#endif
-
-
 #ifndef ATTRIBUTE_UNUSED
 # define ATTRIBUTE_UNUSED __attribute__ ((__unused__))
 #endif /* ATTRIBUTE_UNUSED */
@@ -118,6 +113,22 @@
 # define BB_LITTLE_ENDIAN 1
 #endif
 
+#if BB_BIG_ENDIAN
+#define SWAP_BE16(x) x
+#define SWAP_BE32(x) x
+#define SWAP_BE64(x) x
+#define SWAP_LE16(x) bswap_16(x)
+#define SWAP_LE32(x) bswap_32(x)
+#define SWAP_LE64(x) bswap_64(x)
+#else
+#define SWAP_BE16(x) bswap_16(x)
+#define SWAP_BE32(x) bswap_32(x)
+#define SWAP_BE64(x) bswap_64(x)
+#define SWAP_LE16(x) x
+#define SWAP_LE32(x) x
+#define SWAP_LE64(x) x
+#endif
+
 /* ---- Networking ------------------------------------------ */
 #ifndef __APPLE__
 # include <arpa/inet.h>
diff --git a/libbb/md5.c b/libbb/md5.c
index 584f5fe..58be40b 100644
--- a/libbb/md5.c
+++ b/libbb/md5.c
@@ -27,15 +27,6 @@
 # define MD5_SIZE_VS_SPEED CONFIG_MD5_SIZE_VS_SPEED
 # endif
 
-/* Handle endian-ness */
-# if !BB_BIG_ENDIAN
-#  define SWAP(n) (n)
-# elif defined(bswap_32)
-#  define SWAP(n) bswap_32(n)
-# else
-#  define SWAP(n) ((n << 24) | ((n&0xFF00)<<8) | ((n&0xFF0000)>>8) | (n>>24))
-# endif
-
 /* Initialize structure containing state of computation.
  * (RFC 1321, 3.3: Step 3)
  */
@@ -132,7 +123,7 @@
 		uint32_t temp;
 
 		for (i = 0; i < 16; i++) {
-			cwp[i] = SWAP(words[i]);
+			cwp[i] = SWAP_LE32(words[i]);
 		}
 		words += 16;
 
@@ -224,7 +215,7 @@
 #  define OP(a, b, c, d, s, T)	\
       do	\
 	{	\
-	  a += FF (b, c, d) + (*cwp++ = SWAP (*words)) + T;	\
+	  a += FF (b, c, d) + (*cwp++ = SWAP_LE32(*words)) + T; \
 	  ++words;	\
 	  CYCLIC (a, s);	\
 	  a += b;	\
@@ -455,10 +446,10 @@
 	 * IMPORTANT: On some systems it is required that RESBUF is correctly
 	 * aligned for a 32 bits value.
 	 */
-	((uint32_t *) resbuf)[0] = SWAP(ctx->A);
-	((uint32_t *) resbuf)[1] = SWAP(ctx->B);
-	((uint32_t *) resbuf)[2] = SWAP(ctx->C);
-	((uint32_t *) resbuf)[3] = SWAP(ctx->D);
+	((uint32_t *) resbuf)[0] = SWAP_LE32(ctx->A);
+	((uint32_t *) resbuf)[1] = SWAP_LE32(ctx->B);
+	((uint32_t *) resbuf)[2] = SWAP_LE32(ctx->C);
+	((uint32_t *) resbuf)[3] = SWAP_LE32(ctx->D);
 
 	return resbuf;
 }
diff --git a/miscutils/hdparm.c b/miscutils/hdparm.c
index ea73701..55ed2f6 100644
--- a/miscutils/hdparm.c
+++ b/miscutils/hdparm.c
@@ -26,7 +26,6 @@
 #include <getopt.h>
 #include <linux/types.h>
 #include <linux/hdreg.h>
-#include <asm/byteorder.h>
 
 #if BB_BIG_ENDIAN && !defined(__USE_XOPEN)
 # define __USE_XOPEN
@@ -2007,9 +2006,9 @@
 		args1[0] = WIN_IDENTIFY;
 		args1[3] = 1;
 		if (!bb_ioctl_alt(fd, HDIO_DRIVE_CMD, args1, WIN_PIDENTIFY, "HDIO_DRIVE_CMD(identify)")) {
-			for (i=0; i<(sizeof args1)/2; i+=2)
-				__le16_to_cpus((uint16_t *)(&args1[i]));
-			identify((void *)&args1[4]);
+			uint16_t *ptr = (uint16_t *)args1;
+			for (i=0; i<sizeof(args1)/2; i++) ptr[i] = SWAP_LE16(ptr[i]);
+			identify((void *)(ptr+2));
 		}
 	}
 #endif
@@ -2057,7 +2056,7 @@
 
 static void identify_from_stdin(void)
 {
-	unsigned short sbuf[800];
+	uint16_t sbuf[800];
 	unsigned char  buf[1600], *b = (unsigned char *)buf;
 	int i, count = read(0, buf, 1280);
 
@@ -2066,8 +2065,7 @@
 
 	for (i = 0; count >= 4; ++i)
 	{
-		sbuf[i] = (fromhex(b[0]) << 12) | (fromhex(b[1]) << 8) | (fromhex(b[2]) << 4) | fromhex(b[3]);
-		__le16_to_cpus((uint16_t *)(&sbuf[i]));
+		sbuf[i] = SWAP_LE16((fromhex(b[0]) << 12) | (fromhex(b[1]) << 8) | (fromhex(b[2]) << 4) | fromhex(b[3]));
 		b += 5;
 		count -= 5;
 	}