bzip2: ~1% speedup by special-casing "store 1 bit" function

function                                             old     new   delta
bsW1                                                   -      52     +52
BZ2_compressBlock                                    230     225      -5
BZ2_blockSort                                        125     118      -7
sendMTFValues                                       2070    2051     -19
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/3 up/down: 52/-31)             Total: 21 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
diff --git a/archival/libarchive/bz/blocksort.c b/archival/libarchive/bz/blocksort.c
index e600cb7..a3b099f 100644
--- a/archival/libarchive/bz/blocksort.c
+++ b/archival/libarchive/bz/blocksort.c
@@ -1056,7 +1056,9 @@
 		}
 	}
 
+#if BZ_LIGHT_DEBUG
 	s->origPtr = -1;
+#endif
 	for (i = 0; i < s->nblock; i++)
 		if (ptr[i] == 0) {
 			s->origPtr = i;
diff --git a/archival/libarchive/bz/bzlib.c b/archival/libarchive/bz/bzlib.c
index 3572474..ef98bb2 100644
--- a/archival/libarchive/bz/bzlib.c
+++ b/archival/libarchive/bz/bzlib.c
@@ -55,7 +55,7 @@
 {
 	int i;
 	s->nblock = 0;
-	//indexes inot s->zbits[], initialzation moved to init of s->zbits
+	//indexes into s->zbits[], initialzation moved to init of s->zbits
 	//s->posZ = s->zbits; // was: s->numZ = 0;
 	//s->state_out_pos = s->zbits;
 	BZ_INITIALISE_CRC(s->blockCRC);
diff --git a/archival/libarchive/bz/compress.c b/archival/libarchive/bz/compress.c
index 271982c..4d0f775 100644
--- a/archival/libarchive/bz/compress.c
+++ b/archival/libarchive/bz/compress.c
@@ -88,6 +88,22 @@
 	s->bsBuff |= (v << (16 - s->bsLive));
 	s->bsLive += 16;
 }
+/* Same with n == 1: */
+static
+#if CONFIG_BZIP2_FAST >= 5
+ALWAYS_INLINE
+#endif
+void bsW1(EState* s, uint32_t v)
+{
+	/* need space for only 1 bit, no need for loop freeing > 8 bits */
+	if (s->bsLive >= 8) {
+		*s->posZ++ = (uint8_t)(s->bsBuff >> 24);
+		s->bsBuff <<= 8;
+		s->bsLive -= 8;
+	}
+	s->bsBuff |= (v << (31 - s->bsLive));
+	s->bsLive += 1;
+}
 
 
 /*---------------------------------------------------*/
@@ -557,8 +573,8 @@
 	for (i = 0; i < nSelectors; i++) {
 		unsigned j;
 		for (j = 0; j < s->selectorMtf[i]; j++)
-			bsW(s, 1, 1);
-		bsW(s, 1, 0);
+			bsW1(s, 1);
+		bsW1(s, 0);
 	}
 
 	/*--- Now the coding tables. ---*/
@@ -568,7 +584,7 @@
 		for (i = 0; i < alphaSize; i++) {
 			while (curr < s->len[t][i]) { bsW(s, 2, 2); curr++; /* 10 */ };
 			while (curr > s->len[t][i]) { bsW(s, 2, 3); curr--; /* 11 */ };
-			bsW(s, 1, 0);
+			bsW1(s, 0);
 		}
 	}
 
@@ -682,7 +698,7 @@
 		 * so as to maintain backwards compatibility with
 		 * older versions of bzip2.
 		 */
-		bsW(s, 1, 0);
+		bsW1(s, 0);
 
 		bsW(s, 24, s->origPtr);
 		generateMTFValues(s);