decompress_bunzip2: keep bd->writeCRC in CPU reg in the hot loop

-5 bytes on 64-bit, +7 bytes on 32-bit.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
diff --git a/archival/libunarchive/decompress_bunzip2.c b/archival/libunarchive/decompress_bunzip2.c
index 3a5d233..8d7746a 100644
--- a/archival/libunarchive/decompress_bunzip2.c
+++ b/archival/libunarchive/decompress_bunzip2.c
@@ -492,15 +492,20 @@
 int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
 {
 	const uint32_t *dbuf;
-	int pos, current, previous, gotcount;
+	int pos, current, previous, out_count;
+	uint32_t CRC;
 
-	/* If last read was short due to end of file, return last block now */
-	if (bd->writeCount < 0) return bd->writeCount;
+	/* If we already have error/end indicator, return it */
+	if (bd->writeCount < 0)
+		return bd->writeCount;
 
-	gotcount = 0;
+	out_count = 0;
 	dbuf = bd->dbuf;
+
+	/* Register-cached state (hopefully): */
 	pos = bd->writePos;
 	current = bd->writeCurrent;
+	CRC = bd->writeCRC; /* small loss on x86-32 (not enough regs), win on x86-64 */
 
 	/* We will always have pending decoded data to write into the output
 	   buffer unless this is the very first call (in which case we haven't
@@ -514,8 +519,8 @@
 		/* Loop outputting bytes */
 		for (;;) {
 
-			/* If the output buffer is full, snapshot state and return */
-			if (gotcount >= len) {
+			/* If the output buffer is full, save cached state and return */
+			if (out_count >= len) {
 				/* Unlikely branch.
 				 * Use of "goto" instead of keeping code here
 				 * helps compiler to realize this. */
@@ -523,17 +528,16 @@
 			}
 
 			/* Write next byte into output buffer, updating CRC */
-			outbuf[gotcount++] = current;
-			bd->writeCRC = (bd->writeCRC << 8)
-				^ bd->crc32Table[(bd->writeCRC >> 24) ^ current];
+			outbuf[out_count++] = current;
+			CRC = (CRC << 8) ^ bd->crc32Table[(CRC >> 24) ^ current];
 
 			/* Loop now if we're outputting multiple copies of this byte */
 			if (bd->writeCopies) {
 				/* Unlikely branch */
 				/*--bd->writeCopies;*/
 				/*continue;*/
-				/* Same, but (ab)using other existing --writeCopies operation.
-				 * Luckily, this also compiles into just one branch insn: */
+				/* Same, but (ab)using other existing --writeCopies operation
+				 * (and this if() compiles into just test+branch pair): */
 				goto dec_writeCopies;
 			}
  decode_next_byte:
@@ -549,7 +553,7 @@
 			/* After 3 consecutive copies of the same byte, the 4th
 			 * is a repeat count.  We count down from 4 instead
 			 * of counting up because testing for non-zero is faster */
-			if (--bd->writeRunCountdown) {
+			if (--bd->writeRunCountdown != 0) {
 				if (current != previous)
 					bd->writeRunCountdown = 4;
 			} else {
@@ -568,11 +572,11 @@
 		} /* for(;;) */
 
 		/* Decompression of this input block completed successfully */
-		bd->writeCRC = ~bd->writeCRC;
-		bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC;
+		bd->writeCRC = CRC = ~CRC;
+		bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ CRC;
 
-		/* If this block had a CRC error, force file level CRC error. */
-		if (bd->writeCRC != bd->headerCRC) {
+		/* If this block had a CRC error, force file level CRC error */
+		if (CRC != bd->headerCRC) {
 			bd->totalCRC = bd->headerCRC + 1;
 			return RETVAL_LAST_BLOCK;
 		}
@@ -581,23 +585,26 @@
 	/* Refill the intermediate buffer by Huffman-decoding next block of input */
 	{
 		int r = get_next_block(bd);
-		if (r) {
+		if (r) { /* error/end */
 			bd->writeCount = r;
-			return (r != RETVAL_LAST_BLOCK) ? r : gotcount;
+			return (r != RETVAL_LAST_BLOCK) ? r : out_count;
 		}
 	}
 
-	bd->writeCRC = ~0;
+	CRC = ~0;
 	pos = bd->writePos;
 	current = bd->writeCurrent;
 	goto decode_next_byte;
 
  outbuf_full:
-	/* Output buffer is full, snapshot state and return */
+	/* Output buffer is full, save cached state and return */
 	bd->writePos = pos;
 	bd->writeCurrent = current;
+	bd->writeCRC = CRC;
+
 	bd->writeCopies++;
-	return gotcount;
+
+	return out_count;
 }
 
 /* Allocate the structure, read file header.  If in_fd==-1, inbuf must contain