libbb/sha256: optional x86 hardware accelerated hashing

64 bit:
function                                             old     new   delta
sha256_process_block64_shaNI                           -     730    +730
.rodata                                           108314  108586    +272
sha256_begin                                          31      83     +52
------------------------------------------------------------------------------
(add/remove: 5/1 grow/shrink: 2/0 up/down: 1055/-1)          Total: 1054 bytes

32 bit:
function                                             old     new   delta
sha256_process_block64_shaNI                           -     747    +747
.rodata                                           104318  104590    +272
sha256_begin                                          29      84     +55
------------------------------------------------------------------------------
(add/remove: 5/1 grow/shrink: 2/0 up/down: 1075/-1)          Total: 1074 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
diff --git a/libbb/hash_md5_sha_x86-64.S.sh b/libbb/hash_md5_sha_x86-64.S.sh
index 47c40af..656fb54 100755
--- a/libbb/hash_md5_sha_x86-64.S.sh
+++ b/libbb/hash_md5_sha_x86-64.S.sh
@@ -433,7 +433,7 @@
 	.size	sha1_process_block64, .-sha1_process_block64
 
 	.section	.rodata.cst16.sha1const, \"aM\", @progbits, 16
-	.align	16
+	.balign	16
 rconst0x5A827999:
 	.long	0x5A827999
 	.long	0x5A827999