tls: speed up xor'ing of aligned 16-byte buffers

function                                             old     new   delta
xorbuf_aligned_AES_BLOCK_SIZE                          -      23     +23
xwrite_encrypted                                     585     580      -5
aesgcm_GHASH                                         233     228      -5
GMULT                                                192     187      -5
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/3 up/down: 23/-15)              Total: 8 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
diff --git a/networking/tls.h b/networking/tls.h
index 4b0dc74..494ed78 100644
--- a/networking/tls.h
+++ b/networking/tls.h
@@ -81,8 +81,12 @@
 #define AES_BLOCK_SIZE  16
 
 void tls_get_random(void *buf, unsigned len) FAST_FUNC;
+
 void xorbuf(void* buf, const void* mask, unsigned count) FAST_FUNC;
 
+#define ALIGNED_long ALIGNED(sizeof(long))
+void xorbuf_aligned_AES_BLOCK_SIZE(void* buf, const void* mask) FAST_FUNC;
+
 #define matrixCryptoGetPrngData(buf, len, userPtr) (tls_get_random(buf, len), PS_SUCCESS)
 
 #define psFree(p, pool)    free(p)