tls: P256: 64-bit optimizations

function                                             old     new   delta
sp_256_proj_point_dbl_8                              421     428      +7
sp_256_point_from_bin2x32                             78      84      +6
sp_256_cmp_8                                          38      42      +4
sp_256_to_bin_8                                       28      31      +3
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 4/0 up/down: 20/0)               Total: 20 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
diff --git a/include/platform.h b/include/platform.h
index 9e1fb04..ad27bb3 100644
--- a/include/platform.h
+++ b/include/platform.h
@@ -239,6 +239,7 @@
 # define move_from_unaligned_long(v, longp) ((v) = *(bb__aliased_long*)(longp))
 # define move_from_unaligned16(v, u16p) ((v) = *(bb__aliased_uint16_t*)(u16p))
 # define move_from_unaligned32(v, u32p) ((v) = *(bb__aliased_uint32_t*)(u32p))
+# define move_from_unaligned64(v, u64p) ((v) = *(bb__aliased_uint64_t*)(u64p))
 # define move_to_unaligned16(u16p, v)   (*(bb__aliased_uint16_t*)(u16p) = (v))
 # define move_to_unaligned32(u32p, v)   (*(bb__aliased_uint32_t*)(u32p) = (v))
 # define move_to_unaligned64(u64p, v)   (*(bb__aliased_uint64_t*)(u64p) = (v))
@@ -250,6 +251,7 @@
 # define move_from_unaligned_long(v, longp) (memcpy(&(v), (longp), sizeof(long)))
 # define move_from_unaligned16(v, u16p) (memcpy(&(v), (u16p), 2))
 # define move_from_unaligned32(v, u32p) (memcpy(&(v), (u32p), 4))
+# define move_from_unaligned64(v, u64p) (memcpy(&(v), (u64p), 8))
 # define move_to_unaligned16(u16p, v) do { \
 	uint16_t __t = (v); \
 	memcpy((u16p), &__t, 2); \