aarch64 CPU arch / ThunderX platform initial support

Change-Id: Ia2edd3cee2c25c26c7c47a9023744b97226434c7
Signed-off-by: Dave Barach <dave@barachs.net>
diff --git a/vppinfra/vppinfra/bihash_24_8.h b/vppinfra/vppinfra/bihash_24_8.h
index c789c98..c0dff8c 100644
--- a/vppinfra/vppinfra/bihash_24_8.h
+++ b/vppinfra/vppinfra/bihash_24_8.h
@@ -50,14 +50,6 @@
 
 static inline u64 clib_bihash_hash_24_8  (clib_bihash_kv_24_8_t *v)
 {
-#if 0
-  u64 * dp = (u64 *) &v->key[0];
-  u64 value = 0;
-
-  value __builtin_ia32_crc32di (dp[0], value);
-  value __builtin_ia32_crc32di (dp[1], value);
-  value __builtin_ia32_crc32di (dp[2], value);
-#endif
   u32 * dp = (u32 *) &v->key[0];
   u32 value = 0;
 
@@ -70,8 +62,7 @@
 
   return value;
 }
-
-#else
+#else 
 static inline u64 clib_bihash_hash_24_8  (clib_bihash_kv_24_8_t *v)
 {
   u64 tmp = v->key[0] ^ v->key[1] ^ v->key[2];
diff --git a/vppinfra/vppinfra/byte_order.h b/vppinfra/vppinfra/byte_order.h
index 9c55632..4f385f1 100644
--- a/vppinfra/vppinfra/byte_order.h
+++ b/vppinfra/vppinfra/byte_order.h
@@ -40,7 +40,7 @@
 
 #include <vppinfra/clib.h>
 
-#if defined(__x86_64__) || defined(i386)
+#if defined(__x86_64__) || defined(i386) || defined(__aarch64__)
 #define CLIB_ARCH_IS_BIG_ENDIAN (0)
 #define CLIB_ARCH_IS_LITTLE_ENDIAN (1)
 #else
diff --git a/vppinfra/vppinfra/longjmp.S b/vppinfra/vppinfra/longjmp.S
index ac138a9..9ba237d 100644
--- a/vppinfra/vppinfra/longjmp.S
+++ b/vppinfra/vppinfra/longjmp.S
@@ -580,6 +580,109 @@
 clib_calljmp:	
 1:	B	.S1     1b
 	
+#elif defined (__aarch64__)
+/*
+   Copyright (c) 2011, 2012 ARM Ltd
+   All rights reserved.
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   1. Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+   2. Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+   3. The name of the company may not be used to endorse or promote
+      products derived from this software without specific prior written
+      permission.
+   THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+   WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#define GPR_LAYOUT			\
+	REG_PAIR (x19, x20,  0);	\
+	REG_PAIR (x21, x22, 16);	\
+	REG_PAIR (x23, x24, 32);	\
+	REG_PAIR (x25, x26, 48);	\
+	REG_PAIR (x27, x28, 64);	\
+	REG_PAIR (x29, x30, 80);	\
+	REG_ONE (x16,      96)
+#define FPR_LAYOUT			\
+	REG_PAIR ( d8,  d9, 112);	\
+	REG_PAIR (d10, d11, 128);	\
+	REG_PAIR (d12, d13, 144);	\
+	REG_PAIR (d14, d15, 160);
+// int clib_setjmp (jmp_buf)
+	.global	clib_setjmp
+	.type	clib_setjmp, %function
+clib_setjmp:
+	mov	x16, sp
+#define REG_PAIR(REG1, REG2, OFFS)	stp REG1, REG2, [x0, OFFS]
+#define REG_ONE(REG1, OFFS)		str REG1, [x0, OFFS]
+	GPR_LAYOUT
+	FPR_LAYOUT
+#undef REG_PAIR
+#undef REG_ONE
+	mov	x0, x1
+	ret
+	.size	clib_setjmp, .-clib_setjmp
+// void clib_longjmp (jmp_buf, int) __attribute__ ((noreturn))
+	.global	clib_longjmp
+	.type	clib_longjmp, %function
+clib_longjmp:
+#define REG_PAIR(REG1, REG2, OFFS)	ldp REG1, REG2, [x0, OFFS]
+#define REG_ONE(REG1, OFFS)		ldr REG1, [x0, OFFS]
+	GPR_LAYOUT
+	FPR_LAYOUT
+#undef REG_PAIR
+#undef REG_ONE
+	mov	sp, x16
+	mov     x0, x1
+	// cmp	w1, #0
+	// cinc	w0, w1, eq
+	// use br not ret, as ret is guaranteed to mispredict
+	br	x30
+	.size	clib_longjmp, .-clib_longjmp
+
+
+// void clib_calljmp (x0=function, x1=arg, x2=new_stack)
+	.global	clib_calljmp
+	.type	clib_calljmp, %function
+clib_calljmp:
+	// save fn ptr
+	mov     x3, x0
+	// set up fn arg
+	mov     x0, x1
+	// switch stacks
+	mov     x4, sp
+	
+	// space for saved sp, lr on new stack
+	sub     x2, x2, #16
+	mov     sp, x2
+	
+	// save old sp and link register on new stack
+        str     x4, [sp]
+	str     x30,[sp,#8]
+        mov     x4, sp
+
+	// go there
+        blr     x3
+	
+	// restore old sp and link register
+	mov     x4, sp
+        
+	ldr     x3, [x4]
+	ldr     x30,[x4, #8]
+        mov     sp, x3
+	ret
+	.size	clib_calljmp, .-clib_calljmp
 #else
 #error "unknown machine"
 #endif	
diff --git a/vppinfra/vppinfra/longjmp.h b/vppinfra/vppinfra/longjmp.h
index a28b20c..7252aa3 100644
--- a/vppinfra/vppinfra/longjmp.h
+++ b/vppinfra/vppinfra/longjmp.h
@@ -91,6 +91,8 @@
 /* setjmp/longjmp not supported for the moment. */
 #define CLIB_ARCH_LONGJMP_REGS 0
 
+#elif defined(__aarch64__)
+#define CLIB_ARCH_LONGJMP_REGS (22)
 #else
 #error "unknown machine"
 #endif
diff --git a/vppinfra/vppinfra/test_longjmp.c b/vppinfra/vppinfra/test_longjmp.c
index 40b1440..299de25 100644
--- a/vppinfra/vppinfra/test_longjmp.c
+++ b/vppinfra/vppinfra/test_longjmp.c
@@ -92,7 +92,7 @@
 
 static void test_calljmp (unformat_input_t * input)
 {
-  static u8 stack[32*1024];
+  static u8 stack[32*1024] __attribute__((aligned(16)));
   uword v;
 
   v = clib_calljmp (f3, 0, stack + sizeof (stack));
diff --git a/vppinfra/vppinfra/time.c b/vppinfra/vppinfra/time.c
index 0da469e..9af599a 100644
--- a/vppinfra/vppinfra/time.c
+++ b/vppinfra/vppinfra/time.c
@@ -78,6 +78,11 @@
   int fd;
   unformat_input_t input;
 
+/* $$$$ aarch64 kernel doesn't report "cpu MHz" */
+#if defined(__aarch64__)
+  return 0.0;
+#endif
+  
   cpu_freq = 0;
   fd = open ("/proc/cpuinfo", 0);
   if (fd < 0)
diff --git a/vppinfra/vppinfra/time.h b/vppinfra/vppinfra/time.h
index 524eff6..3c48108 100644
--- a/vppinfra/vppinfra/time.h
+++ b/vppinfra/vppinfra/time.h
@@ -143,8 +143,18 @@
   return ((u64)h << 32) | l;
 }
 
-#else
+#elif defined (__aarch64__)
+always_inline u64 clib_cpu_time_now (void)
+{
+  u64 tsc;
 
+  /* Works on Cavium ThunderX. Other platforms: YMMV */
+  asm volatile("mrs %0, cntvct_el0" : "=r" (tsc));
+
+  return tsc;
+}
+
+#else
 #error "don't know how to read CPU time stamp"
 
 #endif
diff --git a/vppinfra/vppinfra/types.h b/vppinfra/vppinfra/types.h
index d43e961..d5ad6ba 100644
--- a/vppinfra/vppinfra/types.h
+++ b/vppinfra/vppinfra/types.h
@@ -77,7 +77,7 @@
 #define log2_uword_bits 6
 #define clib_address_bits _MIPS_SZPTR
 
-#elif defined(alpha) || defined(__x86_64__) || defined (__powerpc64__)
+#elif defined(alpha) || defined(__x86_64__) || defined (__powerpc64__) || defined (__aarch64__)
 typedef int i32;
 typedef long i64;
 
diff --git a/vppinfra/vppinfra/vector.h b/vppinfra/vppinfra/vector.h
index b301933..84c52a2 100644
--- a/vppinfra/vppinfra/vector.h
+++ b/vppinfra/vppinfra/vector.h
@@ -61,6 +61,10 @@
 
 #define _vector_size(n) __attribute__ ((vector_size (n)))
 
+#if defined (__aarch64__)
+typedef unsigned int u32x4 _vector_size (16);
+#endif
+
 #ifdef CLIB_HAVE_VEC64
 /* Signed 64 bit. */
 typedef char i8x8 _vector_size (8);