blob: a593c5c40aab7f7c117d3ef836ac3ebd5eae970f [file] [log] [blame]
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001/*
2 * Copyright (C) 2021 Denys Vlasenko
3 *
4 * Licensed under GPLv2, see file LICENSE in this source tree.
5 */
6#include "tls.h"
7
8#define SP_DEBUG 0
9#define FIXED_SECRET 0
10#define FIXED_PEER_PUBKEY 0
11
Denys Vlasenko3b411eb2021-10-05 20:00:50 +020012#define ALLOW_ASM 1
13
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +020014#if SP_DEBUG
15# define dbg(...) fprintf(stderr, __VA_ARGS__)
16static void dump_hex(const char *fmt, const void *vp, int len)
17{
18 char hexbuf[32 * 1024 + 4];
19 const uint8_t *p = vp;
20
21 bin2hex(hexbuf, (void*)p, len)[0] = '\0';
22 dbg(fmt, hexbuf);
23}
24#else
25# define dbg(...) ((void)0)
26# define dump_hex(...) ((void)0)
27#endif
28
Denys Vlasenko3b411eb2021-10-05 20:00:50 +020029typedef uint32_t sp_digit;
30typedef int32_t signed_sp_digit;
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +020031
Denys Vlasenko4bc9da12021-11-27 11:28:11 +010032/* 64-bit optimizations:
33 * if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff,
34 * then loads and stores can be done in 64-bit chunks.
35 *
36 * A narrower case is when arch is also little-endian (such as x86_64),
37 * then "LSW first", uint32[8] and uint64[4] representations are equivalent,
38 * and arithmetic can be done in 64 bits too.
39 */
40#if defined(__GNUC__) && defined(__x86_64__)
41# define UNALIGNED_LE_64BIT 1
42#else
43# define UNALIGNED_LE_64BIT 0
44#endif
45
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +020046/* The code below is taken from parts of
47 * wolfssl-3.15.3/wolfcrypt/src/sp_c32.c
48 * and heavily modified.
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +020049 */
50
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +020051typedef struct sp_point {
Denys Vlasenko1b93c7c2021-11-28 02:56:02 +010052 sp_digit x[8]
53#if ULONG_MAX > 0xffffffff
54 /* Make sp_point[] arrays to not be 64-bit misaligned */
55 ALIGNED(8)
56#endif
57 ;
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +010058 sp_digit y[8];
59 sp_digit z[8];
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +020060 int infinity;
61} sp_point;
62
63/* The modulus (prime) of the curve P256. */
Denys Vlasenko1b93c7c2021-11-28 02:56:02 +010064static const sp_digit p256_mod[8] ALIGNED(8) = {
Denys Vlasenko3b411eb2021-10-05 20:00:50 +020065 0xffffffff,0xffffffff,0xffffffff,0x00000000,
66 0x00000000,0x00000000,0x00000001,0xffffffff,
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +020067};
68
69#define p256_mp_mod ((sp_digit)0x000001)
70
Denys Vlasenkoe7305052021-10-05 13:30:48 +020071/* Write r as big endian to byte array.
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +020072 * Fixed length number of bytes written: 32
73 *
74 * r A single precision integer.
75 * a Byte array.
76 */
Denys Vlasenko4bc9da12021-11-27 11:28:11 +010077#if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff
78static void sp_256_to_bin_8(const sp_digit* rr, uint8_t* a)
79{
80 int i;
81 const uint64_t* r = (void*)rr;
82
Denys Vlasenko4bc9da12021-11-27 11:28:11 +010083 r += 4;
84 for (i = 0; i < 4; i++) {
85 r--;
86 move_to_unaligned64(a, SWAP_BE64(*r));
87 a += 8;
88 }
89}
90#else
Denys Vlasenko3b411eb2021-10-05 20:00:50 +020091static void sp_256_to_bin_8(const sp_digit* r, uint8_t* a)
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +020092{
Denys Vlasenko3b411eb2021-10-05 20:00:50 +020093 int i;
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +020094
Denys Vlasenko3b411eb2021-10-05 20:00:50 +020095 r += 8;
96 for (i = 0; i < 8; i++) {
97 r--;
98 move_to_unaligned32(a, SWAP_BE32(*r));
99 a += 4;
Denys Vlasenko12040122021-04-26 20:24:34 +0200100 }
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200101}
Denys Vlasenko4bc9da12021-11-27 11:28:11 +0100102#endif
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200103
Denys Vlasenkoe7305052021-10-05 13:30:48 +0200104/* Read big endian unsigned byte array into r.
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200105 *
106 * r A single precision integer.
107 * a Byte array.
108 * n Number of bytes in array to read.
109 */
Denys Vlasenko4bc9da12021-11-27 11:28:11 +0100110#if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff
111static void sp_256_from_bin_8(sp_digit* rr, const uint8_t* a)
112{
113 int i;
114 uint64_t* r = (void*)rr;
115
116 r += 4;
117 for (i = 0; i < 4; i++) {
118 uint64_t v;
119 move_from_unaligned64(v, a);
120 *--r = SWAP_BE64(v);
121 a += 8;
122 }
123}
124#else
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200125static void sp_256_from_bin_8(sp_digit* r, const uint8_t* a)
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200126{
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200127 int i;
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200128
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200129 r += 8;
130 for (i = 0; i < 8; i++) {
131 sp_digit v;
132 move_from_unaligned32(v, a);
133 *--r = SWAP_BE32(v);
134 a += 4;
Denys Vlasenko12040122021-04-26 20:24:34 +0200135 }
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200136}
Denys Vlasenko4bc9da12021-11-27 11:28:11 +0100137#endif
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200138
Denys Vlasenko137864f2021-10-05 13:47:42 +0200139#if SP_DEBUG
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200140static void dump_256(const char *fmt, const sp_digit* r)
Denys Vlasenko137864f2021-10-05 13:47:42 +0200141{
Denys Vlasenko137864f2021-10-05 13:47:42 +0200142 uint8_t b32[32];
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200143 sp_256_to_bin_8(r, b32);
Denys Vlasenko137864f2021-10-05 13:47:42 +0200144 dump_hex(fmt, b32, 32);
145}
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200146static void dump_512(const char *fmt, const sp_digit* r)
Denys Vlasenko137864f2021-10-05 13:47:42 +0200147{
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200148 uint8_t b64[64];
149 sp_256_to_bin_8(r, b64 + 32);
150 sp_256_to_bin_8(r+8, b64);
151 dump_hex(fmt, b64, 64);
Denys Vlasenko137864f2021-10-05 13:47:42 +0200152}
153#else
154# define dump_256(...) ((void)0)
155# define dump_512(...) ((void)0)
156#endif
157
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200158/* Convert a point of big-endian 32-byte x,y pair to type sp_point. */
159static void sp_256_point_from_bin2x32(sp_point* p, const uint8_t *bin2x32)
160{
Denys Vlasenko12040122021-04-26 20:24:34 +0200161 memset(p, 0, sizeof(*p));
162 /*p->infinity = 0;*/
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200163 sp_256_from_bin_8(p->x, bin2x32);
164 sp_256_from_bin_8(p->y, bin2x32 + 32);
Denys Vlasenkoe7305052021-10-05 13:30:48 +0200165 p->z[0] = 1; /* p->z = 1 */
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200166}
167
Denys Vlasenkob3b17132021-04-26 16:53:53 +0200168/* Compare a with b.
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200169 *
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200170 * return -ve, 0 or +ve if a is less than, equal to or greater than b
171 * respectively.
172 */
Denys Vlasenko4bc9da12021-11-27 11:28:11 +0100173#if UNALIGNED_LE_64BIT
174static signed_sp_digit sp_256_cmp_8(const sp_digit* aa, const sp_digit* bb)
175{
176 const uint64_t* a = (void*)aa;
177 const uint64_t* b = (void*)bb;
178 int i;
179 for (i = 3; i >= 0; i--) {
180 if (a[i] == b[i])
181 continue;
182 return (a[i] > b[i]) * 2 - 1;
183 }
184 return 0;
185}
186#else
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200187static signed_sp_digit sp_256_cmp_8(const sp_digit* a, const sp_digit* b)
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200188{
Denys Vlasenko12040122021-04-26 20:24:34 +0200189 int i;
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200190 for (i = 7; i >= 0; i--) {
191/* signed_sp_digit r = a[i] - b[i];
192 * if (r != 0)
193 * return r;
194 * does not work: think about a[i]=0, b[i]=0xffffffff
195 */
196 if (a[i] == b[i])
197 continue;
198 return (a[i] > b[i]) * 2 - 1;
Denys Vlasenko12040122021-04-26 20:24:34 +0200199 }
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200200 return 0;
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200201}
Denys Vlasenko4bc9da12021-11-27 11:28:11 +0100202#endif
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200203
204/* Compare two numbers to determine if they are equal.
205 *
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200206 * return 1 when equal and 0 otherwise.
207 */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200208static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b)
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200209{
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200210 return sp_256_cmp_8(a, b) == 0;
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200211}
212
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200213/* Add b to a into r. (r = a + b). Return !0 on overflow */
214static int sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200215{
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200216#if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
217 sp_digit reg;
218 asm volatile (
219"\n movl (%0), %3"
220"\n addl (%1), %3"
221"\n movl %3, (%2)"
222"\n"
223"\n movl 1*4(%0), %3"
224"\n adcl 1*4(%1), %3"
225"\n movl %3, 1*4(%2)"
226"\n"
227"\n movl 2*4(%0), %3"
228"\n adcl 2*4(%1), %3"
229"\n movl %3, 2*4(%2)"
230"\n"
231"\n movl 3*4(%0), %3"
232"\n adcl 3*4(%1), %3"
233"\n movl %3, 3*4(%2)"
234"\n"
235"\n movl 4*4(%0), %3"
236"\n adcl 4*4(%1), %3"
237"\n movl %3, 4*4(%2)"
238"\n"
239"\n movl 5*4(%0), %3"
240"\n adcl 5*4(%1), %3"
241"\n movl %3, 5*4(%2)"
242"\n"
243"\n movl 6*4(%0), %3"
244"\n adcl 6*4(%1), %3"
245"\n movl %3, 6*4(%2)"
246"\n"
247"\n movl 7*4(%0), %3"
248"\n adcl 7*4(%1), %3"
249"\n movl %3, 7*4(%2)"
250"\n"
251"\n sbbl %3, %3"
252"\n"
253 : "=r" (a), "=r" (b), "=r" (r), "=r" (reg)
254 : "0" (a), "1" (b), "2" (r)
255 : "memory"
256 );
257 return reg;
Denys Vlasenko911344a2021-10-06 17:17:34 +0200258#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
Denys Vlasenko911344a2021-10-06 17:17:34 +0200259 uint64_t reg;
260 asm volatile (
261"\n movq (%0), %3"
262"\n addq (%1), %3"
263"\n movq %3, (%2)"
264"\n"
265"\n movq 1*8(%0), %3"
266"\n adcq 1*8(%1), %3"
267"\n movq %3, 1*8(%2)"
268"\n"
269"\n movq 2*8(%0), %3"
270"\n adcq 2*8(%1), %3"
271"\n movq %3, 2*8(%2)"
272"\n"
273"\n movq 3*8(%0), %3"
274"\n adcq 3*8(%1), %3"
275"\n movq %3, 3*8(%2)"
276"\n"
277"\n sbbq %3, %3"
278"\n"
279 : "=r" (a), "=r" (b), "=r" (r), "=r" (reg)
280 : "0" (a), "1" (b), "2" (r)
281 : "memory"
282 );
283 return reg;
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200284#else
Denys Vlasenko12040122021-04-26 20:24:34 +0200285 int i;
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200286 sp_digit carry;
287
288 carry = 0;
289 for (i = 0; i < 8; i++) {
290 sp_digit w, v;
291 w = b[i] + carry;
292 v = a[i];
293 if (w != 0) {
294 v = a[i] + w;
295 carry = (v < a[i]);
296 /* hope compiler detects above as "carry flag set" */
297 }
298 /* else: b + carry == 0, two cases:
299 * b:ffffffff, carry:1
300 * b:00000000, carry:0
301 * in either case, r[i] = a[i] and carry remains unchanged
302 */
303 r[i] = v;
304 }
305 return carry;
306#endif
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200307}
308
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200309/* Sub b from a into r. (r = a - b). Return !0 on underflow */
310static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200311{
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200312#if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
313 sp_digit reg;
314 asm volatile (
315"\n movl (%0), %3"
316"\n subl (%1), %3"
317"\n movl %3, (%2)"
318"\n"
319"\n movl 1*4(%0), %3"
320"\n sbbl 1*4(%1), %3"
321"\n movl %3, 1*4(%2)"
322"\n"
323"\n movl 2*4(%0), %3"
324"\n sbbl 2*4(%1), %3"
325"\n movl %3, 2*4(%2)"
326"\n"
327"\n movl 3*4(%0), %3"
328"\n sbbl 3*4(%1), %3"
329"\n movl %3, 3*4(%2)"
330"\n"
331"\n movl 4*4(%0), %3"
332"\n sbbl 4*4(%1), %3"
333"\n movl %3, 4*4(%2)"
334"\n"
335"\n movl 5*4(%0), %3"
336"\n sbbl 5*4(%1), %3"
337"\n movl %3, 5*4(%2)"
338"\n"
339"\n movl 6*4(%0), %3"
340"\n sbbl 6*4(%1), %3"
341"\n movl %3, 6*4(%2)"
342"\n"
343"\n movl 7*4(%0), %3"
344"\n sbbl 7*4(%1), %3"
345"\n movl %3, 7*4(%2)"
346"\n"
347"\n sbbl %3, %3"
348"\n"
349 : "=r" (a), "=r" (b), "=r" (r), "=r" (reg)
350 : "0" (a), "1" (b), "2" (r)
351 : "memory"
352 );
353 return reg;
Denys Vlasenko911344a2021-10-06 17:17:34 +0200354#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
Denys Vlasenko911344a2021-10-06 17:17:34 +0200355 uint64_t reg;
356 asm volatile (
357"\n movq (%0), %3"
358"\n subq (%1), %3"
359"\n movq %3, (%2)"
360"\n"
361"\n movq 1*8(%0), %3"
362"\n sbbq 1*8(%1), %3"
363"\n movq %3, 1*8(%2)"
364"\n"
365"\n movq 2*8(%0), %3"
366"\n sbbq 2*8(%1), %3"
367"\n movq %3, 2*8(%2)"
368"\n"
369"\n movq 3*8(%0), %3"
370"\n sbbq 3*8(%1), %3"
371"\n movq %3, 3*8(%2)"
372"\n"
373"\n sbbq %3, %3"
374"\n"
375 : "=r" (a), "=r" (b), "=r" (r), "=r" (reg)
376 : "0" (a), "1" (b), "2" (r)
377 : "memory"
378 );
379 return reg;
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200380#else
Denys Vlasenko12040122021-04-26 20:24:34 +0200381 int i;
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200382 sp_digit borrow;
383
384 borrow = 0;
385 for (i = 0; i < 8; i++) {
386 sp_digit w, v;
387 w = b[i] + borrow;
388 v = a[i];
389 if (w != 0) {
390 v = a[i] - w;
391 borrow = (v > a[i]);
392 /* hope compiler detects above as "carry flag set" */
393 }
394 /* else: b + borrow == 0, two cases:
395 * b:ffffffff, borrow:1
396 * b:00000000, borrow:0
397 * in either case, r[i] = a[i] and borrow remains unchanged
398 */
399 r[i] = v;
400 }
401 return borrow;
402#endif
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +0200403}
404
Denys Vlasenko5e9c6172021-10-06 20:14:49 +0200405/* Sub p256_mod from r. (r = r - p256_mod). */
Denys Vlasenko87e3f2e2021-10-06 19:59:39 +0200406#if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
Denys Vlasenko5e9c6172021-10-06 20:14:49 +0200407static void sp_256_sub_8_p256_mod(sp_digit* r)
Denys Vlasenkoc7842842021-10-06 01:09:37 +0200408{
Denys Vlasenkoc7842842021-10-06 01:09:37 +0200409//p256_mod[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff
410 asm volatile (
Denys Vlasenko5e9c6172021-10-06 20:14:49 +0200411"\n subl $0xffffffff, (%0)"
412"\n sbbl $0xffffffff, 1*4(%0)"
413"\n sbbl $0xffffffff, 2*4(%0)"
414"\n sbbl $0, 3*4(%0)"
415"\n sbbl $0, 4*4(%0)"
416"\n sbbl $0, 5*4(%0)"
417"\n sbbl $1, 6*4(%0)"
418"\n sbbl $0xffffffff, 7*4(%0)"
Denys Vlasenkoc7842842021-10-06 01:09:37 +0200419"\n"
Denys Vlasenko5e9c6172021-10-06 20:14:49 +0200420 : "=r" (r)
421 : "0" (r)
Denys Vlasenkoc7842842021-10-06 01:09:37 +0200422 : "memory"
423 );
Denys Vlasenkoc7842842021-10-06 01:09:37 +0200424}
Denys Vlasenko87e3f2e2021-10-06 19:59:39 +0200425#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
Denys Vlasenko5e9c6172021-10-06 20:14:49 +0200426static void sp_256_sub_8_p256_mod(sp_digit* r)
Denys Vlasenko87e3f2e2021-10-06 19:59:39 +0200427{
428 uint64_t reg;
429 uint64_t ooff;
430//p256_mod[3..0] = ffffffff00000001 0000000000000000 00000000ffffffff ffffffffffffffff
431 asm volatile (
Denys Vlasenko5e9c6172021-10-06 20:14:49 +0200432"\n addq $1, (%0)" // adding 1 is the same as subtracting ffffffffffffffff
Denys Vlasenko87e3f2e2021-10-06 19:59:39 +0200433"\n cmc" // only carry bit needs inverting
Denys Vlasenko17e6fb02021-10-06 21:22:36 +0200434"\n"
Denys Vlasenko5e9c6172021-10-06 20:14:49 +0200435"\n sbbq %1, 1*8(%0)" // %1 holds 00000000ffffffff
Denys Vlasenko17e6fb02021-10-06 21:22:36 +0200436"\n"
Denys Vlasenko5e9c6172021-10-06 20:14:49 +0200437"\n sbbq $0, 2*8(%0)"
Denys Vlasenko87e3f2e2021-10-06 19:59:39 +0200438"\n"
Denys Vlasenko5e9c6172021-10-06 20:14:49 +0200439"\n movq 3*8(%0), %2"
440"\n sbbq $0, %2" // adding 00000000ffffffff (in %1)
441"\n addq %1, %2" // is the same as subtracting ffffffff00000001
442"\n movq %2, 3*8(%0)"
Denys Vlasenko87e3f2e2021-10-06 19:59:39 +0200443"\n"
Denys Vlasenko5e9c6172021-10-06 20:14:49 +0200444 : "=r" (r), "=r" (ooff), "=r" (reg)
445 : "0" (r), "1" (0x00000000ffffffff)
Denys Vlasenko87e3f2e2021-10-06 19:59:39 +0200446 : "memory"
447 );
448}
Denys Vlasenko567eefc2021-10-06 14:25:10 +0200449#else
Denys Vlasenko5e9c6172021-10-06 20:14:49 +0200450static void sp_256_sub_8_p256_mod(sp_digit* r)
451{
452 sp_256_sub_8(r, r, p256_mod);
453}
Denys Vlasenko567eefc2021-10-06 14:25:10 +0200454#endif
Denys Vlasenkoc7842842021-10-06 01:09:37 +0200455
Denys Vlasenko4415f7b2021-11-27 15:47:26 +0100456/* Multiply a and b into r. (r = a * b)
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +0100457 * r should be [16] array (512 bits), and must not coincide with a or b.
Denys Vlasenko4415f7b2021-11-27 15:47:26 +0100458 */
459static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200460{
Denys Vlasenkobbd723e2021-10-05 23:19:18 +0200461#if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
Denys Vlasenkobbd723e2021-10-05 23:19:18 +0200462 int k;
463 uint32_t accl;
464 uint32_t acch;
465
466 acch = accl = 0;
467 for (k = 0; k < 15; k++) {
468 int i, j;
469 uint32_t acc_hi;
470 i = k - 7;
471 if (i < 0)
472 i = 0;
473 j = k - i;
474 acc_hi = 0;
475 do {
476////////////////////////
477// uint64_t m = ((uint64_t)a[i]) * b[j];
478// acc_hi:acch:accl += m;
479 asm volatile (
480 // a[i] is already loaded in %%eax
481"\n mull %7"
482"\n addl %%eax, %0"
483"\n adcl %%edx, %1"
484"\n adcl $0, %2"
485 : "=rm" (accl), "=rm" (acch), "=rm" (acc_hi)
486 : "0" (accl), "1" (acch), "2" (acc_hi), "a" (a[i]), "m" (b[j])
487 : "cc", "dx"
488 );
489////////////////////////
490 j--;
491 i++;
492 } while (i != 8 && i <= k);
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +0100493 r[k] = accl;
Denys Vlasenkobbd723e2021-10-05 23:19:18 +0200494 accl = acch;
495 acch = acc_hi;
496 }
497 r[15] = accl;
Denys Vlasenko911344a2021-10-06 17:17:34 +0200498#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
Denys Vlasenko911344a2021-10-06 17:17:34 +0200499 const uint64_t* aa = (const void*)a;
500 const uint64_t* bb = (const void*)b;
Denys Vlasenko0b13ab62021-11-27 19:36:23 +0100501 uint64_t* rr = (void*)r;
Denys Vlasenko911344a2021-10-06 17:17:34 +0200502 int k;
503 uint64_t accl;
504 uint64_t acch;
505
506 acch = accl = 0;
507 for (k = 0; k < 7; k++) {
508 int i, j;
509 uint64_t acc_hi;
510 i = k - 3;
511 if (i < 0)
512 i = 0;
513 j = k - i;
514 acc_hi = 0;
515 do {
516////////////////////////
517// uint128_t m = ((uint128_t)a[i]) * b[j];
518// acc_hi:acch:accl += m;
519 asm volatile (
520 // aa[i] is already loaded in %%rax
521"\n mulq %7"
522"\n addq %%rax, %0"
523"\n adcq %%rdx, %1"
524"\n adcq $0, %2"
525 : "=rm" (accl), "=rm" (acch), "=rm" (acc_hi)
526 : "0" (accl), "1" (acch), "2" (acc_hi), "a" (aa[i]), "m" (bb[j])
527 : "cc", "dx"
528 );
529////////////////////////
Denys Vlasenko17e6fb02021-10-06 21:22:36 +0200530 j--;
Denys Vlasenko911344a2021-10-06 17:17:34 +0200531 i++;
532 } while (i != 4 && i <= k);
533 rr[k] = accl;
534 accl = acch;
535 acch = acc_hi;
536 }
537 rr[7] = accl;
Denys Vlasenkobbd723e2021-10-05 23:19:18 +0200538#elif 0
539 //TODO: arm assembly (untested)
Denys Vlasenkobbd723e2021-10-05 23:19:18 +0200540 asm volatile (
541"\n mov r5, #0"
542"\n mov r6, #0"
543"\n mov r7, #0"
544"\n mov r8, #0"
545"\n 1:"
546"\n subs r3, r5, #28"
547"\n movcc r3, #0"
548"\n sub r4, r5, r3"
Denys Vlasenko22fd8fd2021-10-06 16:10:49 +0200549"\n 2:"
Denys Vlasenkobbd723e2021-10-05 23:19:18 +0200550"\n ldr r14, [%[a], r3]"
551"\n ldr r12, [%[b], r4]"
552"\n umull r9, r10, r14, r12"
553"\n adds r6, r6, r9"
554"\n adcs r7, r7, r10"
555"\n adc r8, r8, #0"
556"\n add r3, r3, #4"
557"\n sub r4, r4, #4"
558"\n cmp r3, #32"
559"\n beq 3f"
560"\n cmp r3, r5"
561"\n ble 2b"
562"\n 3:"
563"\n str r6, [%[r], r5]"
564"\n mov r6, r7"
565"\n mov r7, r8"
566"\n mov r8, #0"
567"\n add r5, r5, #4"
568"\n cmp r5, #56"
569"\n ble 1b"
570"\n str r6, [%[r], r5]"
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +0100571 : [r] "r" (r), [a] "r" (a), [b] "r" (b)
Denys Vlasenko22fd8fd2021-10-06 16:10:49 +0200572 : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
Denys Vlasenkobbd723e2021-10-05 23:19:18 +0200573 );
Denys Vlasenkobbd723e2021-10-05 23:19:18 +0200574#else
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200575 int i, j, k;
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200576 uint64_t acc;
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200577
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200578 acc = 0;
579 for (k = 0; k < 15; k++) {
580 uint32_t acc_hi;
581 i = k - 7;
582 if (i < 0)
583 i = 0;
584 j = k - i;
585 acc_hi = 0;
Denys Vlasenkobbd723e2021-10-05 23:19:18 +0200586 do {
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200587 uint64_t m = ((uint64_t)a[i]) * b[j];
588 acc += m;
589 if (acc < m)
590 acc_hi++;
591 j--;
592 i++;
Denys Vlasenkobbd723e2021-10-05 23:19:18 +0200593 } while (i != 8 && i <= k);
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +0100594 r[k] = acc;
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200595 acc = (acc >> 32) | ((uint64_t)acc_hi << 32);
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200596 }
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200597 r[15] = acc;
Denys Vlasenkobbd723e2021-10-05 23:19:18 +0200598#endif
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200599}
600
Denys Vlasenko389329e2021-10-05 13:39:33 +0200601/* Shift number right one bit. Bottom bit is lost. */
Denys Vlasenko4bc9da12021-11-27 11:28:11 +0100602#if UNALIGNED_LE_64BIT
603static void sp_256_rshift1_8(sp_digit* rr, uint64_t carry)
Denys Vlasenkoe7305052021-10-05 13:30:48 +0200604{
Denys Vlasenko4bc9da12021-11-27 11:28:11 +0100605 uint64_t *r = (void*)rr;
Denys Vlasenkoe7305052021-10-05 13:30:48 +0200606 int i;
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200607
Denys Vlasenko4bc9da12021-11-27 11:28:11 +0100608 carry = (((uint64_t)!!carry) << 63);
609 for (i = 3; i >= 0; i--) {
610 uint64_t c = r[i] << 63;
611 r[i] = (r[i] >> 1) | carry;
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200612 carry = c;
613 }
Denys Vlasenkoe7305052021-10-05 13:30:48 +0200614}
Denys Vlasenko4bc9da12021-11-27 11:28:11 +0100615#else
616static void sp_256_rshift1_8(sp_digit* r, sp_digit carry)
617{
618 int i;
619
620 carry = (((sp_digit)!!carry) << 31);
621 for (i = 7; i >= 0; i--) {
622 sp_digit c = r[i] << 31;
623 r[i] = (r[i] >> 1) | carry;
624 carry = c;
625 }
626}
627#endif
Denys Vlasenkoe7305052021-10-05 13:30:48 +0200628
Denys Vlasenkodcfd8d32021-11-27 16:07:42 +0100629/* Divide the number by 2 mod the modulus (prime). (r = (r / 2) % m) */
630static void sp_256_div2_8(sp_digit* r /*, const sp_digit* m*/)
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200631{
Denys Vlasenkodcfd8d32021-11-27 16:07:42 +0100632 const sp_digit* m = p256_mod;
633
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200634 int carry = 0;
Denys Vlasenkodcfd8d32021-11-27 16:07:42 +0100635 if (r[0] & 1)
636 carry = sp_256_add_8(r, r, m);
Denys Vlasenko4bc9da12021-11-27 11:28:11 +0100637 sp_256_rshift1_8(r, carry);
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200638}
639
640/* Add two Montgomery form numbers (r = a + b % m) */
Denys Vlasenkoc7842842021-10-06 01:09:37 +0200641static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b
642 /*, const sp_digit* m*/)
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200643{
Denys Vlasenkoc7842842021-10-06 01:09:37 +0200644// const sp_digit* m = p256_mod;
645
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200646 int carry = sp_256_add_8(r, a, b);
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200647 if (carry) {
Denys Vlasenko5e9c6172021-10-06 20:14:49 +0200648 sp_256_sub_8_p256_mod(r);
Denys Vlasenko55578f22021-10-05 19:45:56 +0200649 }
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200650}
651
652/* Subtract two Montgomery form numbers (r = a - b % m) */
Denys Vlasenkoc7842842021-10-06 01:09:37 +0200653static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b
654 /*, const sp_digit* m*/)
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200655{
Denys Vlasenkoc7842842021-10-06 01:09:37 +0200656 const sp_digit* m = p256_mod;
657
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200658 int borrow;
659 borrow = sp_256_sub_8(r, a, b);
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200660 if (borrow) {
661 sp_256_add_8(r, r, m);
Denys Vlasenko55578f22021-10-05 19:45:56 +0200662 }
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200663}
664
665/* Double a Montgomery form number (r = a + a % m) */
Denys Vlasenkoc7842842021-10-06 01:09:37 +0200666static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a /*, const sp_digit* m*/)
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200667{
Denys Vlasenkoc7842842021-10-06 01:09:37 +0200668// const sp_digit* m = p256_mod;
669
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200670 int carry = sp_256_add_8(r, a, a);
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200671 if (carry)
Denys Vlasenko5e9c6172021-10-06 20:14:49 +0200672 sp_256_sub_8_p256_mod(r);
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200673}
674
675/* Triple a Montgomery form number (r = a + a + a % m) */
Denys Vlasenkoc7842842021-10-06 01:09:37 +0200676static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit* m*/)
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200677{
Denys Vlasenkoc7842842021-10-06 01:09:37 +0200678// const sp_digit* m = p256_mod;
679
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200680 int carry = sp_256_add_8(r, a, a);
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200681 if (carry) {
Denys Vlasenko5e9c6172021-10-06 20:14:49 +0200682 sp_256_sub_8_p256_mod(r);
Denys Vlasenko55578f22021-10-05 19:45:56 +0200683 }
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200684 carry = sp_256_add_8(r, r, a);
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200685 if (carry) {
Denys Vlasenko5e9c6172021-10-06 20:14:49 +0200686 sp_256_sub_8_p256_mod(r);
Denys Vlasenko55578f22021-10-05 19:45:56 +0200687 }
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200688}
689
Denys Vlasenko90b0d332021-11-28 15:38:51 +0100690/* Shift the result in the high 256 bits down to the bottom. */
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +0100691static void sp_512to256_mont_shift_8(sp_digit* r, sp_digit* a)
Denys Vlasenko4bc9da12021-11-27 11:28:11 +0100692{
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +0100693 memcpy(r, a + 8, sizeof(*r) * 8);
Denys Vlasenko4bc9da12021-11-27 11:28:11 +0100694}
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200695
Denys Vlasenko8514b412021-11-28 21:40:23 +0100696#if UNALIGNED_LE_64BIT
Denys Vlasenko90b0d332021-11-28 15:38:51 +0100697/* 64-bit little-endian optimized version.
698 * See generic 32-bit version below for explanation.
699 * The benefit of this version is: even though r[3] calculation is atrocious,
700 * we call sp_256_mul_add_4() four times, not 8.
Denys Vlasenko8514b412021-11-28 21:40:23 +0100701 * Measured run time improvement of curve_P256_compute_pubkey_and_premaster()
702 * call on x86-64: from ~1500us to ~900us. Code size +32 bytes.
Denys Vlasenko90b0d332021-11-28 15:38:51 +0100703 */
704static int sp_256_mul_add_4(uint64_t *r /*, const uint64_t* a, uint64_t b*/)
705{
706 uint64_t b = r[0];
707
708# if 0
709 const uint64_t* a = (const void*)p256_mod;
710//a[3..0] = ffffffff00000001 0000000000000000 00000000ffffffff ffffffffffffffff
711 uint128_t t;
712 int i;
713 t = 0;
714 for (i = 0; i < 4; i++) {
715 uint32_t t_hi;
716 uint128_t m = ((uint128_t)b * a[i]) + r[i];
717 t += m;
718 t_hi = (t < m);
719 r[i] = (uint64_t)t;
720 t = (t >> 64) | ((uint128_t)t_hi << 64);
721 }
722 r[4] += (uint64_t)t;
723 return (r[4] < (uint64_t)t); /* 1 if addition overflowed */
724# else
725 // Unroll, then optimize the above loop:
726 //uint32_t t_hi;
727 //uint128_t m;
728 uint64_t t64, t64u;
729
730 //m = ((uint128_t)b * a[0]) + r[0];
731 // Since b is r[0] and a[0] is ffffffffffffffff, the above optimizes to:
732 // m = r[0] * ffffffffffffffff + r[0] = (r[0] << 64 - r[0]) + r[0] = r[0] << 64;
733 //t += m;
734 // t = r[0] << 64 = b << 64;
735 //t_hi = (t < m);
736 // t_hi = 0;
737 //r[0] = (uint64_t)t;
738// r[0] = 0;
739//the store can be eliminated since caller won't look at lower 256 bits of the result
740 //t = (t >> 64) | ((uint128_t)t_hi << 64);
741 // t = b;
742
743 //m = ((uint128_t)b * a[1]) + r[1];
744 // Since a[1] is 00000000ffffffff, the above optimizes to:
745 // m = b * ffffffff + r[1] = (b * 100000000 - b) + r[1] = (b << 32) - b + r[1];
746 //t += m;
747 // t = b + (b << 32) - b + r[1] = (b << 32) + r[1];
748 //t_hi = (t < m);
749 // t_hi = 0;
750 //r[1] = (uint64_t)t;
751 r[1] += (b << 32);
752 //t = (t >> 64) | ((uint128_t)t_hi << 64);
753 t64 = (r[1] < (b << 32));
754 t64 += (b >> 32);
755
756 //m = ((uint128_t)b * a[2]) + r[2];
757 // Since a[2] is 0000000000000000, the above optimizes to:
758 // m = b * 0 + r[2] = r[2];
759 //t += m;
760 // t = t64 + r[2];
761 //t_hi = (t < m);
762 // t_hi = 0;
763 //r[2] = (uint64_t)t;
764 r[2] += t64;
765 //t = (t >> 64) | ((uint128_t)t_hi << 64);
766 t64 = (r[2] < t64);
767
768 //m = ((uint128_t)b * a[3]) + r[3];
769 // Since a[3] is ffffffff00000001, the above optimizes to:
770 // m = b * ffffffff00000001 + r[3];
771 // m = b + b*ffffffff00000000 + r[3]
772 // m = b + (b*ffffffff << 32) + r[3]
773 // m = b + (((b<<32) - b) << 32) + r[3]
774 //t += m;
775 // t = t64 + (uint128_t)b + ((((uint128_t)b << 32) - b) << 32) + r[3];
776 t64 += b;
777 t64u = (t64 < b);
778 t64 += r[3];
779 t64u += (t64 < r[3]);
Denys Vlasenko8514b412021-11-28 21:40:23 +0100780 { // add ((((uint128_t)b << 32) - b) << 32):
781 uint64_t lo, hi;
Denys Vlasenko90b0d332021-11-28 15:38:51 +0100782 //lo = (((b << 32) - b) << 32
783 //hi = (((uint128_t)b << 32) - b) >> 32
784 //but without uint128_t:
Denys Vlasenko8514b412021-11-28 21:40:23 +0100785 hi = (b << 32) - b; /* make lower 32 bits of "hi", part 1 */
Denys Vlasenko90b0d332021-11-28 15:38:51 +0100786 b = (b >> 32) - (/*borrowed above?*/(b << 32) < b); /* upper 32 bits of "hi" are in b */
787 lo = hi << 32; /* (use "hi" value to calculate "lo",... */
788 t64 += lo; /* ...consume... */
789 t64u += (t64 < lo); /* ..."lo") */
Denys Vlasenko8514b412021-11-28 21:40:23 +0100790 hi >>= 32; /* make lower 32 bits of "hi", part 2 */
791 hi |= (b << 32); /* combine lower and upper 32 bits */
Denys Vlasenko90b0d332021-11-28 15:38:51 +0100792 t64u += hi; /* consume "hi" */
793 }
794 //t_hi = (t < m);
795 // t_hi = 0;
796 //r[3] = (uint64_t)t;
797 r[3] = t64;
798 //t = (t >> 64) | ((uint128_t)t_hi << 64);
799 // t = t64u;
800
801 r[4] += t64u;
802 return (r[4] < t64u); /* 1 if addition overflowed */
803# endif
804}
805
806static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* aa/*, const sp_digit* m, sp_digit mp*/)
807{
808// const sp_digit* m = p256_mod;
809 int i;
810 uint64_t *a = (void*)aa;
811
812 sp_digit carry = 0;
813 for (i = 0; i < 4; i++) {
814// mu = a[i];
815 if (sp_256_mul_add_4(a+i /*, m, mu*/)) {
816 int j = i + 4;
817 inc_next_word:
818 if (++j > 7) { /* a[8] array has no more words? */
819 carry++;
820 continue;
821 }
822 if (++a[j] == 0) /* did this overflow too? */
823 goto inc_next_word;
824 }
825 }
826 sp_512to256_mont_shift_8(r, aa);
827 if (carry != 0)
828 sp_256_sub_8_p256_mod(r);
Denys Vlasenko90b0d332021-11-28 15:38:51 +0100829}
830
831#else /* Generic 32-bit version */
832
Denys Vlasenko4415f7b2021-11-27 15:47:26 +0100833/* Mul a by scalar b and add into r. (r += a * b)
834 * a = p256_mod
835 * b = r[0]
836 */
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200837static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
Denys Vlasenkoe7305052021-10-05 13:30:48 +0200838{
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200839 sp_digit b = r[0];
Denys Vlasenko00f2cce2021-10-06 10:15:29 +0200840 uint64_t t;
841
Denys Vlasenko90b0d332021-11-28 15:38:51 +0100842# if 0
843 const sp_digit* a = p256_mod;
844//a[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff
845 int i;
846 t = 0;
847 for (i = 0; i < 8; i++) {
848 uint32_t t_hi;
849 uint64_t m = ((uint64_t)b * a[i]) + r[i];
850 t += m;
851 t_hi = (t < m);
852 r[i] = (sp_digit)t;
853 t = (t >> 32) | ((uint64_t)t_hi << 32);
854 }
855 r[8] += (sp_digit)t;
856 return (r[8] < (sp_digit)t); /* 1 if addition overflowed */
857# else
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200858 // Unroll, then optimize the above loop:
859 //uint32_t t_hi;
860 uint64_t m;
Denys Vlasenko00f2cce2021-10-06 10:15:29 +0200861 uint32_t t32;
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200862
863 //m = ((uint64_t)b * a[0]) + r[0];
864 // Since b is r[0] and a[0] is ffffffff, the above optimizes to:
865 // m = r[0] * ffffffff + r[0] = (r[0] * 100000000 - r[0]) + r[0] = r[0] << 32;
866 //t += m;
Denys Vlasenko00f2cce2021-10-06 10:15:29 +0200867 // t = r[0] << 32 = b << 32;
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200868 //t_hi = (t < m);
869 // t_hi = 0;
870 //r[0] = (sp_digit)t;
Denys Vlasenko90b0d332021-11-28 15:38:51 +0100871// r[0] = 0;
872//the store can be eliminated since caller won't look at lower 256 bits of the result
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200873 //t = (t >> 32) | ((uint64_t)t_hi << 32);
874 // t = b;
875
876 //m = ((uint64_t)b * a[1]) + r[1];
877 // Since a[1] is ffffffff, the above optimizes to:
878 // m = b * ffffffff + r[1] = (b * 100000000 - b) + r[1] = (b << 32) - b + r[1];
879 //t += m;
880 // t = b + (b << 32) - b + r[1] = (b << 32) + r[1];
881 //t_hi = (t < m);
882 // t_hi = 0;
883 //r[1] = (sp_digit)t;
884 // r[1] = r[1];
885 //t = (t >> 32) | ((uint64_t)t_hi << 32);
886 // t = b;
887
888 //m = ((uint64_t)b * a[2]) + r[2];
889 // Since a[2] is ffffffff, the above optimizes to:
890 // m = b * ffffffff + r[2] = (b * 100000000 - b) + r[2] = (b << 32) - b + r[2];
891 //t += m;
892 // t = b + (b << 32) - b + r[2] = (b << 32) + r[2]
893 //t_hi = (t < m);
894 // t_hi = 0;
895 //r[2] = (sp_digit)t;
896 // r[2] = r[2];
897 //t = (t >> 32) | ((uint64_t)t_hi << 32);
898 // t = b;
899
900 //m = ((uint64_t)b * a[3]) + r[3];
901 // Since a[3] is 00000000, the above optimizes to:
902 // m = b * 0 + r[3] = r[3];
903 //t += m;
Denys Vlasenko00f2cce2021-10-06 10:15:29 +0200904 // t = b + r[3];
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200905 //t_hi = (t < m);
906 // t_hi = 0;
907 //r[3] = (sp_digit)t;
908 r[3] = r[3] + b;
909 //t = (t >> 32) | ((uint64_t)t_hi << 32);
Denys Vlasenko00f2cce2021-10-06 10:15:29 +0200910 t32 = (r[3] < b); // 0 or 1
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200911
912 //m = ((uint64_t)b * a[4]) + r[4];
913 // Since a[4] is 00000000, the above optimizes to:
914 // m = b * 0 + r[4] = r[4];
915 //t += m;
Denys Vlasenko00f2cce2021-10-06 10:15:29 +0200916 // t = t32 + r[4];
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200917 //t_hi = (t < m);
918 // t_hi = 0;
Denys Vlasenko00f2cce2021-10-06 10:15:29 +0200919 //r[4] = (sp_digit)t;
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200920 //t = (t >> 32) | ((uint64_t)t_hi << 32);
Denys Vlasenko00f2cce2021-10-06 10:15:29 +0200921 if (t32 != 0) {
922 r[4]++;
923 t32 = (r[4] == 0); // 0 or 1
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200924
925 //m = ((uint64_t)b * a[5]) + r[5];
926 // Since a[5] is 00000000, the above optimizes to:
927 // m = b * 0 + r[5] = r[5];
928 //t += m;
Denys Vlasenko00f2cce2021-10-06 10:15:29 +0200929 // t = t32 + r[5]; (t32 is 0 or 1)
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200930 //t_hi = (t < m);
931 // t_hi = 0;
Denys Vlasenko00f2cce2021-10-06 10:15:29 +0200932 //r[5] = (sp_digit)t;
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200933 //t = (t >> 32) | ((uint64_t)t_hi << 32);
Denys Vlasenko00f2cce2021-10-06 10:15:29 +0200934 if (t32 != 0) {
935 r[5]++;
936 t32 = (r[5] == 0); // 0 or 1
937 }
938 }
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200939
940 //m = ((uint64_t)b * a[6]) + r[6];
941 // Since a[6] is 00000001, the above optimizes to:
Denys Vlasenko00f2cce2021-10-06 10:15:29 +0200942 // m = (uint64_t)b + r[6]; // 33 bits at most
943 //t += m;
944 t = t32 + (uint64_t)b + r[6];
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200945 //t_hi = (t < m);
Denys Vlasenko00f2cce2021-10-06 10:15:29 +0200946 // t_hi = 0;
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200947 r[6] = (sp_digit)t;
948 //t = (t >> 32) | ((uint64_t)t_hi << 32);
949 t = (t >> 32);
950
951 //m = ((uint64_t)b * a[7]) + r[7];
952 // Since a[7] is ffffffff, the above optimizes to:
953 // m = b * ffffffff + r[7] = (b * 100000000 - b) + r[7]
954 m = ((uint64_t)b << 32) - b + r[7];
955 t += m;
956 //t_hi = (t < m);
Denys Vlasenko00f2cce2021-10-06 10:15:29 +0200957 // t_hi in fact is always 0 here (256bit * 32bit can't have more than 32 bits of overflow)
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200958 r[7] = (sp_digit)t;
959 //t = (t >> 32) | ((uint64_t)t_hi << 32);
960 t = (t >> 32);
961
Denys Vlasenko3b411eb2021-10-05 20:00:50 +0200962 r[8] += (sp_digit)t;
963 return (r[8] < (sp_digit)t); /* 1 if addition overflowed */
Denys Vlasenko90b0d332021-11-28 15:38:51 +0100964# endif
Denys Vlasenkoe7305052021-10-05 13:30:48 +0200965}
966
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200967/* Reduce the number back to 256 bits using Montgomery reduction.
Denys Vlasenko9c671fe2021-11-27 18:42:27 +0100968 * Note: the result is NOT guaranteed to be less than p256_mod!
969 * (it is only guaranteed to fit into 256 bits).
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200970 *
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +0100971 * r Result.
972 * a Double-wide number to reduce. Clobbered.
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200973 * m The single precision number representing the modulus.
974 * mp The digit representing the negative inverse of m mod 2^n.
Denys Vlasenko83262622021-11-28 12:55:20 +0100975 *
976 * Montgomery reduction on multiprecision integers:
977 * Montgomery reduction requires products modulo R.
978 * When R is a power of B [in our case R=2^128, B=2^32], there is a variant
979 * of Montgomery reduction which requires products only of machine word sized
980 * integers. T is stored as an little-endian word array a[0..n]. The algorithm
981 * reduces it one word at a time. First an appropriate multiple of modulus
982 * is added to make T divisible by B. [In our case, it is p256_mp_mod * a[0].]
983 * Then a multiple of modulus is added to make T divisible by B^2.
984 * [In our case, it is (p256_mp_mod * a[1]) << 32.]
985 * And so on. Eventually T is divisible by R, and after division by R
Denys Vlasenko90b0d332021-11-28 15:38:51 +0100986 * the algorithm is in the same place as the usual Montgomery reduction.
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200987 */
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +0100988static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* a/*, const sp_digit* m, sp_digit mp*/)
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200989{
Denys Vlasenkoc7842842021-10-06 01:09:37 +0200990// const sp_digit* m = p256_mod;
Denys Vlasenko389329e2021-10-05 13:39:33 +0200991 sp_digit mp = p256_mp_mod;
992
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200993 int i;
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200994// sp_digit mu;
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +0200995
996 if (mp != 1) {
Denys Vlasenko2430fcf2021-10-06 00:19:30 +0200997 sp_digit word16th = 0;
998 for (i = 0; i < 8; i++) {
999// mu = (sp_digit)(a[i] * mp);
1000 if (sp_256_mul_add_8(a+i /*, m, mu*/)) {
1001 int j = i + 8;
1002 inc_next_word0:
1003 if (++j > 15) { /* a[16] array has no more words? */
1004 word16th++;
1005 continue;
1006 }
1007 if (++a[j] == 0) /* did this overflow too? */
1008 goto inc_next_word0;
1009 }
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001010 }
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001011 sp_512to256_mont_shift_8(r, a);
Denys Vlasenko2430fcf2021-10-06 00:19:30 +02001012 if (word16th != 0)
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001013 sp_256_sub_8_p256_mod(r);
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001014 }
Denys Vlasenko389329e2021-10-05 13:39:33 +02001015 else { /* Same code for explicit mp == 1 (which is always the case for P256) */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001016 sp_digit word16th = 0;
1017 for (i = 0; i < 8; i++) {
Denys Vlasenko4415f7b2021-11-27 15:47:26 +01001018// mu = a[i];
Denys Vlasenko2430fcf2021-10-06 00:19:30 +02001019 if (sp_256_mul_add_8(a+i /*, m, mu*/)) {
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001020 int j = i + 8;
1021 inc_next_word:
1022 if (++j > 15) { /* a[16] array has no more words? */
1023 word16th++;
1024 continue;
1025 }
1026 if (++a[j] == 0) /* did this overflow too? */
1027 goto inc_next_word;
1028 }
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001029 }
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001030 sp_512to256_mont_shift_8(r, a);
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001031 if (word16th != 0)
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001032 sp_256_sub_8_p256_mod(r);
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001033 }
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001034}
Denys Vlasenko90b0d332021-11-28 15:38:51 +01001035#endif
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001036
1037/* Multiply two Montogmery form numbers mod the modulus (prime).
1038 * (r = a * b mod m)
1039 *
1040 * r Result of multiplication.
1041 * a First number to multiply in Montogmery form.
1042 * b Second number to multiply in Montogmery form.
1043 * m Modulus (prime).
Denys Vlasenko1b93c7c2021-11-28 02:56:02 +01001044 * mp Montogmery multiplier.
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001045 */
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001046static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b
Denys Vlasenko389329e2021-10-05 13:39:33 +02001047 /*, const sp_digit* m, sp_digit mp*/)
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001048{
Denys Vlasenko389329e2021-10-05 13:39:33 +02001049 //const sp_digit* m = p256_mod;
1050 //sp_digit mp = p256_mp_mod;
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001051 sp_digit t[2 * 8];
1052 sp_256to512_mul_8(t, a, b);
1053 sp_512to256_mont_reduce_8(r, t /*, m, mp*/);
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001054}
1055
1056/* Square the Montgomery form number. (r = a * a mod m)
1057 *
1058 * r Result of squaring.
1059 * a Number to square in Montogmery form.
1060 * m Modulus (prime).
Denys Vlasenko1b93c7c2021-11-28 02:56:02 +01001061 * mp Montogmery multiplier.
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001062 */
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001063static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a
Denys Vlasenko389329e2021-10-05 13:39:33 +02001064 /*, const sp_digit* m, sp_digit mp*/)
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001065{
Denys Vlasenko389329e2021-10-05 13:39:33 +02001066 //const sp_digit* m = p256_mod;
1067 //sp_digit mp = p256_mp_mod;
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001068 sp_256_mont_mul_8(r, a, a /*, m, mp*/);
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001069}
1070
Denys Vlasenko27df6ae2021-12-11 23:27:40 +01001071static NOINLINE void sp_256_mont_mul_and_reduce_8(sp_digit* r,
1072 const sp_digit* a, const sp_digit* b
1073 /*, const sp_digit* m, sp_digit mp*/)
1074{
1075 sp_digit rr[2 * 8];
1076
1077 sp_256_mont_mul_8(rr, a, b /*, p256_mod, p256_mp_mod*/);
1078 memset(rr + 8, 0, sizeof(rr) / 2);
1079 sp_512to256_mont_reduce_8(r, rr /*, p256_mod, p256_mp_mod*/);
1080}
1081
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001082/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
1083 * P256 curve. (r = 1 / a mod m)
1084 *
Denys Vlasenkocfb61572021-11-28 11:10:00 +01001085 * r Inverse result. Must not coincide with a.
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001086 * a Number to invert.
1087 */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001088static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a)
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001089{
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001090 int i;
1091
Denys Vlasenkocfb61572021-11-28 11:10:00 +01001092 memcpy(r, a, sizeof(sp_digit) * 8);
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001093 for (i = 254; i >= 0; i--) {
Denys Vlasenkocfb61572021-11-28 11:10:00 +01001094 sp_256_mont_sqr_8(r, r /*, p256_mod, p256_mp_mod*/);
Denys Vlasenko83262622021-11-28 12:55:20 +01001095/* p256_mod - 2:
1096 * ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff - 2
1097 * Bit pattern:
1098 * 2 2 2 2 2 2 2 1...1
1099 * 5 5 4 3 2 1 0 9...0 9...1
1100 * 543210987654321098765432109876543210987654321098765432109876543210...09876543210...09876543210
1101 * 111111111111111111111111111111110000000000000000000000000000000100...00000111111...11111111101
1102 */
1103 /*if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))*/
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001104 if (i >= 224 || i == 192 || (i <= 95 && i != 1))
Denys Vlasenkocfb61572021-11-28 11:10:00 +01001105 sp_256_mont_mul_8(r, r, a /*, p256_mod, p256_mp_mod*/);
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001106 }
Denys Vlasenkoa2bc52d2021-04-27 01:21:26 +02001107}
1108
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001109/* Multiply a number by Montogmery normalizer mod modulus (prime).
1110 *
1111 * r The resulting Montgomery form number.
1112 * a The number to convert.
1113 */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001114static void sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a)
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001115{
Denys Vlasenko12040122021-04-26 20:24:34 +02001116 int64_t t[8];
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001117 int32_t o;
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001118
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001119#define A(n) ((uint64_t)a[n])
Denys Vlasenko12040122021-04-26 20:24:34 +02001120 /* 1 1 0 -1 -1 -1 -1 0 */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001121 t[0] = 0 + A(0) + A(1) - A(3) - A(4) - A(5) - A(6);
Denys Vlasenko12040122021-04-26 20:24:34 +02001122 /* 0 1 1 0 -1 -1 -1 -1 */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001123 t[1] = 0 + A(1) + A(2) - A(4) - A(5) - A(6) - A(7);
Denys Vlasenko12040122021-04-26 20:24:34 +02001124 /* 0 0 1 1 0 -1 -1 -1 */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001125 t[2] = 0 + A(2) + A(3) - A(5) - A(6) - A(7);
Denys Vlasenko12040122021-04-26 20:24:34 +02001126 /* -1 -1 0 2 2 1 0 -1 */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001127 t[3] = 0 - A(0) - A(1) + 2 * A(3) + 2 * A(4) + A(5) - A(7);
Denys Vlasenko12040122021-04-26 20:24:34 +02001128 /* 0 -1 -1 0 2 2 1 0 */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001129 t[4] = 0 - A(1) - A(2) + 2 * A(4) + 2 * A(5) + A(6);
Denys Vlasenko12040122021-04-26 20:24:34 +02001130 /* 0 0 -1 -1 0 2 2 1 */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001131 t[5] = 0 - A(2) - A(3) + 2 * A(5) + 2 * A(6) + A(7);
Denys Vlasenko12040122021-04-26 20:24:34 +02001132 /* -1 -1 0 0 0 1 3 2 */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001133 t[6] = 0 - A(0) - A(1) + A(5) + 3 * A(6) + 2 * A(7);
Denys Vlasenko12040122021-04-26 20:24:34 +02001134 /* 1 0 -1 -1 -1 -1 0 3 */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001135 t[7] = 0 + A(0) - A(2) - A(3) - A(4) - A(5) + 3 * A(7);
1136#undef A
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001137
Denys Vlasenko12040122021-04-26 20:24:34 +02001138 t[1] += t[0] >> 32; t[0] &= 0xffffffff;
1139 t[2] += t[1] >> 32; t[1] &= 0xffffffff;
1140 t[3] += t[2] >> 32; t[2] &= 0xffffffff;
1141 t[4] += t[3] >> 32; t[3] &= 0xffffffff;
1142 t[5] += t[4] >> 32; t[4] &= 0xffffffff;
1143 t[6] += t[5] >> 32; t[5] &= 0xffffffff;
1144 t[7] += t[6] >> 32; t[6] &= 0xffffffff;
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001145 o = t[7] >> 32; //t[7] &= 0xffffffff;
Denys Vlasenko12040122021-04-26 20:24:34 +02001146 t[0] += o;
1147 t[3] -= o;
1148 t[6] -= o;
1149 t[7] += o;
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001150 r[0] = (sp_digit)t[0];
1151 t[1] += t[0] >> 32;
1152 r[1] = (sp_digit)t[1];
1153 t[2] += t[1] >> 32;
1154 r[2] = (sp_digit)t[2];
1155 t[3] += t[2] >> 32;
1156 r[3] = (sp_digit)t[3];
1157 t[4] += t[3] >> 32;
1158 r[4] = (sp_digit)t[4];
1159 t[5] += t[4] >> 32;
1160 r[5] = (sp_digit)t[5];
1161 t[6] += t[5] >> 32;
1162 r[6] = (sp_digit)t[6];
1163// t[7] += t[6] >> 32;
1164// r[7] = (sp_digit)t[7];
1165 r[7] = (sp_digit)t[7] + (sp_digit)(t[6] >> 32);
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001166}
1167
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001168/* Map the Montgomery form projective co-ordinate point to an affine point.
1169 *
1170 * r Resulting affine co-ordinate point.
1171 * p Montgomery form projective co-ordinate point.
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001172 */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001173static void sp_256_map_8(sp_point* r, sp_point* p)
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001174{
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001175 sp_digit t1[8];
1176 sp_digit t2[8];
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001177
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001178 sp_256_mont_inv_8(t1, p->z);
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001179
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001180 sp_256_mont_sqr_8(t2, t1 /*, p256_mod, p256_mp_mod*/);
1181 sp_256_mont_mul_8(t1, t2, t1 /*, p256_mod, p256_mp_mod*/);
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001182
Denys Vlasenko12040122021-04-26 20:24:34 +02001183 /* x /= z^2 */
Denys Vlasenko27df6ae2021-12-11 23:27:40 +01001184 sp_256_mont_mul_and_reduce_8(r->x, p->x, t2 /*, p256_mod, p256_mp_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001185 /* Reduce x to less than modulus */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001186 if (sp_256_cmp_8(r->x, p256_mod) >= 0)
Denys Vlasenko5e9c6172021-10-06 20:14:49 +02001187 sp_256_sub_8_p256_mod(r->x);
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001188
Denys Vlasenko12040122021-04-26 20:24:34 +02001189 /* y /= z^3 */
Denys Vlasenko27df6ae2021-12-11 23:27:40 +01001190 sp_256_mont_mul_and_reduce_8(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001191 /* Reduce y to less than modulus */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001192 if (sp_256_cmp_8(r->y, p256_mod) >= 0)
Denys Vlasenko5e9c6172021-10-06 20:14:49 +02001193 sp_256_sub_8_p256_mod(r->y);
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001194
Denys Vlasenko12040122021-04-26 20:24:34 +02001195 memset(r->z, 0, sizeof(r->z));
1196 r->z[0] = 1;
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001197}
1198
1199/* Double the Montgomery form projective point p.
1200 *
1201 * r Result of doubling point.
1202 * p Point to double.
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001203 */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001204static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p)
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001205{
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001206 sp_digit t1[8];
1207 sp_digit t2[8];
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001208
Denys Vlasenko12040122021-04-26 20:24:34 +02001209 /* Put point to double into result */
1210 if (r != p)
1211 *r = *p; /* struct copy */
Denys Vlasenko4d3a5c12021-04-26 15:21:38 +02001212
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001213 if (r->infinity)
Denys Vlasenkoe7305052021-10-05 13:30:48 +02001214 return;
1215
Denys Vlasenko12040122021-04-26 20:24:34 +02001216 /* T1 = Z * Z */
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001217 sp_256_mont_sqr_8(t1, r->z /*, p256_mod, p256_mp_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001218 /* Z = Y * Z */
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001219 sp_256_mont_mul_8(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001220 /* Z = 2Z */
Denys Vlasenkoc7842842021-10-06 01:09:37 +02001221 sp_256_mont_dbl_8(r->z, r->z /*, p256_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001222 /* T2 = X - T1 */
Denys Vlasenkoc7842842021-10-06 01:09:37 +02001223 sp_256_mont_sub_8(t2, r->x, t1 /*, p256_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001224 /* T1 = X + T1 */
Denys Vlasenkoc7842842021-10-06 01:09:37 +02001225 sp_256_mont_add_8(t1, r->x, t1 /*, p256_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001226 /* T2 = T1 * T2 */
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001227 sp_256_mont_mul_8(t2, t1, t2 /*, p256_mod, p256_mp_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001228 /* T1 = 3T2 */
Denys Vlasenkoc7842842021-10-06 01:09:37 +02001229 sp_256_mont_tpl_8(t1, t2 /*, p256_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001230 /* Y = 2Y */
Denys Vlasenkoc7842842021-10-06 01:09:37 +02001231 sp_256_mont_dbl_8(r->y, r->y /*, p256_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001232 /* Y = Y * Y */
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001233 sp_256_mont_sqr_8(r->y, r->y /*, p256_mod, p256_mp_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001234 /* T2 = Y * Y */
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001235 sp_256_mont_sqr_8(t2, r->y /*, p256_mod, p256_mp_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001236 /* T2 = T2/2 */
Denys Vlasenkodcfd8d32021-11-27 16:07:42 +01001237 sp_256_div2_8(t2 /*, p256_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001238 /* Y = Y * X */
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001239 sp_256_mont_mul_8(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001240 /* X = T1 * T1 */
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001241 sp_256_mont_mul_8(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001242 /* X = X - Y */
Denys Vlasenkoc7842842021-10-06 01:09:37 +02001243 sp_256_mont_sub_8(r->x, r->x, r->y /*, p256_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001244 /* X = X - Y */
Denys Vlasenkoc7842842021-10-06 01:09:37 +02001245 sp_256_mont_sub_8(r->x, r->x, r->y /*, p256_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001246 /* Y = Y - X */
Denys Vlasenkoc7842842021-10-06 01:09:37 +02001247 sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001248 /* Y = Y * T1 */
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001249 sp_256_mont_mul_8(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/);
Denys Vlasenko12040122021-04-26 20:24:34 +02001250 /* Y = Y - T2 */
Denys Vlasenkoc7842842021-10-06 01:09:37 +02001251 sp_256_mont_sub_8(r->y, r->y, t2 /*, p256_mod*/);
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001252 dump_512("y2 %s\n", r->y);
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001253}
1254
1255/* Add two Montgomery form projective points.
1256 *
1257 * r Result of addition.
1258 * p Frist point to add.
1259 * q Second point to add.
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001260 */
Denys Vlasenko53b2fdc2021-10-10 13:50:53 +02001261static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q)
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001262{
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001263 sp_digit t1[8];
1264 sp_digit t2[8];
1265 sp_digit t3[8];
1266 sp_digit t4[8];
1267 sp_digit t5[8];
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001268
Denys Vlasenko12040122021-04-26 20:24:34 +02001269 /* Ensure only the first point is the same as the result. */
1270 if (q == r) {
1271 sp_point* a = p;
1272 p = q;
1273 q = a;
1274 }
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001275
Denys Vlasenko12040122021-04-26 20:24:34 +02001276 /* Check double */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001277 sp_256_sub_8(t1, p256_mod, q->y);
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001278 if (sp_256_cmp_equal_8(p->x, q->x)
1279 && sp_256_cmp_equal_8(p->z, q->z)
1280 && (sp_256_cmp_equal_8(p->y, q->y) || sp_256_cmp_equal_8(p->y, t1))
Denys Vlasenko12040122021-04-26 20:24:34 +02001281 ) {
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001282 sp_256_proj_point_dbl_8(r, p);
Denys Vlasenkobbda85c2021-11-27 15:06:57 +01001283 return;
Denys Vlasenko12040122021-04-26 20:24:34 +02001284 }
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001285
Denys Vlasenkobbda85c2021-11-27 15:06:57 +01001286 if (p->infinity || q->infinity) {
Denys Vlasenko12040122021-04-26 20:24:34 +02001287 *r = p->infinity ? *q : *p; /* struct copy */
Denys Vlasenkobbda85c2021-11-27 15:06:57 +01001288 return;
Denys Vlasenko12040122021-04-26 20:24:34 +02001289 }
Denys Vlasenkobbda85c2021-11-27 15:06:57 +01001290
1291 /* U1 = X1*Z2^2 */
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001292 sp_256_mont_sqr_8(t1, q->z /*, p256_mod, p256_mp_mod*/);
1293 sp_256_mont_mul_8(t3, t1, q->z /*, p256_mod, p256_mp_mod*/);
1294 sp_256_mont_mul_8(t1, t1, r->x /*, p256_mod, p256_mp_mod*/);
Denys Vlasenkobbda85c2021-11-27 15:06:57 +01001295 /* U2 = X2*Z1^2 */
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001296 sp_256_mont_sqr_8(t2, r->z /*, p256_mod, p256_mp_mod*/);
1297 sp_256_mont_mul_8(t4, t2, r->z /*, p256_mod, p256_mp_mod*/);
1298 sp_256_mont_mul_8(t2, t2, q->x /*, p256_mod, p256_mp_mod*/);
Denys Vlasenkobbda85c2021-11-27 15:06:57 +01001299 /* S1 = Y1*Z2^3 */
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001300 sp_256_mont_mul_8(t3, t3, r->y /*, p256_mod, p256_mp_mod*/);
Denys Vlasenkobbda85c2021-11-27 15:06:57 +01001301 /* S2 = Y2*Z1^3 */
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001302 sp_256_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/);
Denys Vlasenkobbda85c2021-11-27 15:06:57 +01001303 /* H = U2 - U1 */
1304 sp_256_mont_sub_8(t2, t2, t1 /*, p256_mod*/);
1305 /* R = S2 - S1 */
1306 sp_256_mont_sub_8(t4, t4, t3 /*, p256_mod*/);
1307 /* Z3 = H*Z1*Z2 */
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001308 sp_256_mont_mul_8(r->z, r->z, q->z /*, p256_mod, p256_mp_mod*/);
1309 sp_256_mont_mul_8(r->z, r->z, t2 /*, p256_mod, p256_mp_mod*/);
Denys Vlasenkobbda85c2021-11-27 15:06:57 +01001310 /* X3 = R^2 - H^3 - 2*U1*H^2 */
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001311 sp_256_mont_sqr_8(r->x, t4 /*, p256_mod, p256_mp_mod*/);
1312 sp_256_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/);
1313 sp_256_mont_mul_8(r->y, t1, t5 /*, p256_mod, p256_mp_mod*/);
1314 sp_256_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/);
Denys Vlasenkobbda85c2021-11-27 15:06:57 +01001315 sp_256_mont_sub_8(r->x, r->x, t5 /*, p256_mod*/);
1316 sp_256_mont_dbl_8(t1, r->y /*, p256_mod*/);
1317 sp_256_mont_sub_8(r->x, r->x, t1 /*, p256_mod*/);
1318 /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
1319 sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/);
Denys Vlasenkof92ae1d2021-11-27 19:15:43 +01001320 sp_256_mont_mul_8(r->y, r->y, t4 /*, p256_mod, p256_mp_mod*/);
1321 sp_256_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/);
Denys Vlasenkobbda85c2021-11-27 15:06:57 +01001322 sp_256_mont_sub_8(r->y, r->y, t5 /*, p256_mod*/);
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001323}
1324
1325/* Multiply the point by the scalar and return the result.
1326 * If map is true then convert result to affine co-ordinates.
1327 *
1328 * r Resulting point.
1329 * g Point to multiply.
1330 * k Scalar to multiply by.
Denys Vlasenko03ab2a92021-04-26 14:55:46 +02001331 * map Indicates whether to convert result to affine.
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001332 */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001333static void sp_256_ecc_mulmod_8(sp_point* r, const sp_point* g, const sp_digit* k /*, int map*/)
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001334{
Denys Vlasenko12040122021-04-26 20:24:34 +02001335 enum { map = 1 }; /* we always convert result to affine coordinates */
1336 sp_point t[3];
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001337 sp_digit n = n; /* for compiler */
Denys Vlasenko12040122021-04-26 20:24:34 +02001338 int c, y;
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001339
Denys Vlasenko12040122021-04-26 20:24:34 +02001340 memset(t, 0, sizeof(t));
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001341
Denys Vlasenko12040122021-04-26 20:24:34 +02001342 /* t[0] = {0, 0, 1} * norm */
1343 t[0].infinity = 1;
1344 /* t[1] = {g->x, g->y, g->z} * norm */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001345 sp_256_mod_mul_norm_8(t[1].x, g->x);
1346 sp_256_mod_mul_norm_8(t[1].y, g->y);
1347 sp_256_mod_mul_norm_8(t[1].z, g->z);
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001348
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001349 /* For every bit, starting from most significant... */
1350 k += 7;
1351 c = 256;
1352 for (;;) {
1353 if ((c & 0x1f) == 0) {
1354 if (c == 0)
Denys Vlasenko12040122021-04-26 20:24:34 +02001355 break;
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001356 n = *k--;
Denys Vlasenko12040122021-04-26 20:24:34 +02001357 }
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001358
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001359 y = (n >> 31);
1360 dbg("y:%d t[%d] = t[0]+t[1]\n", y, y^1);
1361 sp_256_proj_point_add_8(&t[y^1], &t[0], &t[1]);
1362 dump_512("t[0].x %s\n", t[0].x);
1363 dump_512("t[0].y %s\n", t[0].y);
1364 dump_512("t[0].z %s\n", t[0].z);
1365 dump_512("t[1].x %s\n", t[1].x);
1366 dump_512("t[1].y %s\n", t[1].y);
1367 dump_512("t[1].z %s\n", t[1].z);
1368 dbg("t[2] = t[%d]\n", y);
Denys Vlasenko26c85222021-11-27 15:00:14 +01001369 t[2] = t[y]; /* struct copy */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001370 dbg("t[2] *= 2\n");
1371 sp_256_proj_point_dbl_8(&t[2], &t[2]);
1372 dump_512("t[2].x %s\n", t[2].x);
1373 dump_512("t[2].y %s\n", t[2].y);
1374 dump_512("t[2].z %s\n", t[2].z);
Denys Vlasenko26c85222021-11-27 15:00:14 +01001375 t[y] = t[2]; /* struct copy */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001376
1377 n <<= 1;
1378 c--;
Denys Vlasenko12040122021-04-26 20:24:34 +02001379 }
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001380
Denys Vlasenko12040122021-04-26 20:24:34 +02001381 if (map)
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001382 sp_256_map_8(r, &t[0]);
Denys Vlasenko12040122021-04-26 20:24:34 +02001383 else
Denys Vlasenko9c671fe2021-11-27 18:42:27 +01001384 *r = t[0]; /* struct copy */
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001385
Denys Vlasenko12040122021-04-26 20:24:34 +02001386 memset(t, 0, sizeof(t)); //paranoia
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001387}
1388
1389/* Multiply the base point of P256 by the scalar and return the result.
1390 * If map is true then convert result to affine co-ordinates.
1391 *
1392 * r Resulting point.
1393 * k Scalar to multiply by.
Denys Vlasenko03ab2a92021-04-26 14:55:46 +02001394 * map Indicates whether to convert result to affine.
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001395 */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001396static void sp_256_ecc_mulmod_base_8(sp_point* r, sp_digit* k /*, int map*/)
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001397{
Denys Vlasenko39a3ef52021-04-27 01:31:51 +02001398 /* Since this function is called only once, save space:
Denys Vlasenko7b969bb2022-07-13 16:11:17 +02001399 * don't have "static const sp_point p256_base = {...}".
Denys Vlasenko39a3ef52021-04-27 01:31:51 +02001400 */
Denys Vlasenko48a18d12021-04-27 12:24:21 +02001401 static const uint8_t p256_base_bin[] = {
1402 /* x (big-endian) */
Denys Vlasenko7b969bb2022-07-13 16:11:17 +02001403 0x6b,0x17,0xd1,0xf2,0xe1,0x2c,0x42,0x47,0xf8,0xbc,0xe6,0xe5,0x63,0xa4,0x40,0xf2,
1404 0x77,0x03,0x7d,0x81,0x2d,0xeb,0x33,0xa0,0xf4,0xa1,0x39,0x45,0xd8,0x98,0xc2,0x96,
Denys Vlasenko48a18d12021-04-27 12:24:21 +02001405 /* y */
Denys Vlasenko7b969bb2022-07-13 16:11:17 +02001406 0x4f,0xe3,0x42,0xe2,0xfe,0x1a,0x7f,0x9b,0x8e,0xe7,0xeb,0x4a,0x7c,0x0f,0x9e,0x16,
1407 0x2b,0xce,0x33,0x57,0x6b,0x31,0x5e,0xce,0xcb,0xb6,0x40,0x68,0x37,0xbf,0x51,0xf5,
Denys Vlasenko646e8562021-04-27 13:09:44 +02001408 /* z will be set to 1, infinity flag to "false" */
Denys Vlasenko39a3ef52021-04-27 01:31:51 +02001409 };
1410 sp_point p256_base;
1411
Denys Vlasenko48a18d12021-04-27 12:24:21 +02001412 sp_256_point_from_bin2x32(&p256_base, p256_base_bin);
Denys Vlasenko39a3ef52021-04-27 01:31:51 +02001413
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001414 sp_256_ecc_mulmod_8(r, &p256_base, k /*, map*/);
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001415}
1416
1417/* Multiply the point by the scalar and serialize the X ordinate.
1418 * The number is 0 padded to maximum size on output.
1419 *
1420 * priv Scalar to multiply the point by.
Denys Vlasenko074b33b2021-04-26 14:33:38 +02001421 * pub2x32 Point to multiply.
1422 * out32 Buffer to hold X ordinate.
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001423 */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001424static void sp_ecc_secret_gen_256(const sp_digit priv[8], const uint8_t *pub2x32, uint8_t* out32)
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001425{
Denys Vlasenko12040122021-04-26 20:24:34 +02001426 sp_point point[1];
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001427
1428#if FIXED_PEER_PUBKEY
Denys Vlasenko12040122021-04-26 20:24:34 +02001429 memset((void*)pub2x32, 0x55, 64);
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001430#endif
Denys Vlasenko12040122021-04-26 20:24:34 +02001431 dump_hex("peerkey %s\n", pub2x32, 32); /* in TLS, this is peer's public key */
1432 dump_hex(" %s\n", pub2x32 + 32, 32);
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001433
Denys Vlasenko12040122021-04-26 20:24:34 +02001434 sp_256_point_from_bin2x32(point, pub2x32);
Denys Vlasenko81d8af12021-10-05 17:31:33 +02001435 dump_512("point->x %s\n", point->x);
1436 dump_512("point->y %s\n", point->y);
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001437
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001438 sp_256_ecc_mulmod_8(point, point, priv);
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001439
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001440 sp_256_to_bin_8(point->x, out32);
Denys Vlasenko12040122021-04-26 20:24:34 +02001441 dump_hex("out32: %s\n", out32, 32);
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001442}
1443
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001444/* Generates a random scalar in [1..order-1] range. */
1445static void sp_256_ecc_gen_k_8(sp_digit k[8])
Denys Vlasenko074b33b2021-04-26 14:33:38 +02001446{
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001447 /* Since 32-bit words are "dense", no need to use
1448 * sp_256_from_bin_8(k, buf) to convert random stream
1449 * to sp_digit array - just store random bits there directly.
1450 */
1451 tls_get_random(k, 8 * sizeof(k[0]));
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001452#if FIXED_SECRET
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001453 memset(k, 0x77, 8 * sizeof(k[0]));
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001454#endif
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001455
1456// If scalar is too large, try again (pseudo-code)
1457// if (k >= 0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551 - 1) // order of P256
1458// goto pick_another_random;
1459// k++; // ensure non-zero
1460 /* Simpler alternative, at the cost of not choosing some valid
1461 * random values, and slightly non-uniform distribution */
1462 if (k[0] == 0)
1463 k[0] = 1;
1464 if (k[7] >= 0xffffffff)
1465 k[7] = 0xfffffffe;
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001466}
1467
Denys Vlasenko074b33b2021-04-26 14:33:38 +02001468/* Makes a random EC key pair. */
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001469static void sp_ecc_make_key_256(sp_digit privkey[8], uint8_t *pubkey)
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001470{
1471 sp_point point[1];
1472
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001473 sp_256_ecc_gen_k_8(privkey);
Denys Vlasenko137864f2021-10-05 13:47:42 +02001474 dump_256("privkey %s\n", privkey);
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001475 sp_256_ecc_mulmod_base_8(point, privkey);
Denys Vlasenko137864f2021-10-05 13:47:42 +02001476 dump_512("point->x %s\n", point->x);
1477 dump_512("point->y %s\n", point->y);
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001478 sp_256_to_bin_8(point->x, pubkey);
1479 sp_256_to_bin_8(point->y, pubkey + 32);
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001480
1481 memset(point, 0, sizeof(point)); //paranoia
1482}
1483
1484void FAST_FUNC curve_P256_compute_pubkey_and_premaster(
Denys Vlasenko074b33b2021-04-26 14:33:38 +02001485 uint8_t *pubkey2x32, uint8_t *premaster32,
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001486 const uint8_t *peerkey2x32)
1487{
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001488 sp_digit privkey[8];
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001489
Denys Vlasenko3b411eb2021-10-05 20:00:50 +02001490 dump_hex("peerkey2x32: %s\n", peerkey2x32, 64);
Denys Vlasenko074b33b2021-04-26 14:33:38 +02001491 sp_ecc_make_key_256(privkey, pubkey2x32);
1492 dump_hex("pubkey: %s\n", pubkey2x32, 32);
1493 dump_hex(" %s\n", pubkey2x32 + 32, 32);
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001494
Denys Vlasenko074b33b2021-04-26 14:33:38 +02001495 /* Combine our privkey and peer's public key to generate premaster */
Denys Vlasenkof18a1fd2021-04-26 13:25:56 +02001496 sp_ecc_secret_gen_256(privkey, /*x,y:*/peerkey2x32, premaster32);
1497 dump_hex("premaster: %s\n", premaster32, 32);
1498}