blob: f810e112a3ee6e2ff08ca5e30a4a6638187d96d4 [file] [log] [blame]
Denys Vlasenkobddb6542018-11-13 02:16:24 +01001/*
2 * Copyright (C) 2018 Denys Vlasenko
3 *
4 * Licensed under GPLv2, see file LICENSE in this source tree.
5 */
6#include "tls.h"
7
8typedef uint8_t byte;
9typedef uint16_t word16;
10typedef uint32_t word32;
11#define XMEMSET memset
12
13#define F25519_SIZE CURVE25519_KEYSIZE
14
15/* The code below is taken from wolfssl-3.15.3/wolfcrypt/src/fe_low_mem.c
16 * Header comment is kept intact:
17 */
18
19/* fe_low_mem.c
20 *
21 * Copyright (C) 2006-2017 wolfSSL Inc.
22 *
23 * This file is part of wolfSSL.
24 *
25 * wolfSSL is free software; you can redistribute it and/or modify
26 * it under the terms of the GNU General Public License as published by
27 * the Free Software Foundation; either version 2 of the License, or
28 * (at your option) any later version.
29 *
30 * wolfSSL is distributed in the hope that it will be useful,
31 * but WITHOUT ANY WARRANTY; without even the implied warranty of
32 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33 * GNU General Public License for more details.
34 *
35 * You should have received a copy of the GNU General Public License
36 * along with this program; if not, write to the Free Software
37 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
38 */
39
40
41/* Based from Daniel Beer's public domain work. */
42
43#if 0 //UNUSED
44static void fprime_copy(byte *x, const byte *a)
45{
Denys Vlasenko3109d1f2019-01-10 20:18:02 +010046 memcpy(x, a, F25519_SIZE);
Denys Vlasenkobddb6542018-11-13 02:16:24 +010047}
48#endif
49
50static void lm_copy(byte* x, const byte* a)
51{
Denys Vlasenko3109d1f2019-01-10 20:18:02 +010052 memcpy(x, a, F25519_SIZE);
Denys Vlasenkobddb6542018-11-13 02:16:24 +010053}
54
55#if 0 //UNUSED
56static void fprime_select(byte *dst, const byte *zero, const byte *one, byte condition)
57{
58 const byte mask = -condition;
59 int i;
60
61 for (i = 0; i < F25519_SIZE; i++)
62 dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
63}
64#endif
65
66static void fe_select(byte *dst,
67 const byte *zero, const byte *one,
68 byte condition)
69{
70 const byte mask = -condition;
71 int i;
72
73 for (i = 0; i < F25519_SIZE; i++)
74 dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
75}
76
77#if 0 //UNUSED
78static void raw_add(byte *x, const byte *p)
79{
80 word16 c = 0;
81 int i;
82
83 for (i = 0; i < F25519_SIZE; i++) {
84 c += ((word16)x[i]) + ((word16)p[i]);
85 x[i] = (byte)c;
86 c >>= 8;
87 }
88}
89#endif
90
91#if 0 //UNUSED
92static void raw_try_sub(byte *x, const byte *p)
93{
94 byte minusp[F25519_SIZE];
95 word16 c = 0;
96 int i;
97
98 for (i = 0; i < F25519_SIZE; i++) {
99 c = ((word16)x[i]) - ((word16)p[i]) - c;
100 minusp[i] = (byte)c;
101 c = (c >> 8) & 1;
102 }
103
104 fprime_select(x, minusp, x, (byte)c);
105}
106#endif
107
108#if 0 //UNUSED
109static int prime_msb(const byte *p)
110{
111 int i;
112 byte x;
113 int shift = 1;
114 int z = F25519_SIZE - 1;
115
116 /*
117 Test for any hot bits.
118 As soon as one instance is encountered set shift to 0.
119 */
120 for (i = F25519_SIZE - 1; i >= 0; i--) {
121 shift &= ((shift ^ ((-p[i] | p[i]) >> 7)) & 1);
122 z -= shift;
123 }
124 x = p[z];
125 z <<= 3;
126 shift = 1;
127 for (i = 0; i < 8; i++) {
128 shift &= ((-(x >> i) | (x >> i)) >> (7 - i) & 1);
129 z += shift;
130 }
131
132 return z - 1;
133}
134#endif
135
136#if 0 //UNUSED
137static void fprime_add(byte *r, const byte *a, const byte *modulus)
138{
139 raw_add(r, a);
140 raw_try_sub(r, modulus);
141}
142#endif
143
144#if 0 //UNUSED
145static void fprime_sub(byte *r, const byte *a, const byte *modulus)
146{
147 raw_add(r, modulus);
148 raw_try_sub(r, a);
149 raw_try_sub(r, modulus);
150}
151#endif
152
153#if 0 //UNUSED
154static void fprime_mul(byte *r, const byte *a, const byte *b,
155 const byte *modulus)
156{
157 word16 c = 0;
158 int i,j;
159
160 XMEMSET(r, 0, F25519_SIZE);
161
162 for (i = prime_msb(modulus); i >= 0; i--) {
163 const byte bit = (b[i >> 3] >> (i & 7)) & 1;
164 byte plusa[F25519_SIZE];
165
166 for (j = 0; j < F25519_SIZE; j++) {
167 c |= ((word16)r[j]) << 1;
168 r[j] = (byte)c;
169 c >>= 8;
170 }
171 raw_try_sub(r, modulus);
172
173 fprime_copy(plusa, r);
174 fprime_add(plusa, a, modulus);
175
176 fprime_select(r, r, plusa, bit);
177 }
178}
179#endif
180
181#if 0 //UNUSED
182static void fe_load(byte *x, word32 c)
183{
184 word32 i;
185
186 for (i = 0; i < sizeof(c); i++) {
187 x[i] = c;
188 c >>= 8;
189 }
190
191 for (; i < F25519_SIZE; i++)
192 x[i] = 0;
193}
194#endif
195
196static void fe_normalize(byte *x)
197{
198 byte minusp[F25519_SIZE];
Denys Vlasenko3109d1f2019-01-10 20:18:02 +0100199 unsigned c;
Denys Vlasenkobddb6542018-11-13 02:16:24 +0100200 int i;
201
202 /* Reduce using 2^255 = 19 mod p */
203 c = (x[31] >> 7) * 19;
204 x[31] &= 127;
205
206 for (i = 0; i < F25519_SIZE; i++) {
207 c += x[i];
208 x[i] = (byte)c;
209 c >>= 8;
210 }
211
212 /* The number is now less than 2^255 + 18, and therefore less than
213 * 2p. Try subtracting p, and conditionally load the subtracted
214 * value if underflow did not occur.
215 */
216 c = 19;
217
Denys Vlasenko3109d1f2019-01-10 20:18:02 +0100218 for (i = 0; i < F25519_SIZE - 1; i++) {
Denys Vlasenkobddb6542018-11-13 02:16:24 +0100219 c += x[i];
220 minusp[i] = (byte)c;
221 c >>= 8;
222 }
223
Denys Vlasenko3109d1f2019-01-10 20:18:02 +0100224 c += ((unsigned)x[i]) - 128;
Denys Vlasenkobddb6542018-11-13 02:16:24 +0100225 minusp[31] = (byte)c;
226
227 /* Load x-p if no underflow */
228 fe_select(x, minusp, x, (c >> 15) & 1);
229}
230
231static void lm_add(byte* r, const byte* a, const byte* b)
232{
Denys Vlasenko3109d1f2019-01-10 20:18:02 +0100233 unsigned c = 0;
Denys Vlasenkobddb6542018-11-13 02:16:24 +0100234 int i;
235
236 /* Add */
237 for (i = 0; i < F25519_SIZE; i++) {
238 c >>= 8;
Denys Vlasenko3109d1f2019-01-10 20:18:02 +0100239 c += ((unsigned)a[i]) + ((unsigned)b[i]);
Denys Vlasenkobddb6542018-11-13 02:16:24 +0100240 r[i] = (byte)c;
241 }
242
243 /* Reduce with 2^255 = 19 mod p */
244 r[31] &= 127;
245 c = (c >> 7) * 19;
246
247 for (i = 0; i < F25519_SIZE; i++) {
248 c += r[i];
249 r[i] = (byte)c;
250 c >>= 8;
251 }
252}
253
254static void lm_sub(byte* r, const byte* a, const byte* b)
255{
256 word32 c = 0;
257 int i;
258
259 /* Calculate a + 2p - b, to avoid underflow */
260 c = 218;
261 for (i = 0; i + 1 < F25519_SIZE; i++) {
262 c += 65280 + ((word32)a[i]) - ((word32)b[i]);
263 r[i] = c;
264 c >>= 8;
265 }
266
267 c += ((word32)a[31]) - ((word32)b[31]);
268 r[31] = c & 127;
269 c = (c >> 7) * 19;
270
271 for (i = 0; i < F25519_SIZE; i++) {
272 c += r[i];
273 r[i] = c;
274 c >>= 8;
275 }
276}
277
278#if 0 //UNUSED
279static void lm_neg(byte* r, const byte* a)
280{
281 word32 c = 0;
282 int i;
283
284 /* Calculate 2p - a, to avoid underflow */
285 c = 218;
286 for (i = 0; i + 1 < F25519_SIZE; i++) {
287 c += 65280 - ((word32)a[i]);
288 r[i] = c;
289 c >>= 8;
290 }
291
292 c -= ((word32)a[31]);
293 r[31] = c & 127;
294 c = (c >> 7) * 19;
295
296 for (i = 0; i < F25519_SIZE; i++) {
297 c += r[i];
298 r[i] = c;
299 c >>= 8;
300 }
301}
302#endif
303
304static void fe_mul__distinct(byte *r, const byte *a, const byte *b)
305{
306 word32 c = 0;
307 int i;
308
309 for (i = 0; i < F25519_SIZE; i++) {
310 int j;
311
312 c >>= 8;
313 for (j = 0; j <= i; j++)
314 c += ((word32)a[j]) * ((word32)b[i - j]);
315
316 for (; j < F25519_SIZE; j++)
317 c += ((word32)a[j]) *
318 ((word32)b[i + F25519_SIZE - j]) * 38;
319
320 r[i] = c;
321 }
322
323 r[31] &= 127;
324 c = (c >> 7) * 19;
325
326 for (i = 0; i < F25519_SIZE; i++) {
327 c += r[i];
328 r[i] = c;
329 c >>= 8;
330 }
331}
332
333#if 0 //UNUSED
334static void lm_mul(byte *r, const byte* a, const byte *b)
335{
336 byte tmp[F25519_SIZE];
337
338 fe_mul__distinct(tmp, a, b);
339 lm_copy(r, tmp);
340}
341#endif
342
343static void fe_mul_c(byte *r, const byte *a, word32 b)
344{
345 word32 c = 0;
346 int i;
347
348 for (i = 0; i < F25519_SIZE; i++) {
349 c >>= 8;
350 c += b * ((word32)a[i]);
351 r[i] = c;
352 }
353
354 r[31] &= 127;
355 c >>= 7;
356 c *= 19;
357
358 for (i = 0; i < F25519_SIZE; i++) {
359 c += r[i];
360 r[i] = c;
361 c >>= 8;
362 }
363}
364
365static void fe_inv__distinct(byte *r, const byte *x)
366{
367 byte s[F25519_SIZE];
368 int i;
369
370 /* This is a prime field, so by Fermat's little theorem:
371 *
372 * x^(p-1) = 1 mod p
373 *
374 * Therefore, raise to (p-2) = 2^255-21 to get a multiplicative
375 * inverse.
376 *
377 * This is a 255-bit binary number with the digits:
378 *
379 * 11111111... 01011
380 *
381 * We compute the result by the usual binary chain, but
382 * alternate between keeping the accumulator in r and s, so as
383 * to avoid copying temporaries.
384 */
385
Denys Vlasenko868f3832021-01-01 18:48:38 +0100386 lm_copy(r, x);
Denys Vlasenkobddb6542018-11-13 02:16:24 +0100387
Denys Vlasenko868f3832021-01-01 18:48:38 +0100388 /* 1, 1 x 249 */
389 for (i = 0; i < 249; i++) {
Denys Vlasenkobddb6542018-11-13 02:16:24 +0100390 fe_mul__distinct(s, r, r);
391 fe_mul__distinct(r, s, x);
392 }
393
394 /* 0 */
395 fe_mul__distinct(s, r, r);
396
397 /* 1 */
398 fe_mul__distinct(r, s, s);
399 fe_mul__distinct(s, r, x);
400
401 /* 0 */
402 fe_mul__distinct(r, s, s);
403
Denys Vlasenko868f3832021-01-01 18:48:38 +0100404 /* 1, 1 */
405 for (i = 0; i < 2; i++) {
406 fe_mul__distinct(s, r, r);
407 fe_mul__distinct(r, s, x);
408 }
Denys Vlasenkobddb6542018-11-13 02:16:24 +0100409}
410
411#if 0 //UNUSED
412static void lm_invert(byte *r, const byte *x)
413{
414 byte tmp[F25519_SIZE];
415
416 fe_inv__distinct(tmp, x);
417 lm_copy(r, tmp);
418}
419#endif
420
421#if 0 //UNUSED
422/* Raise x to the power of (p-5)/8 = 2^252-3, using s for temporary
423 * storage.
424 */
425static void exp2523(byte *r, const byte *x, byte *s)
426{
427 int i;
428
429 /* This number is a 252-bit number with the binary expansion:
430 *
431 * 111111... 01
432 */
433
Denys Vlasenko868f3832021-01-01 18:48:38 +0100434 lm_copy(s, x);
Denys Vlasenkobddb6542018-11-13 02:16:24 +0100435
Denys Vlasenko868f3832021-01-01 18:48:38 +0100436 /* 1, 1 x 249 */
437 for (i = 0; i < 249; i++) {
Denys Vlasenkobddb6542018-11-13 02:16:24 +0100438 fe_mul__distinct(r, s, s);
439 fe_mul__distinct(s, r, x);
440 }
441
442 /* 0 */
443 fe_mul__distinct(r, s, s);
444
445 /* 1 */
446 fe_mul__distinct(s, r, r);
447 fe_mul__distinct(r, s, x);
448}
449#endif
450
451#if 0 //UNUSED
452static void fe_sqrt(byte *r, const byte *a)
453{
454 byte v[F25519_SIZE];
455 byte i[F25519_SIZE];
456 byte x[F25519_SIZE];
457 byte y[F25519_SIZE];
458
459 /* v = (2a)^((p-5)/8) [x = 2a] */
460 fe_mul_c(x, a, 2);
461 exp2523(v, x, y);
462
463 /* i = 2av^2 - 1 */
464 fe_mul__distinct(y, v, v);
465 fe_mul__distinct(i, x, y);
466 fe_load(y, 1);
467 lm_sub(i, i, y);
468
469 /* r = avi */
470 fe_mul__distinct(x, v, a);
471 fe_mul__distinct(r, x, i);
472}
473#endif
474
475/* Differential addition */
476static void xc_diffadd(byte *x5, byte *z5,
477 const byte *x1, const byte *z1,
478 const byte *x2, const byte *z2,
479 const byte *x3, const byte *z3)
480{
481 /* Explicit formulas database: dbl-1987-m3
482 *
483 * source 1987 Montgomery "Speeding the Pollard and elliptic curve
484 * methods of factorization", page 261, fifth display, plus
485 * common-subexpression elimination
486 * compute A = X2+Z2
487 * compute B = X2-Z2
488 * compute C = X3+Z3
489 * compute D = X3-Z3
490 * compute DA = D A
491 * compute CB = C B
492 * compute X5 = Z1(DA+CB)^2
493 * compute Z5 = X1(DA-CB)^2
494 */
495 byte da[F25519_SIZE];
496 byte cb[F25519_SIZE];
497 byte a[F25519_SIZE];
498 byte b[F25519_SIZE];
499
500 lm_add(a, x2, z2);
501 lm_sub(b, x3, z3); /* D */
502 fe_mul__distinct(da, a, b);
503
504 lm_sub(b, x2, z2);
505 lm_add(a, x3, z3); /* C */
506 fe_mul__distinct(cb, a, b);
507
508 lm_add(a, da, cb);
509 fe_mul__distinct(b, a, a);
510 fe_mul__distinct(x5, z1, b);
511
512 lm_sub(a, da, cb);
513 fe_mul__distinct(b, a, a);
514 fe_mul__distinct(z5, x1, b);
515}
516
517/* Double an X-coordinate */
518static void xc_double(byte *x3, byte *z3,
519 const byte *x1, const byte *z1)
520{
521 /* Explicit formulas database: dbl-1987-m
522 *
523 * source 1987 Montgomery "Speeding the Pollard and elliptic
524 * curve methods of factorization", page 261, fourth display
525 * compute X3 = (X1^2-Z1^2)^2
526 * compute Z3 = 4 X1 Z1 (X1^2 + a X1 Z1 + Z1^2)
527 */
528 byte x1sq[F25519_SIZE];
529 byte z1sq[F25519_SIZE];
530 byte x1z1[F25519_SIZE];
531 byte a[F25519_SIZE];
532
533 fe_mul__distinct(x1sq, x1, x1);
534 fe_mul__distinct(z1sq, z1, z1);
535 fe_mul__distinct(x1z1, x1, z1);
536
537 lm_sub(a, x1sq, z1sq);
538 fe_mul__distinct(x3, a, a);
539
540 fe_mul_c(a, x1z1, 486662);
541 lm_add(a, x1sq, a);
542 lm_add(a, z1sq, a);
543 fe_mul__distinct(x1sq, x1z1, a);
544 fe_mul_c(z3, x1sq, 4);
545}
546
Denys Vlasenko83e5c622018-11-23 17:21:38 +0100547void FAST_FUNC curve25519(byte *result, const byte *e, const byte *q)
Denys Vlasenkobddb6542018-11-13 02:16:24 +0100548{
Denys Vlasenkobddb6542018-11-13 02:16:24 +0100549 int i;
550
Denys Vlasenko375fc782018-11-13 03:15:15 +0100551 struct {
552 /* from wolfssl-3.15.3/wolfssl/wolfcrypt/fe_operations.h */
553 /*static const*/ byte f25519_one[F25519_SIZE]; // = {1};
554
555 /* Current point: P_m */
556 byte xm[F25519_SIZE];
557 byte zm[F25519_SIZE]; // = {1};
558 /* Predecessor: P_(m-1) */
559 byte xm1[F25519_SIZE]; // = {1};
560 byte zm1[F25519_SIZE]; // = {0};
561 } z;
562#define f25519_one z.f25519_one
563#define xm z.xm
564#define zm z.zm
565#define xm1 z.xm1
566#define zm1 z.zm1
567 memset(&z, 0, sizeof(z));
568 f25519_one[0] = 1;
569 zm[0] = 1;
570 xm1[0] = 1;
571
Denys Vlasenkobddb6542018-11-13 02:16:24 +0100572 /* Note: bit 254 is assumed to be 1 */
573 lm_copy(xm, q);
574
575 for (i = 253; i >= 0; i--) {
576 const int bit = (e[i >> 3] >> (i & 7)) & 1;
577 byte xms[F25519_SIZE];
578 byte zms[F25519_SIZE];
579
580 /* From P_m and P_(m-1), compute P_(2m) and P_(2m-1) */
581 xc_diffadd(xm1, zm1, q, f25519_one, xm, zm, xm1, zm1);
582 xc_double(xm, zm, xm, zm);
583
584 /* Compute P_(2m+1) */
585 xc_diffadd(xms, zms, xm1, zm1, xm, zm, q, f25519_one);
586
587 /* Select:
588 * bit = 1 --> (P_(2m+1), P_(2m))
589 * bit = 0 --> (P_(2m), P_(2m-1))
590 */
591 fe_select(xm1, xm1, xm, bit);
592 fe_select(zm1, zm1, zm, bit);
593 fe_select(xm, xm, xms, bit);
594 fe_select(zm, zm, zms, bit);
595 }
596
597 /* Freeze out of projective coordinates */
598 fe_inv__distinct(zm1, zm);
599 fe_mul__distinct(result, zm1, xm);
600 fe_normalize(result);
601}