Blame - networking/tls_fe.c - codeaurora/busybox

blob: f235082f536f861dd89d76238d57e10b63dc453a [file] [log] [blame]

Denys Vlasenko	bddb654	2018-11-13 02:16:24 +0100	[diff] [blame]	1	/*
				2	* Copyright (C) 2018 Denys Vlasenko
				3	*
				4	* Licensed under GPLv2, see file LICENSE in this source tree.
				5	*/
				6	#include "tls.h"
				7
				8	typedef uint8_t byte;
				9	typedef uint16_t word16;
				10	typedef uint32_t word32;
				11	#define XMEMSET memset
				12
				13	#define F25519_SIZE CURVE25519_KEYSIZE
				14
				15	/* The code below is taken from wolfssl-3.15.3/wolfcrypt/src/fe_low_mem.c
				16	* Header comment is kept intact:
				17	*/
				18
				19	/* fe_low_mem.c
				20	*
				21	* Copyright (C) 2006-2017 wolfSSL Inc.
				22	*
				23	* This file is part of wolfSSL.
				24	*
				25	* wolfSSL is free software; you can redistribute it and/or modify
				26	* it under the terms of the GNU General Public License as published by
				27	* the Free Software Foundation; either version 2 of the License, or
				28	* (at your option) any later version.
				29	*
				30	* wolfSSL is distributed in the hope that it will be useful,
				31	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				32	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				33	* GNU General Public License for more details.
				34	*
				35	* You should have received a copy of the GNU General Public License
				36	* along with this program; if not, write to the Free Software
				37	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
				38	*/
				39
				40
				41	/* Based from Daniel Beer's public domain work. */
				42
				43	#if 0 //UNUSED
				44	static void fprime_copy(byte x, const byte a)
				45	{
				46	int i;
				47	for (i = 0; i < F25519_SIZE; i++)
				48	x[i] = a[i];
				49	}
				50	#endif
				51
				52	static void lm_copy(byte* x, const byte* a)
				53	{
				54	int i;
				55	for (i = 0; i < F25519_SIZE; i++)
				56	x[i] = a[i];
				57	}
				58
				59	#if 0 //UNUSED
				60	static void fprime_select(byte dst, const byte zero, const byte *one, byte condition)
				61	{
				62	const byte mask = -condition;
				63	int i;
				64
				65	for (i = 0; i < F25519_SIZE; i++)
				66	dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
				67	}
				68	#endif
				69
				70	static void fe_select(byte *dst,
				71	const byte zero, const byte one,
				72	byte condition)
				73	{
				74	const byte mask = -condition;
				75	int i;
				76
				77	for (i = 0; i < F25519_SIZE; i++)
				78	dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
				79	}
				80
				81	#if 0 //UNUSED
				82	static void raw_add(byte x, const byte p)
				83	{
				84	word16 c = 0;
				85	int i;
				86
				87	for (i = 0; i < F25519_SIZE; i++) {
				88	c += ((word16)x[i]) + ((word16)p[i]);
				89	x[i] = (byte)c;
				90	c >>= 8;
				91	}
				92	}
				93	#endif
				94
				95	#if 0 //UNUSED
				96	static void raw_try_sub(byte x, const byte p)
				97	{
				98	byte minusp[F25519_SIZE];
				99	word16 c = 0;
				100	int i;
				101
				102	for (i = 0; i < F25519_SIZE; i++) {
				103	c = ((word16)x[i]) - ((word16)p[i]) - c;
				104	minusp[i] = (byte)c;
				105	c = (c >> 8) & 1;
				106	}
				107
				108	fprime_select(x, minusp, x, (byte)c);
				109	}
				110	#endif
				111
				112	#if 0 //UNUSED
				113	static int prime_msb(const byte *p)
				114	{
				115	int i;
				116	byte x;
				117	int shift = 1;
				118	int z = F25519_SIZE - 1;
				119
				120	/*
				121	Test for any hot bits.
				122	As soon as one instance is encountered set shift to 0.
				123	*/
				124	for (i = F25519_SIZE - 1; i >= 0; i--) {
				125	shift &= ((shift ^ ((-p[i] \| p[i]) >> 7)) & 1);
				126	z -= shift;
				127	}
				128	x = p[z];
				129	z <<= 3;
				130	shift = 1;
				131	for (i = 0; i < 8; i++) {
				132	shift &= ((-(x >> i) \| (x >> i)) >> (7 - i) & 1);
				133	z += shift;
				134	}
				135
				136	return z - 1;
				137	}
				138	#endif
				139
				140	#if 0 //UNUSED
				141	static void fprime_add(byte r, const byte a, const byte *modulus)
				142	{
				143	raw_add(r, a);
				144	raw_try_sub(r, modulus);
				145	}
				146	#endif
				147
				148	#if 0 //UNUSED
				149	static void fprime_sub(byte r, const byte a, const byte *modulus)
				150	{
				151	raw_add(r, modulus);
				152	raw_try_sub(r, a);
				153	raw_try_sub(r, modulus);
				154	}
				155	#endif
				156
				157	#if 0 //UNUSED
				158	static void fprime_mul(byte r, const byte a, const byte *b,
				159	const byte *modulus)
				160	{
				161	word16 c = 0;
				162	int i,j;
				163
				164	XMEMSET(r, 0, F25519_SIZE);
				165
				166	for (i = prime_msb(modulus); i >= 0; i--) {
				167	const byte bit = (b[i >> 3] >> (i & 7)) & 1;
				168	byte plusa[F25519_SIZE];
				169
				170	for (j = 0; j < F25519_SIZE; j++) {
				171	c \|= ((word16)r[j]) << 1;
				172	r[j] = (byte)c;
				173	c >>= 8;
				174	}
				175	raw_try_sub(r, modulus);
				176
				177	fprime_copy(plusa, r);
				178	fprime_add(plusa, a, modulus);
				179
				180	fprime_select(r, r, plusa, bit);
				181	}
				182	}
				183	#endif
				184
				185	#if 0 //UNUSED
				186	static void fe_load(byte *x, word32 c)
				187	{
				188	word32 i;
				189
				190	for (i = 0; i < sizeof(c); i++) {
				191	x[i] = c;
				192	c >>= 8;
				193	}
				194
				195	for (; i < F25519_SIZE; i++)
				196	x[i] = 0;
				197	}
				198	#endif
				199
				200	static void fe_normalize(byte *x)
				201	{
				202	byte minusp[F25519_SIZE];
				203	word16 c;
				204	int i;
				205
				206	/* Reduce using 2^255 = 19 mod p */
				207	c = (x[31] >> 7) * 19;
				208	x[31] &= 127;
				209
				210	for (i = 0; i < F25519_SIZE; i++) {
				211	c += x[i];
				212	x[i] = (byte)c;
				213	c >>= 8;
				214	}
				215
				216	/* The number is now less than 2^255 + 18, and therefore less than
				217	* 2p. Try subtracting p, and conditionally load the subtracted
				218	* value if underflow did not occur.
				219	*/
				220	c = 19;
				221
				222	for (i = 0; i + 1 < F25519_SIZE; i++) {
				223	c += x[i];
				224	minusp[i] = (byte)c;
				225	c >>= 8;
				226	}
				227
				228	c += ((word16)x[i]) - 128;
				229	minusp[31] = (byte)c;
				230
				231	/* Load x-p if no underflow */
				232	fe_select(x, minusp, x, (c >> 15) & 1);
				233	}
				234
				235	static void lm_add(byte* r, const byte* a, const byte* b)
				236	{
				237	word16 c = 0;
				238	int i;
				239
				240	/* Add */
				241	for (i = 0; i < F25519_SIZE; i++) {
				242	c >>= 8;
				243	c += ((word16)a[i]) + ((word16)b[i]);
				244	r[i] = (byte)c;
				245	}
				246
				247	/* Reduce with 2^255 = 19 mod p */
				248	r[31] &= 127;
				249	c = (c >> 7) * 19;
				250
				251	for (i = 0; i < F25519_SIZE; i++) {
				252	c += r[i];
				253	r[i] = (byte)c;
				254	c >>= 8;
				255	}
				256	}
				257
				258	static void lm_sub(byte* r, const byte* a, const byte* b)
				259	{
				260	word32 c = 0;
				261	int i;
				262
				263	/* Calculate a + 2p - b, to avoid underflow */
				264	c = 218;
				265	for (i = 0; i + 1 < F25519_SIZE; i++) {
				266	c += 65280 + ((word32)a[i]) - ((word32)b[i]);
				267	r[i] = c;
				268	c >>= 8;
				269	}
				270
				271	c += ((word32)a[31]) - ((word32)b[31]);
				272	r[31] = c & 127;
				273	c = (c >> 7) * 19;
				274
				275	for (i = 0; i < F25519_SIZE; i++) {
				276	c += r[i];
				277	r[i] = c;
				278	c >>= 8;
				279	}
				280	}
				281
				282	#if 0 //UNUSED
				283	static void lm_neg(byte* r, const byte* a)
				284	{
				285	word32 c = 0;
				286	int i;
				287
				288	/* Calculate 2p - a, to avoid underflow */
				289	c = 218;
				290	for (i = 0; i + 1 < F25519_SIZE; i++) {
				291	c += 65280 - ((word32)a[i]);
				292	r[i] = c;
				293	c >>= 8;
				294	}
				295
				296	c -= ((word32)a[31]);
				297	r[31] = c & 127;
				298	c = (c >> 7) * 19;
				299
				300	for (i = 0; i < F25519_SIZE; i++) {
				301	c += r[i];
				302	r[i] = c;
				303	c >>= 8;
				304	}
				305	}
				306	#endif
				307
				308	static void fe_mul__distinct(byte r, const byte a, const byte *b)
				309	{
				310	word32 c = 0;
				311	int i;
				312
				313	for (i = 0; i < F25519_SIZE; i++) {
				314	int j;
				315
				316	c >>= 8;
				317	for (j = 0; j <= i; j++)
				318	c += ((word32)a[j]) * ((word32)b[i - j]);
				319
				320	for (; j < F25519_SIZE; j++)
				321	c += ((word32)a[j]) *
				322	((word32)b[i + F25519_SIZE - j]) * 38;
				323
				324	r[i] = c;
				325	}
				326
				327	r[31] &= 127;
				328	c = (c >> 7) * 19;
				329
				330	for (i = 0; i < F25519_SIZE; i++) {
				331	c += r[i];
				332	r[i] = c;
				333	c >>= 8;
				334	}
				335	}
				336
				337	#if 0 //UNUSED
				338	static void lm_mul(byte r, const byte a, const byte *b)
				339	{
				340	byte tmp[F25519_SIZE];
				341
				342	fe_mul__distinct(tmp, a, b);
				343	lm_copy(r, tmp);
				344	}
				345	#endif
				346
				347	static void fe_mul_c(byte r, const byte a, word32 b)
				348	{
				349	word32 c = 0;
				350	int i;
				351
				352	for (i = 0; i < F25519_SIZE; i++) {
				353	c >>= 8;
				354	c += b * ((word32)a[i]);
				355	r[i] = c;
				356	}
				357
				358	r[31] &= 127;
				359	c >>= 7;
				360	c *= 19;
				361
				362	for (i = 0; i < F25519_SIZE; i++) {
				363	c += r[i];
				364	r[i] = c;
				365	c >>= 8;
				366	}
				367	}
				368
				369	static void fe_inv__distinct(byte r, const byte x)
				370	{
				371	byte s[F25519_SIZE];
				372	int i;
				373
				374	/* This is a prime field, so by Fermat's little theorem:
				375	*
				376	* x^(p-1) = 1 mod p
				377	*
				378	* Therefore, raise to (p-2) = 2^255-21 to get a multiplicative
				379	* inverse.
				380	*
				381	* This is a 255-bit binary number with the digits:
				382	*
				383	* 11111111... 01011
				384	*
				385	* We compute the result by the usual binary chain, but
				386	* alternate between keeping the accumulator in r and s, so as
				387	* to avoid copying temporaries.
				388	*/
				389
				390	/* 1 1 */
				391	fe_mul__distinct(s, x, x);
				392	fe_mul__distinct(r, s, x);
				393
				394	/* 1 x 248 */
				395	for (i = 0; i < 248; i++) {
				396	fe_mul__distinct(s, r, r);
				397	fe_mul__distinct(r, s, x);
				398	}
				399
				400	/* 0 */
				401	fe_mul__distinct(s, r, r);
				402
				403	/* 1 */
				404	fe_mul__distinct(r, s, s);
				405	fe_mul__distinct(s, r, x);
				406
				407	/* 0 */
				408	fe_mul__distinct(r, s, s);
				409
				410	/* 1 */
				411	fe_mul__distinct(s, r, r);
				412	fe_mul__distinct(r, s, x);
				413
				414	/* 1 */
				415	fe_mul__distinct(s, r, r);
				416	fe_mul__distinct(r, s, x);
				417	}
				418
				419	#if 0 //UNUSED
				420	static void lm_invert(byte r, const byte x)
				421	{
				422	byte tmp[F25519_SIZE];
				423
				424	fe_inv__distinct(tmp, x);
				425	lm_copy(r, tmp);
				426	}
				427	#endif
				428
				429	#if 0 //UNUSED
				430	/* Raise x to the power of (p-5)/8 = 2^252-3, using s for temporary
				431	* storage.
				432	*/
				433	static void exp2523(byte r, const byte x, byte *s)
				434	{
				435	int i;
				436
				437	/* This number is a 252-bit number with the binary expansion:
				438	*
				439	* 111111... 01
				440	*/
				441
				442	/* 1 1 */
				443	fe_mul__distinct(r, x, x);
				444	fe_mul__distinct(s, r, x);
				445
				446	/* 1 x 248 */
				447	for (i = 0; i < 248; i++) {
				448	fe_mul__distinct(r, s, s);
				449	fe_mul__distinct(s, r, x);
				450	}
				451
				452	/* 0 */
				453	fe_mul__distinct(r, s, s);
				454
				455	/* 1 */
				456	fe_mul__distinct(s, r, r);
				457	fe_mul__distinct(r, s, x);
				458	}
				459	#endif
				460
				461	#if 0 //UNUSED
				462	static void fe_sqrt(byte r, const byte a)
				463	{
				464	byte v[F25519_SIZE];
				465	byte i[F25519_SIZE];
				466	byte x[F25519_SIZE];
				467	byte y[F25519_SIZE];
				468
				469	/* v = (2a)^((p-5)/8) [x = 2a] */
				470	fe_mul_c(x, a, 2);
				471	exp2523(v, x, y);
				472
				473	/* i = 2av^2 - 1 */
				474	fe_mul__distinct(y, v, v);
				475	fe_mul__distinct(i, x, y);
				476	fe_load(y, 1);
				477	lm_sub(i, i, y);
				478
				479	/* r = avi */
				480	fe_mul__distinct(x, v, a);
				481	fe_mul__distinct(r, x, i);
				482	}
				483	#endif
				484
				485	/* Differential addition */
				486	static void xc_diffadd(byte x5, byte z5,
				487	const byte x1, const byte z1,
				488	const byte x2, const byte z2,
				489	const byte x3, const byte z3)
				490	{
				491	/* Explicit formulas database: dbl-1987-m3
				492	*
				493	* source 1987 Montgomery "Speeding the Pollard and elliptic curve
				494	* methods of factorization", page 261, fifth display, plus
				495	* common-subexpression elimination
				496	* compute A = X2+Z2
				497	* compute B = X2-Z2
				498	* compute C = X3+Z3
				499	* compute D = X3-Z3
				500	* compute DA = D A
				501	* compute CB = C B
				502	* compute X5 = Z1(DA+CB)^2
				503	* compute Z5 = X1(DA-CB)^2
				504	*/
				505	byte da[F25519_SIZE];
				506	byte cb[F25519_SIZE];
				507	byte a[F25519_SIZE];
				508	byte b[F25519_SIZE];
				509
				510	lm_add(a, x2, z2);
				511	lm_sub(b, x3, z3); /* D */
				512	fe_mul__distinct(da, a, b);
				513
				514	lm_sub(b, x2, z2);
				515	lm_add(a, x3, z3); /* C */
				516	fe_mul__distinct(cb, a, b);
				517
				518	lm_add(a, da, cb);
				519	fe_mul__distinct(b, a, a);
				520	fe_mul__distinct(x5, z1, b);
				521
				522	lm_sub(a, da, cb);
				523	fe_mul__distinct(b, a, a);
				524	fe_mul__distinct(z5, x1, b);
				525	}
				526
				527	/* Double an X-coordinate */
				528	static void xc_double(byte x3, byte z3,
				529	const byte x1, const byte z1)
				530	{
				531	/* Explicit formulas database: dbl-1987-m
				532	*
				533	* source 1987 Montgomery "Speeding the Pollard and elliptic
				534	* curve methods of factorization", page 261, fourth display
				535	* compute X3 = (X1^2-Z1^2)^2
				536	* compute Z3 = 4 X1 Z1 (X1^2 + a X1 Z1 + Z1^2)
				537	*/
				538	byte x1sq[F25519_SIZE];
				539	byte z1sq[F25519_SIZE];
				540	byte x1z1[F25519_SIZE];
				541	byte a[F25519_SIZE];
				542
				543	fe_mul__distinct(x1sq, x1, x1);
				544	fe_mul__distinct(z1sq, z1, z1);
				545	fe_mul__distinct(x1z1, x1, z1);
				546
				547	lm_sub(a, x1sq, z1sq);
				548	fe_mul__distinct(x3, a, a);
				549
				550	fe_mul_c(a, x1z1, 486662);
				551	lm_add(a, x1sq, a);
				552	lm_add(a, z1sq, a);
				553	fe_mul__distinct(x1sq, x1z1, a);
				554	fe_mul_c(z3, x1sq, 4);
				555	}
				556
Denys Vlasenko	83e5c62	2018-11-23 17:21:38 +0100	[diff] [blame^]	557	void FAST_FUNC curve25519(byte result, const byte e, const byte *q)
Denys Vlasenko	bddb654	2018-11-13 02:16:24 +0100	[diff] [blame]	558	{
Denys Vlasenko	bddb654	2018-11-13 02:16:24 +0100	[diff] [blame]	559	int i;
				560
Denys Vlasenko	375fc78	2018-11-13 03:15:15 +0100	[diff] [blame]	561	struct {
				562	/* from wolfssl-3.15.3/wolfssl/wolfcrypt/fe_operations.h */
				563	/static const/ byte f25519_one[F25519_SIZE]; // = {1};
				564
				565	/* Current point: P_m */
				566	byte xm[F25519_SIZE];
				567	byte zm[F25519_SIZE]; // = {1};
				568	/* Predecessor: P_(m-1) */
				569	byte xm1[F25519_SIZE]; // = {1};
				570	byte zm1[F25519_SIZE]; // = {0};
				571	} z;
				572	#define f25519_one z.f25519_one
				573	#define xm z.xm
				574	#define zm z.zm
				575	#define xm1 z.xm1
				576	#define zm1 z.zm1
				577	memset(&z, 0, sizeof(z));
				578	f25519_one[0] = 1;
				579	zm[0] = 1;
				580	xm1[0] = 1;
				581
Denys Vlasenko	bddb654	2018-11-13 02:16:24 +0100	[diff] [blame]	582	/* Note: bit 254 is assumed to be 1 */
				583	lm_copy(xm, q);
				584
				585	for (i = 253; i >= 0; i--) {
				586	const int bit = (e[i >> 3] >> (i & 7)) & 1;
				587	byte xms[F25519_SIZE];
				588	byte zms[F25519_SIZE];
				589
				590	/* From P_m and P_(m-1), compute P_(2m) and P_(2m-1) */
				591	xc_diffadd(xm1, zm1, q, f25519_one, xm, zm, xm1, zm1);
				592	xc_double(xm, zm, xm, zm);
				593
				594	/* Compute P_(2m+1) */
				595	xc_diffadd(xms, zms, xm1, zm1, xm, zm, q, f25519_one);
				596
				597	/* Select:
				598	* bit = 1 --> (P_(2m+1), P_(2m))
				599	* bit = 0 --> (P_(2m), P_(2m-1))
				600	*/
				601	fe_select(xm1, xm1, xm, bit);
				602	fe_select(zm1, zm1, zm, bit);
				603	fe_select(xm, xm, xms, bit);
				604	fe_select(zm, zm, zms, bit);
				605	}
				606
				607	/* Freeze out of projective coordinates */
				608	fe_inv__distinct(zm1, zm);
				609	fe_mul__distinct(result, zm1, xm);
				610	fe_normalize(result);
				611	}