blob: dc2fe0a482cee37532f70c971cd760121490060f [file] [log] [blame]
Denys Vlasenko11d00962017-01-15 00:12:42 +01001/*
2 * Copyright (C) 2017 Denys Vlasenko
3 *
4 * Licensed under GPLv2, see file LICENSE in this source tree.
5 */
6#include "tls.h"
7
Denys Vlasenko3f8ecd92017-01-15 14:16:51 +01008/* The file is taken almost verbatim from matrixssl-3-7-2b-open/crypto/math/.
Denys Vlasenko6b1b0042017-01-19 15:51:00 +01009 * Changes are flagged with //bbox
Denys Vlasenko3f8ecd92017-01-15 14:16:51 +010010 */
11
Denys Vlasenko11d00962017-01-15 00:12:42 +010012/**
13 * @file pstm_montgomery_reduce.c
14 * @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master)
15 *
16 * Multiprecision Montgomery Reduction.
17 */
18/*
19 * Copyright (c) 2013-2015 INSIDE Secure Corporation
20 * Copyright (c) PeerSec Networks, 2002-2011
21 * All Rights Reserved
22 *
23 * The latest version of this code is available at http://www.matrixssl.org
24 *
25 * This software is open source; you can redistribute it and/or modify
26 * it under the terms of the GNU General Public License as published by
27 * the Free Software Foundation; either version 2 of the License, or
28 * (at your option) any later version.
29 *
30 * This General Public License does NOT permit incorporating this software
31 * into proprietary programs. If you are unable to comply with the GPL, a
32 * commercial license for this software may be purchased from INSIDE at
33 * http://www.insidesecure.com/eng/Company/Locations
34 *
35 * This program is distributed in WITHOUT ANY WARRANTY; without even the
36 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
37 * See the GNU General Public License for more details.
38 *
39 * You should have received a copy of the GNU General Public License
40 * along with this program; if not, write to the Free Software
41 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
42 * http://www.gnu.org/copyleft/gpl.html
43 */
44/******************************************************************************/
45
Denys Vlasenko6b1b0042017-01-19 15:51:00 +010046//bbox
Denys Vlasenko11d00962017-01-15 00:12:42 +010047//#include "../cryptoApi.h"
48#ifndef DISABLE_PSTM
49
50/******************************************************************************/
51
52#if defined(PSTM_X86)
53/* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */
54#if !defined(__GNUC__) || !defined(__i386__) || !defined(PSTM_32BIT)
55#error "PSTM_X86 option requires GCC and 32 bit mode x86 processor"
56#endif
57//#pragma message ("Using 32 bit x86 Assembly Optimizations")
58
59#define MONT_START
60#define MONT_FINI
61#define LOOP_END
62#define LOOP_START \
63 mu = c[x] * mp
64
65#define INNERMUL \
66asm( \
67 "movl %5,%%eax \n\t" \
68 "mull %4 \n\t" \
69 "addl %1,%%eax \n\t" \
70 "adcl $0,%%edx \n\t" \
71 "addl %%eax,%0 \n\t" \
72 "adcl $0,%%edx \n\t" \
73 "movl %%edx,%1 \n\t" \
74:"=g"(_c[LO]), "=r"(cy) \
75:"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \
76: "%eax", "%edx", "%cc")
77
78#define PROPCARRY \
79asm( \
80 "addl %1,%0 \n\t" \
81 "setb %%al \n\t" \
82 "movzbl %%al,%1 \n\t" \
83:"=g"(_c[LO]), "=r"(cy) \
84:"0"(_c[LO]), "1"(cy) \
85: "%eax", "%cc")
86
87/******************************************************************************/
88#elif defined(PSTM_X86_64)
89/* x86-64 optimized */
90#if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT)
91#error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor"
92#endif
93//#pragma message ("Using 64 bit x86_64 Assembly Optimizations")
94
95#define MONT_START
96#define MONT_FINI
97#define LOOP_END
98#define LOOP_START \
99mu = c[x] * mp
100
101#define INNERMUL \
102asm( \
103 "movq %5,%%rax \n\t" \
104 "mulq %4 \n\t" \
105 "addq %1,%%rax \n\t" \
106 "adcq $0,%%rdx \n\t" \
107 "addq %%rax,%0 \n\t" \
108 "adcq $0,%%rdx \n\t" \
109 "movq %%rdx,%1 \n\t" \
110 :"=g"(_c[LO]), "=r"(cy) \
111 :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
112 : "%rax", "%rdx", "cc")
113
114#define INNERMUL8 \
115asm( \
116 "movq 0(%5),%%rax \n\t" \
117 "movq 0(%2),%%r10 \n\t" \
118 "movq 0x8(%5),%%r11 \n\t" \
119 "mulq %4 \n\t" \
120 "addq %%r10,%%rax \n\t" \
121 "adcq $0,%%rdx \n\t" \
122 "movq 0x8(%2),%%r10 \n\t" \
123 "addq %3,%%rax \n\t" \
124 "adcq $0,%%rdx \n\t" \
125 "movq %%rax,0(%0) \n\t" \
126 "movq %%rdx,%1 \n\t" \
127 \
128 "movq %%r11,%%rax \n\t" \
129 "movq 0x10(%5),%%r11 \n\t" \
130 "mulq %4 \n\t" \
131 "addq %%r10,%%rax \n\t" \
132 "adcq $0,%%rdx \n\t" \
133 "movq 0x10(%2),%%r10 \n\t" \
134 "addq %3,%%rax \n\t" \
135 "adcq $0,%%rdx \n\t" \
136 "movq %%rax,0x8(%0) \n\t" \
137 "movq %%rdx,%1 \n\t" \
138 \
139 "movq %%r11,%%rax \n\t" \
140 "movq 0x18(%5),%%r11 \n\t" \
141 "mulq %4 \n\t" \
142 "addq %%r10,%%rax \n\t" \
143 "adcq $0,%%rdx \n\t" \
144 "movq 0x18(%2),%%r10 \n\t" \
145 "addq %3,%%rax \n\t" \
146 "adcq $0,%%rdx \n\t" \
147 "movq %%rax,0x10(%0) \n\t" \
148 "movq %%rdx,%1 \n\t" \
149 \
150 "movq %%r11,%%rax \n\t" \
151 "movq 0x20(%5),%%r11 \n\t" \
152 "mulq %4 \n\t" \
153 "addq %%r10,%%rax \n\t" \
154 "adcq $0,%%rdx \n\t" \
155 "movq 0x20(%2),%%r10 \n\t" \
156 "addq %3,%%rax \n\t" \
157 "adcq $0,%%rdx \n\t" \
158 "movq %%rax,0x18(%0) \n\t" \
159 "movq %%rdx,%1 \n\t" \
160 \
161 "movq %%r11,%%rax \n\t" \
162 "movq 0x28(%5),%%r11 \n\t" \
163 "mulq %4 \n\t" \
164 "addq %%r10,%%rax \n\t" \
165 "adcq $0,%%rdx \n\t" \
166 "movq 0x28(%2),%%r10 \n\t" \
167 "addq %3,%%rax \n\t" \
168 "adcq $0,%%rdx \n\t" \
169 "movq %%rax,0x20(%0) \n\t" \
170 "movq %%rdx,%1 \n\t" \
171 \
172 "movq %%r11,%%rax \n\t" \
173 "movq 0x30(%5),%%r11 \n\t" \
174 "mulq %4 \n\t" \
175 "addq %%r10,%%rax \n\t" \
176 "adcq $0,%%rdx \n\t" \
177 "movq 0x30(%2),%%r10 \n\t" \
178 "addq %3,%%rax \n\t" \
179 "adcq $0,%%rdx \n\t" \
180 "movq %%rax,0x28(%0) \n\t" \
181 "movq %%rdx,%1 \n\t" \
182 \
183 "movq %%r11,%%rax \n\t" \
184 "movq 0x38(%5),%%r11 \n\t" \
185 "mulq %4 \n\t" \
186 "addq %%r10,%%rax \n\t" \
187 "adcq $0,%%rdx \n\t" \
188 "movq 0x38(%2),%%r10 \n\t" \
189 "addq %3,%%rax \n\t" \
190 "adcq $0,%%rdx \n\t" \
191 "movq %%rax,0x30(%0) \n\t" \
192 "movq %%rdx,%1 \n\t" \
193 \
194 "movq %%r11,%%rax \n\t" \
195 "mulq %4 \n\t" \
196 "addq %%r10,%%rax \n\t" \
197 "adcq $0,%%rdx \n\t" \
198 "addq %3,%%rax \n\t" \
199 "adcq $0,%%rdx \n\t" \
200 "movq %%rax,0x38(%0) \n\t" \
201 "movq %%rdx,%1 \n\t" \
202 \
203 :"=r"(_c), "=r"(cy) \
204 : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
205 : "%rax", "%rdx", "%r10", "%r11", "cc")
206
207#define PROPCARRY \
208asm( \
209 "addq %1,%0 \n\t" \
210 "setb %%al \n\t" \
211 "movzbq %%al,%1 \n\t" \
212 :"=g"(_c[LO]), "=r"(cy) \
213 :"0"(_c[LO]), "1"(cy) \
214 : "%rax", "cc")
215
216/******************************************************************************/
217#elif defined(PSTM_ARM)
218
219#define MONT_START
220#define MONT_FINI
221#define LOOP_END
222#define LOOP_START \
223mu = c[x] * mp
224
225#ifdef __thumb2__
226//#pragma message ("Using 32 bit ARM Thumb2 Assembly Optimizations")
227#define INNERMUL \
228asm( \
229 " LDR r0,%1 \n\t" \
230 " ADDS r0,r0,%0 \n\t" \
231 " ITE CS \n\t" \
232 " MOVCS %0,#1 \n\t" \
233 " MOVCC %0,#0 \n\t" \
234 " UMLAL r0,%0,%3,%4 \n\t" \
235 " STR r0,%1 \n\t" \
236 :"=r"(cy),"=m"(_c[0])\
237 :"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\
238 :"r0","%cc");
239#define PROPCARRY \
240asm( \
241 " LDR r0,%1 \n\t" \
242 " ADDS r0,r0,%0 \n\t" \
243 " STR r0,%1 \n\t" \
244 " ITE CS \n\t" \
245 " MOVCS %0,#1 \n\t" \
246 " MOVCC %0,#0 \n\t" \
247 :"=r"(cy),"=m"(_c[0])\
248 :"0"(cy),"m"(_c[0])\
249 :"r0","%cc");
250#else /* Non-Thumb2 code */
251//#pragma message ("Using 32 bit ARM Assembly Optimizations")
252#define INNERMUL \
253asm( \
254 " LDR r0,%1 \n\t" \
255 " ADDS r0,r0,%0 \n\t" \
256 " MOVCS %0,#1 \n\t" \
257 " MOVCC %0,#0 \n\t" \
258 " UMLAL r0,%0,%3,%4 \n\t" \
259 " STR r0,%1 \n\t" \
260 :"=r"(cy),"=m"(_c[0])\
261 :"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\
262 :"r0","%cc");
263#define PROPCARRY \
264asm( \
265 " LDR r0,%1 \n\t" \
266 " ADDS r0,r0,%0 \n\t" \
267 " STR r0,%1 \n\t" \
268 " MOVCS %0,#1 \n\t" \
269 " MOVCC %0,#0 \n\t" \
270 :"=r"(cy),"=m"(_c[0])\
271 :"0"(cy),"m"(_c[0])\
272 :"r0","%cc");
273#endif /* __thumb2__ */
274
275
276/******************************************************************************/
277#elif defined(PSTM_MIPS)
278/* MIPS32 */
279//#pragma message ("Using 32 bit MIPS Assembly Optimizations")
280#define MONT_START
281#define MONT_FINI
282#define LOOP_END
283#define LOOP_START \
284mu = c[x] * mp
285
286#define INNERMUL \
287asm( \
288 " multu %3,%4 \n\t" \
289 " mflo $12 \n\t" \
290 " mfhi $13 \n\t" \
291 " addu $12,$12,%0 \n\t" \
292 " sltu $10,$12,%0 \n\t" \
293 " addu $13,$13,$10 \n\t" \
294 " lw $10,%1 \n\t" \
295 " addu $12,$12,$10 \n\t" \
296 " sltu $10,$12,$10 \n\t" \
297 " addu %0,$13,$10 \n\t" \
298 " sw $12,%1 \n\t" \
299 :"=r"(cy),"=m"(_c[0])\
300 :"r"(cy),"r"(mu),"r"(tmpm[0]),"r"(_c[0])\
301 :"$10","$12","$13")\
302; ++tmpm;
303
304#define PROPCARRY \
305asm( \
306 " lw $10,%1 \n\t" \
307 " addu $10,$10,%0 \n\t" \
308 " sw $10,%1 \n\t" \
309 " sltu %0,$10,%0 \n\t" \
310 :"=r"(cy),"=m"(_c[0])\
311 :"r"(cy),"r"(_c[0])\
312 :"$10");
313
314
315/******************************************************************************/
316#else
317
318/* ISO C code */
319#define MONT_START
320#define MONT_FINI
321#define LOOP_END
322#define LOOP_START \
323 mu = c[x] * mp
324
325#define INNERMUL \
326 do { pstm_word t; \
327 t = ((pstm_word)_c[0] + (pstm_word)cy) + \
328 (((pstm_word)mu) * ((pstm_word)*tmpm++)); \
329 _c[0] = (pstm_digit)t; \
330 cy = (pstm_digit)(t >> DIGIT_BIT); \
331 } while (0)
332
333#define PROPCARRY \
334 do { pstm_digit t = _c[0] += cy; cy = (t < cy); } while (0)
335
336#endif
337
338/******************************************************************************/
339
340#define LO 0
341
342/* computes x/R == x (mod N) via Montgomery Reduction */
343int32 pstm_montgomery_reduce(psPool_t *pool, pstm_int *a, pstm_int *m,
344 pstm_digit mp, pstm_digit *paD, uint32 paDlen)
345{
346 pstm_digit *c, *_c, *tmpm, mu;
347 int32 oldused, x, y;
348 int16 pa;
349
350 pa = m->used;
351 if (pa > a->alloc) {
352 /* Sanity test for bad numbers. This will confirm no buffer overruns */
353 return PS_LIMIT_FAIL;
354 }
355
356 if (paD && paDlen >= (uint32)2*pa+1) {
357 c = paD;
358 memset(c, 0x0, paDlen);
359 } else {
Denys Vlasenko6b1b0042017-01-19 15:51:00 +0100360 c = xzalloc(2*pa+1);//bbox
Denys Vlasenko11d00962017-01-15 00:12:42 +0100361 }
362 /* copy the input */
363 oldused = a->used;
364 for (x = 0; x < oldused; x++) {
365 c[x] = a->dp[x];
366 }
367
368 MONT_START;
369
370 for (x = 0; x < pa; x++) {
371 pstm_digit cy = 0;
372 /* get Mu for this round */
373 LOOP_START;
374 _c = c + x;
375 tmpm = m->dp;
376 y = 0;
377#ifdef PSTM_X86_64
378 for (; y < (pa & ~7); y += 8) {
379 INNERMUL8;
380 _c += 8;
381 tmpm += 8;
382 }
383#endif /* PSTM_X86_64 */
384 for (; y < pa; y++) {
385 INNERMUL;
386 ++_c;
387 }
388 LOOP_END;
389 while (cy) {
390 PROPCARRY;
391 ++_c;
392 }
393 }
394
395 /* now copy out */
396 _c = c + pa;
397 tmpm = a->dp;
398 for (x = 0; x < pa+1; x++) {
399 *tmpm++ = *_c++;
400 }
401
402 for (; x < oldused; x++) {
403 *tmpm++ = 0;
404 }
405
406 MONT_FINI;
407
408 a->used = pa+1;
409 pstm_clamp(a);
410
411 /* reuse x as return code */
412 x = PSTM_OKAY;
413
414 /* if A >= m then A = A - m */
415 if (pstm_cmp_mag (a, m) != PSTM_LT) {
416 if (s_pstm_sub (a, m, a) != PSTM_OKAY) {
417 x = PS_MEM_FAIL;
418 }
419 }
420 if (paDlen < (uint32)2*pa+1) {
421 psFree(c, pool);
422 }
423 return x;
424}
425
426#endif /* !DISABLE_PSTM */
427/******************************************************************************/