1 | #include "fixedint.h" |
2 | #include "fe.h" |
3 | |
4 | #ifndef ED25519_LOAD_BYTES |
5 | #define ED25519_LOAD_BYTES |
6 | |
7 | /* |
8 | helper functions |
9 | */ |
10 | static uint64_t load_3(const unsigned char *in) { |
11 | uint64_t result; |
12 | |
13 | result = (uint64_t) in[0]; |
14 | result |= ((uint64_t) in[1]) << 8; |
15 | result |= ((uint64_t) in[2]) << 16; |
16 | |
17 | return result; |
18 | } |
19 | |
20 | static uint64_t load_4(const unsigned char *in) { |
21 | uint64_t result; |
22 | |
23 | result = (uint64_t) in[0]; |
24 | result |= ((uint64_t) in[1]) << 8; |
25 | result |= ((uint64_t) in[2]) << 16; |
26 | result |= ((uint64_t) in[3]) << 24; |
27 | |
28 | return result; |
29 | } |
30 | |
31 | #endif |
32 | |
33 | /* |
34 | h = 0 |
35 | */ |
36 | |
37 | void fe_0(fe h) { |
38 | h[0] = 0; |
39 | h[1] = 0; |
40 | h[2] = 0; |
41 | h[3] = 0; |
42 | h[4] = 0; |
43 | h[5] = 0; |
44 | h[6] = 0; |
45 | h[7] = 0; |
46 | h[8] = 0; |
47 | h[9] = 0; |
48 | } |
49 | |
50 | |
51 | |
52 | /* |
53 | h = 1 |
54 | */ |
55 | |
56 | void fe_1(fe h) { |
57 | h[0] = 1; |
58 | h[1] = 0; |
59 | h[2] = 0; |
60 | h[3] = 0; |
61 | h[4] = 0; |
62 | h[5] = 0; |
63 | h[6] = 0; |
64 | h[7] = 0; |
65 | h[8] = 0; |
66 | h[9] = 0; |
67 | } |
68 | |
69 | |
70 | |
71 | /* |
72 | h = f + g |
73 | Can overlap h with f or g. |
74 | |
75 | Preconditions: |
76 | |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. |
77 | |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. |
78 | |
79 | Postconditions: |
80 | |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. |
81 | */ |
82 | |
83 | void fe_add(fe h, const fe f, const fe g) { |
84 | int32_t f0 = f[0]; |
85 | int32_t f1 = f[1]; |
86 | int32_t f2 = f[2]; |
87 | int32_t f3 = f[3]; |
88 | int32_t f4 = f[4]; |
89 | int32_t f5 = f[5]; |
90 | int32_t f6 = f[6]; |
91 | int32_t f7 = f[7]; |
92 | int32_t f8 = f[8]; |
93 | int32_t f9 = f[9]; |
94 | int32_t g0 = g[0]; |
95 | int32_t g1 = g[1]; |
96 | int32_t g2 = g[2]; |
97 | int32_t g3 = g[3]; |
98 | int32_t g4 = g[4]; |
99 | int32_t g5 = g[5]; |
100 | int32_t g6 = g[6]; |
101 | int32_t g7 = g[7]; |
102 | int32_t g8 = g[8]; |
103 | int32_t g9 = g[9]; |
104 | int32_t h0 = f0 + g0; |
105 | int32_t h1 = f1 + g1; |
106 | int32_t h2 = f2 + g2; |
107 | int32_t h3 = f3 + g3; |
108 | int32_t h4 = f4 + g4; |
109 | int32_t h5 = f5 + g5; |
110 | int32_t h6 = f6 + g6; |
111 | int32_t h7 = f7 + g7; |
112 | int32_t h8 = f8 + g8; |
113 | int32_t h9 = f9 + g9; |
114 | |
115 | h[0] = h0; |
116 | h[1] = h1; |
117 | h[2] = h2; |
118 | h[3] = h3; |
119 | h[4] = h4; |
120 | h[5] = h5; |
121 | h[6] = h6; |
122 | h[7] = h7; |
123 | h[8] = h8; |
124 | h[9] = h9; |
125 | } |
126 | |
127 | |
128 | |
129 | /* |
130 | Replace (f,g) with (g,g) if b == 1; |
131 | replace (f,g) with (f,g) if b == 0. |
132 | |
133 | Preconditions: b in {0,1}. |
134 | */ |
135 | |
136 | void fe_cmov(fe f, const fe g, unsigned int b) { |
137 | int32_t f0 = f[0]; |
138 | int32_t f1 = f[1]; |
139 | int32_t f2 = f[2]; |
140 | int32_t f3 = f[3]; |
141 | int32_t f4 = f[4]; |
142 | int32_t f5 = f[5]; |
143 | int32_t f6 = f[6]; |
144 | int32_t f7 = f[7]; |
145 | int32_t f8 = f[8]; |
146 | int32_t f9 = f[9]; |
147 | int32_t g0 = g[0]; |
148 | int32_t g1 = g[1]; |
149 | int32_t g2 = g[2]; |
150 | int32_t g3 = g[3]; |
151 | int32_t g4 = g[4]; |
152 | int32_t g5 = g[5]; |
153 | int32_t g6 = g[6]; |
154 | int32_t g7 = g[7]; |
155 | int32_t g8 = g[8]; |
156 | int32_t g9 = g[9]; |
157 | int32_t x0 = f0 ^ g0; |
158 | int32_t x1 = f1 ^ g1; |
159 | int32_t x2 = f2 ^ g2; |
160 | int32_t x3 = f3 ^ g3; |
161 | int32_t x4 = f4 ^ g4; |
162 | int32_t x5 = f5 ^ g5; |
163 | int32_t x6 = f6 ^ g6; |
164 | int32_t x7 = f7 ^ g7; |
165 | int32_t x8 = f8 ^ g8; |
166 | int32_t x9 = f9 ^ g9; |
167 | |
168 | b = (unsigned int) (- (int) b); /* silence warning */ |
169 | x0 &= b; |
170 | x1 &= b; |
171 | x2 &= b; |
172 | x3 &= b; |
173 | x4 &= b; |
174 | x5 &= b; |
175 | x6 &= b; |
176 | x7 &= b; |
177 | x8 &= b; |
178 | x9 &= b; |
179 | |
180 | f[0] = f0 ^ x0; |
181 | f[1] = f1 ^ x1; |
182 | f[2] = f2 ^ x2; |
183 | f[3] = f3 ^ x3; |
184 | f[4] = f4 ^ x4; |
185 | f[5] = f5 ^ x5; |
186 | f[6] = f6 ^ x6; |
187 | f[7] = f7 ^ x7; |
188 | f[8] = f8 ^ x8; |
189 | f[9] = f9 ^ x9; |
190 | } |
191 | |
192 | /* |
193 | Replace (f,g) with (g,f) if b == 1; |
194 | replace (f,g) with (f,g) if b == 0. |
195 | |
196 | Preconditions: b in {0,1}. |
197 | */ |
198 | |
199 | void fe_cswap(fe f,fe g,unsigned int b) { |
200 | int32_t f0 = f[0]; |
201 | int32_t f1 = f[1]; |
202 | int32_t f2 = f[2]; |
203 | int32_t f3 = f[3]; |
204 | int32_t f4 = f[4]; |
205 | int32_t f5 = f[5]; |
206 | int32_t f6 = f[6]; |
207 | int32_t f7 = f[7]; |
208 | int32_t f8 = f[8]; |
209 | int32_t f9 = f[9]; |
210 | int32_t g0 = g[0]; |
211 | int32_t g1 = g[1]; |
212 | int32_t g2 = g[2]; |
213 | int32_t g3 = g[3]; |
214 | int32_t g4 = g[4]; |
215 | int32_t g5 = g[5]; |
216 | int32_t g6 = g[6]; |
217 | int32_t g7 = g[7]; |
218 | int32_t g8 = g[8]; |
219 | int32_t g9 = g[9]; |
220 | int32_t x0 = f0 ^ g0; |
221 | int32_t x1 = f1 ^ g1; |
222 | int32_t x2 = f2 ^ g2; |
223 | int32_t x3 = f3 ^ g3; |
224 | int32_t x4 = f4 ^ g4; |
225 | int32_t x5 = f5 ^ g5; |
226 | int32_t x6 = f6 ^ g6; |
227 | int32_t x7 = f7 ^ g7; |
228 | int32_t x8 = f8 ^ g8; |
229 | int32_t x9 = f9 ^ g9; |
230 | b = -b; |
231 | x0 &= b; |
232 | x1 &= b; |
233 | x2 &= b; |
234 | x3 &= b; |
235 | x4 &= b; |
236 | x5 &= b; |
237 | x6 &= b; |
238 | x7 &= b; |
239 | x8 &= b; |
240 | x9 &= b; |
241 | f[0] = f0 ^ x0; |
242 | f[1] = f1 ^ x1; |
243 | f[2] = f2 ^ x2; |
244 | f[3] = f3 ^ x3; |
245 | f[4] = f4 ^ x4; |
246 | f[5] = f5 ^ x5; |
247 | f[6] = f6 ^ x6; |
248 | f[7] = f7 ^ x7; |
249 | f[8] = f8 ^ x8; |
250 | f[9] = f9 ^ x9; |
251 | g[0] = g0 ^ x0; |
252 | g[1] = g1 ^ x1; |
253 | g[2] = g2 ^ x2; |
254 | g[3] = g3 ^ x3; |
255 | g[4] = g4 ^ x4; |
256 | g[5] = g5 ^ x5; |
257 | g[6] = g6 ^ x6; |
258 | g[7] = g7 ^ x7; |
259 | g[8] = g8 ^ x8; |
260 | g[9] = g9 ^ x9; |
261 | } |
262 | |
263 | |
264 | |
265 | /* |
266 | h = f |
267 | */ |
268 | |
269 | void fe_copy(fe h, const fe f) { |
270 | int32_t f0 = f[0]; |
271 | int32_t f1 = f[1]; |
272 | int32_t f2 = f[2]; |
273 | int32_t f3 = f[3]; |
274 | int32_t f4 = f[4]; |
275 | int32_t f5 = f[5]; |
276 | int32_t f6 = f[6]; |
277 | int32_t f7 = f[7]; |
278 | int32_t f8 = f[8]; |
279 | int32_t f9 = f[9]; |
280 | |
281 | h[0] = f0; |
282 | h[1] = f1; |
283 | h[2] = f2; |
284 | h[3] = f3; |
285 | h[4] = f4; |
286 | h[5] = f5; |
287 | h[6] = f6; |
288 | h[7] = f7; |
289 | h[8] = f8; |
290 | h[9] = f9; |
291 | } |
292 | |
293 | |
294 | |
295 | /* |
296 | Ignores top bit of h. |
297 | */ |
298 | |
299 | void fe_frombytes(fe h, const unsigned char *s) { |
300 | int64_t h0 = load_4(in: s); |
301 | int64_t h1 = load_3(in: s + 4) << 6; |
302 | int64_t h2 = load_3(in: s + 7) << 5; |
303 | int64_t h3 = load_3(in: s + 10) << 3; |
304 | int64_t h4 = load_3(in: s + 13) << 2; |
305 | int64_t h5 = load_4(in: s + 16); |
306 | int64_t h6 = load_3(in: s + 20) << 7; |
307 | int64_t h7 = load_3(in: s + 23) << 5; |
308 | int64_t h8 = load_3(in: s + 26) << 4; |
309 | int64_t h9 = (load_3(in: s + 29) & 8388607) << 2; |
310 | int64_t carry0; |
311 | int64_t carry1; |
312 | int64_t carry2; |
313 | int64_t carry3; |
314 | int64_t carry4; |
315 | int64_t carry5; |
316 | int64_t carry6; |
317 | int64_t carry7; |
318 | int64_t carry8; |
319 | int64_t carry9; |
320 | |
321 | carry9 = (h9 + (int64_t) (1 << 24)) >> 25; |
322 | h0 += carry9 * 19; |
323 | h9 -= carry9 << 25; |
324 | carry1 = (h1 + (int64_t) (1 << 24)) >> 25; |
325 | h2 += carry1; |
326 | h1 -= carry1 << 25; |
327 | carry3 = (h3 + (int64_t) (1 << 24)) >> 25; |
328 | h4 += carry3; |
329 | h3 -= carry3 << 25; |
330 | carry5 = (h5 + (int64_t) (1 << 24)) >> 25; |
331 | h6 += carry5; |
332 | h5 -= carry5 << 25; |
333 | carry7 = (h7 + (int64_t) (1 << 24)) >> 25; |
334 | h8 += carry7; |
335 | h7 -= carry7 << 25; |
336 | carry0 = (h0 + (int64_t) (1 << 25)) >> 26; |
337 | h1 += carry0; |
338 | h0 -= carry0 << 26; |
339 | carry2 = (h2 + (int64_t) (1 << 25)) >> 26; |
340 | h3 += carry2; |
341 | h2 -= carry2 << 26; |
342 | carry4 = (h4 + (int64_t) (1 << 25)) >> 26; |
343 | h5 += carry4; |
344 | h4 -= carry4 << 26; |
345 | carry6 = (h6 + (int64_t) (1 << 25)) >> 26; |
346 | h7 += carry6; |
347 | h6 -= carry6 << 26; |
348 | carry8 = (h8 + (int64_t) (1 << 25)) >> 26; |
349 | h9 += carry8; |
350 | h8 -= carry8 << 26; |
351 | |
352 | h[0] = (int32_t) h0; |
353 | h[1] = (int32_t) h1; |
354 | h[2] = (int32_t) h2; |
355 | h[3] = (int32_t) h3; |
356 | h[4] = (int32_t) h4; |
357 | h[5] = (int32_t) h5; |
358 | h[6] = (int32_t) h6; |
359 | h[7] = (int32_t) h7; |
360 | h[8] = (int32_t) h8; |
361 | h[9] = (int32_t) h9; |
362 | } |
363 | |
364 | |
365 | |
366 | void fe_invert(fe out, const fe z) { |
367 | fe t0; |
368 | fe t1; |
369 | fe t2; |
370 | fe t3; |
371 | int i; |
372 | |
373 | fe_sq(h: t0, f: z); |
374 | |
375 | for (i = 1; i < 1; ++i) { |
376 | fe_sq(h: t0, f: t0); |
377 | } |
378 | |
379 | fe_sq(h: t1, f: t0); |
380 | |
381 | for (i = 1; i < 2; ++i) { |
382 | fe_sq(h: t1, f: t1); |
383 | } |
384 | |
385 | fe_mul(h: t1, f: z, g: t1); |
386 | fe_mul(h: t0, f: t0, g: t1); |
387 | fe_sq(h: t2, f: t0); |
388 | |
389 | for (i = 1; i < 1; ++i) { |
390 | fe_sq(h: t2, f: t2); |
391 | } |
392 | |
393 | fe_mul(h: t1, f: t1, g: t2); |
394 | fe_sq(h: t2, f: t1); |
395 | |
396 | for (i = 1; i < 5; ++i) { |
397 | fe_sq(h: t2, f: t2); |
398 | } |
399 | |
400 | fe_mul(h: t1, f: t2, g: t1); |
401 | fe_sq(h: t2, f: t1); |
402 | |
403 | for (i = 1; i < 10; ++i) { |
404 | fe_sq(h: t2, f: t2); |
405 | } |
406 | |
407 | fe_mul(h: t2, f: t2, g: t1); |
408 | fe_sq(h: t3, f: t2); |
409 | |
410 | for (i = 1; i < 20; ++i) { |
411 | fe_sq(h: t3, f: t3); |
412 | } |
413 | |
414 | fe_mul(h: t2, f: t3, g: t2); |
415 | fe_sq(h: t2, f: t2); |
416 | |
417 | for (i = 1; i < 10; ++i) { |
418 | fe_sq(h: t2, f: t2); |
419 | } |
420 | |
421 | fe_mul(h: t1, f: t2, g: t1); |
422 | fe_sq(h: t2, f: t1); |
423 | |
424 | for (i = 1; i < 50; ++i) { |
425 | fe_sq(h: t2, f: t2); |
426 | } |
427 | |
428 | fe_mul(h: t2, f: t2, g: t1); |
429 | fe_sq(h: t3, f: t2); |
430 | |
431 | for (i = 1; i < 100; ++i) { |
432 | fe_sq(h: t3, f: t3); |
433 | } |
434 | |
435 | fe_mul(h: t2, f: t3, g: t2); |
436 | fe_sq(h: t2, f: t2); |
437 | |
438 | for (i = 1; i < 50; ++i) { |
439 | fe_sq(h: t2, f: t2); |
440 | } |
441 | |
442 | fe_mul(h: t1, f: t2, g: t1); |
443 | fe_sq(h: t1, f: t1); |
444 | |
445 | for (i = 1; i < 5; ++i) { |
446 | fe_sq(h: t1, f: t1); |
447 | } |
448 | |
449 | fe_mul(h: out, f: t1, g: t0); |
450 | } |
451 | |
452 | |
453 | |
454 | /* |
455 | return 1 if f is in {1,3,5,...,q-2} |
456 | return 0 if f is in {0,2,4,...,q-1} |
457 | |
458 | Preconditions: |
459 | |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. |
460 | */ |
461 | |
462 | int fe_isnegative(const fe f) { |
463 | unsigned char s[32]; |
464 | |
465 | fe_tobytes(s, h: f); |
466 | |
467 | return s[0] & 1; |
468 | } |
469 | |
470 | |
471 | |
472 | /* |
473 | return 1 if f == 0 |
474 | return 0 if f != 0 |
475 | |
476 | Preconditions: |
477 | |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. |
478 | */ |
479 | |
480 | int fe_isnonzero(const fe f) { |
481 | unsigned char s[32]; |
482 | unsigned char r; |
483 | |
484 | fe_tobytes(s, h: f); |
485 | |
486 | r = s[0]; |
487 | #define F(i) r |= s[i] |
488 | F(1); |
489 | F(2); |
490 | F(3); |
491 | F(4); |
492 | F(5); |
493 | F(6); |
494 | F(7); |
495 | F(8); |
496 | F(9); |
497 | F(10); |
498 | F(11); |
499 | F(12); |
500 | F(13); |
501 | F(14); |
502 | F(15); |
503 | F(16); |
504 | F(17); |
505 | F(18); |
506 | F(19); |
507 | F(20); |
508 | F(21); |
509 | F(22); |
510 | F(23); |
511 | F(24); |
512 | F(25); |
513 | F(26); |
514 | F(27); |
515 | F(28); |
516 | F(29); |
517 | F(30); |
518 | F(31); |
519 | #undef F |
520 | |
521 | return r != 0; |
522 | } |
523 | |
524 | |
525 | |
526 | /* |
527 | h = f * g |
528 | Can overlap h with f or g. |
529 | |
530 | Preconditions: |
531 | |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. |
532 | |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. |
533 | |
534 | Postconditions: |
535 | |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. |
536 | */ |
537 | |
538 | /* |
539 | Notes on implementation strategy: |
540 | |
541 | Using schoolbook multiplication. |
542 | Karatsuba would save a little in some cost models. |
543 | |
544 | Most multiplications by 2 and 19 are 32-bit precomputations; |
545 | cheaper than 64-bit postcomputations. |
546 | |
547 | There is one remaining multiplication by 19 in the carry chain; |
548 | one *19 precomputation can be merged into this, |
549 | but the resulting data flow is considerably less clean. |
550 | |
551 | There are 12 carries below. |
552 | 10 of them are 2-way parallelizable and vectorizable. |
553 | Can get away with 11 carries, but then data flow is much deeper. |
554 | |
555 | With tighter constraints on inputs can squeeze carries into int32. |
556 | */ |
557 | |
558 | void fe_mul(fe h, const fe f, const fe g) { |
559 | int32_t f0 = f[0]; |
560 | int32_t f1 = f[1]; |
561 | int32_t f2 = f[2]; |
562 | int32_t f3 = f[3]; |
563 | int32_t f4 = f[4]; |
564 | int32_t f5 = f[5]; |
565 | int32_t f6 = f[6]; |
566 | int32_t f7 = f[7]; |
567 | int32_t f8 = f[8]; |
568 | int32_t f9 = f[9]; |
569 | int32_t g0 = g[0]; |
570 | int32_t g1 = g[1]; |
571 | int32_t g2 = g[2]; |
572 | int32_t g3 = g[3]; |
573 | int32_t g4 = g[4]; |
574 | int32_t g5 = g[5]; |
575 | int32_t g6 = g[6]; |
576 | int32_t g7 = g[7]; |
577 | int32_t g8 = g[8]; |
578 | int32_t g9 = g[9]; |
579 | int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */ |
580 | int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */ |
581 | int32_t g3_19 = 19 * g3; |
582 | int32_t g4_19 = 19 * g4; |
583 | int32_t g5_19 = 19 * g5; |
584 | int32_t g6_19 = 19 * g6; |
585 | int32_t g7_19 = 19 * g7; |
586 | int32_t g8_19 = 19 * g8; |
587 | int32_t g9_19 = 19 * g9; |
588 | int32_t f1_2 = 2 * f1; |
589 | int32_t f3_2 = 2 * f3; |
590 | int32_t f5_2 = 2 * f5; |
591 | int32_t f7_2 = 2 * f7; |
592 | int32_t f9_2 = 2 * f9; |
593 | int64_t f0g0 = f0 * (int64_t) g0; |
594 | int64_t f0g1 = f0 * (int64_t) g1; |
595 | int64_t f0g2 = f0 * (int64_t) g2; |
596 | int64_t f0g3 = f0 * (int64_t) g3; |
597 | int64_t f0g4 = f0 * (int64_t) g4; |
598 | int64_t f0g5 = f0 * (int64_t) g5; |
599 | int64_t f0g6 = f0 * (int64_t) g6; |
600 | int64_t f0g7 = f0 * (int64_t) g7; |
601 | int64_t f0g8 = f0 * (int64_t) g8; |
602 | int64_t f0g9 = f0 * (int64_t) g9; |
603 | int64_t f1g0 = f1 * (int64_t) g0; |
604 | int64_t f1g1_2 = f1_2 * (int64_t) g1; |
605 | int64_t f1g2 = f1 * (int64_t) g2; |
606 | int64_t f1g3_2 = f1_2 * (int64_t) g3; |
607 | int64_t f1g4 = f1 * (int64_t) g4; |
608 | int64_t f1g5_2 = f1_2 * (int64_t) g5; |
609 | int64_t f1g6 = f1 * (int64_t) g6; |
610 | int64_t f1g7_2 = f1_2 * (int64_t) g7; |
611 | int64_t f1g8 = f1 * (int64_t) g8; |
612 | int64_t f1g9_38 = f1_2 * (int64_t) g9_19; |
613 | int64_t f2g0 = f2 * (int64_t) g0; |
614 | int64_t f2g1 = f2 * (int64_t) g1; |
615 | int64_t f2g2 = f2 * (int64_t) g2; |
616 | int64_t f2g3 = f2 * (int64_t) g3; |
617 | int64_t f2g4 = f2 * (int64_t) g4; |
618 | int64_t f2g5 = f2 * (int64_t) g5; |
619 | int64_t f2g6 = f2 * (int64_t) g6; |
620 | int64_t f2g7 = f2 * (int64_t) g7; |
621 | int64_t f2g8_19 = f2 * (int64_t) g8_19; |
622 | int64_t f2g9_19 = f2 * (int64_t) g9_19; |
623 | int64_t f3g0 = f3 * (int64_t) g0; |
624 | int64_t f3g1_2 = f3_2 * (int64_t) g1; |
625 | int64_t f3g2 = f3 * (int64_t) g2; |
626 | int64_t f3g3_2 = f3_2 * (int64_t) g3; |
627 | int64_t f3g4 = f3 * (int64_t) g4; |
628 | int64_t f3g5_2 = f3_2 * (int64_t) g5; |
629 | int64_t f3g6 = f3 * (int64_t) g6; |
630 | int64_t f3g7_38 = f3_2 * (int64_t) g7_19; |
631 | int64_t f3g8_19 = f3 * (int64_t) g8_19; |
632 | int64_t f3g9_38 = f3_2 * (int64_t) g9_19; |
633 | int64_t f4g0 = f4 * (int64_t) g0; |
634 | int64_t f4g1 = f4 * (int64_t) g1; |
635 | int64_t f4g2 = f4 * (int64_t) g2; |
636 | int64_t f4g3 = f4 * (int64_t) g3; |
637 | int64_t f4g4 = f4 * (int64_t) g4; |
638 | int64_t f4g5 = f4 * (int64_t) g5; |
639 | int64_t f4g6_19 = f4 * (int64_t) g6_19; |
640 | int64_t f4g7_19 = f4 * (int64_t) g7_19; |
641 | int64_t f4g8_19 = f4 * (int64_t) g8_19; |
642 | int64_t f4g9_19 = f4 * (int64_t) g9_19; |
643 | int64_t f5g0 = f5 * (int64_t) g0; |
644 | int64_t f5g1_2 = f5_2 * (int64_t) g1; |
645 | int64_t f5g2 = f5 * (int64_t) g2; |
646 | int64_t f5g3_2 = f5_2 * (int64_t) g3; |
647 | int64_t f5g4 = f5 * (int64_t) g4; |
648 | int64_t f5g5_38 = f5_2 * (int64_t) g5_19; |
649 | int64_t f5g6_19 = f5 * (int64_t) g6_19; |
650 | int64_t f5g7_38 = f5_2 * (int64_t) g7_19; |
651 | int64_t f5g8_19 = f5 * (int64_t) g8_19; |
652 | int64_t f5g9_38 = f5_2 * (int64_t) g9_19; |
653 | int64_t f6g0 = f6 * (int64_t) g0; |
654 | int64_t f6g1 = f6 * (int64_t) g1; |
655 | int64_t f6g2 = f6 * (int64_t) g2; |
656 | int64_t f6g3 = f6 * (int64_t) g3; |
657 | int64_t f6g4_19 = f6 * (int64_t) g4_19; |
658 | int64_t f6g5_19 = f6 * (int64_t) g5_19; |
659 | int64_t f6g6_19 = f6 * (int64_t) g6_19; |
660 | int64_t f6g7_19 = f6 * (int64_t) g7_19; |
661 | int64_t f6g8_19 = f6 * (int64_t) g8_19; |
662 | int64_t f6g9_19 = f6 * (int64_t) g9_19; |
663 | int64_t f7g0 = f7 * (int64_t) g0; |
664 | int64_t f7g1_2 = f7_2 * (int64_t) g1; |
665 | int64_t f7g2 = f7 * (int64_t) g2; |
666 | int64_t f7g3_38 = f7_2 * (int64_t) g3_19; |
667 | int64_t f7g4_19 = f7 * (int64_t) g4_19; |
668 | int64_t f7g5_38 = f7_2 * (int64_t) g5_19; |
669 | int64_t f7g6_19 = f7 * (int64_t) g6_19; |
670 | int64_t f7g7_38 = f7_2 * (int64_t) g7_19; |
671 | int64_t f7g8_19 = f7 * (int64_t) g8_19; |
672 | int64_t f7g9_38 = f7_2 * (int64_t) g9_19; |
673 | int64_t f8g0 = f8 * (int64_t) g0; |
674 | int64_t f8g1 = f8 * (int64_t) g1; |
675 | int64_t f8g2_19 = f8 * (int64_t) g2_19; |
676 | int64_t f8g3_19 = f8 * (int64_t) g3_19; |
677 | int64_t f8g4_19 = f8 * (int64_t) g4_19; |
678 | int64_t f8g5_19 = f8 * (int64_t) g5_19; |
679 | int64_t f8g6_19 = f8 * (int64_t) g6_19; |
680 | int64_t f8g7_19 = f8 * (int64_t) g7_19; |
681 | int64_t f8g8_19 = f8 * (int64_t) g8_19; |
682 | int64_t f8g9_19 = f8 * (int64_t) g9_19; |
683 | int64_t f9g0 = f9 * (int64_t) g0; |
684 | int64_t f9g1_38 = f9_2 * (int64_t) g1_19; |
685 | int64_t f9g2_19 = f9 * (int64_t) g2_19; |
686 | int64_t f9g3_38 = f9_2 * (int64_t) g3_19; |
687 | int64_t f9g4_19 = f9 * (int64_t) g4_19; |
688 | int64_t f9g5_38 = f9_2 * (int64_t) g5_19; |
689 | int64_t f9g6_19 = f9 * (int64_t) g6_19; |
690 | int64_t f9g7_38 = f9_2 * (int64_t) g7_19; |
691 | int64_t f9g8_19 = f9 * (int64_t) g8_19; |
692 | int64_t f9g9_38 = f9_2 * (int64_t) g9_19; |
693 | int64_t h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38; |
694 | int64_t h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19; |
695 | int64_t h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38; |
696 | int64_t h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19; |
697 | int64_t h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38; |
698 | int64_t h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19; |
699 | int64_t h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38; |
700 | int64_t h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19; |
701 | int64_t h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38; |
702 | int64_t h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0 ; |
703 | int64_t carry0; |
704 | int64_t carry1; |
705 | int64_t carry2; |
706 | int64_t carry3; |
707 | int64_t carry4; |
708 | int64_t carry5; |
709 | int64_t carry6; |
710 | int64_t carry7; |
711 | int64_t carry8; |
712 | int64_t carry9; |
713 | |
714 | carry0 = (h0 + (int64_t) (1 << 25)) >> 26; |
715 | h1 += carry0; |
716 | h0 -= carry0 << 26; |
717 | carry4 = (h4 + (int64_t) (1 << 25)) >> 26; |
718 | h5 += carry4; |
719 | h4 -= carry4 << 26; |
720 | |
721 | carry1 = (h1 + (int64_t) (1 << 24)) >> 25; |
722 | h2 += carry1; |
723 | h1 -= carry1 << 25; |
724 | carry5 = (h5 + (int64_t) (1 << 24)) >> 25; |
725 | h6 += carry5; |
726 | h5 -= carry5 << 25; |
727 | |
728 | carry2 = (h2 + (int64_t) (1 << 25)) >> 26; |
729 | h3 += carry2; |
730 | h2 -= carry2 << 26; |
731 | carry6 = (h6 + (int64_t) (1 << 25)) >> 26; |
732 | h7 += carry6; |
733 | h6 -= carry6 << 26; |
734 | |
735 | carry3 = (h3 + (int64_t) (1 << 24)) >> 25; |
736 | h4 += carry3; |
737 | h3 -= carry3 << 25; |
738 | carry7 = (h7 + (int64_t) (1 << 24)) >> 25; |
739 | h8 += carry7; |
740 | h7 -= carry7 << 25; |
741 | |
742 | carry4 = (h4 + (int64_t) (1 << 25)) >> 26; |
743 | h5 += carry4; |
744 | h4 -= carry4 << 26; |
745 | carry8 = (h8 + (int64_t) (1 << 25)) >> 26; |
746 | h9 += carry8; |
747 | h8 -= carry8 << 26; |
748 | |
749 | carry9 = (h9 + (int64_t) (1 << 24)) >> 25; |
750 | h0 += carry9 * 19; |
751 | h9 -= carry9 << 25; |
752 | |
753 | carry0 = (h0 + (int64_t) (1 << 25)) >> 26; |
754 | h1 += carry0; |
755 | h0 -= carry0 << 26; |
756 | |
757 | h[0] = (int32_t) h0; |
758 | h[1] = (int32_t) h1; |
759 | h[2] = (int32_t) h2; |
760 | h[3] = (int32_t) h3; |
761 | h[4] = (int32_t) h4; |
762 | h[5] = (int32_t) h5; |
763 | h[6] = (int32_t) h6; |
764 | h[7] = (int32_t) h7; |
765 | h[8] = (int32_t) h8; |
766 | h[9] = (int32_t) h9; |
767 | } |
768 | |
769 | |
770 | /* |
771 | h = f * 121666 |
772 | Can overlap h with f. |
773 | |
774 | Preconditions: |
775 | |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. |
776 | |
777 | Postconditions: |
778 | |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. |
779 | */ |
780 | |
781 | void fe_mul121666(fe h, fe f) { |
782 | int32_t f0 = f[0]; |
783 | int32_t f1 = f[1]; |
784 | int32_t f2 = f[2]; |
785 | int32_t f3 = f[3]; |
786 | int32_t f4 = f[4]; |
787 | int32_t f5 = f[5]; |
788 | int32_t f6 = f[6]; |
789 | int32_t f7 = f[7]; |
790 | int32_t f8 = f[8]; |
791 | int32_t f9 = f[9]; |
792 | int64_t h0 = f0 * (int64_t) 121666; |
793 | int64_t h1 = f1 * (int64_t) 121666; |
794 | int64_t h2 = f2 * (int64_t) 121666; |
795 | int64_t h3 = f3 * (int64_t) 121666; |
796 | int64_t h4 = f4 * (int64_t) 121666; |
797 | int64_t h5 = f5 * (int64_t) 121666; |
798 | int64_t h6 = f6 * (int64_t) 121666; |
799 | int64_t h7 = f7 * (int64_t) 121666; |
800 | int64_t h8 = f8 * (int64_t) 121666; |
801 | int64_t h9 = f9 * (int64_t) 121666; |
802 | int64_t carry0; |
803 | int64_t carry1; |
804 | int64_t carry2; |
805 | int64_t carry3; |
806 | int64_t carry4; |
807 | int64_t carry5; |
808 | int64_t carry6; |
809 | int64_t carry7; |
810 | int64_t carry8; |
811 | int64_t carry9; |
812 | |
813 | carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; |
814 | carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; |
815 | carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; |
816 | carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; |
817 | carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; |
818 | |
819 | carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; |
820 | carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; |
821 | carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; |
822 | carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; |
823 | carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; |
824 | |
825 | h[0] = h0; |
826 | h[1] = h1; |
827 | h[2] = h2; |
828 | h[3] = h3; |
829 | h[4] = h4; |
830 | h[5] = h5; |
831 | h[6] = h6; |
832 | h[7] = h7; |
833 | h[8] = h8; |
834 | h[9] = h9; |
835 | } |
836 | |
837 | |
838 | /* |
839 | h = -f |
840 | |
841 | Preconditions: |
842 | |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. |
843 | |
844 | Postconditions: |
845 | |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. |
846 | */ |
847 | |
848 | void fe_neg(fe h, const fe f) { |
849 | int32_t f0 = f[0]; |
850 | int32_t f1 = f[1]; |
851 | int32_t f2 = f[2]; |
852 | int32_t f3 = f[3]; |
853 | int32_t f4 = f[4]; |
854 | int32_t f5 = f[5]; |
855 | int32_t f6 = f[6]; |
856 | int32_t f7 = f[7]; |
857 | int32_t f8 = f[8]; |
858 | int32_t f9 = f[9]; |
859 | int32_t h0 = -f0; |
860 | int32_t h1 = -f1; |
861 | int32_t h2 = -f2; |
862 | int32_t h3 = -f3; |
863 | int32_t h4 = -f4; |
864 | int32_t h5 = -f5; |
865 | int32_t h6 = -f6; |
866 | int32_t h7 = -f7; |
867 | int32_t h8 = -f8; |
868 | int32_t h9 = -f9; |
869 | |
870 | h[0] = h0; |
871 | h[1] = h1; |
872 | h[2] = h2; |
873 | h[3] = h3; |
874 | h[4] = h4; |
875 | h[5] = h5; |
876 | h[6] = h6; |
877 | h[7] = h7; |
878 | h[8] = h8; |
879 | h[9] = h9; |
880 | } |
881 | |
882 | |
883 | void fe_pow22523(fe out, const fe z) { |
884 | fe t0; |
885 | fe t1; |
886 | fe t2; |
887 | int i; |
888 | fe_sq(h: t0, f: z); |
889 | |
890 | for (i = 1; i < 1; ++i) { |
891 | fe_sq(h: t0, f: t0); |
892 | } |
893 | |
894 | fe_sq(h: t1, f: t0); |
895 | |
896 | for (i = 1; i < 2; ++i) { |
897 | fe_sq(h: t1, f: t1); |
898 | } |
899 | |
900 | fe_mul(h: t1, f: z, g: t1); |
901 | fe_mul(h: t0, f: t0, g: t1); |
902 | fe_sq(h: t0, f: t0); |
903 | |
904 | for (i = 1; i < 1; ++i) { |
905 | fe_sq(h: t0, f: t0); |
906 | } |
907 | |
908 | fe_mul(h: t0, f: t1, g: t0); |
909 | fe_sq(h: t1, f: t0); |
910 | |
911 | for (i = 1; i < 5; ++i) { |
912 | fe_sq(h: t1, f: t1); |
913 | } |
914 | |
915 | fe_mul(h: t0, f: t1, g: t0); |
916 | fe_sq(h: t1, f: t0); |
917 | |
918 | for (i = 1; i < 10; ++i) { |
919 | fe_sq(h: t1, f: t1); |
920 | } |
921 | |
922 | fe_mul(h: t1, f: t1, g: t0); |
923 | fe_sq(h: t2, f: t1); |
924 | |
925 | for (i = 1; i < 20; ++i) { |
926 | fe_sq(h: t2, f: t2); |
927 | } |
928 | |
929 | fe_mul(h: t1, f: t2, g: t1); |
930 | fe_sq(h: t1, f: t1); |
931 | |
932 | for (i = 1; i < 10; ++i) { |
933 | fe_sq(h: t1, f: t1); |
934 | } |
935 | |
936 | fe_mul(h: t0, f: t1, g: t0); |
937 | fe_sq(h: t1, f: t0); |
938 | |
939 | for (i = 1; i < 50; ++i) { |
940 | fe_sq(h: t1, f: t1); |
941 | } |
942 | |
943 | fe_mul(h: t1, f: t1, g: t0); |
944 | fe_sq(h: t2, f: t1); |
945 | |
946 | for (i = 1; i < 100; ++i) { |
947 | fe_sq(h: t2, f: t2); |
948 | } |
949 | |
950 | fe_mul(h: t1, f: t2, g: t1); |
951 | fe_sq(h: t1, f: t1); |
952 | |
953 | for (i = 1; i < 50; ++i) { |
954 | fe_sq(h: t1, f: t1); |
955 | } |
956 | |
957 | fe_mul(h: t0, f: t1, g: t0); |
958 | fe_sq(h: t0, f: t0); |
959 | |
960 | for (i = 1; i < 2; ++i) { |
961 | fe_sq(h: t0, f: t0); |
962 | } |
963 | |
964 | fe_mul(h: out, f: t0, g: z); |
965 | return; |
966 | } |
967 | |
968 | |
969 | /* |
970 | h = f * f |
971 | Can overlap h with f. |
972 | |
973 | Preconditions: |
974 | |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. |
975 | |
976 | Postconditions: |
977 | |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. |
978 | */ |
979 | |
980 | /* |
981 | See fe_mul.c for discussion of implementation strategy. |
982 | */ |
983 | |
984 | void fe_sq(fe h, const fe f) { |
985 | int32_t f0 = f[0]; |
986 | int32_t f1 = f[1]; |
987 | int32_t f2 = f[2]; |
988 | int32_t f3 = f[3]; |
989 | int32_t f4 = f[4]; |
990 | int32_t f5 = f[5]; |
991 | int32_t f6 = f[6]; |
992 | int32_t f7 = f[7]; |
993 | int32_t f8 = f[8]; |
994 | int32_t f9 = f[9]; |
995 | int32_t f0_2 = 2 * f0; |
996 | int32_t f1_2 = 2 * f1; |
997 | int32_t f2_2 = 2 * f2; |
998 | int32_t f3_2 = 2 * f3; |
999 | int32_t f4_2 = 2 * f4; |
1000 | int32_t f5_2 = 2 * f5; |
1001 | int32_t f6_2 = 2 * f6; |
1002 | int32_t f7_2 = 2 * f7; |
1003 | int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ |
1004 | int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ |
1005 | int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ |
1006 | int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ |
1007 | int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ |
1008 | int64_t f0f0 = f0 * (int64_t) f0; |
1009 | int64_t f0f1_2 = f0_2 * (int64_t) f1; |
1010 | int64_t f0f2_2 = f0_2 * (int64_t) f2; |
1011 | int64_t f0f3_2 = f0_2 * (int64_t) f3; |
1012 | int64_t f0f4_2 = f0_2 * (int64_t) f4; |
1013 | int64_t f0f5_2 = f0_2 * (int64_t) f5; |
1014 | int64_t f0f6_2 = f0_2 * (int64_t) f6; |
1015 | int64_t f0f7_2 = f0_2 * (int64_t) f7; |
1016 | int64_t f0f8_2 = f0_2 * (int64_t) f8; |
1017 | int64_t f0f9_2 = f0_2 * (int64_t) f9; |
1018 | int64_t f1f1_2 = f1_2 * (int64_t) f1; |
1019 | int64_t f1f2_2 = f1_2 * (int64_t) f2; |
1020 | int64_t f1f3_4 = f1_2 * (int64_t) f3_2; |
1021 | int64_t f1f4_2 = f1_2 * (int64_t) f4; |
1022 | int64_t f1f5_4 = f1_2 * (int64_t) f5_2; |
1023 | int64_t f1f6_2 = f1_2 * (int64_t) f6; |
1024 | int64_t f1f7_4 = f1_2 * (int64_t) f7_2; |
1025 | int64_t f1f8_2 = f1_2 * (int64_t) f8; |
1026 | int64_t f1f9_76 = f1_2 * (int64_t) f9_38; |
1027 | int64_t f2f2 = f2 * (int64_t) f2; |
1028 | int64_t f2f3_2 = f2_2 * (int64_t) f3; |
1029 | int64_t f2f4_2 = f2_2 * (int64_t) f4; |
1030 | int64_t f2f5_2 = f2_2 * (int64_t) f5; |
1031 | int64_t f2f6_2 = f2_2 * (int64_t) f6; |
1032 | int64_t f2f7_2 = f2_2 * (int64_t) f7; |
1033 | int64_t f2f8_38 = f2_2 * (int64_t) f8_19; |
1034 | int64_t f2f9_38 = f2 * (int64_t) f9_38; |
1035 | int64_t f3f3_2 = f3_2 * (int64_t) f3; |
1036 | int64_t f3f4_2 = f3_2 * (int64_t) f4; |
1037 | int64_t f3f5_4 = f3_2 * (int64_t) f5_2; |
1038 | int64_t f3f6_2 = f3_2 * (int64_t) f6; |
1039 | int64_t f3f7_76 = f3_2 * (int64_t) f7_38; |
1040 | int64_t f3f8_38 = f3_2 * (int64_t) f8_19; |
1041 | int64_t f3f9_76 = f3_2 * (int64_t) f9_38; |
1042 | int64_t f4f4 = f4 * (int64_t) f4; |
1043 | int64_t f4f5_2 = f4_2 * (int64_t) f5; |
1044 | int64_t f4f6_38 = f4_2 * (int64_t) f6_19; |
1045 | int64_t f4f7_38 = f4 * (int64_t) f7_38; |
1046 | int64_t f4f8_38 = f4_2 * (int64_t) f8_19; |
1047 | int64_t f4f9_38 = f4 * (int64_t) f9_38; |
1048 | int64_t f5f5_38 = f5 * (int64_t) f5_38; |
1049 | int64_t f5f6_38 = f5_2 * (int64_t) f6_19; |
1050 | int64_t f5f7_76 = f5_2 * (int64_t) f7_38; |
1051 | int64_t f5f8_38 = f5_2 * (int64_t) f8_19; |
1052 | int64_t f5f9_76 = f5_2 * (int64_t) f9_38; |
1053 | int64_t f6f6_19 = f6 * (int64_t) f6_19; |
1054 | int64_t f6f7_38 = f6 * (int64_t) f7_38; |
1055 | int64_t f6f8_38 = f6_2 * (int64_t) f8_19; |
1056 | int64_t f6f9_38 = f6 * (int64_t) f9_38; |
1057 | int64_t f7f7_38 = f7 * (int64_t) f7_38; |
1058 | int64_t f7f8_38 = f7_2 * (int64_t) f8_19; |
1059 | int64_t f7f9_76 = f7_2 * (int64_t) f9_38; |
1060 | int64_t f8f8_19 = f8 * (int64_t) f8_19; |
1061 | int64_t f8f9_38 = f8 * (int64_t) f9_38; |
1062 | int64_t f9f9_38 = f9 * (int64_t) f9_38; |
1063 | int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38; |
1064 | int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38; |
1065 | int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19; |
1066 | int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38; |
1067 | int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38; |
1068 | int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38; |
1069 | int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19; |
1070 | int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38; |
1071 | int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38; |
1072 | int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2; |
1073 | int64_t carry0; |
1074 | int64_t carry1; |
1075 | int64_t carry2; |
1076 | int64_t carry3; |
1077 | int64_t carry4; |
1078 | int64_t carry5; |
1079 | int64_t carry6; |
1080 | int64_t carry7; |
1081 | int64_t carry8; |
1082 | int64_t carry9; |
1083 | carry0 = (h0 + (int64_t) (1 << 25)) >> 26; |
1084 | h1 += carry0; |
1085 | h0 -= carry0 << 26; |
1086 | carry4 = (h4 + (int64_t) (1 << 25)) >> 26; |
1087 | h5 += carry4; |
1088 | h4 -= carry4 << 26; |
1089 | carry1 = (h1 + (int64_t) (1 << 24)) >> 25; |
1090 | h2 += carry1; |
1091 | h1 -= carry1 << 25; |
1092 | carry5 = (h5 + (int64_t) (1 << 24)) >> 25; |
1093 | h6 += carry5; |
1094 | h5 -= carry5 << 25; |
1095 | carry2 = (h2 + (int64_t) (1 << 25)) >> 26; |
1096 | h3 += carry2; |
1097 | h2 -= carry2 << 26; |
1098 | carry6 = (h6 + (int64_t) (1 << 25)) >> 26; |
1099 | h7 += carry6; |
1100 | h6 -= carry6 << 26; |
1101 | carry3 = (h3 + (int64_t) (1 << 24)) >> 25; |
1102 | h4 += carry3; |
1103 | h3 -= carry3 << 25; |
1104 | carry7 = (h7 + (int64_t) (1 << 24)) >> 25; |
1105 | h8 += carry7; |
1106 | h7 -= carry7 << 25; |
1107 | carry4 = (h4 + (int64_t) (1 << 25)) >> 26; |
1108 | h5 += carry4; |
1109 | h4 -= carry4 << 26; |
1110 | carry8 = (h8 + (int64_t) (1 << 25)) >> 26; |
1111 | h9 += carry8; |
1112 | h8 -= carry8 << 26; |
1113 | carry9 = (h9 + (int64_t) (1 << 24)) >> 25; |
1114 | h0 += carry9 * 19; |
1115 | h9 -= carry9 << 25; |
1116 | carry0 = (h0 + (int64_t) (1 << 25)) >> 26; |
1117 | h1 += carry0; |
1118 | h0 -= carry0 << 26; |
1119 | h[0] = (int32_t) h0; |
1120 | h[1] = (int32_t) h1; |
1121 | h[2] = (int32_t) h2; |
1122 | h[3] = (int32_t) h3; |
1123 | h[4] = (int32_t) h4; |
1124 | h[5] = (int32_t) h5; |
1125 | h[6] = (int32_t) h6; |
1126 | h[7] = (int32_t) h7; |
1127 | h[8] = (int32_t) h8; |
1128 | h[9] = (int32_t) h9; |
1129 | } |
1130 | |
1131 | |
1132 | /* |
1133 | h = 2 * f * f |
1134 | Can overlap h with f. |
1135 | |
1136 | Preconditions: |
1137 | |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. |
1138 | |
1139 | Postconditions: |
1140 | |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. |
1141 | */ |
1142 | |
1143 | /* |
1144 | See fe_mul.c for discussion of implementation strategy. |
1145 | */ |
1146 | |
1147 | void fe_sq2(fe h, const fe f) { |
1148 | int32_t f0 = f[0]; |
1149 | int32_t f1 = f[1]; |
1150 | int32_t f2 = f[2]; |
1151 | int32_t f3 = f[3]; |
1152 | int32_t f4 = f[4]; |
1153 | int32_t f5 = f[5]; |
1154 | int32_t f6 = f[6]; |
1155 | int32_t f7 = f[7]; |
1156 | int32_t f8 = f[8]; |
1157 | int32_t f9 = f[9]; |
1158 | int32_t f0_2 = 2 * f0; |
1159 | int32_t f1_2 = 2 * f1; |
1160 | int32_t f2_2 = 2 * f2; |
1161 | int32_t f3_2 = 2 * f3; |
1162 | int32_t f4_2 = 2 * f4; |
1163 | int32_t f5_2 = 2 * f5; |
1164 | int32_t f6_2 = 2 * f6; |
1165 | int32_t f7_2 = 2 * f7; |
1166 | int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ |
1167 | int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ |
1168 | int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ |
1169 | int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ |
1170 | int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ |
1171 | int64_t f0f0 = f0 * (int64_t) f0; |
1172 | int64_t f0f1_2 = f0_2 * (int64_t) f1; |
1173 | int64_t f0f2_2 = f0_2 * (int64_t) f2; |
1174 | int64_t f0f3_2 = f0_2 * (int64_t) f3; |
1175 | int64_t f0f4_2 = f0_2 * (int64_t) f4; |
1176 | int64_t f0f5_2 = f0_2 * (int64_t) f5; |
1177 | int64_t f0f6_2 = f0_2 * (int64_t) f6; |
1178 | int64_t f0f7_2 = f0_2 * (int64_t) f7; |
1179 | int64_t f0f8_2 = f0_2 * (int64_t) f8; |
1180 | int64_t f0f9_2 = f0_2 * (int64_t) f9; |
1181 | int64_t f1f1_2 = f1_2 * (int64_t) f1; |
1182 | int64_t f1f2_2 = f1_2 * (int64_t) f2; |
1183 | int64_t f1f3_4 = f1_2 * (int64_t) f3_2; |
1184 | int64_t f1f4_2 = f1_2 * (int64_t) f4; |
1185 | int64_t f1f5_4 = f1_2 * (int64_t) f5_2; |
1186 | int64_t f1f6_2 = f1_2 * (int64_t) f6; |
1187 | int64_t f1f7_4 = f1_2 * (int64_t) f7_2; |
1188 | int64_t f1f8_2 = f1_2 * (int64_t) f8; |
1189 | int64_t f1f9_76 = f1_2 * (int64_t) f9_38; |
1190 | int64_t f2f2 = f2 * (int64_t) f2; |
1191 | int64_t f2f3_2 = f2_2 * (int64_t) f3; |
1192 | int64_t f2f4_2 = f2_2 * (int64_t) f4; |
1193 | int64_t f2f5_2 = f2_2 * (int64_t) f5; |
1194 | int64_t f2f6_2 = f2_2 * (int64_t) f6; |
1195 | int64_t f2f7_2 = f2_2 * (int64_t) f7; |
1196 | int64_t f2f8_38 = f2_2 * (int64_t) f8_19; |
1197 | int64_t f2f9_38 = f2 * (int64_t) f9_38; |
1198 | int64_t f3f3_2 = f3_2 * (int64_t) f3; |
1199 | int64_t f3f4_2 = f3_2 * (int64_t) f4; |
1200 | int64_t f3f5_4 = f3_2 * (int64_t) f5_2; |
1201 | int64_t f3f6_2 = f3_2 * (int64_t) f6; |
1202 | int64_t f3f7_76 = f3_2 * (int64_t) f7_38; |
1203 | int64_t f3f8_38 = f3_2 * (int64_t) f8_19; |
1204 | int64_t f3f9_76 = f3_2 * (int64_t) f9_38; |
1205 | int64_t f4f4 = f4 * (int64_t) f4; |
1206 | int64_t f4f5_2 = f4_2 * (int64_t) f5; |
1207 | int64_t f4f6_38 = f4_2 * (int64_t) f6_19; |
1208 | int64_t f4f7_38 = f4 * (int64_t) f7_38; |
1209 | int64_t f4f8_38 = f4_2 * (int64_t) f8_19; |
1210 | int64_t f4f9_38 = f4 * (int64_t) f9_38; |
1211 | int64_t f5f5_38 = f5 * (int64_t) f5_38; |
1212 | int64_t f5f6_38 = f5_2 * (int64_t) f6_19; |
1213 | int64_t f5f7_76 = f5_2 * (int64_t) f7_38; |
1214 | int64_t f5f8_38 = f5_2 * (int64_t) f8_19; |
1215 | int64_t f5f9_76 = f5_2 * (int64_t) f9_38; |
1216 | int64_t f6f6_19 = f6 * (int64_t) f6_19; |
1217 | int64_t f6f7_38 = f6 * (int64_t) f7_38; |
1218 | int64_t f6f8_38 = f6_2 * (int64_t) f8_19; |
1219 | int64_t f6f9_38 = f6 * (int64_t) f9_38; |
1220 | int64_t f7f7_38 = f7 * (int64_t) f7_38; |
1221 | int64_t f7f8_38 = f7_2 * (int64_t) f8_19; |
1222 | int64_t f7f9_76 = f7_2 * (int64_t) f9_38; |
1223 | int64_t f8f8_19 = f8 * (int64_t) f8_19; |
1224 | int64_t f8f9_38 = f8 * (int64_t) f9_38; |
1225 | int64_t f9f9_38 = f9 * (int64_t) f9_38; |
1226 | int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38; |
1227 | int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38; |
1228 | int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19; |
1229 | int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38; |
1230 | int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38; |
1231 | int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38; |
1232 | int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19; |
1233 | int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38; |
1234 | int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38; |
1235 | int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2; |
1236 | int64_t carry0; |
1237 | int64_t carry1; |
1238 | int64_t carry2; |
1239 | int64_t carry3; |
1240 | int64_t carry4; |
1241 | int64_t carry5; |
1242 | int64_t carry6; |
1243 | int64_t carry7; |
1244 | int64_t carry8; |
1245 | int64_t carry9; |
1246 | h0 += h0; |
1247 | h1 += h1; |
1248 | h2 += h2; |
1249 | h3 += h3; |
1250 | h4 += h4; |
1251 | h5 += h5; |
1252 | h6 += h6; |
1253 | h7 += h7; |
1254 | h8 += h8; |
1255 | h9 += h9; |
1256 | carry0 = (h0 + (int64_t) (1 << 25)) >> 26; |
1257 | h1 += carry0; |
1258 | h0 -= carry0 << 26; |
1259 | carry4 = (h4 + (int64_t) (1 << 25)) >> 26; |
1260 | h5 += carry4; |
1261 | h4 -= carry4 << 26; |
1262 | carry1 = (h1 + (int64_t) (1 << 24)) >> 25; |
1263 | h2 += carry1; |
1264 | h1 -= carry1 << 25; |
1265 | carry5 = (h5 + (int64_t) (1 << 24)) >> 25; |
1266 | h6 += carry5; |
1267 | h5 -= carry5 << 25; |
1268 | carry2 = (h2 + (int64_t) (1 << 25)) >> 26; |
1269 | h3 += carry2; |
1270 | h2 -= carry2 << 26; |
1271 | carry6 = (h6 + (int64_t) (1 << 25)) >> 26; |
1272 | h7 += carry6; |
1273 | h6 -= carry6 << 26; |
1274 | carry3 = (h3 + (int64_t) (1 << 24)) >> 25; |
1275 | h4 += carry3; |
1276 | h3 -= carry3 << 25; |
1277 | carry7 = (h7 + (int64_t) (1 << 24)) >> 25; |
1278 | h8 += carry7; |
1279 | h7 -= carry7 << 25; |
1280 | carry4 = (h4 + (int64_t) (1 << 25)) >> 26; |
1281 | h5 += carry4; |
1282 | h4 -= carry4 << 26; |
1283 | carry8 = (h8 + (int64_t) (1 << 25)) >> 26; |
1284 | h9 += carry8; |
1285 | h8 -= carry8 << 26; |
1286 | carry9 = (h9 + (int64_t) (1 << 24)) >> 25; |
1287 | h0 += carry9 * 19; |
1288 | h9 -= carry9 << 25; |
1289 | carry0 = (h0 + (int64_t) (1 << 25)) >> 26; |
1290 | h1 += carry0; |
1291 | h0 -= carry0 << 26; |
1292 | h[0] = (int32_t) h0; |
1293 | h[1] = (int32_t) h1; |
1294 | h[2] = (int32_t) h2; |
1295 | h[3] = (int32_t) h3; |
1296 | h[4] = (int32_t) h4; |
1297 | h[5] = (int32_t) h5; |
1298 | h[6] = (int32_t) h6; |
1299 | h[7] = (int32_t) h7; |
1300 | h[8] = (int32_t) h8; |
1301 | h[9] = (int32_t) h9; |
1302 | } |
1303 | |
1304 | |
1305 | /* |
1306 | h = f - g |
1307 | Can overlap h with f or g. |
1308 | |
1309 | Preconditions: |
1310 | |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. |
1311 | |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. |
1312 | |
1313 | Postconditions: |
1314 | |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. |
1315 | */ |
1316 | |
1317 | void fe_sub(fe h, const fe f, const fe g) { |
1318 | int32_t f0 = f[0]; |
1319 | int32_t f1 = f[1]; |
1320 | int32_t f2 = f[2]; |
1321 | int32_t f3 = f[3]; |
1322 | int32_t f4 = f[4]; |
1323 | int32_t f5 = f[5]; |
1324 | int32_t f6 = f[6]; |
1325 | int32_t f7 = f[7]; |
1326 | int32_t f8 = f[8]; |
1327 | int32_t f9 = f[9]; |
1328 | int32_t g0 = g[0]; |
1329 | int32_t g1 = g[1]; |
1330 | int32_t g2 = g[2]; |
1331 | int32_t g3 = g[3]; |
1332 | int32_t g4 = g[4]; |
1333 | int32_t g5 = g[5]; |
1334 | int32_t g6 = g[6]; |
1335 | int32_t g7 = g[7]; |
1336 | int32_t g8 = g[8]; |
1337 | int32_t g9 = g[9]; |
1338 | int32_t h0 = f0 - g0; |
1339 | int32_t h1 = f1 - g1; |
1340 | int32_t h2 = f2 - g2; |
1341 | int32_t h3 = f3 - g3; |
1342 | int32_t h4 = f4 - g4; |
1343 | int32_t h5 = f5 - g5; |
1344 | int32_t h6 = f6 - g6; |
1345 | int32_t h7 = f7 - g7; |
1346 | int32_t h8 = f8 - g8; |
1347 | int32_t h9 = f9 - g9; |
1348 | |
1349 | h[0] = h0; |
1350 | h[1] = h1; |
1351 | h[2] = h2; |
1352 | h[3] = h3; |
1353 | h[4] = h4; |
1354 | h[5] = h5; |
1355 | h[6] = h6; |
1356 | h[7] = h7; |
1357 | h[8] = h8; |
1358 | h[9] = h9; |
1359 | } |
1360 | |
1361 | |
1362 | |
1363 | /* |
1364 | Preconditions: |
1365 | |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. |
1366 | |
1367 | Write p=2^255-19; q=floor(h/p). |
1368 | Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). |
1369 | |
1370 | Proof: |
1371 | Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. |
1372 | Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4. |
1373 | |
1374 | Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). |
1375 | Then 0<y<1. |
1376 | |
1377 | Write r=h-pq. |
1378 | Have 0<=r<=p-1=2^255-20. |
1379 | Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1. |
1380 | |
1381 | Write x=r+19(2^-255)r+y. |
1382 | Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q. |
1383 | |
1384 | Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1)) |
1385 | so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. |
1386 | */ |
1387 | |
1388 | void fe_tobytes(unsigned char *s, const fe h) { |
1389 | int32_t h0 = h[0]; |
1390 | int32_t h1 = h[1]; |
1391 | int32_t h2 = h[2]; |
1392 | int32_t h3 = h[3]; |
1393 | int32_t h4 = h[4]; |
1394 | int32_t h5 = h[5]; |
1395 | int32_t h6 = h[6]; |
1396 | int32_t h7 = h[7]; |
1397 | int32_t h8 = h[8]; |
1398 | int32_t h9 = h[9]; |
1399 | int32_t q; |
1400 | int32_t carry0; |
1401 | int32_t carry1; |
1402 | int32_t carry2; |
1403 | int32_t carry3; |
1404 | int32_t carry4; |
1405 | int32_t carry5; |
1406 | int32_t carry6; |
1407 | int32_t carry7; |
1408 | int32_t carry8; |
1409 | int32_t carry9; |
1410 | q = (19 * h9 + (((int32_t) 1) << 24)) >> 25; |
1411 | q = (h0 + q) >> 26; |
1412 | q = (h1 + q) >> 25; |
1413 | q = (h2 + q) >> 26; |
1414 | q = (h3 + q) >> 25; |
1415 | q = (h4 + q) >> 26; |
1416 | q = (h5 + q) >> 25; |
1417 | q = (h6 + q) >> 26; |
1418 | q = (h7 + q) >> 25; |
1419 | q = (h8 + q) >> 26; |
1420 | q = (h9 + q) >> 25; |
1421 | /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ |
1422 | h0 += 19 * q; |
1423 | /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ |
1424 | carry0 = h0 >> 26; |
1425 | h1 += carry0; |
1426 | h0 -= carry0 << 26; |
1427 | carry1 = h1 >> 25; |
1428 | h2 += carry1; |
1429 | h1 -= carry1 << 25; |
1430 | carry2 = h2 >> 26; |
1431 | h3 += carry2; |
1432 | h2 -= carry2 << 26; |
1433 | carry3 = h3 >> 25; |
1434 | h4 += carry3; |
1435 | h3 -= carry3 << 25; |
1436 | carry4 = h4 >> 26; |
1437 | h5 += carry4; |
1438 | h4 -= carry4 << 26; |
1439 | carry5 = h5 >> 25; |
1440 | h6 += carry5; |
1441 | h5 -= carry5 << 25; |
1442 | carry6 = h6 >> 26; |
1443 | h7 += carry6; |
1444 | h6 -= carry6 << 26; |
1445 | carry7 = h7 >> 25; |
1446 | h8 += carry7; |
1447 | h7 -= carry7 << 25; |
1448 | carry8 = h8 >> 26; |
1449 | h9 += carry8; |
1450 | h8 -= carry8 << 26; |
1451 | carry9 = h9 >> 25; |
1452 | h9 -= carry9 << 25; |
1453 | |
1454 | /* h10 = carry9 */ |
1455 | /* |
1456 | Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. |
1457 | Have h0+...+2^230 h9 between 0 and 2^255-1; |
1458 | evidently 2^255 h10-2^255 q = 0. |
1459 | Goal: Output h0+...+2^230 h9. |
1460 | */ |
1461 | s[0] = (unsigned char) (h0 >> 0); |
1462 | s[1] = (unsigned char) (h0 >> 8); |
1463 | s[2] = (unsigned char) (h0 >> 16); |
1464 | s[3] = (unsigned char) ((h0 >> 24) | (h1 << 2)); |
1465 | s[4] = (unsigned char) (h1 >> 6); |
1466 | s[5] = (unsigned char) (h1 >> 14); |
1467 | s[6] = (unsigned char) ((h1 >> 22) | (h2 << 3)); |
1468 | s[7] = (unsigned char) (h2 >> 5); |
1469 | s[8] = (unsigned char) (h2 >> 13); |
1470 | s[9] = (unsigned char) ((h2 >> 21) | (h3 << 5)); |
1471 | s[10] = (unsigned char) (h3 >> 3); |
1472 | s[11] = (unsigned char) (h3 >> 11); |
1473 | s[12] = (unsigned char) ((h3 >> 19) | (h4 << 6)); |
1474 | s[13] = (unsigned char) (h4 >> 2); |
1475 | s[14] = (unsigned char) (h4 >> 10); |
1476 | s[15] = (unsigned char) (h4 >> 18); |
1477 | s[16] = (unsigned char) (h5 >> 0); |
1478 | s[17] = (unsigned char) (h5 >> 8); |
1479 | s[18] = (unsigned char) (h5 >> 16); |
1480 | s[19] = (unsigned char) ((h5 >> 24) | (h6 << 1)); |
1481 | s[20] = (unsigned char) (h6 >> 7); |
1482 | s[21] = (unsigned char) (h6 >> 15); |
1483 | s[22] = (unsigned char) ((h6 >> 23) | (h7 << 3)); |
1484 | s[23] = (unsigned char) (h7 >> 5); |
1485 | s[24] = (unsigned char) (h7 >> 13); |
1486 | s[25] = (unsigned char) ((h7 >> 21) | (h8 << 4)); |
1487 | s[26] = (unsigned char) (h8 >> 4); |
1488 | s[27] = (unsigned char) (h8 >> 12); |
1489 | s[28] = (unsigned char) ((h8 >> 20) | (h9 << 6)); |
1490 | s[29] = (unsigned char) (h9 >> 2); |
1491 | s[30] = (unsigned char) (h9 >> 10); |
1492 | s[31] = (unsigned char) (h9 >> 18); |
1493 | } |
1494 | |