1#include "fixedint.h"
2#include "fe.h"
3
4#ifndef ED25519_LOAD_BYTES
5#define ED25519_LOAD_BYTES
6
7/*
8 helper functions
9*/
10static uint64_t load_3(const unsigned char *in) {
11 uint64_t result;
12
13 result = (uint64_t) in[0];
14 result |= ((uint64_t) in[1]) << 8;
15 result |= ((uint64_t) in[2]) << 16;
16
17 return result;
18}
19
20static uint64_t load_4(const unsigned char *in) {
21 uint64_t result;
22
23 result = (uint64_t) in[0];
24 result |= ((uint64_t) in[1]) << 8;
25 result |= ((uint64_t) in[2]) << 16;
26 result |= ((uint64_t) in[3]) << 24;
27
28 return result;
29}
30
31#endif
32
33/*
34 h = 0
35*/
36
37void fe_0(fe h) {
38 h[0] = 0;
39 h[1] = 0;
40 h[2] = 0;
41 h[3] = 0;
42 h[4] = 0;
43 h[5] = 0;
44 h[6] = 0;
45 h[7] = 0;
46 h[8] = 0;
47 h[9] = 0;
48}
49
50
51
52/*
53 h = 1
54*/
55
56void fe_1(fe h) {
57 h[0] = 1;
58 h[1] = 0;
59 h[2] = 0;
60 h[3] = 0;
61 h[4] = 0;
62 h[5] = 0;
63 h[6] = 0;
64 h[7] = 0;
65 h[8] = 0;
66 h[9] = 0;
67}
68
69
70
71/*
72 h = f + g
73 Can overlap h with f or g.
74
75 Preconditions:
76 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
77 |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
78
79 Postconditions:
80 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
81*/
82
83void fe_add(fe h, const fe f, const fe g) {
84 int32_t f0 = f[0];
85 int32_t f1 = f[1];
86 int32_t f2 = f[2];
87 int32_t f3 = f[3];
88 int32_t f4 = f[4];
89 int32_t f5 = f[5];
90 int32_t f6 = f[6];
91 int32_t f7 = f[7];
92 int32_t f8 = f[8];
93 int32_t f9 = f[9];
94 int32_t g0 = g[0];
95 int32_t g1 = g[1];
96 int32_t g2 = g[2];
97 int32_t g3 = g[3];
98 int32_t g4 = g[4];
99 int32_t g5 = g[5];
100 int32_t g6 = g[6];
101 int32_t g7 = g[7];
102 int32_t g8 = g[8];
103 int32_t g9 = g[9];
104 int32_t h0 = f0 + g0;
105 int32_t h1 = f1 + g1;
106 int32_t h2 = f2 + g2;
107 int32_t h3 = f3 + g3;
108 int32_t h4 = f4 + g4;
109 int32_t h5 = f5 + g5;
110 int32_t h6 = f6 + g6;
111 int32_t h7 = f7 + g7;
112 int32_t h8 = f8 + g8;
113 int32_t h9 = f9 + g9;
114
115 h[0] = h0;
116 h[1] = h1;
117 h[2] = h2;
118 h[3] = h3;
119 h[4] = h4;
120 h[5] = h5;
121 h[6] = h6;
122 h[7] = h7;
123 h[8] = h8;
124 h[9] = h9;
125}
126
127
128
129/*
130 Replace (f,g) with (g,g) if b == 1;
131 replace (f,g) with (f,g) if b == 0.
132
133 Preconditions: b in {0,1}.
134*/
135
136void fe_cmov(fe f, const fe g, unsigned int b) {
137 int32_t f0 = f[0];
138 int32_t f1 = f[1];
139 int32_t f2 = f[2];
140 int32_t f3 = f[3];
141 int32_t f4 = f[4];
142 int32_t f5 = f[5];
143 int32_t f6 = f[6];
144 int32_t f7 = f[7];
145 int32_t f8 = f[8];
146 int32_t f9 = f[9];
147 int32_t g0 = g[0];
148 int32_t g1 = g[1];
149 int32_t g2 = g[2];
150 int32_t g3 = g[3];
151 int32_t g4 = g[4];
152 int32_t g5 = g[5];
153 int32_t g6 = g[6];
154 int32_t g7 = g[7];
155 int32_t g8 = g[8];
156 int32_t g9 = g[9];
157 int32_t x0 = f0 ^ g0;
158 int32_t x1 = f1 ^ g1;
159 int32_t x2 = f2 ^ g2;
160 int32_t x3 = f3 ^ g3;
161 int32_t x4 = f4 ^ g4;
162 int32_t x5 = f5 ^ g5;
163 int32_t x6 = f6 ^ g6;
164 int32_t x7 = f7 ^ g7;
165 int32_t x8 = f8 ^ g8;
166 int32_t x9 = f9 ^ g9;
167
168 b = (unsigned int) (- (int) b); /* silence warning */
169 x0 &= b;
170 x1 &= b;
171 x2 &= b;
172 x3 &= b;
173 x4 &= b;
174 x5 &= b;
175 x6 &= b;
176 x7 &= b;
177 x8 &= b;
178 x9 &= b;
179
180 f[0] = f0 ^ x0;
181 f[1] = f1 ^ x1;
182 f[2] = f2 ^ x2;
183 f[3] = f3 ^ x3;
184 f[4] = f4 ^ x4;
185 f[5] = f5 ^ x5;
186 f[6] = f6 ^ x6;
187 f[7] = f7 ^ x7;
188 f[8] = f8 ^ x8;
189 f[9] = f9 ^ x9;
190}
191
192/*
193 Replace (f,g) with (g,f) if b == 1;
194 replace (f,g) with (f,g) if b == 0.
195
196 Preconditions: b in {0,1}.
197*/
198
199void fe_cswap(fe f,fe g,unsigned int b) {
200 int32_t f0 = f[0];
201 int32_t f1 = f[1];
202 int32_t f2 = f[2];
203 int32_t f3 = f[3];
204 int32_t f4 = f[4];
205 int32_t f5 = f[5];
206 int32_t f6 = f[6];
207 int32_t f7 = f[7];
208 int32_t f8 = f[8];
209 int32_t f9 = f[9];
210 int32_t g0 = g[0];
211 int32_t g1 = g[1];
212 int32_t g2 = g[2];
213 int32_t g3 = g[3];
214 int32_t g4 = g[4];
215 int32_t g5 = g[5];
216 int32_t g6 = g[6];
217 int32_t g7 = g[7];
218 int32_t g8 = g[8];
219 int32_t g9 = g[9];
220 int32_t x0 = f0 ^ g0;
221 int32_t x1 = f1 ^ g1;
222 int32_t x2 = f2 ^ g2;
223 int32_t x3 = f3 ^ g3;
224 int32_t x4 = f4 ^ g4;
225 int32_t x5 = f5 ^ g5;
226 int32_t x6 = f6 ^ g6;
227 int32_t x7 = f7 ^ g7;
228 int32_t x8 = f8 ^ g8;
229 int32_t x9 = f9 ^ g9;
230 b = -b;
231 x0 &= b;
232 x1 &= b;
233 x2 &= b;
234 x3 &= b;
235 x4 &= b;
236 x5 &= b;
237 x6 &= b;
238 x7 &= b;
239 x8 &= b;
240 x9 &= b;
241 f[0] = f0 ^ x0;
242 f[1] = f1 ^ x1;
243 f[2] = f2 ^ x2;
244 f[3] = f3 ^ x3;
245 f[4] = f4 ^ x4;
246 f[5] = f5 ^ x5;
247 f[6] = f6 ^ x6;
248 f[7] = f7 ^ x7;
249 f[8] = f8 ^ x8;
250 f[9] = f9 ^ x9;
251 g[0] = g0 ^ x0;
252 g[1] = g1 ^ x1;
253 g[2] = g2 ^ x2;
254 g[3] = g3 ^ x3;
255 g[4] = g4 ^ x4;
256 g[5] = g5 ^ x5;
257 g[6] = g6 ^ x6;
258 g[7] = g7 ^ x7;
259 g[8] = g8 ^ x8;
260 g[9] = g9 ^ x9;
261}
262
263
264
265/*
266 h = f
267*/
268
269void fe_copy(fe h, const fe f) {
270 int32_t f0 = f[0];
271 int32_t f1 = f[1];
272 int32_t f2 = f[2];
273 int32_t f3 = f[3];
274 int32_t f4 = f[4];
275 int32_t f5 = f[5];
276 int32_t f6 = f[6];
277 int32_t f7 = f[7];
278 int32_t f8 = f[8];
279 int32_t f9 = f[9];
280
281 h[0] = f0;
282 h[1] = f1;
283 h[2] = f2;
284 h[3] = f3;
285 h[4] = f4;
286 h[5] = f5;
287 h[6] = f6;
288 h[7] = f7;
289 h[8] = f8;
290 h[9] = f9;
291}
292
293
294
295/*
296 Ignores top bit of h.
297*/
298
299void fe_frombytes(fe h, const unsigned char *s) {
300 int64_t h0 = load_4(in: s);
301 int64_t h1 = load_3(in: s + 4) << 6;
302 int64_t h2 = load_3(in: s + 7) << 5;
303 int64_t h3 = load_3(in: s + 10) << 3;
304 int64_t h4 = load_3(in: s + 13) << 2;
305 int64_t h5 = load_4(in: s + 16);
306 int64_t h6 = load_3(in: s + 20) << 7;
307 int64_t h7 = load_3(in: s + 23) << 5;
308 int64_t h8 = load_3(in: s + 26) << 4;
309 int64_t h9 = (load_3(in: s + 29) & 8388607) << 2;
310 int64_t carry0;
311 int64_t carry1;
312 int64_t carry2;
313 int64_t carry3;
314 int64_t carry4;
315 int64_t carry5;
316 int64_t carry6;
317 int64_t carry7;
318 int64_t carry8;
319 int64_t carry9;
320
321 carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
322 h0 += carry9 * 19;
323 h9 -= carry9 << 25;
324 carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
325 h2 += carry1;
326 h1 -= carry1 << 25;
327 carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
328 h4 += carry3;
329 h3 -= carry3 << 25;
330 carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
331 h6 += carry5;
332 h5 -= carry5 << 25;
333 carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
334 h8 += carry7;
335 h7 -= carry7 << 25;
336 carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
337 h1 += carry0;
338 h0 -= carry0 << 26;
339 carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
340 h3 += carry2;
341 h2 -= carry2 << 26;
342 carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
343 h5 += carry4;
344 h4 -= carry4 << 26;
345 carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
346 h7 += carry6;
347 h6 -= carry6 << 26;
348 carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
349 h9 += carry8;
350 h8 -= carry8 << 26;
351
352 h[0] = (int32_t) h0;
353 h[1] = (int32_t) h1;
354 h[2] = (int32_t) h2;
355 h[3] = (int32_t) h3;
356 h[4] = (int32_t) h4;
357 h[5] = (int32_t) h5;
358 h[6] = (int32_t) h6;
359 h[7] = (int32_t) h7;
360 h[8] = (int32_t) h8;
361 h[9] = (int32_t) h9;
362}
363
364
365
366void fe_invert(fe out, const fe z) {
367 fe t0;
368 fe t1;
369 fe t2;
370 fe t3;
371 int i;
372
373 fe_sq(h: t0, f: z);
374
375 for (i = 1; i < 1; ++i) {
376 fe_sq(h: t0, f: t0);
377 }
378
379 fe_sq(h: t1, f: t0);
380
381 for (i = 1; i < 2; ++i) {
382 fe_sq(h: t1, f: t1);
383 }
384
385 fe_mul(h: t1, f: z, g: t1);
386 fe_mul(h: t0, f: t0, g: t1);
387 fe_sq(h: t2, f: t0);
388
389 for (i = 1; i < 1; ++i) {
390 fe_sq(h: t2, f: t2);
391 }
392
393 fe_mul(h: t1, f: t1, g: t2);
394 fe_sq(h: t2, f: t1);
395
396 for (i = 1; i < 5; ++i) {
397 fe_sq(h: t2, f: t2);
398 }
399
400 fe_mul(h: t1, f: t2, g: t1);
401 fe_sq(h: t2, f: t1);
402
403 for (i = 1; i < 10; ++i) {
404 fe_sq(h: t2, f: t2);
405 }
406
407 fe_mul(h: t2, f: t2, g: t1);
408 fe_sq(h: t3, f: t2);
409
410 for (i = 1; i < 20; ++i) {
411 fe_sq(h: t3, f: t3);
412 }
413
414 fe_mul(h: t2, f: t3, g: t2);
415 fe_sq(h: t2, f: t2);
416
417 for (i = 1; i < 10; ++i) {
418 fe_sq(h: t2, f: t2);
419 }
420
421 fe_mul(h: t1, f: t2, g: t1);
422 fe_sq(h: t2, f: t1);
423
424 for (i = 1; i < 50; ++i) {
425 fe_sq(h: t2, f: t2);
426 }
427
428 fe_mul(h: t2, f: t2, g: t1);
429 fe_sq(h: t3, f: t2);
430
431 for (i = 1; i < 100; ++i) {
432 fe_sq(h: t3, f: t3);
433 }
434
435 fe_mul(h: t2, f: t3, g: t2);
436 fe_sq(h: t2, f: t2);
437
438 for (i = 1; i < 50; ++i) {
439 fe_sq(h: t2, f: t2);
440 }
441
442 fe_mul(h: t1, f: t2, g: t1);
443 fe_sq(h: t1, f: t1);
444
445 for (i = 1; i < 5; ++i) {
446 fe_sq(h: t1, f: t1);
447 }
448
449 fe_mul(h: out, f: t1, g: t0);
450}
451
452
453
454/*
455 return 1 if f is in {1,3,5,...,q-2}
456 return 0 if f is in {0,2,4,...,q-1}
457
458 Preconditions:
459 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
460*/
461
462int fe_isnegative(const fe f) {
463 unsigned char s[32];
464
465 fe_tobytes(s, h: f);
466
467 return s[0] & 1;
468}
469
470
471
472/*
473 return 1 if f == 0
474 return 0 if f != 0
475
476 Preconditions:
477 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
478*/
479
480int fe_isnonzero(const fe f) {
481 unsigned char s[32];
482 unsigned char r;
483
484 fe_tobytes(s, h: f);
485
486 r = s[0];
487 #define F(i) r |= s[i]
488 F(1);
489 F(2);
490 F(3);
491 F(4);
492 F(5);
493 F(6);
494 F(7);
495 F(8);
496 F(9);
497 F(10);
498 F(11);
499 F(12);
500 F(13);
501 F(14);
502 F(15);
503 F(16);
504 F(17);
505 F(18);
506 F(19);
507 F(20);
508 F(21);
509 F(22);
510 F(23);
511 F(24);
512 F(25);
513 F(26);
514 F(27);
515 F(28);
516 F(29);
517 F(30);
518 F(31);
519 #undef F
520
521 return r != 0;
522}
523
524
525
526/*
527 h = f * g
528 Can overlap h with f or g.
529
530 Preconditions:
531 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
532 |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
533
534 Postconditions:
535 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
536 */
537
538 /*
539 Notes on implementation strategy:
540
541 Using schoolbook multiplication.
542 Karatsuba would save a little in some cost models.
543
544 Most multiplications by 2 and 19 are 32-bit precomputations;
545 cheaper than 64-bit postcomputations.
546
547 There is one remaining multiplication by 19 in the carry chain;
548 one *19 precomputation can be merged into this,
549 but the resulting data flow is considerably less clean.
550
551 There are 12 carries below.
552 10 of them are 2-way parallelizable and vectorizable.
553 Can get away with 11 carries, but then data flow is much deeper.
554
555 With tighter constraints on inputs can squeeze carries into int32.
556*/
557
558void fe_mul(fe h, const fe f, const fe g) {
559 int32_t f0 = f[0];
560 int32_t f1 = f[1];
561 int32_t f2 = f[2];
562 int32_t f3 = f[3];
563 int32_t f4 = f[4];
564 int32_t f5 = f[5];
565 int32_t f6 = f[6];
566 int32_t f7 = f[7];
567 int32_t f8 = f[8];
568 int32_t f9 = f[9];
569 int32_t g0 = g[0];
570 int32_t g1 = g[1];
571 int32_t g2 = g[2];
572 int32_t g3 = g[3];
573 int32_t g4 = g[4];
574 int32_t g5 = g[5];
575 int32_t g6 = g[6];
576 int32_t g7 = g[7];
577 int32_t g8 = g[8];
578 int32_t g9 = g[9];
579 int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
580 int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
581 int32_t g3_19 = 19 * g3;
582 int32_t g4_19 = 19 * g4;
583 int32_t g5_19 = 19 * g5;
584 int32_t g6_19 = 19 * g6;
585 int32_t g7_19 = 19 * g7;
586 int32_t g8_19 = 19 * g8;
587 int32_t g9_19 = 19 * g9;
588 int32_t f1_2 = 2 * f1;
589 int32_t f3_2 = 2 * f3;
590 int32_t f5_2 = 2 * f5;
591 int32_t f7_2 = 2 * f7;
592 int32_t f9_2 = 2 * f9;
593 int64_t f0g0 = f0 * (int64_t) g0;
594 int64_t f0g1 = f0 * (int64_t) g1;
595 int64_t f0g2 = f0 * (int64_t) g2;
596 int64_t f0g3 = f0 * (int64_t) g3;
597 int64_t f0g4 = f0 * (int64_t) g4;
598 int64_t f0g5 = f0 * (int64_t) g5;
599 int64_t f0g6 = f0 * (int64_t) g6;
600 int64_t f0g7 = f0 * (int64_t) g7;
601 int64_t f0g8 = f0 * (int64_t) g8;
602 int64_t f0g9 = f0 * (int64_t) g9;
603 int64_t f1g0 = f1 * (int64_t) g0;
604 int64_t f1g1_2 = f1_2 * (int64_t) g1;
605 int64_t f1g2 = f1 * (int64_t) g2;
606 int64_t f1g3_2 = f1_2 * (int64_t) g3;
607 int64_t f1g4 = f1 * (int64_t) g4;
608 int64_t f1g5_2 = f1_2 * (int64_t) g5;
609 int64_t f1g6 = f1 * (int64_t) g6;
610 int64_t f1g7_2 = f1_2 * (int64_t) g7;
611 int64_t f1g8 = f1 * (int64_t) g8;
612 int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
613 int64_t f2g0 = f2 * (int64_t) g0;
614 int64_t f2g1 = f2 * (int64_t) g1;
615 int64_t f2g2 = f2 * (int64_t) g2;
616 int64_t f2g3 = f2 * (int64_t) g3;
617 int64_t f2g4 = f2 * (int64_t) g4;
618 int64_t f2g5 = f2 * (int64_t) g5;
619 int64_t f2g6 = f2 * (int64_t) g6;
620 int64_t f2g7 = f2 * (int64_t) g7;
621 int64_t f2g8_19 = f2 * (int64_t) g8_19;
622 int64_t f2g9_19 = f2 * (int64_t) g9_19;
623 int64_t f3g0 = f3 * (int64_t) g0;
624 int64_t f3g1_2 = f3_2 * (int64_t) g1;
625 int64_t f3g2 = f3 * (int64_t) g2;
626 int64_t f3g3_2 = f3_2 * (int64_t) g3;
627 int64_t f3g4 = f3 * (int64_t) g4;
628 int64_t f3g5_2 = f3_2 * (int64_t) g5;
629 int64_t f3g6 = f3 * (int64_t) g6;
630 int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
631 int64_t f3g8_19 = f3 * (int64_t) g8_19;
632 int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
633 int64_t f4g0 = f4 * (int64_t) g0;
634 int64_t f4g1 = f4 * (int64_t) g1;
635 int64_t f4g2 = f4 * (int64_t) g2;
636 int64_t f4g3 = f4 * (int64_t) g3;
637 int64_t f4g4 = f4 * (int64_t) g4;
638 int64_t f4g5 = f4 * (int64_t) g5;
639 int64_t f4g6_19 = f4 * (int64_t) g6_19;
640 int64_t f4g7_19 = f4 * (int64_t) g7_19;
641 int64_t f4g8_19 = f4 * (int64_t) g8_19;
642 int64_t f4g9_19 = f4 * (int64_t) g9_19;
643 int64_t f5g0 = f5 * (int64_t) g0;
644 int64_t f5g1_2 = f5_2 * (int64_t) g1;
645 int64_t f5g2 = f5 * (int64_t) g2;
646 int64_t f5g3_2 = f5_2 * (int64_t) g3;
647 int64_t f5g4 = f5 * (int64_t) g4;
648 int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
649 int64_t f5g6_19 = f5 * (int64_t) g6_19;
650 int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
651 int64_t f5g8_19 = f5 * (int64_t) g8_19;
652 int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
653 int64_t f6g0 = f6 * (int64_t) g0;
654 int64_t f6g1 = f6 * (int64_t) g1;
655 int64_t f6g2 = f6 * (int64_t) g2;
656 int64_t f6g3 = f6 * (int64_t) g3;
657 int64_t f6g4_19 = f6 * (int64_t) g4_19;
658 int64_t f6g5_19 = f6 * (int64_t) g5_19;
659 int64_t f6g6_19 = f6 * (int64_t) g6_19;
660 int64_t f6g7_19 = f6 * (int64_t) g7_19;
661 int64_t f6g8_19 = f6 * (int64_t) g8_19;
662 int64_t f6g9_19 = f6 * (int64_t) g9_19;
663 int64_t f7g0 = f7 * (int64_t) g0;
664 int64_t f7g1_2 = f7_2 * (int64_t) g1;
665 int64_t f7g2 = f7 * (int64_t) g2;
666 int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
667 int64_t f7g4_19 = f7 * (int64_t) g4_19;
668 int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
669 int64_t f7g6_19 = f7 * (int64_t) g6_19;
670 int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
671 int64_t f7g8_19 = f7 * (int64_t) g8_19;
672 int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
673 int64_t f8g0 = f8 * (int64_t) g0;
674 int64_t f8g1 = f8 * (int64_t) g1;
675 int64_t f8g2_19 = f8 * (int64_t) g2_19;
676 int64_t f8g3_19 = f8 * (int64_t) g3_19;
677 int64_t f8g4_19 = f8 * (int64_t) g4_19;
678 int64_t f8g5_19 = f8 * (int64_t) g5_19;
679 int64_t f8g6_19 = f8 * (int64_t) g6_19;
680 int64_t f8g7_19 = f8 * (int64_t) g7_19;
681 int64_t f8g8_19 = f8 * (int64_t) g8_19;
682 int64_t f8g9_19 = f8 * (int64_t) g9_19;
683 int64_t f9g0 = f9 * (int64_t) g0;
684 int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
685 int64_t f9g2_19 = f9 * (int64_t) g2_19;
686 int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
687 int64_t f9g4_19 = f9 * (int64_t) g4_19;
688 int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
689 int64_t f9g6_19 = f9 * (int64_t) g6_19;
690 int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
691 int64_t f9g8_19 = f9 * (int64_t) g8_19;
692 int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
693 int64_t h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38;
694 int64_t h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19;
695 int64_t h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38;
696 int64_t h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19;
697 int64_t h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38;
698 int64_t h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19;
699 int64_t h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38;
700 int64_t h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19;
701 int64_t h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38;
702 int64_t h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0 ;
703 int64_t carry0;
704 int64_t carry1;
705 int64_t carry2;
706 int64_t carry3;
707 int64_t carry4;
708 int64_t carry5;
709 int64_t carry6;
710 int64_t carry7;
711 int64_t carry8;
712 int64_t carry9;
713
714 carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
715 h1 += carry0;
716 h0 -= carry0 << 26;
717 carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
718 h5 += carry4;
719 h4 -= carry4 << 26;
720
721 carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
722 h2 += carry1;
723 h1 -= carry1 << 25;
724 carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
725 h6 += carry5;
726 h5 -= carry5 << 25;
727
728 carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
729 h3 += carry2;
730 h2 -= carry2 << 26;
731 carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
732 h7 += carry6;
733 h6 -= carry6 << 26;
734
735 carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
736 h4 += carry3;
737 h3 -= carry3 << 25;
738 carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
739 h8 += carry7;
740 h7 -= carry7 << 25;
741
742 carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
743 h5 += carry4;
744 h4 -= carry4 << 26;
745 carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
746 h9 += carry8;
747 h8 -= carry8 << 26;
748
749 carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
750 h0 += carry9 * 19;
751 h9 -= carry9 << 25;
752
753 carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
754 h1 += carry0;
755 h0 -= carry0 << 26;
756
757 h[0] = (int32_t) h0;
758 h[1] = (int32_t) h1;
759 h[2] = (int32_t) h2;
760 h[3] = (int32_t) h3;
761 h[4] = (int32_t) h4;
762 h[5] = (int32_t) h5;
763 h[6] = (int32_t) h6;
764 h[7] = (int32_t) h7;
765 h[8] = (int32_t) h8;
766 h[9] = (int32_t) h9;
767}
768
769
770/*
771h = f * 121666
772Can overlap h with f.
773
774Preconditions:
775 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
776
777Postconditions:
778 |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
779*/
780
781void fe_mul121666(fe h, fe f) {
782 int32_t f0 = f[0];
783 int32_t f1 = f[1];
784 int32_t f2 = f[2];
785 int32_t f3 = f[3];
786 int32_t f4 = f[4];
787 int32_t f5 = f[5];
788 int32_t f6 = f[6];
789 int32_t f7 = f[7];
790 int32_t f8 = f[8];
791 int32_t f9 = f[9];
792 int64_t h0 = f0 * (int64_t) 121666;
793 int64_t h1 = f1 * (int64_t) 121666;
794 int64_t h2 = f2 * (int64_t) 121666;
795 int64_t h3 = f3 * (int64_t) 121666;
796 int64_t h4 = f4 * (int64_t) 121666;
797 int64_t h5 = f5 * (int64_t) 121666;
798 int64_t h6 = f6 * (int64_t) 121666;
799 int64_t h7 = f7 * (int64_t) 121666;
800 int64_t h8 = f8 * (int64_t) 121666;
801 int64_t h9 = f9 * (int64_t) 121666;
802 int64_t carry0;
803 int64_t carry1;
804 int64_t carry2;
805 int64_t carry3;
806 int64_t carry4;
807 int64_t carry5;
808 int64_t carry6;
809 int64_t carry7;
810 int64_t carry8;
811 int64_t carry9;
812
813 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
814 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
815 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
816 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
817 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
818
819 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
820 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
821 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
822 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
823 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
824
825 h[0] = h0;
826 h[1] = h1;
827 h[2] = h2;
828 h[3] = h3;
829 h[4] = h4;
830 h[5] = h5;
831 h[6] = h6;
832 h[7] = h7;
833 h[8] = h8;
834 h[9] = h9;
835}
836
837
838/*
839h = -f
840
841Preconditions:
842 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
843
844Postconditions:
845 |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
846*/
847
848void fe_neg(fe h, const fe f) {
849 int32_t f0 = f[0];
850 int32_t f1 = f[1];
851 int32_t f2 = f[2];
852 int32_t f3 = f[3];
853 int32_t f4 = f[4];
854 int32_t f5 = f[5];
855 int32_t f6 = f[6];
856 int32_t f7 = f[7];
857 int32_t f8 = f[8];
858 int32_t f9 = f[9];
859 int32_t h0 = -f0;
860 int32_t h1 = -f1;
861 int32_t h2 = -f2;
862 int32_t h3 = -f3;
863 int32_t h4 = -f4;
864 int32_t h5 = -f5;
865 int32_t h6 = -f6;
866 int32_t h7 = -f7;
867 int32_t h8 = -f8;
868 int32_t h9 = -f9;
869
870 h[0] = h0;
871 h[1] = h1;
872 h[2] = h2;
873 h[3] = h3;
874 h[4] = h4;
875 h[5] = h5;
876 h[6] = h6;
877 h[7] = h7;
878 h[8] = h8;
879 h[9] = h9;
880}
881
882
883void fe_pow22523(fe out, const fe z) {
884 fe t0;
885 fe t1;
886 fe t2;
887 int i;
888 fe_sq(h: t0, f: z);
889
890 for (i = 1; i < 1; ++i) {
891 fe_sq(h: t0, f: t0);
892 }
893
894 fe_sq(h: t1, f: t0);
895
896 for (i = 1; i < 2; ++i) {
897 fe_sq(h: t1, f: t1);
898 }
899
900 fe_mul(h: t1, f: z, g: t1);
901 fe_mul(h: t0, f: t0, g: t1);
902 fe_sq(h: t0, f: t0);
903
904 for (i = 1; i < 1; ++i) {
905 fe_sq(h: t0, f: t0);
906 }
907
908 fe_mul(h: t0, f: t1, g: t0);
909 fe_sq(h: t1, f: t0);
910
911 for (i = 1; i < 5; ++i) {
912 fe_sq(h: t1, f: t1);
913 }
914
915 fe_mul(h: t0, f: t1, g: t0);
916 fe_sq(h: t1, f: t0);
917
918 for (i = 1; i < 10; ++i) {
919 fe_sq(h: t1, f: t1);
920 }
921
922 fe_mul(h: t1, f: t1, g: t0);
923 fe_sq(h: t2, f: t1);
924
925 for (i = 1; i < 20; ++i) {
926 fe_sq(h: t2, f: t2);
927 }
928
929 fe_mul(h: t1, f: t2, g: t1);
930 fe_sq(h: t1, f: t1);
931
932 for (i = 1; i < 10; ++i) {
933 fe_sq(h: t1, f: t1);
934 }
935
936 fe_mul(h: t0, f: t1, g: t0);
937 fe_sq(h: t1, f: t0);
938
939 for (i = 1; i < 50; ++i) {
940 fe_sq(h: t1, f: t1);
941 }
942
943 fe_mul(h: t1, f: t1, g: t0);
944 fe_sq(h: t2, f: t1);
945
946 for (i = 1; i < 100; ++i) {
947 fe_sq(h: t2, f: t2);
948 }
949
950 fe_mul(h: t1, f: t2, g: t1);
951 fe_sq(h: t1, f: t1);
952
953 for (i = 1; i < 50; ++i) {
954 fe_sq(h: t1, f: t1);
955 }
956
957 fe_mul(h: t0, f: t1, g: t0);
958 fe_sq(h: t0, f: t0);
959
960 for (i = 1; i < 2; ++i) {
961 fe_sq(h: t0, f: t0);
962 }
963
964 fe_mul(h: out, f: t0, g: z);
965 return;
966}
967
968
969/*
970h = f * f
971Can overlap h with f.
972
973Preconditions:
974 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
975
976Postconditions:
977 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
978*/
979
980/*
981See fe_mul.c for discussion of implementation strategy.
982*/
983
984void fe_sq(fe h, const fe f) {
985 int32_t f0 = f[0];
986 int32_t f1 = f[1];
987 int32_t f2 = f[2];
988 int32_t f3 = f[3];
989 int32_t f4 = f[4];
990 int32_t f5 = f[5];
991 int32_t f6 = f[6];
992 int32_t f7 = f[7];
993 int32_t f8 = f[8];
994 int32_t f9 = f[9];
995 int32_t f0_2 = 2 * f0;
996 int32_t f1_2 = 2 * f1;
997 int32_t f2_2 = 2 * f2;
998 int32_t f3_2 = 2 * f3;
999 int32_t f4_2 = 2 * f4;
1000 int32_t f5_2 = 2 * f5;
1001 int32_t f6_2 = 2 * f6;
1002 int32_t f7_2 = 2 * f7;
1003 int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
1004 int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
1005 int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
1006 int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
1007 int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
1008 int64_t f0f0 = f0 * (int64_t) f0;
1009 int64_t f0f1_2 = f0_2 * (int64_t) f1;
1010 int64_t f0f2_2 = f0_2 * (int64_t) f2;
1011 int64_t f0f3_2 = f0_2 * (int64_t) f3;
1012 int64_t f0f4_2 = f0_2 * (int64_t) f4;
1013 int64_t f0f5_2 = f0_2 * (int64_t) f5;
1014 int64_t f0f6_2 = f0_2 * (int64_t) f6;
1015 int64_t f0f7_2 = f0_2 * (int64_t) f7;
1016 int64_t f0f8_2 = f0_2 * (int64_t) f8;
1017 int64_t f0f9_2 = f0_2 * (int64_t) f9;
1018 int64_t f1f1_2 = f1_2 * (int64_t) f1;
1019 int64_t f1f2_2 = f1_2 * (int64_t) f2;
1020 int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
1021 int64_t f1f4_2 = f1_2 * (int64_t) f4;
1022 int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
1023 int64_t f1f6_2 = f1_2 * (int64_t) f6;
1024 int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
1025 int64_t f1f8_2 = f1_2 * (int64_t) f8;
1026 int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
1027 int64_t f2f2 = f2 * (int64_t) f2;
1028 int64_t f2f3_2 = f2_2 * (int64_t) f3;
1029 int64_t f2f4_2 = f2_2 * (int64_t) f4;
1030 int64_t f2f5_2 = f2_2 * (int64_t) f5;
1031 int64_t f2f6_2 = f2_2 * (int64_t) f6;
1032 int64_t f2f7_2 = f2_2 * (int64_t) f7;
1033 int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
1034 int64_t f2f9_38 = f2 * (int64_t) f9_38;
1035 int64_t f3f3_2 = f3_2 * (int64_t) f3;
1036 int64_t f3f4_2 = f3_2 * (int64_t) f4;
1037 int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
1038 int64_t f3f6_2 = f3_2 * (int64_t) f6;
1039 int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
1040 int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
1041 int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
1042 int64_t f4f4 = f4 * (int64_t) f4;
1043 int64_t f4f5_2 = f4_2 * (int64_t) f5;
1044 int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
1045 int64_t f4f7_38 = f4 * (int64_t) f7_38;
1046 int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
1047 int64_t f4f9_38 = f4 * (int64_t) f9_38;
1048 int64_t f5f5_38 = f5 * (int64_t) f5_38;
1049 int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
1050 int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
1051 int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
1052 int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
1053 int64_t f6f6_19 = f6 * (int64_t) f6_19;
1054 int64_t f6f7_38 = f6 * (int64_t) f7_38;
1055 int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
1056 int64_t f6f9_38 = f6 * (int64_t) f9_38;
1057 int64_t f7f7_38 = f7 * (int64_t) f7_38;
1058 int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
1059 int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
1060 int64_t f8f8_19 = f8 * (int64_t) f8_19;
1061 int64_t f8f9_38 = f8 * (int64_t) f9_38;
1062 int64_t f9f9_38 = f9 * (int64_t) f9_38;
1063 int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
1064 int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
1065 int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
1066 int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
1067 int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
1068 int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
1069 int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
1070 int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
1071 int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
1072 int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
1073 int64_t carry0;
1074 int64_t carry1;
1075 int64_t carry2;
1076 int64_t carry3;
1077 int64_t carry4;
1078 int64_t carry5;
1079 int64_t carry6;
1080 int64_t carry7;
1081 int64_t carry8;
1082 int64_t carry9;
1083 carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
1084 h1 += carry0;
1085 h0 -= carry0 << 26;
1086 carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
1087 h5 += carry4;
1088 h4 -= carry4 << 26;
1089 carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
1090 h2 += carry1;
1091 h1 -= carry1 << 25;
1092 carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
1093 h6 += carry5;
1094 h5 -= carry5 << 25;
1095 carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
1096 h3 += carry2;
1097 h2 -= carry2 << 26;
1098 carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
1099 h7 += carry6;
1100 h6 -= carry6 << 26;
1101 carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
1102 h4 += carry3;
1103 h3 -= carry3 << 25;
1104 carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
1105 h8 += carry7;
1106 h7 -= carry7 << 25;
1107 carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
1108 h5 += carry4;
1109 h4 -= carry4 << 26;
1110 carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
1111 h9 += carry8;
1112 h8 -= carry8 << 26;
1113 carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
1114 h0 += carry9 * 19;
1115 h9 -= carry9 << 25;
1116 carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
1117 h1 += carry0;
1118 h0 -= carry0 << 26;
1119 h[0] = (int32_t) h0;
1120 h[1] = (int32_t) h1;
1121 h[2] = (int32_t) h2;
1122 h[3] = (int32_t) h3;
1123 h[4] = (int32_t) h4;
1124 h[5] = (int32_t) h5;
1125 h[6] = (int32_t) h6;
1126 h[7] = (int32_t) h7;
1127 h[8] = (int32_t) h8;
1128 h[9] = (int32_t) h9;
1129}
1130
1131
1132/*
1133h = 2 * f * f
1134Can overlap h with f.
1135
1136Preconditions:
1137 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
1138
1139Postconditions:
1140 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
1141*/
1142
1143/*
1144See fe_mul.c for discussion of implementation strategy.
1145*/
1146
1147void fe_sq2(fe h, const fe f) {
1148 int32_t f0 = f[0];
1149 int32_t f1 = f[1];
1150 int32_t f2 = f[2];
1151 int32_t f3 = f[3];
1152 int32_t f4 = f[4];
1153 int32_t f5 = f[5];
1154 int32_t f6 = f[6];
1155 int32_t f7 = f[7];
1156 int32_t f8 = f[8];
1157 int32_t f9 = f[9];
1158 int32_t f0_2 = 2 * f0;
1159 int32_t f1_2 = 2 * f1;
1160 int32_t f2_2 = 2 * f2;
1161 int32_t f3_2 = 2 * f3;
1162 int32_t f4_2 = 2 * f4;
1163 int32_t f5_2 = 2 * f5;
1164 int32_t f6_2 = 2 * f6;
1165 int32_t f7_2 = 2 * f7;
1166 int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
1167 int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
1168 int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
1169 int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
1170 int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
1171 int64_t f0f0 = f0 * (int64_t) f0;
1172 int64_t f0f1_2 = f0_2 * (int64_t) f1;
1173 int64_t f0f2_2 = f0_2 * (int64_t) f2;
1174 int64_t f0f3_2 = f0_2 * (int64_t) f3;
1175 int64_t f0f4_2 = f0_2 * (int64_t) f4;
1176 int64_t f0f5_2 = f0_2 * (int64_t) f5;
1177 int64_t f0f6_2 = f0_2 * (int64_t) f6;
1178 int64_t f0f7_2 = f0_2 * (int64_t) f7;
1179 int64_t f0f8_2 = f0_2 * (int64_t) f8;
1180 int64_t f0f9_2 = f0_2 * (int64_t) f9;
1181 int64_t f1f1_2 = f1_2 * (int64_t) f1;
1182 int64_t f1f2_2 = f1_2 * (int64_t) f2;
1183 int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
1184 int64_t f1f4_2 = f1_2 * (int64_t) f4;
1185 int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
1186 int64_t f1f6_2 = f1_2 * (int64_t) f6;
1187 int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
1188 int64_t f1f8_2 = f1_2 * (int64_t) f8;
1189 int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
1190 int64_t f2f2 = f2 * (int64_t) f2;
1191 int64_t f2f3_2 = f2_2 * (int64_t) f3;
1192 int64_t f2f4_2 = f2_2 * (int64_t) f4;
1193 int64_t f2f5_2 = f2_2 * (int64_t) f5;
1194 int64_t f2f6_2 = f2_2 * (int64_t) f6;
1195 int64_t f2f7_2 = f2_2 * (int64_t) f7;
1196 int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
1197 int64_t f2f9_38 = f2 * (int64_t) f9_38;
1198 int64_t f3f3_2 = f3_2 * (int64_t) f3;
1199 int64_t f3f4_2 = f3_2 * (int64_t) f4;
1200 int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
1201 int64_t f3f6_2 = f3_2 * (int64_t) f6;
1202 int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
1203 int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
1204 int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
1205 int64_t f4f4 = f4 * (int64_t) f4;
1206 int64_t f4f5_2 = f4_2 * (int64_t) f5;
1207 int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
1208 int64_t f4f7_38 = f4 * (int64_t) f7_38;
1209 int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
1210 int64_t f4f9_38 = f4 * (int64_t) f9_38;
1211 int64_t f5f5_38 = f5 * (int64_t) f5_38;
1212 int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
1213 int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
1214 int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
1215 int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
1216 int64_t f6f6_19 = f6 * (int64_t) f6_19;
1217 int64_t f6f7_38 = f6 * (int64_t) f7_38;
1218 int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
1219 int64_t f6f9_38 = f6 * (int64_t) f9_38;
1220 int64_t f7f7_38 = f7 * (int64_t) f7_38;
1221 int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
1222 int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
1223 int64_t f8f8_19 = f8 * (int64_t) f8_19;
1224 int64_t f8f9_38 = f8 * (int64_t) f9_38;
1225 int64_t f9f9_38 = f9 * (int64_t) f9_38;
1226 int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
1227 int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
1228 int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
1229 int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
1230 int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
1231 int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
1232 int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
1233 int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
1234 int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
1235 int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
1236 int64_t carry0;
1237 int64_t carry1;
1238 int64_t carry2;
1239 int64_t carry3;
1240 int64_t carry4;
1241 int64_t carry5;
1242 int64_t carry6;
1243 int64_t carry7;
1244 int64_t carry8;
1245 int64_t carry9;
1246 h0 += h0;
1247 h1 += h1;
1248 h2 += h2;
1249 h3 += h3;
1250 h4 += h4;
1251 h5 += h5;
1252 h6 += h6;
1253 h7 += h7;
1254 h8 += h8;
1255 h9 += h9;
1256 carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
1257 h1 += carry0;
1258 h0 -= carry0 << 26;
1259 carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
1260 h5 += carry4;
1261 h4 -= carry4 << 26;
1262 carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
1263 h2 += carry1;
1264 h1 -= carry1 << 25;
1265 carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
1266 h6 += carry5;
1267 h5 -= carry5 << 25;
1268 carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
1269 h3 += carry2;
1270 h2 -= carry2 << 26;
1271 carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
1272 h7 += carry6;
1273 h6 -= carry6 << 26;
1274 carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
1275 h4 += carry3;
1276 h3 -= carry3 << 25;
1277 carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
1278 h8 += carry7;
1279 h7 -= carry7 << 25;
1280 carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
1281 h5 += carry4;
1282 h4 -= carry4 << 26;
1283 carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
1284 h9 += carry8;
1285 h8 -= carry8 << 26;
1286 carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
1287 h0 += carry9 * 19;
1288 h9 -= carry9 << 25;
1289 carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
1290 h1 += carry0;
1291 h0 -= carry0 << 26;
1292 h[0] = (int32_t) h0;
1293 h[1] = (int32_t) h1;
1294 h[2] = (int32_t) h2;
1295 h[3] = (int32_t) h3;
1296 h[4] = (int32_t) h4;
1297 h[5] = (int32_t) h5;
1298 h[6] = (int32_t) h6;
1299 h[7] = (int32_t) h7;
1300 h[8] = (int32_t) h8;
1301 h[9] = (int32_t) h9;
1302}
1303
1304
1305/*
1306h = f - g
1307Can overlap h with f or g.
1308
1309Preconditions:
1310 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
1311 |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
1312
1313Postconditions:
1314 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
1315*/
1316
1317void fe_sub(fe h, const fe f, const fe g) {
1318 int32_t f0 = f[0];
1319 int32_t f1 = f[1];
1320 int32_t f2 = f[2];
1321 int32_t f3 = f[3];
1322 int32_t f4 = f[4];
1323 int32_t f5 = f[5];
1324 int32_t f6 = f[6];
1325 int32_t f7 = f[7];
1326 int32_t f8 = f[8];
1327 int32_t f9 = f[9];
1328 int32_t g0 = g[0];
1329 int32_t g1 = g[1];
1330 int32_t g2 = g[2];
1331 int32_t g3 = g[3];
1332 int32_t g4 = g[4];
1333 int32_t g5 = g[5];
1334 int32_t g6 = g[6];
1335 int32_t g7 = g[7];
1336 int32_t g8 = g[8];
1337 int32_t g9 = g[9];
1338 int32_t h0 = f0 - g0;
1339 int32_t h1 = f1 - g1;
1340 int32_t h2 = f2 - g2;
1341 int32_t h3 = f3 - g3;
1342 int32_t h4 = f4 - g4;
1343 int32_t h5 = f5 - g5;
1344 int32_t h6 = f6 - g6;
1345 int32_t h7 = f7 - g7;
1346 int32_t h8 = f8 - g8;
1347 int32_t h9 = f9 - g9;
1348
1349 h[0] = h0;
1350 h[1] = h1;
1351 h[2] = h2;
1352 h[3] = h3;
1353 h[4] = h4;
1354 h[5] = h5;
1355 h[6] = h6;
1356 h[7] = h7;
1357 h[8] = h8;
1358 h[9] = h9;
1359}
1360
1361
1362
1363/*
1364Preconditions:
1365 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
1366
1367Write p=2^255-19; q=floor(h/p).
1368Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
1369
1370Proof:
1371 Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
1372 Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
1373
1374 Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
1375 Then 0<y<1.
1376
1377 Write r=h-pq.
1378 Have 0<=r<=p-1=2^255-20.
1379 Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
1380
1381 Write x=r+19(2^-255)r+y.
1382 Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
1383
1384 Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
1385 so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
1386*/
1387
1388void fe_tobytes(unsigned char *s, const fe h) {
1389 int32_t h0 = h[0];
1390 int32_t h1 = h[1];
1391 int32_t h2 = h[2];
1392 int32_t h3 = h[3];
1393 int32_t h4 = h[4];
1394 int32_t h5 = h[5];
1395 int32_t h6 = h[6];
1396 int32_t h7 = h[7];
1397 int32_t h8 = h[8];
1398 int32_t h9 = h[9];
1399 int32_t q;
1400 int32_t carry0;
1401 int32_t carry1;
1402 int32_t carry2;
1403 int32_t carry3;
1404 int32_t carry4;
1405 int32_t carry5;
1406 int32_t carry6;
1407 int32_t carry7;
1408 int32_t carry8;
1409 int32_t carry9;
1410 q = (19 * h9 + (((int32_t) 1) << 24)) >> 25;
1411 q = (h0 + q) >> 26;
1412 q = (h1 + q) >> 25;
1413 q = (h2 + q) >> 26;
1414 q = (h3 + q) >> 25;
1415 q = (h4 + q) >> 26;
1416 q = (h5 + q) >> 25;
1417 q = (h6 + q) >> 26;
1418 q = (h7 + q) >> 25;
1419 q = (h8 + q) >> 26;
1420 q = (h9 + q) >> 25;
1421 /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
1422 h0 += 19 * q;
1423 /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
1424 carry0 = h0 >> 26;
1425 h1 += carry0;
1426 h0 -= carry0 << 26;
1427 carry1 = h1 >> 25;
1428 h2 += carry1;
1429 h1 -= carry1 << 25;
1430 carry2 = h2 >> 26;
1431 h3 += carry2;
1432 h2 -= carry2 << 26;
1433 carry3 = h3 >> 25;
1434 h4 += carry3;
1435 h3 -= carry3 << 25;
1436 carry4 = h4 >> 26;
1437 h5 += carry4;
1438 h4 -= carry4 << 26;
1439 carry5 = h5 >> 25;
1440 h6 += carry5;
1441 h5 -= carry5 << 25;
1442 carry6 = h6 >> 26;
1443 h7 += carry6;
1444 h6 -= carry6 << 26;
1445 carry7 = h7 >> 25;
1446 h8 += carry7;
1447 h7 -= carry7 << 25;
1448 carry8 = h8 >> 26;
1449 h9 += carry8;
1450 h8 -= carry8 << 26;
1451 carry9 = h9 >> 25;
1452 h9 -= carry9 << 25;
1453
1454 /* h10 = carry9 */
1455 /*
1456 Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
1457 Have h0+...+2^230 h9 between 0 and 2^255-1;
1458 evidently 2^255 h10-2^255 q = 0.
1459 Goal: Output h0+...+2^230 h9.
1460 */
1461 s[0] = (unsigned char) (h0 >> 0);
1462 s[1] = (unsigned char) (h0 >> 8);
1463 s[2] = (unsigned char) (h0 >> 16);
1464 s[3] = (unsigned char) ((h0 >> 24) | (h1 << 2));
1465 s[4] = (unsigned char) (h1 >> 6);
1466 s[5] = (unsigned char) (h1 >> 14);
1467 s[6] = (unsigned char) ((h1 >> 22) | (h2 << 3));
1468 s[7] = (unsigned char) (h2 >> 5);
1469 s[8] = (unsigned char) (h2 >> 13);
1470 s[9] = (unsigned char) ((h2 >> 21) | (h3 << 5));
1471 s[10] = (unsigned char) (h3 >> 3);
1472 s[11] = (unsigned char) (h3 >> 11);
1473 s[12] = (unsigned char) ((h3 >> 19) | (h4 << 6));
1474 s[13] = (unsigned char) (h4 >> 2);
1475 s[14] = (unsigned char) (h4 >> 10);
1476 s[15] = (unsigned char) (h4 >> 18);
1477 s[16] = (unsigned char) (h5 >> 0);
1478 s[17] = (unsigned char) (h5 >> 8);
1479 s[18] = (unsigned char) (h5 >> 16);
1480 s[19] = (unsigned char) ((h5 >> 24) | (h6 << 1));
1481 s[20] = (unsigned char) (h6 >> 7);
1482 s[21] = (unsigned char) (h6 >> 15);
1483 s[22] = (unsigned char) ((h6 >> 23) | (h7 << 3));
1484 s[23] = (unsigned char) (h7 >> 5);
1485 s[24] = (unsigned char) (h7 >> 13);
1486 s[25] = (unsigned char) ((h7 >> 21) | (h8 << 4));
1487 s[26] = (unsigned char) (h8 >> 4);
1488 s[27] = (unsigned char) (h8 >> 12);
1489 s[28] = (unsigned char) ((h8 >> 20) | (h9 << 6));
1490 s[29] = (unsigned char) (h9 >> 2);
1491 s[30] = (unsigned char) (h9 >> 10);
1492 s[31] = (unsigned char) (h9 >> 18);
1493}
1494