fe.c source code [olm/lib/ed25519/src/fe.c]

1	#include "fixedint.h"
2	#include "fe.h"
3
4	#ifndef ED25519_LOAD_BYTES
5	#define ED25519_LOAD_BYTES
6
7	/*
8	helper functions
9	*/
10	static uint64_t load_3(const unsigned char *in) {
11	uint64_t result;
12
13	result = (uint64_t) in[`0`];
14	result \|= ((uint64_t) in[`1`]) << `8`;
15	result \|= ((uint64_t) in[`2`]) << `16`;
16
17	return result;
18	}
19
20	static uint64_t load_4(const unsigned char *in) {
21	uint64_t result;
22
23	result = (uint64_t) in[`0`];
24	result \|= ((uint64_t) in[`1`]) << `8`;
25	result \|= ((uint64_t) in[`2`]) << `16`;
26	result \|= ((uint64_t) in[`3`]) << `24`;
27
28	return result;
29	}
30
31	#endif
32
33	/*
34	h = 0
35	*/
36
37	void fe_0(fe h) {
38	h[`0`] = `0`;
39	h[`1`] = `0`;
40	h[`2`] = `0`;
41	h[`3`] = `0`;
42	h[`4`] = `0`;
43	h[`5`] = `0`;
44	h[`6`] = `0`;
45	h[`7`] = `0`;
46	h[`8`] = `0`;
47	h[`9`] = `0`;
48	}
49
50
51
52	/*
53	h = 1
54	*/
55
56	void fe_1(fe h) {
57	h[`0`] = `1`;
58	h[`1`] = `0`;
59	h[`2`] = `0`;
60	h[`3`] = `0`;
61	h[`4`] = `0`;
62	h[`5`] = `0`;
63	h[`6`] = `0`;
64	h[`7`] = `0`;
65	h[`8`] = `0`;
66	h[`9`] = `0`;
67	}
68
69
70
71	/*
72	h = f + g
73	Can overlap h with f or g.
74
75	Preconditions:
76	\|f\| bounded by 1.12^25,1.12^24,1.12^25,1.12^24,etc.
77	\|g\| bounded by 1.12^25,1.12^24,1.12^25,1.12^24,etc.
78
79	Postconditions:
80	\|h\| bounded by 1.12^26,1.12^25,1.12^26,1.12^25,etc.
81	*/
82
83	void fe_add(fe h, const fe f, const fe g) {
84	int32_t f0 = f[`0`];
85	int32_t f1 = f[`1`];
86	int32_t f2 = f[`2`];
87	int32_t f3 = f[`3`];
88	int32_t f4 = f[`4`];
89	int32_t f5 = f[`5`];
90	int32_t f6 = f[`6`];
91	int32_t f7 = f[`7`];
92	int32_t f8 = f[`8`];
93	int32_t f9 = f[`9`];
94	int32_t g0 = g[`0`];
95	int32_t g1 = g[`1`];
96	int32_t g2 = g[`2`];
97	int32_t g3 = g[`3`];
98	int32_t g4 = g[`4`];
99	int32_t g5 = g[`5`];
100	int32_t g6 = g[`6`];
101	int32_t g7 = g[`7`];
102	int32_t g8 = g[`8`];
103	int32_t g9 = g[`9`];
104	int32_t h0 = f0 + g0;
105	int32_t h1 = f1 + g1;
106	int32_t h2 = f2 + g2;
107	int32_t h3 = f3 + g3;
108	int32_t h4 = f4 + g4;
109	int32_t h5 = f5 + g5;
110	int32_t h6 = f6 + g6;
111	int32_t h7 = f7 + g7;
112	int32_t h8 = f8 + g8;
113	int32_t h9 = f9 + g9;
114
115	h[`0`] = h0;
116	h[`1`] = h1;
117	h[`2`] = h2;
118	h[`3`] = h3;
119	h[`4`] = h4;
120	h[`5`] = h5;
121	h[`6`] = h6;
122	h[`7`] = h7;
123	h[`8`] = h8;
124	h[`9`] = h9;
125	}
126
127
128
129	/*
130	Replace (f,g) with (g,g) if b == 1;
131	replace (f,g) with (f,g) if b == 0.
132
133	Preconditions: b in {0,1}.
134	*/
135
136	void fe_cmov(fe f, const fe g, unsigned int b) {
137	int32_t f0 = f[`0`];
138	int32_t f1 = f[`1`];
139	int32_t f2 = f[`2`];
140	int32_t f3 = f[`3`];
141	int32_t f4 = f[`4`];
142	int32_t f5 = f[`5`];
143	int32_t f6 = f[`6`];
144	int32_t f7 = f[`7`];
145	int32_t f8 = f[`8`];
146	int32_t f9 = f[`9`];
147	int32_t g0 = g[`0`];
148	int32_t g1 = g[`1`];
149	int32_t g2 = g[`2`];
150	int32_t g3 = g[`3`];
151	int32_t g4 = g[`4`];
152	int32_t g5 = g[`5`];
153	int32_t g6 = g[`6`];
154	int32_t g7 = g[`7`];
155	int32_t g8 = g[`8`];
156	int32_t g9 = g[`9`];
157	int32_t x0 = f0 ^ g0;
158	int32_t x1 = f1 ^ g1;
159	int32_t x2 = f2 ^ g2;
160	int32_t x3 = f3 ^ g3;
161	int32_t x4 = f4 ^ g4;
162	int32_t x5 = f5 ^ g5;
163	int32_t x6 = f6 ^ g6;
164	int32_t x7 = f7 ^ g7;
165	int32_t x8 = f8 ^ g8;
166	int32_t x9 = f9 ^ g9;
167
168	b = (unsigned int) (- (int) b); / silence warning /
169	x0 &= b;
170	x1 &= b;
171	x2 &= b;
172	x3 &= b;
173	x4 &= b;
174	x5 &= b;
175	x6 &= b;
176	x7 &= b;
177	x8 &= b;
178	x9 &= b;
179
180	f[`0`] = f0 ^ x0;
181	f[`1`] = f1 ^ x1;
182	f[`2`] = f2 ^ x2;
183	f[`3`] = f3 ^ x3;
184	f[`4`] = f4 ^ x4;
185	f[`5`] = f5 ^ x5;
186	f[`6`] = f6 ^ x6;
187	f[`7`] = f7 ^ x7;
188	f[`8`] = f8 ^ x8;
189	f[`9`] = f9 ^ x9;
190	}
191
192	/*
193	Replace (f,g) with (g,f) if b == 1;
194	replace (f,g) with (f,g) if b == 0.
195
196	Preconditions: b in {0,1}.
197	*/
198
199	void fe_cswap(fe f,fe g,unsigned int b) {
200	int32_t f0 = f[`0`];
201	int32_t f1 = f[`1`];
202	int32_t f2 = f[`2`];
203	int32_t f3 = f[`3`];
204	int32_t f4 = f[`4`];
205	int32_t f5 = f[`5`];
206	int32_t f6 = f[`6`];
207	int32_t f7 = f[`7`];
208	int32_t f8 = f[`8`];
209	int32_t f9 = f[`9`];
210	int32_t g0 = g[`0`];
211	int32_t g1 = g[`1`];
212	int32_t g2 = g[`2`];
213	int32_t g3 = g[`3`];
214	int32_t g4 = g[`4`];
215	int32_t g5 = g[`5`];
216	int32_t g6 = g[`6`];
217	int32_t g7 = g[`7`];
218	int32_t g8 = g[`8`];
219	int32_t g9 = g[`9`];
220	int32_t x0 = f0 ^ g0;
221	int32_t x1 = f1 ^ g1;
222	int32_t x2 = f2 ^ g2;
223	int32_t x3 = f3 ^ g3;
224	int32_t x4 = f4 ^ g4;
225	int32_t x5 = f5 ^ g5;
226	int32_t x6 = f6 ^ g6;
227	int32_t x7 = f7 ^ g7;
228	int32_t x8 = f8 ^ g8;
229	int32_t x9 = f9 ^ g9;
230	b = -b;
231	x0 &= b;
232	x1 &= b;
233	x2 &= b;
234	x3 &= b;
235	x4 &= b;
236	x5 &= b;
237	x6 &= b;
238	x7 &= b;
239	x8 &= b;
240	x9 &= b;
241	f[`0`] = f0 ^ x0;
242	f[`1`] = f1 ^ x1;
243	f[`2`] = f2 ^ x2;
244	f[`3`] = f3 ^ x3;
245	f[`4`] = f4 ^ x4;
246	f[`5`] = f5 ^ x5;
247	f[`6`] = f6 ^ x6;
248	f[`7`] = f7 ^ x7;
249	f[`8`] = f8 ^ x8;
250	f[`9`] = f9 ^ x9;
251	g[`0`] = g0 ^ x0;
252	g[`1`] = g1 ^ x1;
253	g[`2`] = g2 ^ x2;
254	g[`3`] = g3 ^ x3;
255	g[`4`] = g4 ^ x4;
256	g[`5`] = g5 ^ x5;
257	g[`6`] = g6 ^ x6;
258	g[`7`] = g7 ^ x7;
259	g[`8`] = g8 ^ x8;
260	g[`9`] = g9 ^ x9;
261	}
262
263
264
265	/*
266	h = f
267	*/
268
269	void fe_copy(fe h, const fe f) {
270	int32_t f0 = f[`0`];
271	int32_t f1 = f[`1`];
272	int32_t f2 = f[`2`];
273	int32_t f3 = f[`3`];
274	int32_t f4 = f[`4`];
275	int32_t f5 = f[`5`];
276	int32_t f6 = f[`6`];
277	int32_t f7 = f[`7`];
278	int32_t f8 = f[`8`];
279	int32_t f9 = f[`9`];
280
281	h[`0`] = f0;
282	h[`1`] = f1;
283	h[`2`] = f2;
284	h[`3`] = f3;
285	h[`4`] = f4;
286	h[`5`] = f5;
287	h[`6`] = f6;
288	h[`7`] = f7;
289	h[`8`] = f8;
290	h[`9`] = f9;
291	}
292
293
294
295	/*
296	Ignores top bit of h.
297	*/
298
299	void fe_frombytes(fe h, const unsigned char *s) {
300	int64_t h0 = load_4(in: s);
301	int64_t h1 = load_3(in: s + `4`) << `6`;
302	int64_t h2 = load_3(in: s + `7`) << `5`;
303	int64_t h3 = load_3(in: s + `10`) << `3`;
304	int64_t h4 = load_3(in: s + `13`) << `2`;
305	int64_t h5 = load_4(in: s + `16`);
306	int64_t h6 = load_3(in: s + `20`) << `7`;
307	int64_t h7 = load_3(in: s + `23`) << `5`;
308	int64_t h8 = load_3(in: s + `26`) << `4`;
309	int64_t h9 = (load_3(in: s + `29`) & `8388607`) << `2`;
310	int64_t carry0;
311	int64_t carry1;
312	int64_t carry2;
313	int64_t carry3;
314	int64_t carry4;
315	int64_t carry5;
316	int64_t carry6;
317	int64_t carry7;
318	int64_t carry8;
319	int64_t carry9;
320
321	carry9 = (h9 + (int64_t) (`1` << `24`)) >> `25`;
322	h0 += carry9 * `19`;
323	h9 -= carry9 << `25`;
324	carry1 = (h1 + (int64_t) (`1` << `24`)) >> `25`;
325	h2 += carry1;
326	h1 -= carry1 << `25`;
327	carry3 = (h3 + (int64_t) (`1` << `24`)) >> `25`;
328	h4 += carry3;
329	h3 -= carry3 << `25`;
330	carry5 = (h5 + (int64_t) (`1` << `24`)) >> `25`;
331	h6 += carry5;
332	h5 -= carry5 << `25`;
333	carry7 = (h7 + (int64_t) (`1` << `24`)) >> `25`;
334	h8 += carry7;
335	h7 -= carry7 << `25`;
336	carry0 = (h0 + (int64_t) (`1` << `25`)) >> `26`;
337	h1 += carry0;
338	h0 -= carry0 << `26`;
339	carry2 = (h2 + (int64_t) (`1` << `25`)) >> `26`;
340	h3 += carry2;
341	h2 -= carry2 << `26`;
342	carry4 = (h4 + (int64_t) (`1` << `25`)) >> `26`;
343	h5 += carry4;
344	h4 -= carry4 << `26`;
345	carry6 = (h6 + (int64_t) (`1` << `25`)) >> `26`;
346	h7 += carry6;
347	h6 -= carry6 << `26`;
348	carry8 = (h8 + (int64_t) (`1` << `25`)) >> `26`;
349	h9 += carry8;
350	h8 -= carry8 << `26`;
351
352	h[`0`] = (int32_t) h0;
353	h[`1`] = (int32_t) h1;
354	h[`2`] = (int32_t) h2;
355	h[`3`] = (int32_t) h3;
356	h[`4`] = (int32_t) h4;
357	h[`5`] = (int32_t) h5;
358	h[`6`] = (int32_t) h6;
359	h[`7`] = (int32_t) h7;
360	h[`8`] = (int32_t) h8;
361	h[`9`] = (int32_t) h9;
362	}
363
364
365
366	void fe_invert(fe out, const fe z) {
367	fe t0;
368	fe t1;
369	fe t2;
370	fe t3;
371	int i;
372
373	fe_sq(h: t0, f: z);
374
375	for (i = `1`; i < `1`; ++i) {
376	fe_sq(h: t0, f: t0);
377	}
378
379	fe_sq(h: t1, f: t0);
380
381	for (i = `1`; i < `2`; ++i) {
382	fe_sq(h: t1, f: t1);
383	}
384
385	fe_mul(h: t1, f: z, g: t1);
386	fe_mul(h: t0, f: t0, g: t1);
387	fe_sq(h: t2, f: t0);
388
389	for (i = `1`; i < `1`; ++i) {
390	fe_sq(h: t2, f: t2);
391	}
392
393	fe_mul(h: t1, f: t1, g: t2);
394	fe_sq(h: t2, f: t1);
395
396	for (i = `1`; i < `5`; ++i) {
397	fe_sq(h: t2, f: t2);
398	}
399
400	fe_mul(h: t1, f: t2, g: t1);
401	fe_sq(h: t2, f: t1);
402
403	for (i = `1`; i < `10`; ++i) {
404	fe_sq(h: t2, f: t2);
405	}
406
407	fe_mul(h: t2, f: t2, g: t1);
408	fe_sq(h: t3, f: t2);
409
410	for (i = `1`; i < `20`; ++i) {
411	fe_sq(h: t3, f: t3);
412	}
413
414	fe_mul(h: t2, f: t3, g: t2);
415	fe_sq(h: t2, f: t2);
416
417	for (i = `1`; i < `10`; ++i) {
418	fe_sq(h: t2, f: t2);
419	}
420
421	fe_mul(h: t1, f: t2, g: t1);
422	fe_sq(h: t2, f: t1);
423
424	for (i = `1`; i < `50`; ++i) {
425	fe_sq(h: t2, f: t2);
426	}
427
428	fe_mul(h: t2, f: t2, g: t1);
429	fe_sq(h: t3, f: t2);
430
431	for (i = `1`; i < `100`; ++i) {
432	fe_sq(h: t3, f: t3);
433	}
434
435	fe_mul(h: t2, f: t3, g: t2);
436	fe_sq(h: t2, f: t2);
437
438	for (i = `1`; i < `50`; ++i) {
439	fe_sq(h: t2, f: t2);
440	}
441
442	fe_mul(h: t1, f: t2, g: t1);
443	fe_sq(h: t1, f: t1);
444
445	for (i = `1`; i < `5`; ++i) {
446	fe_sq(h: t1, f: t1);
447	}
448
449	fe_mul(h: out, f: t1, g: t0);
450	}
451
452
453
454	/*
455	return 1 if f is in {1,3,5,...,q-2}
456	return 0 if f is in {0,2,4,...,q-1}
457
458	Preconditions:
459	\|f\| bounded by 1.12^26,1.12^25,1.12^26,1.12^25,etc.
460	*/
461
462	int fe_isnegative(const fe f) {
463	unsigned char s[`32`];
464
465	fe_tobytes(s, h: f);
466
467	return s[`0`] & `1`;
468	}
469
470
471
472	/*
473	return 1 if f == 0
474	return 0 if f != 0
475
476	Preconditions:
477	\|f\| bounded by 1.12^26,1.12^25,1.12^26,1.12^25,etc.
478	*/
479
480	int fe_isnonzero(const fe f) {
481	unsigned char s[`32`];
482	unsigned char r;
483
484	fe_tobytes(s, h: f);
485
486	r = s[`0`];
487	#define F(i) r \|= s[i]
488	F(`1`);
489	F(`2`);
490	F(`3`);
491	F(`4`);
492	F(`5`);
493	F(`6`);
494	F(`7`);
495	F(`8`);
496	F(`9`);
497	F(`10`);
498	F(`11`);
499	F(`12`);
500	F(`13`);
501	F(`14`);
502	F(`15`);
503	F(`16`);
504	F(`17`);
505	F(`18`);
506	F(`19`);
507	F(`20`);
508	F(`21`);
509	F(`22`);
510	F(`23`);
511	F(`24`);
512	F(`25`);
513	F(`26`);
514	F(`27`);
515	F(`28`);
516	F(`29`);
517	F(`30`);
518	F(`31`);
519	#undef F
520
521	return r != `0`;
522	}
523
524
525
526	/*
527	h = f g*
528	Can overlap h with f or g.
529
530	Preconditions:
531	\|f\| bounded by 1.652^26,1.652^25,1.652^26,1.652^25,etc.
532	\|g\| bounded by 1.652^26,1.652^25,1.652^26,1.652^25,etc.
533
534	Postconditions:
535	\|h\| bounded by 1.012^25,1.012^24,1.012^25,1.012^24,etc.
536	*/
537
538	/*
539	Notes on implementation strategy:
540
541	Using schoolbook multiplication.
542	Karatsuba would save a little in some cost models.
543
544	Most multiplications by 2 and 19 are 32-bit precomputations;
545	cheaper than 64-bit postcomputations.
546
547	There is one remaining multiplication by 19 in the carry chain;
548	one 19 precomputation can be merged into this,*
549	but the resulting data flow is considerably less clean.
550
551	There are 12 carries below.
552	10 of them are 2-way parallelizable and vectorizable.
553	Can get away with 11 carries, but then data flow is much deeper.
554
555	With tighter constraints on inputs can squeeze carries into int32.
556	*/
557
558	void fe_mul(fe h, const fe f, const fe g) {
559	int32_t f0 = f[`0`];
560	int32_t f1 = f[`1`];
561	int32_t f2 = f[`2`];
562	int32_t f3 = f[`3`];
563	int32_t f4 = f[`4`];
564	int32_t f5 = f[`5`];
565	int32_t f6 = f[`6`];
566	int32_t f7 = f[`7`];
567	int32_t f8 = f[`8`];
568	int32_t f9 = f[`9`];
569	int32_t g0 = g[`0`];
570	int32_t g1 = g[`1`];
571	int32_t g2 = g[`2`];
572	int32_t g3 = g[`3`];
573	int32_t g4 = g[`4`];
574	int32_t g5 = g[`5`];
575	int32_t g6 = g[`6`];
576	int32_t g7 = g[`7`];
577	int32_t g8 = g[`8`];
578	int32_t g9 = g[`9`];
579	int32_t g1_19 = `19` * g1; / 1.9593752^29 /*
580	int32_t g2_19 = `19` * g2; / 1.9593752^30; still ok /*
581	int32_t g3_19 = `19` * g3;
582	int32_t g4_19 = `19` * g4;
583	int32_t g5_19 = `19` * g5;
584	int32_t g6_19 = `19` * g6;
585	int32_t g7_19 = `19` * g7;
586	int32_t g8_19 = `19` * g8;
587	int32_t g9_19 = `19` * g9;
588	int32_t f1_2 = `2` * f1;
589	int32_t f3_2 = `2` * f3;
590	int32_t f5_2 = `2` * f5;
591	int32_t f7_2 = `2` * f7;
592	int32_t f9_2 = `2` * f9;
593	int64_t f0g0 = f0 * (int64_t) g0;
594	int64_t f0g1 = f0 * (int64_t) g1;
595	int64_t f0g2 = f0 * (int64_t) g2;
596	int64_t f0g3 = f0 * (int64_t) g3;
597	int64_t f0g4 = f0 * (int64_t) g4;
598	int64_t f0g5 = f0 * (int64_t) g5;
599	int64_t f0g6 = f0 * (int64_t) g6;
600	int64_t f0g7 = f0 * (int64_t) g7;
601	int64_t f0g8 = f0 * (int64_t) g8;
602	int64_t f0g9 = f0 * (int64_t) g9;
603	int64_t f1g0 = f1 * (int64_t) g0;
604	int64_t f1g1_2 = f1_2 * (int64_t) g1;
605	int64_t f1g2 = f1 * (int64_t) g2;
606	int64_t f1g3_2 = f1_2 * (int64_t) g3;
607	int64_t f1g4 = f1 * (int64_t) g4;
608	int64_t f1g5_2 = f1_2 * (int64_t) g5;
609	int64_t f1g6 = f1 * (int64_t) g6;
610	int64_t f1g7_2 = f1_2 * (int64_t) g7;
611	int64_t f1g8 = f1 * (int64_t) g8;
612	int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
613	int64_t f2g0 = f2 * (int64_t) g0;
614	int64_t f2g1 = f2 * (int64_t) g1;
615	int64_t f2g2 = f2 * (int64_t) g2;
616	int64_t f2g3 = f2 * (int64_t) g3;
617	int64_t f2g4 = f2 * (int64_t) g4;
618	int64_t f2g5 = f2 * (int64_t) g5;
619	int64_t f2g6 = f2 * (int64_t) g6;
620	int64_t f2g7 = f2 * (int64_t) g7;
621	int64_t f2g8_19 = f2 * (int64_t) g8_19;
622	int64_t f2g9_19 = f2 * (int64_t) g9_19;
623	int64_t f3g0 = f3 * (int64_t) g0;
624	int64_t f3g1_2 = f3_2 * (int64_t) g1;
625	int64_t f3g2 = f3 * (int64_t) g2;
626	int64_t f3g3_2 = f3_2 * (int64_t) g3;
627	int64_t f3g4 = f3 * (int64_t) g4;
628	int64_t f3g5_2 = f3_2 * (int64_t) g5;
629	int64_t f3g6 = f3 * (int64_t) g6;
630	int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
631	int64_t f3g8_19 = f3 * (int64_t) g8_19;
632	int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
633	int64_t f4g0 = f4 * (int64_t) g0;
634	int64_t f4g1 = f4 * (int64_t) g1;
635	int64_t f4g2 = f4 * (int64_t) g2;
636	int64_t f4g3 = f4 * (int64_t) g3;
637	int64_t f4g4 = f4 * (int64_t) g4;
638	int64_t f4g5 = f4 * (int64_t) g5;
639	int64_t f4g6_19 = f4 * (int64_t) g6_19;
640	int64_t f4g7_19 = f4 * (int64_t) g7_19;
641	int64_t f4g8_19 = f4 * (int64_t) g8_19;
642	int64_t f4g9_19 = f4 * (int64_t) g9_19;
643	int64_t f5g0 = f5 * (int64_t) g0;
644	int64_t f5g1_2 = f5_2 * (int64_t) g1;
645	int64_t f5g2 = f5 * (int64_t) g2;
646	int64_t f5g3_2 = f5_2 * (int64_t) g3;
647	int64_t f5g4 = f5 * (int64_t) g4;
648	int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
649	int64_t f5g6_19 = f5 * (int64_t) g6_19;
650	int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
651	int64_t f5g8_19 = f5 * (int64_t) g8_19;
652	int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
653	int64_t f6g0 = f6 * (int64_t) g0;
654	int64_t f6g1 = f6 * (int64_t) g1;
655	int64_t f6g2 = f6 * (int64_t) g2;
656	int64_t f6g3 = f6 * (int64_t) g3;
657	int64_t f6g4_19 = f6 * (int64_t) g4_19;
658	int64_t f6g5_19 = f6 * (int64_t) g5_19;
659	int64_t f6g6_19 = f6 * (int64_t) g6_19;
660	int64_t f6g7_19 = f6 * (int64_t) g7_19;
661	int64_t f6g8_19 = f6 * (int64_t) g8_19;
662	int64_t f6g9_19 = f6 * (int64_t) g9_19;
663	int64_t f7g0 = f7 * (int64_t) g0;
664	int64_t f7g1_2 = f7_2 * (int64_t) g1;
665	int64_t f7g2 = f7 * (int64_t) g2;
666	int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
667	int64_t f7g4_19 = f7 * (int64_t) g4_19;
668	int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
669	int64_t f7g6_19 = f7 * (int64_t) g6_19;
670	int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
671	int64_t f7g8_19 = f7 * (int64_t) g8_19;
672	int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
673	int64_t f8g0 = f8 * (int64_t) g0;
674	int64_t f8g1 = f8 * (int64_t) g1;
675	int64_t f8g2_19 = f8 * (int64_t) g2_19;
676	int64_t f8g3_19 = f8 * (int64_t) g3_19;
677	int64_t f8g4_19 = f8 * (int64_t) g4_19;
678	int64_t f8g5_19 = f8 * (int64_t) g5_19;
679	int64_t f8g6_19 = f8 * (int64_t) g6_19;
680	int64_t f8g7_19 = f8 * (int64_t) g7_19;
681	int64_t f8g8_19 = f8 * (int64_t) g8_19;
682	int64_t f8g9_19 = f8 * (int64_t) g9_19;
683	int64_t f9g0 = f9 * (int64_t) g0;
684	int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
685	int64_t f9g2_19 = f9 * (int64_t) g2_19;
686	int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
687	int64_t f9g4_19 = f9 * (int64_t) g4_19;
688	int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
689	int64_t f9g6_19 = f9 * (int64_t) g6_19;
690	int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
691	int64_t f9g8_19 = f9 * (int64_t) g8_19;
692	int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
693	int64_t h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38;
694	int64_t h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19;
695	int64_t h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38;
696	int64_t h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19;
697	int64_t h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38;
698	int64_t h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19;
699	int64_t h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38;
700	int64_t h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19;
701	int64_t h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38;
702	int64_t h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0 ;
703	int64_t carry0;
704	int64_t carry1;
705	int64_t carry2;
706	int64_t carry3;
707	int64_t carry4;
708	int64_t carry5;
709	int64_t carry6;
710	int64_t carry7;
711	int64_t carry8;
712	int64_t carry9;
713
714	carry0 = (h0 + (int64_t) (`1` << `25`)) >> `26`;
715	h1 += carry0;
716	h0 -= carry0 << `26`;
717	carry4 = (h4 + (int64_t) (`1` << `25`)) >> `26`;
718	h5 += carry4;
719	h4 -= carry4 << `26`;
720
721	carry1 = (h1 + (int64_t) (`1` << `24`)) >> `25`;
722	h2 += carry1;
723	h1 -= carry1 << `25`;
724	carry5 = (h5 + (int64_t) (`1` << `24`)) >> `25`;
725	h6 += carry5;
726	h5 -= carry5 << `25`;
727
728	carry2 = (h2 + (int64_t) (`1` << `25`)) >> `26`;
729	h3 += carry2;
730	h2 -= carry2 << `26`;
731	carry6 = (h6 + (int64_t) (`1` << `25`)) >> `26`;
732	h7 += carry6;
733	h6 -= carry6 << `26`;
734
735	carry3 = (h3 + (int64_t) (`1` << `24`)) >> `25`;
736	h4 += carry3;
737	h3 -= carry3 << `25`;
738	carry7 = (h7 + (int64_t) (`1` << `24`)) >> `25`;
739	h8 += carry7;
740	h7 -= carry7 << `25`;
741
742	carry4 = (h4 + (int64_t) (`1` << `25`)) >> `26`;
743	h5 += carry4;
744	h4 -= carry4 << `26`;
745	carry8 = (h8 + (int64_t) (`1` << `25`)) >> `26`;
746	h9 += carry8;
747	h8 -= carry8 << `26`;
748
749	carry9 = (h9 + (int64_t) (`1` << `24`)) >> `25`;
750	h0 += carry9 * `19`;
751	h9 -= carry9 << `25`;
752
753	carry0 = (h0 + (int64_t) (`1` << `25`)) >> `26`;
754	h1 += carry0;
755	h0 -= carry0 << `26`;
756
757	h[`0`] = (int32_t) h0;
758	h[`1`] = (int32_t) h1;
759	h[`2`] = (int32_t) h2;
760	h[`3`] = (int32_t) h3;
761	h[`4`] = (int32_t) h4;
762	h[`5`] = (int32_t) h5;
763	h[`6`] = (int32_t) h6;
764	h[`7`] = (int32_t) h7;
765	h[`8`] = (int32_t) h8;
766	h[`9`] = (int32_t) h9;
767	}
768
769
770	/*
771	h = f 121666*
772	Can overlap h with f.
773
774	Preconditions:
775	\|f\| bounded by 1.12^26,1.12^25,1.12^26,1.12^25,etc.
776
777	Postconditions:
778	\|h\| bounded by 1.12^25,1.12^24,1.12^25,1.12^24,etc.
779	*/
780
781	void fe_mul121666(fe h, fe f) {
782	int32_t f0 = f[`0`];
783	int32_t f1 = f[`1`];
784	int32_t f2 = f[`2`];
785	int32_t f3 = f[`3`];
786	int32_t f4 = f[`4`];
787	int32_t f5 = f[`5`];
788	int32_t f6 = f[`6`];
789	int32_t f7 = f[`7`];
790	int32_t f8 = f[`8`];
791	int32_t f9 = f[`9`];
792	int64_t h0 = f0 * (int64_t) `121666`;
793	int64_t h1 = f1 * (int64_t) `121666`;
794	int64_t h2 = f2 * (int64_t) `121666`;
795	int64_t h3 = f3 * (int64_t) `121666`;
796	int64_t h4 = f4 * (int64_t) `121666`;
797	int64_t h5 = f5 * (int64_t) `121666`;
798	int64_t h6 = f6 * (int64_t) `121666`;
799	int64_t h7 = f7 * (int64_t) `121666`;
800	int64_t h8 = f8 * (int64_t) `121666`;
801	int64_t h9 = f9 * (int64_t) `121666`;
802	int64_t carry0;
803	int64_t carry1;
804	int64_t carry2;
805	int64_t carry3;
806	int64_t carry4;
807	int64_t carry5;
808	int64_t carry6;
809	int64_t carry7;
810	int64_t carry8;
811	int64_t carry9;
812
813	carry9 = (h9 + (int64_t) (`1`<<`24`)) >> `25`; h0 += carry9 * `19`; h9 -= carry9 << `25`;
814	carry1 = (h1 + (int64_t) (`1`<<`24`)) >> `25`; h2 += carry1; h1 -= carry1 << `25`;
815	carry3 = (h3 + (int64_t) (`1`<<`24`)) >> `25`; h4 += carry3; h3 -= carry3 << `25`;
816	carry5 = (h5 + (int64_t) (`1`<<`24`)) >> `25`; h6 += carry5; h5 -= carry5 << `25`;
817	carry7 = (h7 + (int64_t) (`1`<<`24`)) >> `25`; h8 += carry7; h7 -= carry7 << `25`;
818
819	carry0 = (h0 + (int64_t) (`1`<<`25`)) >> `26`; h1 += carry0; h0 -= carry0 << `26`;
820	carry2 = (h2 + (int64_t) (`1`<<`25`)) >> `26`; h3 += carry2; h2 -= carry2 << `26`;
821	carry4 = (h4 + (int64_t) (`1`<<`25`)) >> `26`; h5 += carry4; h4 -= carry4 << `26`;
822	carry6 = (h6 + (int64_t) (`1`<<`25`)) >> `26`; h7 += carry6; h6 -= carry6 << `26`;
823	carry8 = (h8 + (int64_t) (`1`<<`25`)) >> `26`; h9 += carry8; h8 -= carry8 << `26`;
824
825	h[`0`] = h0;
826	h[`1`] = h1;
827	h[`2`] = h2;
828	h[`3`] = h3;
829	h[`4`] = h4;
830	h[`5`] = h5;
831	h[`6`] = h6;
832	h[`7`] = h7;
833	h[`8`] = h8;
834	h[`9`] = h9;
835	}
836
837
838	/*
839	h = -f
840
841	Preconditions:
842	\|f\| bounded by 1.12^25,1.12^24,1.12^25,1.12^24,etc.
843
844	Postconditions:
845	\|h\| bounded by 1.12^25,1.12^24,1.12^25,1.12^24,etc.
846	*/
847
848	void fe_neg(fe h, const fe f) {
849	int32_t f0 = f[`0`];
850	int32_t f1 = f[`1`];
851	int32_t f2 = f[`2`];
852	int32_t f3 = f[`3`];
853	int32_t f4 = f[`4`];
854	int32_t f5 = f[`5`];
855	int32_t f6 = f[`6`];
856	int32_t f7 = f[`7`];
857	int32_t f8 = f[`8`];
858	int32_t f9 = f[`9`];
859	int32_t h0 = -f0;
860	int32_t h1 = -f1;
861	int32_t h2 = -f2;
862	int32_t h3 = -f3;
863	int32_t h4 = -f4;
864	int32_t h5 = -f5;
865	int32_t h6 = -f6;
866	int32_t h7 = -f7;
867	int32_t h8 = -f8;
868	int32_t h9 = -f9;
869
870	h[`0`] = h0;
871	h[`1`] = h1;
872	h[`2`] = h2;
873	h[`3`] = h3;
874	h[`4`] = h4;
875	h[`5`] = h5;
876	h[`6`] = h6;
877	h[`7`] = h7;
878	h[`8`] = h8;
879	h[`9`] = h9;
880	}
881
882
883	void fe_pow22523(fe out, const fe z) {
884	fe t0;
885	fe t1;
886	fe t2;
887	int i;
888	fe_sq(h: t0, f: z);
889
890	for (i = `1`; i < `1`; ++i) {
891	fe_sq(h: t0, f: t0);
892	}
893
894	fe_sq(h: t1, f: t0);
895
896	for (i = `1`; i < `2`; ++i) {
897	fe_sq(h: t1, f: t1);
898	}
899
900	fe_mul(h: t1, f: z, g: t1);
901	fe_mul(h: t0, f: t0, g: t1);
902	fe_sq(h: t0, f: t0);
903
904	for (i = `1`; i < `1`; ++i) {
905	fe_sq(h: t0, f: t0);
906	}
907
908	fe_mul(h: t0, f: t1, g: t0);
909	fe_sq(h: t1, f: t0);
910
911	for (i = `1`; i < `5`; ++i) {
912	fe_sq(h: t1, f: t1);
913	}
914
915	fe_mul(h: t0, f: t1, g: t0);
916	fe_sq(h: t1, f: t0);
917
918	for (i = `1`; i < `10`; ++i) {
919	fe_sq(h: t1, f: t1);
920	}
921
922	fe_mul(h: t1, f: t1, g: t0);
923	fe_sq(h: t2, f: t1);
924
925	for (i = `1`; i < `20`; ++i) {
926	fe_sq(h: t2, f: t2);
927	}
928
929	fe_mul(h: t1, f: t2, g: t1);
930	fe_sq(h: t1, f: t1);
931
932	for (i = `1`; i < `10`; ++i) {
933	fe_sq(h: t1, f: t1);
934	}
935
936	fe_mul(h: t0, f: t1, g: t0);
937	fe_sq(h: t1, f: t0);
938
939	for (i = `1`; i < `50`; ++i) {
940	fe_sq(h: t1, f: t1);
941	}
942
943	fe_mul(h: t1, f: t1, g: t0);
944	fe_sq(h: t2, f: t1);
945
946	for (i = `1`; i < `100`; ++i) {
947	fe_sq(h: t2, f: t2);
948	}
949
950	fe_mul(h: t1, f: t2, g: t1);
951	fe_sq(h: t1, f: t1);
952
953	for (i = `1`; i < `50`; ++i) {
954	fe_sq(h: t1, f: t1);
955	}
956
957	fe_mul(h: t0, f: t1, g: t0);
958	fe_sq(h: t0, f: t0);
959
960	for (i = `1`; i < `2`; ++i) {
961	fe_sq(h: t0, f: t0);
962	}
963
964	fe_mul(h: out, f: t0, g: z);
965	return;
966	}
967
968
969	/*
970	h = f f*
971	Can overlap h with f.
972
973	Preconditions:
974	\|f\| bounded by 1.652^26,1.652^25,1.652^26,1.652^25,etc.
975
976	Postconditions:
977	\|h\| bounded by 1.012^25,1.012^24,1.012^25,1.012^24,etc.
978	*/
979
980	/*
981	See fe_mul.c for discussion of implementation strategy.
982	*/
983
984	void fe_sq(fe h, const fe f) {
985	int32_t f0 = f[`0`];
986	int32_t f1 = f[`1`];
987	int32_t f2 = f[`2`];
988	int32_t f3 = f[`3`];
989	int32_t f4 = f[`4`];
990	int32_t f5 = f[`5`];
991	int32_t f6 = f[`6`];
992	int32_t f7 = f[`7`];
993	int32_t f8 = f[`8`];
994	int32_t f9 = f[`9`];
995	int32_t f0_2 = `2` * f0;
996	int32_t f1_2 = `2` * f1;
997	int32_t f2_2 = `2` * f2;
998	int32_t f3_2 = `2` * f3;
999	int32_t f4_2 = `2` * f4;
1000	int32_t f5_2 = `2` * f5;
1001	int32_t f6_2 = `2` * f6;
1002	int32_t f7_2 = `2` * f7;
1003	int32_t f5_38 = `38` * f5; / 1.9593752^30 /*
1004	int32_t f6_19 = `19` * f6; / 1.9593752^30 /*
1005	int32_t f7_38 = `38` * f7; / 1.9593752^30 /*
1006	int32_t f8_19 = `19` * f8; / 1.9593752^30 /*
1007	int32_t f9_38 = `38` * f9; / 1.9593752^30 /*
1008	int64_t f0f0 = f0 * (int64_t) f0;
1009	int64_t f0f1_2 = f0_2 * (int64_t) f1;
1010	int64_t f0f2_2 = f0_2 * (int64_t) f2;
1011	int64_t f0f3_2 = f0_2 * (int64_t) f3;
1012	int64_t f0f4_2 = f0_2 * (int64_t) f4;
1013	int64_t f0f5_2 = f0_2 * (int64_t) f5;
1014	int64_t f0f6_2 = f0_2 * (int64_t) f6;
1015	int64_t f0f7_2 = f0_2 * (int64_t) f7;
1016	int64_t f0f8_2 = f0_2 * (int64_t) f8;
1017	int64_t f0f9_2 = f0_2 * (int64_t) f9;
1018	int64_t f1f1_2 = f1_2 * (int64_t) f1;
1019	int64_t f1f2_2 = f1_2 * (int64_t) f2;
1020	int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
1021	int64_t f1f4_2 = f1_2 * (int64_t) f4;
1022	int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
1023	int64_t f1f6_2 = f1_2 * (int64_t) f6;
1024	int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
1025	int64_t f1f8_2 = f1_2 * (int64_t) f8;
1026	int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
1027	int64_t f2f2 = f2 * (int64_t) f2;
1028	int64_t f2f3_2 = f2_2 * (int64_t) f3;
1029	int64_t f2f4_2 = f2_2 * (int64_t) f4;
1030	int64_t f2f5_2 = f2_2 * (int64_t) f5;
1031	int64_t f2f6_2 = f2_2 * (int64_t) f6;
1032	int64_t f2f7_2 = f2_2 * (int64_t) f7;
1033	int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
1034	int64_t f2f9_38 = f2 * (int64_t) f9_38;
1035	int64_t f3f3_2 = f3_2 * (int64_t) f3;
1036	int64_t f3f4_2 = f3_2 * (int64_t) f4;
1037	int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
1038	int64_t f3f6_2 = f3_2 * (int64_t) f6;
1039	int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
1040	int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
1041	int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
1042	int64_t f4f4 = f4 * (int64_t) f4;
1043	int64_t f4f5_2 = f4_2 * (int64_t) f5;
1044	int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
1045	int64_t f4f7_38 = f4 * (int64_t) f7_38;
1046	int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
1047	int64_t f4f9_38 = f4 * (int64_t) f9_38;
1048	int64_t f5f5_38 = f5 * (int64_t) f5_38;
1049	int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
1050	int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
1051	int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
1052	int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
1053	int64_t f6f6_19 = f6 * (int64_t) f6_19;
1054	int64_t f6f7_38 = f6 * (int64_t) f7_38;
1055	int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
1056	int64_t f6f9_38 = f6 * (int64_t) f9_38;
1057	int64_t f7f7_38 = f7 * (int64_t) f7_38;
1058	int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
1059	int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
1060	int64_t f8f8_19 = f8 * (int64_t) f8_19;
1061	int64_t f8f9_38 = f8 * (int64_t) f9_38;
1062	int64_t f9f9_38 = f9 * (int64_t) f9_38;
1063	int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
1064	int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
1065	int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
1066	int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
1067	int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
1068	int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
1069	int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
1070	int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
1071	int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
1072	int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
1073	int64_t carry0;
1074	int64_t carry1;
1075	int64_t carry2;
1076	int64_t carry3;
1077	int64_t carry4;
1078	int64_t carry5;
1079	int64_t carry6;
1080	int64_t carry7;
1081	int64_t carry8;
1082	int64_t carry9;
1083	carry0 = (h0 + (int64_t) (`1` << `25`)) >> `26`;
1084	h1 += carry0;
1085	h0 -= carry0 << `26`;
1086	carry4 = (h4 + (int64_t) (`1` << `25`)) >> `26`;
1087	h5 += carry4;
1088	h4 -= carry4 << `26`;
1089	carry1 = (h1 + (int64_t) (`1` << `24`)) >> `25`;
1090	h2 += carry1;
1091	h1 -= carry1 << `25`;
1092	carry5 = (h5 + (int64_t) (`1` << `24`)) >> `25`;
1093	h6 += carry5;
1094	h5 -= carry5 << `25`;
1095	carry2 = (h2 + (int64_t) (`1` << `25`)) >> `26`;
1096	h3 += carry2;
1097	h2 -= carry2 << `26`;
1098	carry6 = (h6 + (int64_t) (`1` << `25`)) >> `26`;
1099	h7 += carry6;
1100	h6 -= carry6 << `26`;
1101	carry3 = (h3 + (int64_t) (`1` << `24`)) >> `25`;
1102	h4 += carry3;
1103	h3 -= carry3 << `25`;
1104	carry7 = (h7 + (int64_t) (`1` << `24`)) >> `25`;
1105	h8 += carry7;
1106	h7 -= carry7 << `25`;
1107	carry4 = (h4 + (int64_t) (`1` << `25`)) >> `26`;
1108	h5 += carry4;
1109	h4 -= carry4 << `26`;
1110	carry8 = (h8 + (int64_t) (`1` << `25`)) >> `26`;
1111	h9 += carry8;
1112	h8 -= carry8 << `26`;
1113	carry9 = (h9 + (int64_t) (`1` << `24`)) >> `25`;
1114	h0 += carry9 * `19`;
1115	h9 -= carry9 << `25`;
1116	carry0 = (h0 + (int64_t) (`1` << `25`)) >> `26`;
1117	h1 += carry0;
1118	h0 -= carry0 << `26`;
1119	h[`0`] = (int32_t) h0;
1120	h[`1`] = (int32_t) h1;
1121	h[`2`] = (int32_t) h2;
1122	h[`3`] = (int32_t) h3;
1123	h[`4`] = (int32_t) h4;
1124	h[`5`] = (int32_t) h5;
1125	h[`6`] = (int32_t) h6;
1126	h[`7`] = (int32_t) h7;
1127	h[`8`] = (int32_t) h8;
1128	h[`9`] = (int32_t) h9;
1129	}
1130
1131
1132	/*
1133	h = 2 f * f*
1134	Can overlap h with f.
1135
1136	Preconditions:
1137	\|f\| bounded by 1.652^26,1.652^25,1.652^26,1.652^25,etc.
1138
1139	Postconditions:
1140	\|h\| bounded by 1.012^25,1.012^24,1.012^25,1.012^24,etc.
1141	*/
1142
1143	/*
1144	See fe_mul.c for discussion of implementation strategy.
1145	*/
1146
1147	void fe_sq2(fe h, const fe f) {
1148	int32_t f0 = f[`0`];
1149	int32_t f1 = f[`1`];
1150	int32_t f2 = f[`2`];
1151	int32_t f3 = f[`3`];
1152	int32_t f4 = f[`4`];
1153	int32_t f5 = f[`5`];
1154	int32_t f6 = f[`6`];
1155	int32_t f7 = f[`7`];
1156	int32_t f8 = f[`8`];
1157	int32_t f9 = f[`9`];
1158	int32_t f0_2 = `2` * f0;
1159	int32_t f1_2 = `2` * f1;
1160	int32_t f2_2 = `2` * f2;
1161	int32_t f3_2 = `2` * f3;
1162	int32_t f4_2 = `2` * f4;
1163	int32_t f5_2 = `2` * f5;
1164	int32_t f6_2 = `2` * f6;
1165	int32_t f7_2 = `2` * f7;
1166	int32_t f5_38 = `38` * f5; / 1.9593752^30 /*
1167	int32_t f6_19 = `19` * f6; / 1.9593752^30 /*
1168	int32_t f7_38 = `38` * f7; / 1.9593752^30 /*
1169	int32_t f8_19 = `19` * f8; / 1.9593752^30 /*
1170	int32_t f9_38 = `38` * f9; / 1.9593752^30 /*
1171	int64_t f0f0 = f0 * (int64_t) f0;
1172	int64_t f0f1_2 = f0_2 * (int64_t) f1;
1173	int64_t f0f2_2 = f0_2 * (int64_t) f2;
1174	int64_t f0f3_2 = f0_2 * (int64_t) f3;
1175	int64_t f0f4_2 = f0_2 * (int64_t) f4;
1176	int64_t f0f5_2 = f0_2 * (int64_t) f5;
1177	int64_t f0f6_2 = f0_2 * (int64_t) f6;
1178	int64_t f0f7_2 = f0_2 * (int64_t) f7;
1179	int64_t f0f8_2 = f0_2 * (int64_t) f8;
1180	int64_t f0f9_2 = f0_2 * (int64_t) f9;
1181	int64_t f1f1_2 = f1_2 * (int64_t) f1;
1182	int64_t f1f2_2 = f1_2 * (int64_t) f2;
1183	int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
1184	int64_t f1f4_2 = f1_2 * (int64_t) f4;
1185	int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
1186	int64_t f1f6_2 = f1_2 * (int64_t) f6;
1187	int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
1188	int64_t f1f8_2 = f1_2 * (int64_t) f8;
1189	int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
1190	int64_t f2f2 = f2 * (int64_t) f2;
1191	int64_t f2f3_2 = f2_2 * (int64_t) f3;
1192	int64_t f2f4_2 = f2_2 * (int64_t) f4;
1193	int64_t f2f5_2 = f2_2 * (int64_t) f5;
1194	int64_t f2f6_2 = f2_2 * (int64_t) f6;
1195	int64_t f2f7_2 = f2_2 * (int64_t) f7;
1196	int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
1197	int64_t f2f9_38 = f2 * (int64_t) f9_38;
1198	int64_t f3f3_2 = f3_2 * (int64_t) f3;
1199	int64_t f3f4_2 = f3_2 * (int64_t) f4;
1200	int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
1201	int64_t f3f6_2 = f3_2 * (int64_t) f6;
1202	int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
1203	int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
1204	int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
1205	int64_t f4f4 = f4 * (int64_t) f4;
1206	int64_t f4f5_2 = f4_2 * (int64_t) f5;
1207	int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
1208	int64_t f4f7_38 = f4 * (int64_t) f7_38;
1209	int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
1210	int64_t f4f9_38 = f4 * (int64_t) f9_38;
1211	int64_t f5f5_38 = f5 * (int64_t) f5_38;
1212	int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
1213	int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
1214	int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
1215	int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
1216	int64_t f6f6_19 = f6 * (int64_t) f6_19;
1217	int64_t f6f7_38 = f6 * (int64_t) f7_38;
1218	int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
1219	int64_t f6f9_38 = f6 * (int64_t) f9_38;
1220	int64_t f7f7_38 = f7 * (int64_t) f7_38;
1221	int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
1222	int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
1223	int64_t f8f8_19 = f8 * (int64_t) f8_19;
1224	int64_t f8f9_38 = f8 * (int64_t) f9_38;
1225	int64_t f9f9_38 = f9 * (int64_t) f9_38;
1226	int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
1227	int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
1228	int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
1229	int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
1230	int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
1231	int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
1232	int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
1233	int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
1234	int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
1235	int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
1236	int64_t carry0;
1237	int64_t carry1;
1238	int64_t carry2;
1239	int64_t carry3;
1240	int64_t carry4;
1241	int64_t carry5;
1242	int64_t carry6;
1243	int64_t carry7;
1244	int64_t carry8;
1245	int64_t carry9;
1246	h0 += h0;
1247	h1 += h1;
1248	h2 += h2;
1249	h3 += h3;
1250	h4 += h4;
1251	h5 += h5;
1252	h6 += h6;
1253	h7 += h7;
1254	h8 += h8;
1255	h9 += h9;
1256	carry0 = (h0 + (int64_t) (`1` << `25`)) >> `26`;
1257	h1 += carry0;
1258	h0 -= carry0 << `26`;
1259	carry4 = (h4 + (int64_t) (`1` << `25`)) >> `26`;
1260	h5 += carry4;
1261	h4 -= carry4 << `26`;
1262	carry1 = (h1 + (int64_t) (`1` << `24`)) >> `25`;
1263	h2 += carry1;
1264	h1 -= carry1 << `25`;
1265	carry5 = (h5 + (int64_t) (`1` << `24`)) >> `25`;
1266	h6 += carry5;
1267	h5 -= carry5 << `25`;
1268	carry2 = (h2 + (int64_t) (`1` << `25`)) >> `26`;
1269	h3 += carry2;
1270	h2 -= carry2 << `26`;
1271	carry6 = (h6 + (int64_t) (`1` << `25`)) >> `26`;
1272	h7 += carry6;
1273	h6 -= carry6 << `26`;
1274	carry3 = (h3 + (int64_t) (`1` << `24`)) >> `25`;
1275	h4 += carry3;
1276	h3 -= carry3 << `25`;
1277	carry7 = (h7 + (int64_t) (`1` << `24`)) >> `25`;
1278	h8 += carry7;
1279	h7 -= carry7 << `25`;
1280	carry4 = (h4 + (int64_t) (`1` << `25`)) >> `26`;
1281	h5 += carry4;
1282	h4 -= carry4 << `26`;
1283	carry8 = (h8 + (int64_t) (`1` << `25`)) >> `26`;
1284	h9 += carry8;
1285	h8 -= carry8 << `26`;
1286	carry9 = (h9 + (int64_t) (`1` << `24`)) >> `25`;
1287	h0 += carry9 * `19`;
1288	h9 -= carry9 << `25`;
1289	carry0 = (h0 + (int64_t) (`1` << `25`)) >> `26`;
1290	h1 += carry0;
1291	h0 -= carry0 << `26`;
1292	h[`0`] = (int32_t) h0;
1293	h[`1`] = (int32_t) h1;
1294	h[`2`] = (int32_t) h2;
1295	h[`3`] = (int32_t) h3;
1296	h[`4`] = (int32_t) h4;
1297	h[`5`] = (int32_t) h5;
1298	h[`6`] = (int32_t) h6;
1299	h[`7`] = (int32_t) h7;
1300	h[`8`] = (int32_t) h8;
1301	h[`9`] = (int32_t) h9;
1302	}
1303
1304
1305	/*
1306	h = f - g
1307	Can overlap h with f or g.
1308
1309	Preconditions:
1310	\|f\| bounded by 1.12^25,1.12^24,1.12^25,1.12^24,etc.
1311	\|g\| bounded by 1.12^25,1.12^24,1.12^25,1.12^24,etc.
1312
1313	Postconditions:
1314	\|h\| bounded by 1.12^26,1.12^25,1.12^26,1.12^25,etc.
1315	*/
1316
1317	void fe_sub(fe h, const fe f, const fe g) {
1318	int32_t f0 = f[`0`];
1319	int32_t f1 = f[`1`];
1320	int32_t f2 = f[`2`];
1321	int32_t f3 = f[`3`];
1322	int32_t f4 = f[`4`];
1323	int32_t f5 = f[`5`];
1324	int32_t f6 = f[`6`];
1325	int32_t f7 = f[`7`];
1326	int32_t f8 = f[`8`];
1327	int32_t f9 = f[`9`];
1328	int32_t g0 = g[`0`];
1329	int32_t g1 = g[`1`];
1330	int32_t g2 = g[`2`];
1331	int32_t g3 = g[`3`];
1332	int32_t g4 = g[`4`];
1333	int32_t g5 = g[`5`];
1334	int32_t g6 = g[`6`];
1335	int32_t g7 = g[`7`];
1336	int32_t g8 = g[`8`];
1337	int32_t g9 = g[`9`];
1338	int32_t h0 = f0 - g0;
1339	int32_t h1 = f1 - g1;
1340	int32_t h2 = f2 - g2;
1341	int32_t h3 = f3 - g3;
1342	int32_t h4 = f4 - g4;
1343	int32_t h5 = f5 - g5;
1344	int32_t h6 = f6 - g6;
1345	int32_t h7 = f7 - g7;
1346	int32_t h8 = f8 - g8;
1347	int32_t h9 = f9 - g9;
1348
1349	h[`0`] = h0;
1350	h[`1`] = h1;
1351	h[`2`] = h2;
1352	h[`3`] = h3;
1353	h[`4`] = h4;
1354	h[`5`] = h5;
1355	h[`6`] = h6;
1356	h[`7`] = h7;
1357	h[`8`] = h8;
1358	h[`9`] = h9;
1359	}
1360
1361
1362
1363	/*
1364	Preconditions:
1365	\|h\| bounded by 1.12^26,1.12^25,1.12^26,1.12^25,etc.
1366
1367	Write p=2^255-19; q=floor(h/p).
1368	Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
1369
1370	Proof:
1371	Have \|h\|<=p so \|q\|<=1 so \|19^2 2^(-255) q\|<1/4.
1372	Also have \|h-2^230 h9\|<2^231 so \|19 2^(-255)(h-2^230 h9)\|<1/4.
1373
1374	Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
1375	Then 0<y<1.
1376
1377	Write r=h-pq.
1378	Have 0<=r<=p-1=2^255-20.
1379	Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
1380
1381	Write x=r+19(2^-255)r+y.
1382	Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
1383
1384	Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
1385	so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
1386	*/
1387
1388	void fe_tobytes(unsigned char s, const* fe h) {
1389	int32_t h0 = h[`0`];
1390	int32_t h1 = h[`1`];
1391	int32_t h2 = h[`2`];
1392	int32_t h3 = h[`3`];
1393	int32_t h4 = h[`4`];
1394	int32_t h5 = h[`5`];
1395	int32_t h6 = h[`6`];
1396	int32_t h7 = h[`7`];
1397	int32_t h8 = h[`8`];
1398	int32_t h9 = h[`9`];
1399	int32_t q;
1400	int32_t carry0;
1401	int32_t carry1;
1402	int32_t carry2;
1403	int32_t carry3;
1404	int32_t carry4;
1405	int32_t carry5;
1406	int32_t carry6;
1407	int32_t carry7;
1408	int32_t carry8;
1409	int32_t carry9;
1410	q = (`19` * h9 + (((int32_t) `1`) << `24`)) >> `25`;
1411	q = (h0 + q) >> `26`;
1412	q = (h1 + q) >> `25`;
1413	q = (h2 + q) >> `26`;
1414	q = (h3 + q) >> `25`;
1415	q = (h4 + q) >> `26`;
1416	q = (h5 + q) >> `25`;
1417	q = (h6 + q) >> `26`;
1418	q = (h7 + q) >> `25`;
1419	q = (h8 + q) >> `26`;
1420	q = (h9 + q) >> `25`;
1421	/ Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. /
1422	h0 += `19` * q;
1423	/ Goal: Output h-2^255 q, which is between 0 and 2^255-20. /
1424	carry0 = h0 >> `26`;
1425	h1 += carry0;
1426	h0 -= carry0 << `26`;
1427	carry1 = h1 >> `25`;
1428	h2 += carry1;
1429	h1 -= carry1 << `25`;
1430	carry2 = h2 >> `26`;
1431	h3 += carry2;
1432	h2 -= carry2 << `26`;
1433	carry3 = h3 >> `25`;
1434	h4 += carry3;
1435	h3 -= carry3 << `25`;
1436	carry4 = h4 >> `26`;
1437	h5 += carry4;
1438	h4 -= carry4 << `26`;
1439	carry5 = h5 >> `25`;
1440	h6 += carry5;
1441	h5 -= carry5 << `25`;
1442	carry6 = h6 >> `26`;
1443	h7 += carry6;
1444	h6 -= carry6 << `26`;
1445	carry7 = h7 >> `25`;
1446	h8 += carry7;
1447	h7 -= carry7 << `25`;
1448	carry8 = h8 >> `26`;
1449	h9 += carry8;
1450	h8 -= carry8 << `26`;
1451	carry9 = h9 >> `25`;
1452	h9 -= carry9 << `25`;
1453
1454	/ h10 = carry9 /
1455	/*
1456	Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
1457	Have h0+...+2^230 h9 between 0 and 2^255-1;
1458	evidently 2^255 h10-2^255 q = 0.
1459	Goal: Output h0+...+2^230 h9.
1460	*/
1461	s[`0`] = (unsigned char) (h0 >> `0`);
1462	s[`1`] = (unsigned char) (h0 >> `8`);
1463	s[`2`] = (unsigned char) (h0 >> `16`);
1464	s[`3`] = (unsigned char) ((h0 >> `24`) \| (h1 << `2`));
1465	s[`4`] = (unsigned char) (h1 >> `6`);
1466	s[`5`] = (unsigned char) (h1 >> `14`);
1467	s[`6`] = (unsigned char) ((h1 >> `22`) \| (h2 << `3`));
1468	s[`7`] = (unsigned char) (h2 >> `5`);
1469	s[`8`] = (unsigned char) (h2 >> `13`);
1470	s[`9`] = (unsigned char) ((h2 >> `21`) \| (h3 << `5`));
1471	s[`10`] = (unsigned char) (h3 >> `3`);
1472	s[`11`] = (unsigned char) (h3 >> `11`);
1473	s[`12`] = (unsigned char) ((h3 >> `19`) \| (h4 << `6`));
1474	s[`13`] = (unsigned char) (h4 >> `2`);
1475	s[`14`] = (unsigned char) (h4 >> `10`);
1476	s[`15`] = (unsigned char) (h4 >> `18`);
1477	s[`16`] = (unsigned char) (h5 >> `0`);
1478	s[`17`] = (unsigned char) (h5 >> `8`);
1479	s[`18`] = (unsigned char) (h5 >> `16`);
1480	s[`19`] = (unsigned char) ((h5 >> `24`) \| (h6 << `1`));
1481	s[`20`] = (unsigned char) (h6 >> `7`);
1482	s[`21`] = (unsigned char) (h6 >> `15`);
1483	s[`22`] = (unsigned char) ((h6 >> `23`) \| (h7 << `3`));
1484	s[`23`] = (unsigned char) (h7 >> `5`);
1485	s[`24`] = (unsigned char) (h7 >> `13`);
1486	s[`25`] = (unsigned char) ((h7 >> `21`) \| (h8 << `4`));
1487	s[`26`] = (unsigned char) (h8 >> `4`);
1488	s[`27`] = (unsigned char) (h8 >> `12`);
1489	s[`28`] = (unsigned char) ((h8 >> `20`) \| (h9 << `6`));
1490	s[`29`] = (unsigned char) (h9 >> `2`);
1491	s[`30`] = (unsigned char) (h9 >> `10`);
1492	s[`31`] = (unsigned char) (h9 >> `18`);
1493	}
1494

Browse the source code of olm/lib/ed25519/src/fe.c