crypto_bigint/modular/mul.rs
1use super::reduction::montgomery_reduction;
2use crate::{CtLt, Limb, Odd, Uint, UintRef, WideWord, Word, word};
3
4/// Computes an "almost" Montgomery multiplication of `x` and `y` into `out`, that is
5/// `out = x * y * 2^(-n*W) mod m + am` assuming `k = -1/m mod 2^W`,
6/// where `W` is the bit size of the limb, and `n * W` is the full bit size of the integer.
7///
8/// NOTE: `out` is assumed to be pre-zeroized.
9///
10/// Unlike the standard Montgomery multiplication, we are reducing the final result only if
11/// it overflows `2^(n*W)`, not when it overflows `m`.
12///
13/// This means that this function does not assume `x` and `y` are reduced `mod m`,
14/// and the result will be correct `mod m`, but potentially greater than `m`,
15/// and smaller than `2^(n * W) + m`.
16/// See "Efficient Software Implementations of Modular Exponentiation" by S. Gueron for details
17/// (<https://eprint.iacr.org/2011/239.pdf>).
18///
19/// This function exhibits certain properties which were discovered via randomized tests,
20/// but (to my knowledge at this moment) have not been proven formally.
21/// Hereinafter we denote `f(x) = floor(x / m)`, that is `f` is the number of subtractions
22/// of the modulus required to fully reduce `x`.
23///
24/// 1. In general, if `f(x) = k` and `f(y) = n`, then `f(AMM(x, y)) <= min(k, n) + 1`.
25/// That is the "reduction error" grows with every operation,
26/// but is determined by the argument with the lower error.
27/// 2. To retrieve the number from Montgomery form we MM it by 1. In this case `f(AMM(x, 1)) = 0`,
28/// that is the result is always fully reduced regardless of `f(x)`.
29/// 3. `f(AMM(x, x)) <= 1` regardless of `f(x)`. That is, squaring resets the error to at most 1.
30#[inline]
31pub(crate) fn almost_montgomery_mul(
32 x: &UintRef,
33 y: &UintRef,
34 out: &mut UintRef,
35 modulus: &UintRef,
36 mod_neg_inv: Limb,
37) {
38 let overflow = montgomery_multiply_inner(
39 x.as_limbs(),
40 y.as_limbs(),
41 out.as_mut_limbs(),
42 modulus.as_limbs(),
43 mod_neg_inv,
44 );
45 let overflow = word::choice_from_lsb(overflow.0);
46 out.conditional_borrowing_sub_assign(modulus, overflow);
47}
48
49/// Ensure the output of an "almost" Montgomery multiplication is properly reduced.
50///
51/// Using the properties of `almost_montgomery_mul()` (see its documentation):
52/// - We have an incoming `x` which is fully reduced (`floor(x / modulus) = 0`).
53/// - We build an array of `powers` which are produced by multiplying the previous power by
54/// `x`, so for each power `floor(power / modulus) <= 1`.
55/// - Then we take turns squaring the accumulator `z` (bringing `floor(z / modulus)` to 1
56/// regardless of the previous reduction level) and multiplying by a power of `x`
57/// (bringing `floor(z / modulus)` to at most 2).
58/// - Then we either exit the loop, or square again, which brings `floor(z / modulus)` back
59/// to 1.
60///
61/// We now need to reduce `z` at most twice to bring it within `[0, modulus)`.
62pub(crate) fn almost_montgomery_reduce(z: &mut UintRef, modulus: &UintRef) {
63 z.conditional_borrowing_sub_assign(modulus, !z.ct_lt(modulus));
64 z.conditional_borrowing_sub_assign(modulus, !z.ct_lt(modulus));
65}
66
67/// Based on Algorithm 14.36 in Handbook of Applied Cryptography
68/// <https://cacr.uwaterloo.ca/hac/about/chap14.pdf>
69///
70/// Multiply `x` and `y` in Montgomery form, producing `x•y•R^-1 mod modulus + a•modulus`.
71///
72/// This algorithm roughly corresponds to the Finely Integrated Operand Scanning (FIOS)
73/// method of "Analyzing and Comparing Montgomery Multiplication Algorithms" by Koc et al
74/// <https://www.microsoft.com/en-us/research/wp-content/uploads/1996/01/j37acmon.pdf>
75/// but using wide words to track the intermediate products and carry.
76///
77/// The final conditional subtraction of the modulus to produce a result in the range
78/// `[0, modulus)` is not performed here, and must be performed by the caller. In some
79/// cases this may be deferred, as demonstrated by the `almost_montgomery_mul` method.
80#[inline(always)]
81#[allow(clippy::cast_possible_truncation)]
82pub const fn montgomery_multiply_inner(
83 x: &[Limb],
84 y: &[Limb],
85 out: &mut [Limb],
86 modulus: &[Limb],
87 mod_neg_inv: Limb,
88) -> Limb {
89 let nlimbs = modulus.len();
90 assert!(nlimbs == x.len() && nlimbs == y.len() && nlimbs == out.len());
91
92 let mut meta_carry = 0;
93
94 let mut i = 0;
95 while i < nlimbs {
96 let xi = x[i];
97 // A[0] + x[i]y[0] <= (2^64 - 1)^2 + (2^64 - 1) = 2^128 - 2^64
98 let axy = (xi.0 as WideWord) * (y[0].0 as WideWord) + out[0].0 as WideWord;
99 let u = (axy as Word).wrapping_mul(mod_neg_inv.0);
100
101 let mut carry;
102 // A[0] + x[i]y[0] + um[0] <= (2^128 - 1) + (2^128 - 2^64) = 2^129 - 2^64 - 1
103 let (a, c) = ((u as WideWord) * (modulus[0].0 as WideWord)).overflowing_add(axy);
104 // carry <= (2^129 - 2^64 - 1) / 2^64 <= 2^65 - 2
105 carry = ((c as WideWord) << Word::BITS) | (a >> Word::BITS);
106
107 let mut j = 1;
108 while j < nlimbs {
109 // A[j] + x[i]y[j] <= (2^64 - 1)^2 + (2^64 - 1) = 2^128 - 2^64
110 let axy = (xi.0 as WideWord) * (y[j].0 as WideWord) + out[j].0 as WideWord;
111 // um[j] + carry <= (2^64 - 1)^2 + (2^65 - 2) = 2^128 - 1
112 let umc = (u as WideWord) * (modulus[j].0 as WideWord) + carry;
113 let (ab, c) = axy.overflowing_add(umc);
114 out[j - 1] = Limb(ab as Word);
115 // carry <= (2^129 - 2^64 - 1) / 2^64 <= 2^65 - 2
116 carry = ((c as WideWord) << Word::BITS) | (ab >> Word::BITS);
117 j += 1;
118 }
119
120 carry += meta_carry;
121 (out[nlimbs - 1], meta_carry) = (Limb(carry as Word), carry >> Word::BITS);
122
123 i += 1;
124 }
125
126 Limb(meta_carry as Word)
127}
128
129/// Computes the Montgomery product of `a` and `b` modulo `modulus`, where
130/// `a` and `b` are in Montgomery form.
131pub(crate) const fn mul_montgomery_form<const LIMBS: usize>(
132 a: &Uint<LIMBS>,
133 b: &Uint<LIMBS>,
134 modulus: &Odd<Uint<LIMBS>>,
135 mod_neg_inv: Limb,
136) -> Uint<LIMBS> {
137 let mut out = Uint::<LIMBS>::ZERO;
138 let carry = montgomery_multiply_inner(
139 &a.limbs,
140 &b.limbs,
141 &mut out.limbs,
142 &modulus.as_ref().limbs,
143 mod_neg_inv,
144 );
145 out.try_sub_with_carry(carry, modulus.as_ref()).0
146}
147
148/// Computes the Montgomery squaring of `a` modulo `modulus` where
149/// `a` is in Montgomery form.
150pub(crate) const fn square_montgomery_form<const LIMBS: usize>(
151 a: &Uint<LIMBS>,
152 modulus: &Odd<Uint<LIMBS>>,
153 mod_neg_inv: Limb,
154) -> Uint<LIMBS> {
155 // One case in which it appears to be more efficient to perform a wide squaring and reduction.
156 if LIMBS == 4 {
157 let lower_upper = a.widening_square();
158 return montgomery_reduction(&lower_upper, modulus, mod_neg_inv);
159 }
160
161 let mut out = Uint::<LIMBS>::ZERO;
162 let carry = montgomery_multiply_inner(
163 &a.limbs,
164 &a.limbs,
165 &mut out.limbs,
166 &modulus.as_ref().limbs,
167 mod_neg_inv,
168 );
169 out.try_sub_with_carry(carry, modulus.as_ref()).0
170}
171
172/// Computes a repeated Montgomery squaring of `a` modulo `modulus` where
173/// `a` is in Montgomery form.
174///
175/// This method is variable time in `n`.
176#[inline(always)]
177pub(crate) const fn square_repeat_montgomery_form<const LIMBS: usize>(
178 a: &Uint<LIMBS>,
179 n: u32,
180 modulus: &Odd<Uint<LIMBS>>,
181 mod_neg_inv: Limb,
182) -> Uint<LIMBS> {
183 if n == 0 {
184 return *a;
185 }
186 if n == 1 {
187 return square_montgomery_form(a, modulus, mod_neg_inv);
188 }
189
190 let mut i = 0;
191 let mut out = *a;
192 let mut base;
193 let mut carry;
194
195 loop {
196 (base, out) = (out, Uint::ZERO);
197 carry = montgomery_multiply_inner(
198 &base.limbs,
199 &base.limbs,
200 &mut out.limbs,
201 &modulus.as_ref().limbs,
202 mod_neg_inv,
203 );
204 i += 1;
205 if i == n {
206 break;
207 }
208 // intermediate results are in "Almost Montgomery form", which is <= Uint::MAX
209 // but may require the modulus to be subtracted twice.
210 out = out
211 .conditional_borrowing_sub(modulus.as_ref(), carry.is_nonzero())
212 .0;
213 }
214
215 // correct for "Almost Montygomery form"
216 (out, carry) = out.try_sub_with_carry(carry, modulus.as_ref());
217 out.try_sub_with_carry(carry, modulus.as_ref()).0
218}