crypto_bigint/modular/
mul.rs

1use super::reduction::montgomery_reduction;
2use crate::{CtLt, Limb, Odd, Uint, UintRef, WideWord, Word, word};
3
4/// Computes an "almost" Montgomery multiplication of `x` and `y` into `out`, that is
5/// `out = x * y * 2^(-n*W) mod m + am` assuming `k = -1/m mod 2^W`,
6/// where `W` is the bit size of the limb, and `n * W` is the full bit size of the integer.
7///
8/// NOTE: `out` is assumed to be pre-zeroized.
9///
10/// Unlike the standard Montgomery multiplication, we are reducing the final result only if
11/// it overflows `2^(n*W)`, not when it overflows `m`.
12///
13/// This means that this function does not assume `x` and `y` are reduced `mod m`,
14/// and the result will be correct `mod m`, but potentially greater than `m`,
15/// and smaller than `2^(n * W) + m`.
16/// See "Efficient Software Implementations of Modular Exponentiation" by S. Gueron for details
17/// (<https://eprint.iacr.org/2011/239.pdf>).
18///
19/// This function exhibits certain properties which were discovered via randomized tests,
20/// but (to my knowledge at this moment) have not been proven formally.
21/// Hereinafter we denote `f(x) = floor(x / m)`, that is `f` is the number of subtractions
22/// of the modulus required to fully reduce `x`.
23///
24/// 1. In general, if `f(x) = k` and `f(y) = n`, then `f(AMM(x, y)) <= min(k, n) + 1`.
25///    That is the "reduction error" grows with every operation,
26///    but is determined by the argument with the lower error.
27/// 2. To retrieve the number from Montgomery form we MM it by 1. In this case `f(AMM(x, 1)) = 0`,
28///    that is the result is always fully reduced regardless of `f(x)`.
29/// 3. `f(AMM(x, x)) <= 1` regardless of `f(x)`. That is, squaring resets the error to at most 1.
30#[inline]
31pub(crate) fn almost_montgomery_mul(
32    x: &UintRef,
33    y: &UintRef,
34    out: &mut UintRef,
35    modulus: &UintRef,
36    mod_neg_inv: Limb,
37) {
38    let overflow = montgomery_multiply_inner(
39        x.as_limbs(),
40        y.as_limbs(),
41        out.as_mut_limbs(),
42        modulus.as_limbs(),
43        mod_neg_inv,
44    );
45    let overflow = word::choice_from_lsb(overflow.0);
46    out.conditional_borrowing_sub_assign(modulus, overflow);
47}
48
49/// Ensure the output of an "almost" Montgomery multiplication is properly reduced.
50///
51/// Using the properties of `almost_montgomery_mul()` (see its documentation):
52/// - We have an incoming `x` which is fully reduced (`floor(x / modulus) = 0`).
53/// - We build an array of `powers` which are produced by multiplying the previous power by
54///   `x`, so for each power `floor(power / modulus) <= 1`.
55/// - Then we take turns squaring the accumulator `z` (bringing `floor(z / modulus)` to 1
56///   regardless of the previous reduction level) and multiplying by a power of `x`
57///   (bringing `floor(z / modulus)` to at most 2).
58/// - Then we either exit the loop, or square again, which brings `floor(z / modulus)` back
59///   to 1.
60///
61/// We now need to reduce `z` at most twice to bring it within `[0, modulus)`.
62pub(crate) fn almost_montgomery_reduce(z: &mut UintRef, modulus: &UintRef) {
63    z.conditional_borrowing_sub_assign(modulus, !z.ct_lt(modulus));
64    z.conditional_borrowing_sub_assign(modulus, !z.ct_lt(modulus));
65}
66
67/// Based on Algorithm 14.36 in Handbook of Applied Cryptography
68/// <https://cacr.uwaterloo.ca/hac/about/chap14.pdf>
69///
70/// Multiply `x` and `y` in Montgomery form, producing `x•y•R^-1 mod modulus + a•modulus`.
71///
72/// This algorithm roughly corresponds to the Finely Integrated Operand Scanning (FIOS)
73/// method of "Analyzing and Comparing Montgomery Multiplication Algorithms" by Koc et al
74/// <https://www.microsoft.com/en-us/research/wp-content/uploads/1996/01/j37acmon.pdf>
75/// but using wide words to track the intermediate products and carry.
76///
77/// The final conditional subtraction of the modulus to produce a result in the range
78/// `[0, modulus)` is not performed here, and must be performed by the caller. In some
79/// cases this may be deferred, as demonstrated by the `almost_montgomery_mul` method.
80#[inline(always)]
81#[allow(clippy::cast_possible_truncation)]
82pub const fn montgomery_multiply_inner(
83    x: &[Limb],
84    y: &[Limb],
85    out: &mut [Limb],
86    modulus: &[Limb],
87    mod_neg_inv: Limb,
88) -> Limb {
89    let nlimbs = modulus.len();
90    assert!(nlimbs == x.len() && nlimbs == y.len() && nlimbs == out.len());
91
92    let mut meta_carry = 0;
93
94    let mut i = 0;
95    while i < nlimbs {
96        let xi = x[i];
97        // A[0] + x[i]y[0] <= (2^64 - 1)^2 + (2^64 - 1) = 2^128 - 2^64
98        let axy = (xi.0 as WideWord) * (y[0].0 as WideWord) + out[0].0 as WideWord;
99        let u = (axy as Word).wrapping_mul(mod_neg_inv.0);
100
101        let mut carry;
102        // A[0] + x[i]y[0] + um[0] <= (2^128 - 1) + (2^128 - 2^64) = 2^129 - 2^64 - 1
103        let (a, c) = ((u as WideWord) * (modulus[0].0 as WideWord)).overflowing_add(axy);
104        // carry <= (2^129 - 2^64 - 1) / 2^64 <= 2^65 - 2
105        carry = ((c as WideWord) << Word::BITS) | (a >> Word::BITS);
106
107        let mut j = 1;
108        while j < nlimbs {
109            // A[j] + x[i]y[j] <= (2^64 - 1)^2 + (2^64 - 1) = 2^128 - 2^64
110            let axy = (xi.0 as WideWord) * (y[j].0 as WideWord) + out[j].0 as WideWord;
111            // um[j] + carry <= (2^64 - 1)^2 + (2^65 - 2) = 2^128 - 1
112            let umc = (u as WideWord) * (modulus[j].0 as WideWord) + carry;
113            let (ab, c) = axy.overflowing_add(umc);
114            out[j - 1] = Limb(ab as Word);
115            // carry <= (2^129 - 2^64 - 1) / 2^64 <= 2^65 - 2
116            carry = ((c as WideWord) << Word::BITS) | (ab >> Word::BITS);
117            j += 1;
118        }
119
120        carry += meta_carry;
121        (out[nlimbs - 1], meta_carry) = (Limb(carry as Word), carry >> Word::BITS);
122
123        i += 1;
124    }
125
126    Limb(meta_carry as Word)
127}
128
129/// Computes the Montgomery product of `a` and `b` modulo `modulus`, where
130/// `a` and `b` are in Montgomery form.
131pub(crate) const fn mul_montgomery_form<const LIMBS: usize>(
132    a: &Uint<LIMBS>,
133    b: &Uint<LIMBS>,
134    modulus: &Odd<Uint<LIMBS>>,
135    mod_neg_inv: Limb,
136) -> Uint<LIMBS> {
137    let mut out = Uint::<LIMBS>::ZERO;
138    let carry = montgomery_multiply_inner(
139        &a.limbs,
140        &b.limbs,
141        &mut out.limbs,
142        &modulus.as_ref().limbs,
143        mod_neg_inv,
144    );
145    out.try_sub_with_carry(carry, modulus.as_ref()).0
146}
147
148/// Computes the Montgomery squaring of `a` modulo `modulus` where
149/// `a` is in Montgomery form.
150pub(crate) const fn square_montgomery_form<const LIMBS: usize>(
151    a: &Uint<LIMBS>,
152    modulus: &Odd<Uint<LIMBS>>,
153    mod_neg_inv: Limb,
154) -> Uint<LIMBS> {
155    // One case in which it appears to be more efficient to perform a wide squaring and reduction.
156    if LIMBS == 4 {
157        let lower_upper = a.widening_square();
158        return montgomery_reduction(&lower_upper, modulus, mod_neg_inv);
159    }
160
161    let mut out = Uint::<LIMBS>::ZERO;
162    let carry = montgomery_multiply_inner(
163        &a.limbs,
164        &a.limbs,
165        &mut out.limbs,
166        &modulus.as_ref().limbs,
167        mod_neg_inv,
168    );
169    out.try_sub_with_carry(carry, modulus.as_ref()).0
170}
171
172/// Computes a repeated Montgomery squaring of `a` modulo `modulus` where
173/// `a` is in Montgomery form.
174///
175/// This method is variable time in `n`.
176#[inline(always)]
177pub(crate) const fn square_repeat_montgomery_form<const LIMBS: usize>(
178    a: &Uint<LIMBS>,
179    n: u32,
180    modulus: &Odd<Uint<LIMBS>>,
181    mod_neg_inv: Limb,
182) -> Uint<LIMBS> {
183    if n == 0 {
184        return *a;
185    }
186    if n == 1 {
187        return square_montgomery_form(a, modulus, mod_neg_inv);
188    }
189
190    let mut i = 0;
191    let mut out = *a;
192    let mut base;
193    let mut carry;
194
195    loop {
196        (base, out) = (out, Uint::ZERO);
197        carry = montgomery_multiply_inner(
198            &base.limbs,
199            &base.limbs,
200            &mut out.limbs,
201            &modulus.as_ref().limbs,
202            mod_neg_inv,
203        );
204        i += 1;
205        if i == n {
206            break;
207        }
208        // intermediate results are in "Almost Montgomery form", which is <= Uint::MAX
209        // but may require the modulus to be subtracted twice.
210        out = out
211            .conditional_borrowing_sub(modulus.as_ref(), carry.is_nonzero())
212            .0;
213    }
214
215    // correct for "Almost Montygomery form"
216    (out, carry) = out.try_sub_with_carry(carry, modulus.as_ref());
217    out.try_sub_with_carry(carry, modulus.as_ref()).0
218}
crypto_bigint/modular/mul.rs

crypto_bigint/modular/
mul.rs