Skip to main content

crypto_bigint/uint/
div_limb.rs

1//! Implementation of constant-time division via reciprocal precomputation, as described in
2//! "Improved Division by Invariant Integers" by Niels Möller and Torbjorn Granlund
3//! (DOI: 10.1109/TC.2010.143, <https://gmplib.org/~tege/division-paper.pdf>).
4
5use crate::{
6    Choice, CtSelect, Limb, NonZero, Uint, WideWord, Word, primitives::widening_mul, word,
7};
8
9cpubits::cpubits! {
10    32 => {
11        /// Calculates the reciprocal of the given 32-bit divisor with the highmost bit set.
12        ///
13        /// This method corresponds to Algorithm 3
14        pub const fn reciprocal(d: Word) -> Word {
15            debug_assert!(d >= (1 << (Word::BITS - 1)));
16
17            let d0 = d & 1;
18            let d10 = d >> 22;
19            let d21 = (d >> 11) + 1;
20            let d31 = (d >> 1) + d0;
21            let v0 = short_div((1 << 24) - (1 << 14) + (1 << 9), 24, d10, 10);
22            let (_lo, hi) = widening_mul(v0 * v0, d21);
23            let v1 = (v0 << 4) - hi - 1;
24
25            // Checks that the expression for `e` can be simplified in the way we did below.
26            debug_assert!(widening_mul(v1, d31).1 == (1 << 16) - 1);
27            let e = v1.wrapping_mul(d31).wrapping_neg() + (v1 >> 1) * d0;
28
29            let (_lo, hi) = widening_mul(v1, e);
30            // Note: the paper does not mention a wrapping add here,
31            // but the 64-bit version has it at this stage, and the function panics without it
32            // when calculating a reciprocal for `Word::MAX`.
33            let v2 = (v1 << 15).wrapping_add(hi >> 1);
34
35            // The paper has `(v2 + 1) * d / 2^32` (there's another 2^32, but it's accounted for later).
36            // If `v2 == 2^32-1` this should give `d`, but we can't achieve this in our wrapping arithmetic.
37            // Hence the `word::select()`.
38            let x = v2.wrapping_add(1);
39            let (_lo, hi) = widening_mul(x, d);
40            let hi = word::select(d, hi, Choice::from_u32_nz(x));
41
42            v2.wrapping_sub(hi).wrapping_sub(d)
43        }
44    }
45    64 => {
46        /// Calculates the reciprocal of the given 64-bit divisor with the highmost bit set.
47        ///
48        /// This method corresponds to Algorithm 2
49        pub const fn reciprocal(d: Word) -> Word {
50            debug_assert!(d >= (1 << (Word::BITS - 1)));
51
52            let d0 = d & 1;
53            let d9 = d >> 55;
54            let d40 = (d >> 24) + 1;
55            let d63 = (d >> 1) + d0;
56            let v0 = short_div((1 << 19) - 3 * (1 << 8), 19, d9 as u32, 9) as u64;
57            let v1 = (v0 << 11) - ((v0 * v0 * d40) >> 40) - 1;
58            let v2 = (v1 << 13) + ((v1 * ((1 << 60) - v1 * d40)) >> 47);
59
60            // Checks that the expression for `e` can be simplified in the way we did below.
61            debug_assert!(widening_mul(v2, d63).1 == (1 << 32) - 1);
62            let e = v2.wrapping_mul(d63).wrapping_neg() + (v2 >> 1) * d0;
63
64            let (_lo, hi) = widening_mul(v2, e);
65            let v3 = (v2 << 31).wrapping_add(hi >> 1);
66
67            // The paper has `(v3 + 1) * d / 2^64` (there's another 2^64, but it's accounted for later).
68            // If `v3 == 2^64-1` this should give `d`, but we can't achieve this in our wrapping arithmetic.
69            // Hence the `word::select()`.
70            let x = v3.wrapping_add(1);
71            let (_lo, hi) = widening_mul(x, d);
72            let hi = word::select(d, hi, word::choice_from_nz(x));
73
74            v3.wrapping_sub(hi).wrapping_sub(d)
75        }
76    }
77}
78
79/// Calculates `dividend / divisor`, given `dividend` and `divisor`
80/// along with their maximum bitsizes.
81#[inline(always)]
82const fn short_div(mut dividend: u32, dividend_bits: u32, divisor: u32, divisor_bits: u32) -> u32 {
83    // TODO: this may be sped up even more using the fact that `dividend` is a known constant.
84
85    // In the paper this is a table lookup, but since we want it to be constant-time,
86    // we have to access all the elements of the table, which is quite large.
87    // So this shift-and-subtract approach is actually faster.
88
89    // Passing `dividend_bits` and `divisor_bits` because calling `.leading_zeros()`
90    // causes a significant slowdown, and we know those values anyway.
91
92    let mut divisor = divisor << (dividend_bits - divisor_bits);
93    let mut quotient: u32 = 0;
94    let mut i = dividend_bits - divisor_bits + 1;
95
96    while i > 0 {
97        i -= 1;
98        let bit = Choice::from_u32_lt(dividend, divisor);
99        dividend = bit.select_u32(dividend.wrapping_sub(divisor), dividend);
100        divisor >>= 1;
101        quotient |= bit.not().select_u32(0, 1 << i);
102    }
103
104    quotient
105}
106
107/// Calculate the quotient and the remainder of the division of a wide word
108/// (supplied as high and low words) by `d`, with a precalculated reciprocal `v`.
109///
110/// This method corresponds to Algorithm 4
111#[inline(always)]
112pub(crate) const fn div2by1(u0: Word, u1: Word, reciprocal: &Reciprocal) -> (Word, Word) {
113    let d = reciprocal.divisor_normalized;
114    let v = reciprocal.reciprocal;
115
116    debug_assert!(d >= (1 << (Word::BITS - 1)), "divisor top bit unset");
117    debug_assert!(u1 < d, "dividend >= divisor");
118
119    let q = (v as WideWord * u1 as WideWord) + word::join(u0, u1);
120    let (q0, q1) = word::split_wide(q);
121    let q1 = q1.wrapping_add(1);
122    let r = u0.wrapping_sub(q1.wrapping_mul(d));
123
124    let r_gt_q0 = word::choice_to_mask(word::choice_from_lt(q0, r));
125    let q1 = q1.wrapping_sub(r_gt_q0 & 1);
126    let r = r.wrapping_add(r_gt_q0 & d);
127
128    // If this was a normal `if`, we wouldn't need wrapping ops, because there would be no overflow.
129    // But since we calculate both results either way, we have to wrap.
130    // Added an assert to still check the lack of overflow in debug mode.
131    debug_assert!(r < d || q1 < Word::MAX);
132    let r_ge_d = word::choice_to_mask(word::choice_from_le(d, r));
133    let q1 = q1.wrapping_add(r_ge_d & 1);
134    let r = r.wrapping_sub(r_ge_d & d);
135
136    (q1, r)
137}
138
139/// Given two long integers `u = (..., u0, u1, u2)` and `v = (..., v0, v1)`
140/// (where `u2` and `v1` are the most significant limbs), where `floor(u / v) <= Limb::MAX`,
141/// calculates `q` such that `q - 1 <= floor(u / v) <= q`.
142/// In place of `v1` takes its reciprocal, and assumes that `v` was already pre-shifted
143/// so that v1 has its most significant bit set (that is, the reciprocal's `shift` is 0).
144///
145// This method corresponds to Algorithm 5
146#[inline(always)]
147#[allow(clippy::cast_possible_truncation)]
148pub(crate) const fn div3by2(
149    (u0, u1, u2): (Word, Word, Word),
150    (d0, d1): (Word, Word),
151    v: Word,
152) -> (Word, WideWord) {
153    let d = word::join(d0, d1);
154    let u_hi = word::join(u1, u2);
155
156    debug_assert!(d >= (1 << (WideWord::BITS - 1)), "divisor top bit unset");
157    debug_assert!(u_hi <= d, "dividend > divisor");
158
159    let q = (v as WideWord * u2 as WideWord) + u_hi;
160    let (q0, q1) = word::split_wide(q);
161    let r1 = u1.wrapping_sub(q1.wrapping_mul(d1));
162    let t = d0 as WideWord * q1 as WideWord;
163    let r = word::join(u0, r1).wrapping_sub(t).wrapping_sub(d);
164
165    let r1_ge_q0 = word::choice_to_wide_mask(word::choice_from_le(q0, (r >> Word::BITS) as Word));
166    let q1 = q1.wrapping_add(!(r1_ge_q0 as Word) & 1);
167    let r = r.wrapping_add(r1_ge_q0 & d);
168
169    let r_ge_d = word::choice_to_wide_mask(word::choice_from_wide_le(d, r));
170    let q1 = q1.wrapping_add(r_ge_d as Word & 1);
171    let r = r.wrapping_sub(r_ge_d & d);
172
173    // When the leading dividend word equals the leading divisor word, cap the quotient
174    // at Word::MAX and update the remainder. This differs from the original algorithm
175    // but is required for multi-word division.
176    let maxed = word::choice_from_wide_eq(u_hi, d);
177    let q1 = q1 | word::choice_to_mask(maxed);
178    let r = word::select_wide(r, d.saturating_add(u0 as WideWord), maxed);
179
180    (q1, r)
181}
182
183/// A pre-calculated reciprocal for division by a single limb.
184#[derive(Copy, Clone, Debug, PartialEq, Eq)]
185pub struct Reciprocal {
186    divisor_normalized: Word,
187    shift: u32,
188    reciprocal: Word,
189}
190
191impl Reciprocal {
192    /// Pre-calculates a reciprocal for a known divisor,
193    /// to be used in the single-limb division later.
194    #[must_use]
195    pub const fn new(divisor: NonZero<Limb>) -> Self {
196        let divisor = divisor.get_copy();
197
198        // Assuming this is constant-time for primitive types.
199        let shift = divisor.0.leading_zeros();
200
201        // Will not panic since divisor is non-zero
202        let divisor_normalized = divisor.0 << shift;
203
204        Self {
205            divisor_normalized,
206            shift,
207            reciprocal: reciprocal(divisor_normalized),
208        }
209    }
210
211    /// Returns a default instance of this object.
212    /// It is a self-consistent `Reciprocal` that will not cause panics in functions that take it.
213    ///
214    /// NOTE: intended for using it as a placeholder during compile-time array generation,
215    /// don't rely on the contents.
216    #[must_use]
217    pub const fn default() -> Self {
218        Self {
219            divisor_normalized: Word::MAX,
220            shift: 0,
221            // The result of calling `reciprocal(Word::MAX)`
222            // This holds both for 32- and 64-bit versions.
223            reciprocal: 1,
224        }
225    }
226
227    /// Get the shift value
228    #[must_use]
229    pub const fn shift(&self) -> u32 {
230        self.shift
231    }
232
233    /// Adjusted reciprocal for 3x2 division
234    ///
235    /// This method corresponds to Algorithm 6
236    #[must_use]
237    pub const fn reciprocal_3by2(&self, d0: Word, d1: Word) -> Word {
238        debug_assert!(self.shift == 0 && d1 == self.divisor_normalized);
239
240        let v = self.reciprocal;
241        let p = d1.wrapping_mul(v).wrapping_add(d0);
242
243        let p_lt_d0 = word::choice_from_lt(p, d0);
244        let v = word::select(v, v.wrapping_sub(1), p_lt_d0);
245
246        let p_ge_d1 = word::choice_from_le(d1, p).and(p_lt_d0);
247        let v = word::select(v, v.wrapping_sub(1), p_ge_d1);
248        let p = word::select(p, p.wrapping_sub(d1), p_ge_d1);
249        let p = word::select(p, p.wrapping_sub(d1), p_lt_d0);
250
251        let (t0, t1) = widening_mul(v, d0);
252        let p = p.wrapping_add(t1);
253
254        let p_lt_t1 = word::choice_from_lt(p, t1);
255        let v = word::select(v, v.wrapping_sub(1), p_lt_t1);
256        let d = word::join(d0, d1);
257        let t0p = word::join(t0, p);
258        let t0p_ge_d = word::choice_from_wide_le(d, t0p).and(p_lt_t1);
259        word::select(v, v.wrapping_sub(1), t0p_ge_d)
260    }
261}
262
263impl CtSelect for Reciprocal {
264    fn ct_select(&self, other: &Self, choice: Choice) -> Self {
265        Self {
266            divisor_normalized: Word::ct_select(
267                &self.divisor_normalized,
268                &other.divisor_normalized,
269                choice,
270            ),
271            shift: u32::ct_select(&self.shift, &other.shift, choice),
272            reciprocal: Word::ct_select(&self.reciprocal, &other.reciprocal, choice),
273        }
274    }
275}
276
277// `CtOption.map()` needs this; for some reason it doesn't use the value it already has
278// for the `None` branch.
279impl Default for Reciprocal {
280    fn default() -> Self {
281        Self::default()
282    }
283}
284
285/// Divides `u` by the divisor encoded in the `reciprocal`, and returns the remainder.
286#[inline(always)]
287pub(crate) const fn rem_limb_with_reciprocal<const L: usize>(
288    u: &Uint<L>,
289    reciprocal: &Reciprocal,
290) -> Limb {
291    let (u_shifted, u_hi) = u.shl_limb_with_carry(reciprocal.shift, Limb::ZERO);
292    let mut r = u_hi.0;
293
294    let mut j = L;
295    while j > 0 {
296        j -= 1;
297        let (_, rj) = div2by1(u_shifted.as_limbs()[j].0, r, reciprocal);
298        r = rj;
299    }
300    Limb(r >> reciprocal.shift)
301}
302
303/// Computes `(a * b) % d`.
304#[inline(always)]
305pub(crate) const fn mul_rem(a: Limb, b: Limb, d: NonZero<Limb>) -> Limb {
306    let rec = Reciprocal::new(d);
307    let (lo, hi) = widening_mul(a.0, b.0);
308    rem_limb_with_reciprocal(&Uint::from_words([lo, hi]), &rec)
309}
310
311#[cfg(test)]
312mod tests {
313    use super::{Reciprocal, div2by1, reciprocal};
314    use crate::{Limb, NonZero, Uint, WideWord, Word, word};
315
316    #[test]
317    fn reciprocal_valid() {
318        #![allow(clippy::integer_division_remainder_used, reason = "test")]
319        fn test(d: Word) {
320            let v = reciprocal(d);
321
322            // the reciprocal must be equal to floor((β^2 - 1) / d) - β
323            // v = floor((β^2 - 1) / d) - β = floor((β - 1 - d)*β + β - 1>/d)
324            let expected = WideWord::MAX / WideWord::from(d) - WideWord::from(Word::MAX) - 1;
325            assert_eq!(WideWord::from(v), expected);
326        }
327
328        test(Word::MAX);
329        test(1 << (Word::BITS - 1));
330        test((1 << (Word::BITS - 1)) | 1);
331    }
332
333    #[test]
334    fn reciprocal_3by2_valid() {
335        fn test(d: WideWord) {
336            let (d0, d1) = word::split_wide(d);
337            let v0 = Reciprocal::new(NonZero::<Limb>::new_unwrap(Limb(d1)));
338            let v = v0.reciprocal_3by2(d0, d1);
339
340            // the reciprocal must be equal to v = floor((β^3 − 1)/d) − β
341            // (β^3 − βd - 1)/d - 1 < v <= (β^3 − βd - 1)/d
342            // β^3 − βd - 1 - d < v*d <= β^3 − βd - 1
343            // β^3-1 - d < (v+β)d <= β^3-1
344            let actual = (Uint::<3>::from_word(v)
345                + Uint::<3>::ZERO.set_bit_vartime(Word::BITS, true))
346            .checked_mul(&Uint::<3>::from_wide_word(d))
347            .expect("overflow");
348            let min = Uint::<3>::MAX - Uint::<3>::from_wide_word(d);
349            assert!(actual > min, "{actual} <= {min}");
350        }
351
352        test(WideWord::MAX);
353        test(1 << (WideWord::BITS - 1));
354        test((1 << (WideWord::BITS - 1)) | 1);
355    }
356
357    #[test]
358    fn div2by1_overflow() {
359        // A regression test for a situation when in div2by1() an operation (`q1 + 1`)
360        // that is protected from overflowing by a condition in the original paper (`r >= d`)
361        // still overflows because we're calculating the results for both branches.
362        let r = Reciprocal::new(NonZero::new(Limb(Word::MAX - 1)).unwrap());
363        assert_eq!(
364            div2by1(Word::MAX - 63, Word::MAX - 2, &r),
365            (Word::MAX, Word::MAX - 65)
366        );
367    }
368}