Skip to main content

p256/arithmetic/scalar/
scalar64.rs

1//! 64-bit secp256r1 scalar field algorithms.
2
3use super::MODULUS;
4use elliptic_curve::bigint::{Limb, U256};
5
6/// MU = floor(2^512 / n)
7///    = 115792089264276142090721624801893421302707618245269942344307673200490803338238
8///    = 0x100000000fffffffffffffffeffffffff43190552df1a6c21012ffd85eedf9bfe
9const MU: [Limb; 5] = [
10    Limb::from_u64(0x012f_fd85_eedf_9bfe),
11    Limb::from_u64(0x4319_0552_df1a_6c21),
12    Limb::from_u64(0xffff_fffe_ffff_ffff),
13    Limb::from_u64(0x0000_0000_ffff_ffff),
14    Limb::from_u64(0x0000_0000_0000_0001),
15];
16
17/// Barrett Reduction
18///
19/// NOTE: this implementation is specialized to P-256's order! See #1155
20///
21/// The general algorithm is:
22/// ```text
23/// p = n = order of group
24/// b = 2^64 = 64bit machine word
25/// k = 4
26/// a \in [0, 2^512]
27/// mu := floor(b^{2k} / p)
28/// q1 := floor(a / b^{k - 1})
29/// q2 := q1 * mu
30/// q3 := floor(q2 / b^{k + 1})
31/// r1 := a mod b^{k + 1}
32/// r2 := q3 * m mod b^{k + 1}
33/// r := r1 - r2
34///
35/// if r < 0: r := r + b^{k + 1}
36/// while r >= p: do r := r - p (at most twice)
37/// ```
38///
39/// References:
40/// - Handbook of Applied Cryptography, Chapter 14
41///   Algorithm 14.42
42///   <http://cacr.uwaterloo.ca/hac/about/chap14.pdf>
43///
44/// - Efficient and Secure Elliptic Curve Cryptography Implementation of Curve P-256
45///   Algorithm 6) Barrett Reduction modulo p
46///   <https://csrc.nist.gov/csrc/media/events/workshop-on-elliptic-curve-cryptography-standards/documents/papers/session6-adalier-mehmet.pdf>
47#[inline]
48#[allow(clippy::too_many_arguments)]
49pub(super) const fn barrett_reduce(lo: U256, hi: U256) -> U256 {
50    let lo = lo.as_limbs();
51    let hi = hi.as_limbs();
52    let a0 = lo[0];
53    let a1 = lo[1];
54    let a2 = lo[2];
55    let a3 = lo[3];
56    let a4 = hi[0];
57    let a5 = hi[1];
58    let a6 = hi[2];
59    let a7 = hi[3];
60    let q1 = [a3, a4, a5, a6, a7];
61    let q3 = q1_times_mu_shift_five(&q1);
62
63    let r1 = [a0, a1, a2, a3, a4];
64    let r2 = q3_times_n_keep_five(&q3);
65    let r = sub_inner_five(r1, r2);
66
67    // Result is in range (0, 3*n - 1),
68    // and 90% of the time, no subtraction will be needed.
69    let r = subtract_n_if_necessary(r);
70    U256::new([r[0], r[1], r[2], r[3]])
71}
72
73#[inline]
74const fn q1_times_mu_shift_five(q1: &[Limb; 5]) -> [Limb; 5] {
75    // Schoolbook multiplication
76
77    let (_w0, carry) = q1[0].carrying_mul_add(MU[0], Limb::ZERO, Limb::ZERO);
78    let (w1, carry) = q1[0].carrying_mul_add(MU[1], Limb::ZERO, carry);
79    let (w2, carry) = q1[0].carrying_mul_add(MU[2], Limb::ZERO, carry);
80    let (w3, carry) = q1[0].carrying_mul_add(MU[3], Limb::ZERO, carry);
81    // NOTE MU[4] == 1
82    // let (w4, w5) = q1[0].carrying_mul_add(MU[4], Limb::ZERO, carry);
83    let (w4, w5) = Limb::ZERO.carrying_add(q1[0], carry);
84
85    let (_w1, carry) = q1[1].carrying_mul_add(MU[0], w1, Limb::ZERO);
86    let (w2, carry) = q1[1].carrying_mul_add(MU[1], w2, carry);
87    let (w3, carry) = q1[1].carrying_mul_add(MU[2], w3, carry);
88    let (w4, carry) = q1[1].carrying_mul_add(MU[3], w4, carry);
89    // let (w5, w6) = mac(w5, q1[1], MU[4], carry);
90    let (w5, w6) = w5.carrying_add(q1[1], carry);
91
92    let (_w2, carry) = q1[2].carrying_mul_add(MU[0], w2, Limb::ZERO);
93    let (w3, carry) = q1[2].carrying_mul_add(MU[1], w3, carry);
94    let (w4, carry) = q1[2].carrying_mul_add(MU[2], w4, carry);
95    let (w5, carry) = q1[2].carrying_mul_add(MU[3], w5, carry);
96    // let (w6, w7) = q1[2].carrying_mul_add(MU[4], w6, carry);
97    let (w6, w7) = w6.carrying_add(q1[2], carry);
98
99    let (_w3, carry) = q1[3].carrying_mul_add(MU[0], w3, Limb::ZERO);
100    let (w4, carry) = q1[3].carrying_mul_add(MU[1], w4, carry);
101    let (w5, carry) = q1[3].carrying_mul_add(MU[2], w5, carry);
102    let (w6, carry) = q1[3].carrying_mul_add(MU[3], w6, carry);
103    // let (w7, w8) = q1[3].carrying_mul_add(MU[4], w7, carry);
104    let (w7, w8) = w7.carrying_add(q1[3], carry);
105
106    let (_w4, carry) = q1[4].carrying_mul_add(MU[0], w4, Limb::ZERO);
107    let (w5, carry) = q1[4].carrying_mul_add(MU[1], w5, carry);
108    let (w6, carry) = q1[4].carrying_mul_add(MU[2], w6, carry);
109    let (w7, carry) = q1[4].carrying_mul_add(MU[3], w7, carry);
110    // let (w8, w9) = q1[4].carrying_mul_add(MU[4], w8, carry);
111    let (w8, w9) = w8.carrying_add(q1[4], carry);
112
113    // let q2 = [_w0, _w1, _w2, _w3, _w4, w5, w6, w7, w8, w9];
114    [w5, w6, w7, w8, w9]
115}
116
117#[inline]
118const fn q3_times_n_keep_five(q3: &[Limb; 5]) -> [Limb; 5] {
119    // Schoolbook multiplication.
120
121    let modulus = MODULUS.as_ref().as_limbs();
122
123    let (w0, carry) = q3[0].carrying_mul_add(modulus[0], Limb::ZERO, Limb::ZERO);
124    let (w1, carry) = q3[0].carrying_mul_add(modulus[1], Limb::ZERO, carry);
125    let (w2, carry) = q3[0].carrying_mul_add(modulus[2], Limb::ZERO, carry);
126    let (w3, carry) = q3[0].carrying_mul_add(modulus[3], Limb::ZERO, carry);
127    // let (w4, _) = q3[0].carrying_mul_add(0, Limb::ZERO, carry);
128    let (w4, _) = (carry, Limb::ZERO);
129
130    let (w1, carry) = q3[1].carrying_mul_add(modulus[0], w1, Limb::ZERO);
131    let (w2, carry) = q3[1].carrying_mul_add(modulus[1], w2, carry);
132    let (w3, carry) = q3[1].carrying_mul_add(modulus[2], w3, carry);
133    let (w4, _) = q3[1].carrying_mul_add(modulus[3], w4, carry);
134
135    let (w2, carry) = q3[2].carrying_mul_add(modulus[0], w2, Limb::ZERO);
136    let (w3, carry) = q3[2].carrying_mul_add(modulus[1], w3, carry);
137    let (w4, _) = q3[2].carrying_mul_add(modulus[2], w4, carry);
138
139    let (w3, carry) = q3[3].carrying_mul_add(modulus[0], w3, Limb::ZERO);
140    let (w4, _) = q3[3].carrying_mul_add(modulus[1], w4, carry);
141
142    let (w4, _) = q3[4].carrying_mul_add(modulus[0], w4, Limb::ZERO);
143
144    [w0, w1, w2, w3, w4]
145}
146
147#[inline]
148#[allow(clippy::too_many_arguments)]
149const fn sub_inner_five(l: [Limb; 5], r: [Limb; 5]) -> [Limb; 5] {
150    let (w0, borrow) = l[0].borrowing_sub(r[0], Limb::ZERO);
151    let (w1, borrow) = l[1].borrowing_sub(r[1], borrow);
152    let (w2, borrow) = l[2].borrowing_sub(r[2], borrow);
153    let (w3, borrow) = l[3].borrowing_sub(r[3], borrow);
154    let (w4, _borrow) = l[4].borrowing_sub(r[4], borrow);
155
156    // If underflow occurred on the final limb - don't care (= add b^{k+1}).
157    [w0, w1, w2, w3, w4]
158}
159
160#[inline]
161#[allow(clippy::too_many_arguments)]
162const fn subtract_n_if_necessary(r: [Limb; 5]) -> [Limb; 5] {
163    let modulus = MODULUS.as_ref().as_limbs();
164
165    let (w0, borrow) = r[0].borrowing_sub(modulus[0], Limb::ZERO);
166    let (w1, borrow) = r[1].borrowing_sub(modulus[1], borrow);
167    let (w2, borrow) = r[2].borrowing_sub(modulus[2], borrow);
168    let (w3, borrow) = r[3].borrowing_sub(modulus[3], borrow);
169    let (w4, borrow) = r[4].borrowing_sub(Limb::ZERO, borrow);
170
171    // If underflow occurred on the final limb, borrow = 0xfff...fff, otherwise
172    // borrow = 0x000...000. Thus, we use it as a mask to conditionally add the
173    // modulus.
174    let (w0, carry) = w0.carrying_add(modulus[0].bitand(borrow), Limb::ZERO);
175    let (w1, carry) = w1.carrying_add(modulus[1].bitand(borrow), carry);
176    let (w2, carry) = w2.carrying_add(modulus[2].bitand(borrow), carry);
177    let (w3, carry) = w3.carrying_add(modulus[3].bitand(borrow), carry);
178    let (w4, _carry) = w4.carrying_add(Limb::ZERO, carry);
179
180    [w0, w1, w2, w3, w4]
181}