1use crate::exponents::expf::{ExpfBackend, GenericExpfBackend};
30
31pub(crate) struct ExpBReduc {
32 pub(crate) hi: f64,
33 pub(crate) lo: f64,
34}
35
36const MID_BITS: u32 = 5;
37const MID_MASK: usize = (1 << MID_BITS) - 1;
38const LOG2_B: f64 = f64::from_bits(0x400a934f0979a371) * (1 << MID_BITS) as f64;
39const M_LOGB_2_HI: f64 = f64::from_bits(0xbfd34413509f8000) / (1 << MID_BITS) as f64;
40const M_LOGB_2_LO: f64 = f64::from_bits(0x3d380433b83b532a) / (1 << MID_BITS) as f64;
41const EXP_2_MID: [u64; 32] = [
42 0x3ff0000000000000,
43 0x3ff059b0d3158574,
44 0x3ff0b5586cf9890f,
45 0x3ff11301d0125b51,
46 0x3ff172b83c7d517b,
47 0x3ff1d4873168b9aa,
48 0x3ff2387a6e756238,
49 0x3ff29e9df51fdee1,
50 0x3ff306fe0a31b715,
51 0x3ff371a7373aa9cb,
52 0x3ff3dea64c123422,
53 0x3ff44e086061892d,
54 0x3ff4bfdad5362a27,
55 0x3ff5342b569d4f82,
56 0x3ff5ab07dd485429,
57 0x3ff6247eb03a5585,
58 0x3ff6a09e667f3bcd,
59 0x3ff71f75e8ec5f74,
60 0x3ff7a11473eb0187,
61 0x3ff82589994cce13,
62 0x3ff8ace5422aa0db,
63 0x3ff93737b0cdc5e5,
64 0x3ff9c49182a3f090,
65 0x3ffa5503b23e255d,
66 0x3ffae89f995ad3ad,
67 0x3ffb7f76f2fb5e47,
68 0x3ffc199bdd85529c,
69 0x3ffcb720dcef9069,
70 0x3ffd5818dcfba487,
71 0x3ffdfc97337b9b5f,
72 0x3ffea4afa2a490da,
73 0x3fff50765b6e4540,
74];
75
76pub(crate) const EXP10F_COEFFS: [u64; 5] = [
81 0x40026bb1bbb55515,
82 0x40053524c73bd3ea,
83 0x4000470591dff149,
84 0x3ff2bd7c0a9fbc4d,
85 0x3fe1429e74a98f43,
86];
87
88#[inline(always)]
90pub(crate) fn exp_b_range_reduc<B: ExpfBackend>(x: f32, backend: &B) -> ExpBReduc {
91 let xd = x as f64;
92
93 let kd = backend.round(LOG2_B * xd);
95 let k = unsafe { kd.to_int_unchecked::<i32>() }; let exp_hi = (k.wrapping_shr(MID_BITS) as u64).wrapping_shl(52); let mid_index = (k as usize) & MID_MASK;
102 let mh_bits = EXP_2_MID[mid_index].wrapping_add(exp_hi);
103 let mh = f64::from_bits(mh_bits);
104
105 let z0 = backend.fma(kd, M_LOGB_2_HI, xd);
107 let dx = backend.fma(kd, M_LOGB_2_LO, z0);
108
109 ExpBReduc { lo: dx, hi: mh }
110}
111
112#[inline(always)]
113fn exp10f_gen<B: ExpfBackend>(x: f32, backend: B) -> f32 {
114 let x_u = x.to_bits();
115 let x_abs = x_u & 0x7fff_ffff;
116
117 if x_abs >= 0x421a209bu32 {
119 if x_u.wrapping_shl(1) >= 0xffu32 << 24 {
120 if x.is_sign_negative() && x.is_infinite() {
122 return 0.0;
123 } else if x.is_infinite() {
124 return f32::INFINITY;
125 }
126 return x + f32::NAN; }
128 if x_u > 0xc2349e35u32 {
130 if x.is_infinite() {
132 return 0.0;
133 }
134 if x.is_nan() {
136 return x;
137 }
138 return 0.0;
139 }
140 if x > 0. && (x_u >= 0x421a209bu32) {
142 return x + f32::INFINITY;
144 }
145 }
146
147 if x_abs <= 0x3d000000u32 {
148 if x_abs <= 0x3b9a209bu32 {
150 if x_u == 0xb25e5bd9u32 {
151 return 1.;
153 }
154 if x_abs <= 0x32800000u32 {
157 return backend.fmaf(x, f32::from_bits(0x40135da2), 1.0);
158 }
159 }
160
161 let xd = x as f64;
162
163 let p = backend.polyeval7(
171 xd,
172 f64::from_bits(0x40026bb1bbb55516),
173 f64::from_bits(0x40053524c73cfbf6),
174 f64::from_bits(0x4000470591de0b07),
175 f64::from_bits(0x3ff2bd760599f3a5),
176 f64::from_bits(0x3fe142a001511a6f),
177 f64::from_bits(0x3fca7feffa781d53),
178 f64::from_bits(0x3fb16e53492c0f0e),
179 );
180 return backend.fma(p, xd, 1.) as f32;
181 }
182
183 let rr = exp_b_range_reduc(x, &backend);
186
187 let lo2 = rr.lo * rr.lo;
190 let c0 = backend.fma(rr.lo, f64::from_bits(EXP10F_COEFFS[0]), 1.0);
192 let c1 = backend.fma(
194 rr.lo,
195 f64::from_bits(EXP10F_COEFFS[2]),
196 f64::from_bits(EXP10F_COEFFS[1]),
197 );
198 let c2 = backend.fma(
200 rr.lo,
201 f64::from_bits(EXP10F_COEFFS[4]),
202 f64::from_bits(EXP10F_COEFFS[3]),
203 );
204 let p = backend.fma(lo2, c2, c1);
207 backend.fma(p, lo2 * rr.hi, c0 * rr.hi) as f32
212}
213
214#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
215#[target_feature(enable = "avx", enable = "fma")]
216unsafe fn exp10f_fma_impl(x: f32) -> f32 {
217 use crate::exponents::expf::FmaBackend;
218 exp10f_gen(x, FmaBackend {})
219}
220
221#[inline]
225pub fn f_exp10f(x: f32) -> f32 {
226 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
227 {
228 exp10f_gen(x, GenericExpfBackend {})
229 }
230 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
231 {
232 use std::sync::OnceLock;
233 static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
234 let q = EXECUTOR.get_or_init(|| {
235 if std::arch::is_x86_feature_detected!("avx")
236 && std::arch::is_x86_feature_detected!("fma")
237 {
238 exp10f_fma_impl
239 } else {
240 fn def_exp10f(x: f32) -> f32 {
241 exp10f_gen(x, GenericExpfBackend {})
242 }
243 def_exp10f
244 }
245 });
246 unsafe { q(x) }
247 }
248}
249
250#[cfg(test)]
251mod tests {
252 use super::*;
253
254 #[test]
255 fn test_exp10f() {
256 assert!(f_exp10f(f32::from_bits(0x7fc0_0000)).is_nan());
257 assert_eq!(f_exp10f(-1. / 64.), 0.9646616);
258 assert_eq!(f_exp10f(1. / 64.), 1.0366329);
259 assert_eq!(f_exp10f(1.), 10.0);
260 assert_eq!(f_exp10f(2.), 100.0);
261 assert_eq!(f_exp10f(3.), 1000.0);
262 assert_eq!(f_exp10f(f32::INFINITY), f32::INFINITY);
263 assert_eq!(f_exp10f(f32::NEG_INFINITY), 0.);
264 assert!(f_exp10f(f32::NAN).is_nan());
265 }
266}