Skip to main content

pxfm/compound/
compoundf.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{dd_fmla, is_integerf};
30use crate::double_double::DoubleDouble;
31use crate::rounding::CpuRoundTiesEven;
32use std::hint::black_box;
33
34#[cold]
35#[inline(never)]
36fn as_compoundf_special(x: f32, y: f32) -> f32 {
37    let nx = x.to_bits();
38    let ny = y.to_bits();
39    let ax: u32 = nx.wrapping_shl(1);
40    let ay: u32 = ny.wrapping_shl(1);
41
42    if ax == 0 || ay == 0 {
43        // x or y is 0
44        if ax == 0 {
45            // compound(0,y) = 1 except for y = sNaN
46            return if y.is_nan() { x + y } else { 1.0 };
47        }
48
49        if ay == 0 {
50            // compound (x, 0)
51            if x.is_nan() {
52                return x + y;
53            } // x = sNaN
54            return if x < -1.0 {
55                f32::NAN // rule (g)
56            } else {
57                1.0
58            }; // rule (a)
59        }
60    }
61
62    let mone = (-1.0f32).to_bits();
63    if ay >= 0xffu32 << 24 {
64        // y=Inf/NaN
65        // the case x=0 was already checked above
66        if ax > 0xffu32 << 24 {
67            return x + y;
68        } // x=NaN
69        if ay == 0xffu32 << 24 {
70            // y = +/-Inf
71            if nx > mone {
72                return f32::NAN;
73            } // rule (g)
74            let sy = ny >> 31; // sign bit of y
75            if nx == mone {
76                return if sy == 0 {
77                    0.0 // Rule (c)
78                } else {
79                    f32::INFINITY // Rule (b)
80                };
81            }
82            if x < 0.0 {
83                return if sy == 0 { 0.0 } else { f32::INFINITY };
84            }
85            if x > 0.0 {
86                return if sy != 0 { 0.0 } else { f32::INFINITY };
87            }
88            return 1.0;
89        }
90        return x + y; // case y=NaN
91    }
92
93    if nx >= mone || nx >= 0xffu32 << 23 {
94        // x is Inf, NaN or <= -1
95        if ax == 0xffu32 << 24 {
96            // x is +Inf or -Inf
97            if (nx >> 31) != 0 {
98                return f32::NAN;
99            } // x = -Inf, rule (g)
100            // (1 + Inf)^y = +Inf for y > 0, +0 for y < 0
101            return if (ny >> 31) != 0 { 1.0 / x } else { x };
102        }
103        if ax > 0xffu32 << 24 {
104            return x + y;
105        } // x is NaN
106        if nx > mone {
107            return f32::NAN; // x < -1.0: rule (g)
108        }
109        // now x = -1
110        return if (ny >> 31) != 0 {
111            // y < 0
112            f32::INFINITY
113        } else {
114            // y > 0
115            0.0
116        };
117    }
118    0.0
119}
120
121#[inline]
122pub(crate) fn log2p1_polyeval_1(z: f64) -> f64 {
123    // we include P[0] = 0 so that P[i] corresponds to degree i
124    // this degree-8 polynomial generated by Sollya (cf p1.sollya)
125    // has relative error < 2^-50.98
126    const P: [u64; 8] = [
127        0x0000000000000000,
128        0x3ff71547652b82fe,
129        0xbfe71547652b8d11,
130        0x3fdec709dc3a5014,
131        0xbfd715475b144983,
132        0x3fd2776c3fda300e,
133        0xbfcec990162358ce,
134        0x3fca645337c29e27,
135    ];
136
137    let z2 = z * z;
138    let mut c5 = dd_fmla(f64::from_bits(P[6]), z, f64::from_bits(P[5]));
139    let c3 = dd_fmla(f64::from_bits(P[4]), z, f64::from_bits(P[3]));
140    let mut c1 = dd_fmla(f64::from_bits(P[2]), z, f64::from_bits(P[1]));
141    let z4 = z2 * z2;
142    c5 = dd_fmla(f64::from_bits(P[7]), z2, c5);
143    c1 = dd_fmla(c3, z2, c1);
144    c1 = dd_fmla(c5, z4, c1);
145    z * c1
146}
147
148// for 0<=i<46, inv[i] approximates 1/t for 1/2+(i+13)/64 <= t < 1/2+(i+14)/64
149pub(crate) static LOG2P1_COMPOUNDF_INV: [u64; 46] = [
150    0x3ff6800000000000,
151    0x3ff6000000000000,
152    0x3ff5800000000000,
153    0x3ff5000000000000,
154    0x3ff4c00000000000,
155    0x3ff4400000000000,
156    0x3ff4000000000000,
157    0x3ff3800000000000,
158    0x3ff3400000000000,
159    0x3ff2c00000000000,
160    0x3ff2800000000000,
161    0x3ff2000000000000,
162    0x3ff1c00000000000,
163    0x3ff1800000000000,
164    0x3ff1400000000000,
165    0x3ff1000000000000,
166    0x3ff0c00000000000,
167    0x3ff0800000000000,
168    0x3ff0000000000000,
169    0x3ff0000000000000,
170    0x3fef400000000000,
171    0x3feec00000000000,
172    0x3fee400000000000,
173    0x3fee000000000000,
174    0x3fed800000000000,
175    0x3fed000000000000,
176    0x3feca00000000000,
177    0x3fec400000000000,
178    0x3febe00000000000,
179    0x3feb800000000000,
180    0x3feb200000000000,
181    0x3feac00000000000,
182    0x3fea800000000000,
183    0x3fea200000000000,
184    0x3fe9c00000000000,
185    0x3fe9800000000000,
186    0x3fe9200000000000,
187    0x3fe8c00000000000,
188    0x3fe8800000000000,
189    0x3fe8400000000000,
190    0x3fe8000000000000,
191    0x3fe7c00000000000,
192    0x3fe7600000000000,
193    0x3fe7200000000000,
194    0x3fe6e00000000000,
195    0x3fe6a00000000000,
196];
197
198/* log2inv[i][0]+log2inv[i][1] is a double-double approximation of
199-log2(inv[i]), with log2inv[i][0] having absolute error < 2^-54.462,
200and log2inv[i][0]+log2inv[i][1] absolute error < 2^-109.101 */
201pub(crate) static LOG2P1_COMPOUNDF_LOG2_INV: [(u64, u64); 46] = [
202    (0x3c68f3673ffdd785, 0xbfdf7a8568cb06cf),
203    (0x3c1c141e66faaaad, 0xbfdd6753e032ea0f),
204    (0x3c76fae441c09d76, 0xbfdb47ebf73882a1),
205    (0x3c72d352bea51e59, 0xbfd91bba891f1709),
206    (0xbc69575b04fa6fbd, 0xbfd800a563161c54),
207    (0x3c7817fd3b7d7e5d, 0xbfd5c01a39fbd688),
208    (0x3c1b6d40900b2502, 0xbfd49a784bcd1b8b),
209    (0x3c7f6e91ad16ecff, 0xbfd24407ab0e073a),
210    (0x3c6a7b47d2c352d9, 0xbfd11307dad30b76),
211    (0x3c5b85a54d7ee2fd, 0xbfcd49ee4c325970),
212    (0x3c401ee1343fe7ca, 0xbfcacf5e2db4ec94),
213    (0x3c6817fd3b7d7e5d, 0xbfc5c01a39fbd688),
214    (0xbc4f51f2c075a74c, 0xbfc32ae9e278ae1a),
215    (0x3c6a7610e40bd6ab, 0xbfc08c588cda79e4),
216    (0xbc58ecb169b9465f, 0xbfbbc84240adabba),
217    (0xbc5f3314e0985116, 0xbfb663f6fac91316),
218    (0x3c530c22d15199b8, 0xbfb0eb389fa29f9b),
219    (0xbc389b03784b5be1, 0xbfa6bad3758efd87),
220    (0x0000000000000000, 0x0000000000000000),
221    (0x0000000000000000, 0x0000000000000000),
222    (0x3c3491f06c085bc2, 0x3fa184b8e4c56af8),
223    (0x3c0155660710eb2a, 0x3fad6ebd1f1febfe),
224    (0x3c2c141e66faaaad, 0x3fb4c560fe68af88),
225    (0x3c59ced1447e30ad, 0x3fb7d60496cfbb4c),
226    (0x3c592ce9636c90a0, 0x3fbe0b1ae8f2fd56),
227    (0xbc5696e2866c718e, 0x3fc22dadc2ab3497),
228    (0xbc61562d61af73f8, 0x3fc494f863b8df35),
229    (0xbc60798d1aa21694, 0x3fc7046031c79f85),
230    (0xbc6e95734abd2fcc, 0x3fc97c1cb13c7ec1),
231    (0x3c2bc0af7b82e7d7, 0x3fcbfc67a7fff4cc),
232    (0xbc6086fce864a1f6, 0x3fce857d3d361368),
233    (0xbc53d56efe4338fe, 0x3fd08bce0d95fa38),
234    (0x3c7c8d43e017579b, 0x3fd169c05363f158),
235    (0xbc50132ae5e417cd, 0x3fd2baa0c34be1ec),
236    (0xbc7c658d602e66b0, 0x3fd4106017c3eca3),
237    (0x3c7e393a16b94b52, 0x3fd4f6fbb2cec598),
238    (0x3c7ac9080333c605, 0x3fd6552b49986277),
239    (0x3c68f89e2eb553b2, 0x3fd7b89f02cf2aad),
240    (0x3c799aa6df8b7d83, 0x3fd8a8980abfbd32),
241    (0x3c7bca36fd02def0, 0x3fd99b072a96c6b2),
242    (0x3c5817fd3b7d7e5d, 0x3fda8ff971810a5e),
243    (0xbc501d98c3531027, 0x3fdb877c57b1b070),
244    (0x3c78a38b4175d665, 0x3fdcffae611ad12b),
245    (0x3c438c8946414c6a, 0x3fddfdd89d586e2b),
246    (0x3c76d261f1753e0b, 0x3fdefec61b011f85),
247    (0xbc87398fe685f171, 0x3fe0014332be0033),
248];
249
250/* for |z| <= 2^-6, returns an approximation of 2^z
251with absolute error < 2^-43.540  */
252#[inline]
253fn compoundf_expf_poly(z: f64) -> f64 {
254    /* Q is a degree-4 polynomial generated by Sollya (cf q1.sollya)
255    with absolute error < 2^-43.549 */
256    const Q: [u64; 5] = [
257        0x3ff0000000000000,
258        0x3fe62e42fef6d01a,
259        0x3fcebfbdff7feeba,
260        0x3fac6b167e579bee,
261        0x3f83b2b3428d06de,
262    ];
263    let z2 = z * z;
264    let c3 = dd_fmla(f64::from_bits(Q[4]), z, f64::from_bits(Q[3]));
265    let c0 = dd_fmla(f64::from_bits(Q[1]), z, f64::from_bits(Q[0]));
266    let c2 = dd_fmla(c3, z, f64::from_bits(Q[2]));
267    dd_fmla(c2, z2, c0)
268}
269
270pub(crate) fn compoundf_log2p1_fast(x: f64) -> f64 {
271    /* for x > 0, 1+x is exact when 2^-29 <=  x < 2^53
272    for x < 0, 1+x is exact when -1 < x <= 2^-30 */
273
274    //  double u = (x >= 0x1p53) ? x : 1.0 + x;
275    let u = 1.0 + x;
276    /* For x < 0x1p53, x + 1 is exact thus u = x+1.
277    For x >= 2^53, we estimate log2(x) instead of log2(1+x),
278    since log2(1+x) = log2(x) + log2(1+1/x),
279    log2(x) >= 53 and |log2(1+1/x)| < 2^-52.471, the additional relative
280    error is bounded by 2^-52.471/53 < 2^-58.198 */
281
282    let mut v = u.to_bits();
283    let m: u64 = v & 0xfffffffffffffu64;
284    let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;
285    // 2^e/sqrt(2) < u < 2^e*sqrt(2), with -29 <= e <= 128
286    v = v.wrapping_sub((e << 52) as u64);
287    let t = f64::from_bits(v);
288    // u = 2^e*t with 1/sqrt(2) < t < sqrt(2)
289    // thus log2(u) = e + log2(t)
290    v = (f64::from_bits(v) + 2.0).to_bits(); // add 2 so that v.f is always in the binade [2, 4)
291    let i = (v >> 45) as i32 - 0x2002d; // 0 <= i <= 45
292    let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
293    let z = dd_fmla(r, t, -1.0); // exact, -1/64 <= z <= 1/64
294    // we approximates log2(t) by -log2(r) + log2(r*t)
295    let p = log2p1_polyeval_1(z);
296    // p approximates log2(r*t) with rel. error < 2^-49.642, and |p| < 2^-5.459
297    e as f64 + (f64::from_bits(LOG2P1_COMPOUNDF_LOG2_INV[i as usize].1) + p)
298}
299
300pub(crate) static COMPOUNDF_EXP2_T: [u64; 33] = [
301    0xbfe0000000000000,
302    0xbfde000000000000,
303    0xbfdc000000000000,
304    0xbfda000000000000,
305    0xbfd8000000000000,
306    0xbfd6000000000000,
307    0xbfd4000000000000,
308    0xbfd2000000000000,
309    0xbfd0000000000000,
310    0xbfcc000000000000,
311    0xbfc8000000000000,
312    0xbfc4000000000000,
313    0xbfc0000000000000,
314    0xbfb8000000000000,
315    0xbfb0000000000000,
316    0xbfa0000000000000,
317    0x0000000000000000,
318    0x3fa0000000000000,
319    0x3fb0000000000000,
320    0x3fb8000000000000,
321    0x3fc0000000000000,
322    0x3fc4000000000000,
323    0x3fc8000000000000,
324    0x3fcc000000000000,
325    0x3fd0000000000000,
326    0x3fd2000000000000,
327    0x3fd4000000000000,
328    0x3fd6000000000000,
329    0x3fd8000000000000,
330    0x3fda000000000000,
331    0x3fdc000000000000,
332    0x3fde000000000000,
333    0x3fe0000000000000,
334];
335
336/* exp2_U[i] is a double-double approximation h+l of 2^exp2_T[i]
337so that h approximates 2^exp2_T[i] with absolute error < 2^-53.092,
338and h+l approximates 2^exp2_T[i] with absolute error < 2^-107.385 */
339pub(crate) static COMPOUNDF_EXP2_U: [(u64, u64); 33] = [
340    (0xbc8bdd3413b26456, 0x3fe6a09e667f3bcd),
341    (0xbc716e4786887a99, 0x3fe71f75e8ec5f74),
342    (0xbc741577ee04992f, 0x3fe7a11473eb0187),
343    (0xbc8d4c1dd41532d8, 0x3fe82589994cce13),
344    (0x3c86e9f156864b27, 0x3fe8ace5422aa0db),
345    (0xbc575fc781b57ebc, 0x3fe93737b0cdc5e5),
346    (0x3c6c7c46b071f2be, 0x3fe9c49182a3f090),
347    (0xbc8d2f6edb8d41e1, 0x3fea5503b23e255d),
348    (0x3c87a1cd345dcc81, 0x3feae89f995ad3ad),
349    (0xbc65584f7e54ac3b, 0x3feb7f76f2fb5e47),
350    (0x3c711065895048dd, 0x3fec199bdd85529c),
351    (0x3c6503cbd1e949db, 0x3fecb720dcef9069),
352    (0x3c72ed02d75b3707, 0x3fed5818dcfba487),
353    (0xbc81a5cd4f184b5c, 0x3fedfc97337b9b5f),
354    (0xbc8e9c23179c2893, 0x3feea4afa2a490da),
355    (0x3c89d3e12dd8a18b, 0x3fef50765b6e4540),
356    (0x0000000000000000, 0x3ff0000000000000),
357    (0x3c8d73e2a475b465, 0x3ff059b0d3158574),
358    (0x3c98a62e4adc610b, 0x3ff0b5586cf9890f),
359    (0xbc96c51039449b3a, 0x3ff11301d0125b51),
360    (0xbc819041b9d78a76, 0x3ff172b83c7d517b),
361    (0x3c9e016e00a2643c, 0x3ff1d4873168b9aa),
362    (0x3c99b07eb6c70573, 0x3ff2387a6e756238),
363    (0x3c8612e8afad1255, 0x3ff29e9df51fdee1),
364    (0x3c86f46ad23182e4, 0x3ff306fe0a31b715),
365    (0xbc963aeabf42eae2, 0x3ff371a7373aa9cb),
366    (0x3c8ada0911f09ebc, 0x3ff3dea64c123422),
367    (0x3c489b7a04ef80d0, 0x3ff44e086061892d),
368    (0x3c7d4397afec42e2, 0x3ff4bfdad5362a27),
369    (0xbc807abe1db13cad, 0x3ff5342b569d4f82),
370    (0x3c96324c054647ad, 0x3ff5ab07dd485429),
371    (0xbc9383c17e40b497, 0x3ff6247eb03a5585),
372    (0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd),
373];
374
375/* return the correct rounding of (1+x)^y, otherwise -1.0
376where t is an approximation of y*log2(1+x) with absolute error < 2^-40.680,
377assuming 0x1.7154759a0df53p-24 <= |t| <= 150
378exact is non-zero iff (1+x)^y is exact or midpoint */
379fn exp2_fast(t: f64) -> f64 {
380    let k = t.cpu_round_ties_even(); // 0 <= |k| <= 150
381    let mut r = t - k; // |r| <= 1/2, exact
382    let mut v: u64 = (3.015625 + r).to_bits(); // 2.5 <= v <= 3.5015625
383    // we add 2^-6 so that i is rounded to nearest
384    let i: i32 = (v >> 46) as i32 - 0x10010; // 0 <= i <= 32
385    r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
386    // now |r| <= 2^-6
387    // 2^t = 2^k * exp2_U[i][0] * 2^r
388    v = (f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1) * compoundf_expf_poly(r)).to_bits();
389    /* the absolute error on exp2_U[i][0] is bounded by 2^-53.092, with
390    exp2_U[i][0] < 2^0.5, and that on q1(r) is bounded by 2^-43.540,
391    with |q1(r)| < 1.011, thus |v| < 1.43, and the absolute error on v is
392    bounded by ulp(v) + 2^0.5 * 2^-43.540 + 2^-53.092 * 1.011 < 2^-43.035.
393    Now t approximates u := y*log2(1+x) with |t-u| < 2^-40.680 thus
394    2^u = 2^t * (1 + eps) with eps < 2^(2^-40.680)-1 < 2^-41.208.
395    The total absolute error is thus bounded by 2^-43.035 + 2^-41.208
396    < 2^-40.849. */
397    let mut err: u64 = 0x3d61d00000000000; // 2^-40.849 < 0x1.1dp-41
398    let ik = k as i64;
399    v = v.wrapping_add(ik.wrapping_shl(52) as u64); // scale v by 2^k, k is already integer
400
401    // in case of potential underflow, we defer to the accurate path
402    if f64::from_bits(v) < f64::from_bits(0x38100000000008e2) {
403        return -1.0;
404    }
405    err = err.wrapping_add((ik << 52) as u64); // scale the error by 2^k too
406    let lb = (f64::from_bits(v) - f64::from_bits(err)) as f32;
407    let rb = (f64::from_bits(v) + f64::from_bits(err)) as f32;
408    if lb != rb {
409        return -1.0;
410    } // rounding test failed
411
412    f64::from_bits(v)
413}
414
415// 2^e/sqrt(2) < h < 2^e*sqrt(2), with -29 <= e <= 128
416// divide h, l by 2^e
417pub(crate) static LOG2P1_SCALE: [u64; 158] = [
418    0x41c0000000000000,
419    0x41b0000000000000,
420    0x41a0000000000000,
421    0x4190000000000000,
422    0x4180000000000000,
423    0x4170000000000000,
424    0x4160000000000000,
425    0x4150000000000000,
426    0x4140000000000000,
427    0x4130000000000000,
428    0x4120000000000000,
429    0x4110000000000000,
430    0x4100000000000000,
431    0x40f0000000000000,
432    0x40e0000000000000,
433    0x40d0000000000000,
434    0x40c0000000000000,
435    0x40b0000000000000,
436    0x40a0000000000000,
437    0x4090000000000000,
438    0x4080000000000000,
439    0x4070000000000000,
440    0x4060000000000000,
441    0x4050000000000000,
442    0x4040000000000000,
443    0x4030000000000000,
444    0x4020000000000000,
445    0x4010000000000000,
446    0x4000000000000000,
447    0x3ff0000000000000,
448    0x3fe0000000000000,
449    0x3fd0000000000000,
450    0x3fc0000000000000,
451    0x3fb0000000000000,
452    0x3fa0000000000000,
453    0x3f90000000000000,
454    0x3f80000000000000,
455    0x3f70000000000000,
456    0x3f60000000000000,
457    0x3f50000000000000,
458    0x3f40000000000000,
459    0x3f30000000000000,
460    0x3f20000000000000,
461    0x3f10000000000000,
462    0x3f00000000000000,
463    0x3ef0000000000000,
464    0x3ee0000000000000,
465    0x3ed0000000000000,
466    0x3ec0000000000000,
467    0x3eb0000000000000,
468    0x3ea0000000000000,
469    0x3e90000000000000,
470    0x3e80000000000000,
471    0x3e70000000000000,
472    0x3e60000000000000,
473    0x3e50000000000000,
474    0x3e40000000000000,
475    0x3e30000000000000,
476    0x3e20000000000000,
477    0x3e10000000000000,
478    0x3e00000000000000,
479    0x3df0000000000000,
480    0x3de0000000000000,
481    0x3dd0000000000000,
482    0x3dc0000000000000,
483    0x3db0000000000000,
484    0x3da0000000000000,
485    0x3d90000000000000,
486    0x3d80000000000000,
487    0x3d70000000000000,
488    0x3d60000000000000,
489    0x3d50000000000000,
490    0x3d40000000000000,
491    0x3d30000000000000,
492    0x3d20000000000000,
493    0x3d10000000000000,
494    0x3d00000000000000,
495    0x3cf0000000000000,
496    0x3ce0000000000000,
497    0x3cd0000000000000,
498    0x3cc0000000000000,
499    0x3cb0000000000000,
500    0x3ca0000000000000,
501    0x3c90000000000000,
502    0x3c80000000000000,
503    0x3c70000000000000,
504    0x3c60000000000000,
505    0x3c50000000000000,
506    0x3c40000000000000,
507    0x3c30000000000000,
508    0x3c20000000000000,
509    0x3c10000000000000,
510    0x3c00000000000000,
511    0x3bf0000000000000,
512    0x3be0000000000000,
513    0x3bd0000000000000,
514    0x3bc0000000000000,
515    0x3bb0000000000000,
516    0x3ba0000000000000,
517    0x3b90000000000000,
518    0x3b80000000000000,
519    0x3b70000000000000,
520    0x3b60000000000000,
521    0x3b50000000000000,
522    0x3b40000000000000,
523    0x3b30000000000000,
524    0x3b20000000000000,
525    0x3b10000000000000,
526    0x3b00000000000000,
527    0x3af0000000000000,
528    0x3ae0000000000000,
529    0x3ad0000000000000,
530    0x3ac0000000000000,
531    0x3ab0000000000000,
532    0x3aa0000000000000,
533    0x3a90000000000000,
534    0x3a80000000000000,
535    0x3a70000000000000,
536    0x3a60000000000000,
537    0x3a50000000000000,
538    0x3a40000000000000,
539    0x3a30000000000000,
540    0x3a20000000000000,
541    0x3a10000000000000,
542    0x3a00000000000000,
543    0x39f0000000000000,
544    0x39e0000000000000,
545    0x39d0000000000000,
546    0x39c0000000000000,
547    0x39b0000000000000,
548    0x39a0000000000000,
549    0x3990000000000000,
550    0x3980000000000000,
551    0x3970000000000000,
552    0x3960000000000000,
553    0x3950000000000000,
554    0x3940000000000000,
555    0x3930000000000000,
556    0x3920000000000000,
557    0x3910000000000000,
558    0x3900000000000000,
559    0x38f0000000000000,
560    0x38e0000000000000,
561    0x38d0000000000000,
562    0x38c0000000000000,
563    0x38b0000000000000,
564    0x38a0000000000000,
565    0x3890000000000000,
566    0x3880000000000000,
567    0x3870000000000000,
568    0x3860000000000000,
569    0x3850000000000000,
570    0x3840000000000000,
571    0x3830000000000000,
572    0x3820000000000000,
573    0x3810000000000000,
574    0x3800000000000000,
575    0x37f0000000000000,
576];
577
578/* put in h+l an approximation of log2(1+zh+zl)
579   for |zh| <= 1/64 + 2^-51.508, |zl| < 2^-58 and |zl| < ulp(zh).
580   We have |h|, |h+l| < 2^-5.459, |l| < 2^-56.162,
581   the relative error is bounded by 2^-91.196,
582   and |l| < 2^-50.523 |h| (see analyze_p2() in compoundf.sage).
583*/
584
585/* degree-13 polynomial generated by Sollya which approximates
586   log2(1+z) for |z| <= 1/64 with relative error < 2^-93.777
587   (cf file p2.sollya)
588*/
589static LOG2P1_LOG2_POLY: [u64; 18] = [
590    0x3ff71547652b82fe,
591    0x3c7777d0ffda0d80,
592    0xbfe71547652b82fe,
593    0xbc6777d0fd20b49c,
594    0x3fdec709dc3a03fd,
595    0x3c7d27f05171b74a,
596    0xbfd71547652b82fe,
597    0xbc57814e70b828b0,
598    0x3fd2776c50ef9bfe,
599    0x3c7e4f63e12bff83,
600    0xbfcec709dc3a03f4,
601    0x3fca61762a7adecc,
602    0xbfc71547652d8849,
603    0x3fc484b13d7e7029,
604    0xbfc2776c1b2a40fd,
605    0x3fc0c9a80f9b7c1c,
606    0xbfbecc6801121200,
607    0x3fbc6e4b91fd10e5,
608];
609
610fn log2_poly2(z: DoubleDouble) -> DoubleDouble {
611    /* since we can't expect a relative accuracy better than 2^-93.777,
612    the lower part of the double-double approximation only needs to
613    have about 94-53 = 41 accurate bits. Since |p7*z^7/p1| < 2^-44,
614    we evaluate terms of degree 7 or more in double precision only. */
615    let mut h = f64::from_bits(LOG2P1_LOG2_POLY[4 + 13]); // degree 13
616
617    for i in 7..=12 {
618        h = dd_fmla(z.hi, z.hi, f64::from_bits(LOG2P1_LOG2_POLY[4 + i]));
619    }
620
621    let mut v = DoubleDouble::quick_mult_f64(z, h);
622    let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[10]));
623    v.hi = t.hi;
624    v.lo += t.lo;
625
626    v = DoubleDouble::quick_mult(v, z);
627
628    let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[8]));
629    v.hi = t.hi;
630    v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[9]);
631
632    v = DoubleDouble::quick_mult(v, z);
633
634    let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[6]));
635    v.hi = t.hi;
636    v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[7]);
637
638    v = DoubleDouble::quick_mult(v, z);
639
640    let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[4]));
641    v.hi = t.hi;
642    v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[5]);
643
644    v = DoubleDouble::quick_mult(v, z);
645
646    let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[2]));
647    v.hi = t.hi;
648    v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[3]);
649
650    v = DoubleDouble::quick_mult(v, z);
651
652    let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[0]));
653    v.hi = t.hi;
654    v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[1]);
655
656    v = DoubleDouble::quick_mult(v, z);
657
658    v
659}
660
661/* assuming -1 < x < 2^128, and x is representable in binary32,
662put in h+l a double-double approximation of log2(1+x),
663with relative error bounded by 2^-91.123, and |l| < 2^-48.574 |h|
664(see analyze_log2p1_accurate() in compoundf.sage) */
665pub(crate) fn compoundf_log2p1_accurate(x: f64) -> DoubleDouble {
666    let mut v_dd = if 1.0 >= x {
667        // then 1.0 >= |x| since x > -1
668        if (x as f32).abs() >= f32::from_bits(0x25000000) {
669            DoubleDouble::from_exact_add(1.0, x)
670        } else {
671            DoubleDouble::new(x, 1.0)
672        }
673    } else {
674        // fast_two_sum() is exact when |x| < 2^54 by Lemma 1 condition (ii) of [1]
675        DoubleDouble::from_exact_add(x, 1.0)
676    };
677
678    // now h + l = 1 + x + eps with |eps| <= 2^-105 |h| and |l| <= ulp(h)
679    let mut v = v_dd.hi.to_bits();
680    let m = v & 0xfffffffffffffu64;
681    let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;
682
683    let scale = f64::from_bits(LOG2P1_SCALE[e.wrapping_add(29) as usize]);
684    v_dd.hi *= scale;
685    v_dd.lo *= scale;
686
687    // now |h| < sqrt(2) and |l| <= ulp(h) <= 2^-52
688
689    // now 1 + x ~ 2^e * (h + l) thus log2(1+x) ~ e + log2(h+l)
690
691    v = (2.0 + v_dd.hi).to_bits(); // add 2 so that v.f is always in the binade [2, 4)
692    let i: i32 = (v >> 45) as i32 - 0x2002d; // h is near 1/2+(i+13)/64
693    let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
694    let mut z_dd = DoubleDouble::new(r * v_dd.lo, dd_fmla(r, v_dd.hi, -1.0)); // exact, -1/64 <= zh <= 1/64
695    // since |r| <= 0x1.68p+0 and |l| <= 2^-52, |zl| <= 2^-51.508
696    // zh + zl = r*(h+l)-1
697    // log2(h+l) = -log2(r) + log2(r*(h+l)) = -log2(r) + log2(1+zh+zl)
698    z_dd = DoubleDouble::from_exact_add(z_dd.hi, z_dd.lo);
699    // now |zh| <= 1/64 + 2^-51.508 and |zl| < 2^-58
700    /* the relative error of fast_two_sum() is bounded by 2^-105,
701    this amplified the relative error on p2() as follows:
702    (1+2^-91.196)*(1+2^-105)-1 < 2^-91.195. */
703
704    // now |zh| <= 1/64 + 2^-51.508 and |zl| < 2^-58
705    /* the relative error of fast_two_sum() is bounded by 2^-105,
706    this amplified the relative error on p2() as follows:
707    (1+2^-91.196)*(1+2^-105)-1 < 2^-91.195. */
708    let log_p = log2_poly2(z_dd);
709    // ph + pl approximates log2(1+zh+zl) with relative error < 2^-93.471
710
711    /* since |log2inv[i][0]| < 1 and e is integer, the precondition of
712    fast_two_sum is fulfilled: either |e| >= 1, or e=0 and fast_two_sum
713    is exact */
714    let log2_inv = LOG2P1_COMPOUNDF_LOG2_INV[i as usize];
715    v_dd = DoubleDouble::from_exact_add(e as f64, f64::from_bits(log2_inv.1));
716    v_dd.lo += f64::from_bits(log2_inv.0);
717    let mut p = DoubleDouble::from_exact_add(v_dd.hi, log_p.hi);
718    p.lo += v_dd.lo + log_p.lo;
719    p
720}
721
722pub(crate) fn compoundf_exp2_poly2(z: DoubleDouble) -> DoubleDouble {
723    /* Q2 is a degree-8 polynomial generated by Sollya (cf q2.sollya)
724    with absolute error < 2^-85.218 */
725
726    static Q2: [u64; 12] = [
727        0x3ff0000000000000,
728        0x3fe62e42fefa39ef,
729        0x3c7abc9d45534d06,
730        0x3fcebfbdff82c58f,
731        0xbc65e4383cf9ddf7,
732        0x3fac6b08d704a0c0,
733        0xbc46cbc55586c8f1,
734        0x3f83b2ab6fba4e77,
735        0x3f55d87fe789aec5,
736        0x3f2430912f879daa,
737        0x3eeffcc774b2367a,
738        0x3eb62c017b9bdfe6,
739    ];
740    let h2 = z.hi * z.hi;
741    let c7 = dd_fmla(f64::from_bits(Q2[11]), z.hi, f64::from_bits(Q2[10]));
742    let mut c5 = dd_fmla(f64::from_bits(Q2[9]), z.hi, f64::from_bits(Q2[8]));
743    c5 = dd_fmla(c7, h2, c5);
744    // since ulp(c5*h^5) <= 2^-86, we still compute c5*z as double
745    let z_vqh = c5 * z.hi;
746    let mut q = DoubleDouble::from_exact_add(f64::from_bits(Q2[7]), z_vqh);
747    // multiply by z
748    q = DoubleDouble::quick_mult(q, z);
749    // add coefficient of degree 3
750    let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[5]), q.hi);
751    q.hi = t.hi;
752    q.lo += t.lo + f64::from_bits(Q2[6]);
753    // multiply by z and add coefficient of degree 2
754    q = DoubleDouble::quick_mult(q, z);
755    let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[3]), q.hi);
756    q.hi = t.hi;
757    q.lo += t.lo + f64::from_bits(Q2[4]);
758    // multiply by h+l and add coefficient of degree 1
759    q = DoubleDouble::quick_mult(q, z);
760    let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[1]), q.hi);
761    q.hi = t.hi;
762    q.lo += t.lo + f64::from_bits(Q2[2]);
763    // multiply by h+l and add coefficient of degree 0
764    q = DoubleDouble::quick_mult(q, z);
765    let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[0]), q.hi);
766    q.hi = t.hi;
767    q.lo += t.lo;
768    q
769}
770
771/* return the correct rounding of (1+x)^y or -1 if the rounding test failed,
772where t is an approximation of y*log2(1+x).
773We assume |h+l| < 150, |l/h| < 2^-48.445 |h|,
774and the relative error between h+l and y*log2(1+x) is < 2^-91.120.
775x and y are the original inputs of compound. */
776fn compoundf_exp2_accurate(x_dd: DoubleDouble, x: f32, y: f32) -> f32 {
777    if y == 1.0 {
778        let res = 1.0 + x;
779        return res;
780    }
781    let k = x_dd.hi.cpu_round_ties_even(); // |k| <= 150
782
783    // check easy cases h+l is tiny thus 2^(h+l) rounds to 1, 1- or 1+
784    if k == 0. && x_dd.hi.abs() <= f64::from_bits(0x3e6715476af0d4c8) {
785        /* the relative error between h and y*log2(1+x) is bounded by
786        (1 + 2^-48.445) * (1 + 2^-91.120) - 1 < 2^-48.444.
787        2^h rounds to 1 to nearest for |h| <= H0 := 0x1.715476af0d4d9p-25.
788        The above threshold is such that h*(1+2^-48.444) < H0. */
789        return (1.0 + x_dd.hi * 0.5) as f32;
790    }
791
792    let r = x_dd.hi - k; // |r| <= 1/2, exact
793    // since r is an integer multiple of ulp(h), fast_two_sum() below is exact
794    let mut v_dd = DoubleDouble::from_exact_add(r, x_dd.lo);
795    let mut v = (3.015625 + v_dd.hi).to_bits(); // 2.5 <= v <= 3.5015625
796    // we add 2^-6 so that i is rounded to nearest
797    let i: i32 = ((v >> 46) as i32).wrapping_sub(0x10010); // 0 <= i <= 32
798    // h is near (i-16)/2^5
799    v_dd.hi -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
800
801    // now |h| <= 2^-6
802    // 2^(h+l) = 2^k * exp2_U[i] * 2^(h+l)
803    v_dd = DoubleDouble::from_exact_add(v_dd.hi, v_dd.lo);
804    let q = compoundf_exp2_poly2(v_dd);
805
806    /* we have 0.989 < qh < 1.011, |ql| < 2^-51.959, and
807    |qh + ql - 2^(h+l)| < 2^-85.210 */
808    let exp2u = DoubleDouble::from_bit_pair(COMPOUNDF_EXP2_U[i as usize]);
809    let mut q = DoubleDouble::quick_mult(exp2u, q);
810    q = DoubleDouble::from_exact_add(q.hi, q.lo);
811    /* Total error:
812    * at input we have a relative error between h+l and y*log2(1+x) bounded
813      by 2^-91.120: h + l = y*log2(1+x) * (1 + eps1) with |eps1| < 2^-91.120.
814      Since |h+l| <= 150, this yields an absolute error bounded
815      by 150*2^-91.120 < 2^-83.891:
816      h + l = y*log2(1+x) + eps2 with |eps2| <= 150*2^-91.120 < 2^-83.891.
817    * the absolute error in q2() is bounded by 2^-85.210
818      and is multiplied by exp2_U[i] < 1.415
819    * the absolute d_mul() error is bounded by 2^-102.199
820    * the fast_two_sum() error is bounded by 2^-105
821    All this yields an absolute error on qh+ql bounded by:
822      2^-83.891 + 2^-85.210*1.415 + 2^-102.199 + 2^-105 < 2^-83.242.
823
824    We distinguish the "small" case when at input |h+l| <= 2^-9.
825    In this case k=0, i=16, thus exp2_T[i]=0, exp2_U[i]=1,
826    and absolute error in q2() is bounded by 2^-102.646,
827    and remains unchanged since the d_mul() call does not change qh, ql.
828    */
829
830    /* Rounding test: since |ql| < ulp(qh), and the error is less than ulp(qh),
831    the rounding test can fail only when the last 53-25 = 28 bits of qh
832    represent a signed number in [-1,1] (when it is -2 or 2, adding ql and
833    the error cannot cross a rounding boundary). */
834    let mut w = q.hi.to_bits();
835    if ((w.wrapping_add(1)) & 0xfffffffu64) <= 2 {
836        static ERR: [u64; 2] = [0x3abb100000000000, 0x3a2d800000000000];
837        let small: bool = k == 0. && i == 16 && x_dd.hi <= f64::from_bits(0x3f60000000000000);
838        let err = f64::from_bits(ERR[small as usize]);
839
840        w = (q.hi + (q.lo + err)).to_bits();
841        w = unsafe { w.wrapping_add(k.to_int_unchecked::<i64>().wrapping_shl(52) as u64) };
842    }
843
844    /* multiply qh+ql by 2^k: since 0.989 < qh_in < 1.011 and
845    0.707 < exp2_U[i] < 1.415, we have 0.69 < qh+ql < 1.44 */
846    v = (q.hi + q.lo).to_bits();
847    /* For RNDN, if qh fits exactly in 25 bits, and ql is tiny, so that
848    qh + ql rounds to qh, then we might have a double-rounding issue. */
849    if (w.wrapping_shl(36)) == 0 && f64::from_bits(v) == q.hi && q.lo != 0. {
850        v = v.wrapping_add((if q.lo > 0. { 1i64 } else { -1i64 }) as u64); // simulate round to odd
851    }
852    v = unsafe { v.wrapping_add(k.to_int_unchecked::<i64>().wrapping_shl(52) as u64) };
853    // there is no underflow/overflow in the scaling by 2^k since |k| <= 150
854    f64::from_bits(v) as f32
855}
856
857// at input, exact is non-zero iff (1+x)^y is exact
858// x,y=0x1.0f6f1ap+1,0x1.c643bp+5: 49 identical bits after round bit
859// x,y=0x1.ef272cp+15,-0x1.746ab2p+1: 55 identical bits after round bit
860// x,y=0x1.07ffcp+0,-0x1.921a8ap+4: 47 identical bits after round bit
861#[cold]
862#[inline(never)]
863fn compoundf_accurate(x: f32, y: f32) -> f32 {
864    let mut v = compoundf_log2p1_accurate(x as f64);
865    /* h + l is a double-double approximation of log(1+x),
866    with relative error bounded by 2^-91.123,
867    and |l| < 2^-48.574 |h| */
868    v = DoubleDouble::quick_mult_f64(v, y as f64);
869    /* h + l is a double-double approximation of y*log(1+x).
870    Since 2^-149 <= |h_in+l_in| < 128 and 2^-149 <= |y| < 2^128, we have
871    2^-298 <= |h+l| < 2^135, thus no underflow/overflow in double is possible.
872    The s_mul() error is bounded by ulp(l). Since |l_in| < 2^-48.574 |h_in|,
873    and the intermediate variable lo in s_mul() satisfies |lo| < ulp(h),
874    we have |l| < ulp(h) + |y l_in| <= ulp(h) + 2^-48.574 |y h_in|
875    < (2^-52 + 2^-48.574) |h| < 2^-48.445 |h|. The s_mul() error is thus
876    bounded by 2^-48.445*2^-52 = 2^-100.445 |h|. This yields a total relative
877    error bounded by (1+2^-91.123)*(1+2^-100.445)-1 < 2^-91.120. */
878    compoundf_exp2_accurate(v, x, y)
879}
880
881/// Computes compound function (1.0 + x)^y
882///
883/// Max ULP 0.5
884#[inline]
885pub fn f_compoundf(x: f32, y: f32) -> f32 {
886    /* Rules from IEEE 754-2019 for compound (x, n) with n integer:
887       (a) compound (x, 0) is 1 for x >= -1 or quiet NaN
888       (b) compound (-1, n) is +Inf and signals the divideByZero exception for n < 0
889       (c) compound (-1, n) is +0 for n > 0
890       (d) compound (+/-0, n) is 1
891       (e) compound (+Inf, n) is +Inf for n > 0
892       (f) compound (+Inf, n) is +0 for n < 0
893       (g) compound (x, n) is qNaN and signals the invalid exception for x < -1
894       (h) compound (qNaN, n) is qNaN for n <> 0.
895    */
896    let mone = (-1.0f32).to_bits();
897    let nx = x.to_bits();
898    let ny = y.to_bits();
899    if nx >= mone {
900        return as_compoundf_special(x, y);
901    } // x <= -1 
902    // now x > -1
903
904    let ax: u32 = nx.wrapping_shl(1);
905    let ay: u32 = ny.wrapping_shl(1);
906    if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 {
907        return as_compoundf_special(x, y);
908    } // x=+-0 || x=+-inf/nan || y=+-0 || y=+-inf/nan
909
910    // evaluate (1+x)^y explicitly for integer y in [-16,16] range and |x|<2^64
911    if is_integerf(y) && ay <= 0x83000000u32 && ax <= 0xbefffffeu32 {
912        if ax <= 0x62000000u32 {
913            return 1.0 + y * x;
914        } // does it work for |x|<2^-29 and |y|<=16?
915        let mut s = x as f64 + 1.;
916        let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
917
918        // exponentiation by squaring: O(log(y)) complexity
919        let mut acc = if iter_count % 2 != 0 { s } else { 1. };
920
921        while {
922            iter_count >>= 1;
923            iter_count
924        } != 0
925        {
926            s = s * s;
927            if iter_count % 2 != 0 {
928                acc *= s;
929            }
930        }
931
932        let dz = if y.is_sign_negative() { 1. / acc } else { acc };
933        return dz as f32;
934    }
935
936    let xd = x as f64;
937    let yd = y as f64;
938    let tx = xd.to_bits();
939    let ty = yd.to_bits();
940
941    let l: f64 = compoundf_log2p1_fast(f64::from_bits(tx));
942
943    /* l approximates log2(1+x) with relative error < 2^-47.997,
944    and 2^-149 <= |l| < 128 */
945
946    let t: u64 = (l * f64::from_bits(ty)).to_bits();
947    /* since 2^-149 <= |l| < 128 and 2^-149 <= |y| < 2^128, we have
948    2^-298 <= |t| < 2^135, thus no underflow/overflow in double is possible.
949    The relative error is bounded by (1+2^-47.997)*(1+2^-52)-1 < 2^-47.909 */
950
951    // detect overflow/underflow
952    if (t.wrapping_shl(1)) >= (0x406u64 << 53) {
953        // |t| >= 128
954        if t >= 0x3018bu64 << 46 {
955            // t <= -150
956            return black_box(f32::from_bits(0x00800000)) * black_box(f32::from_bits(0x00800000));
957        } else if (t >> 63) == 0 {
958            // t >= 128: overflow
959            return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000));
960        }
961    }
962
963    /* since |t| < 150, the absolute error on t is bounded by
964    150*2^-47.909 < 2^-40.680 */
965
966    // 2^t rounds to 1 to nearest when |t| <= 0x1.715476ba97f14p-25
967    if (t.wrapping_shl(1)) <= 0x3e6715476ba97f14u64 {
968        return if (t >> 63) != 0 {
969            black_box(1.0) - black_box(f32::from_bits(0x33000000))
970        } else {
971            black_box(1.0) + black_box(f32::from_bits(0x33000000))
972        };
973    }
974
975    let res = exp2_fast(f64::from_bits(t));
976    if res != -1.0 {
977        return res as f32;
978    }
979    compoundf_accurate(x, y)
980}
981
982#[cfg(test)]
983mod tests {
984    use super::*;
985
986    #[test]
987    fn test_compoundf() {
988        assert_eq!(
989            f_compoundf(
990                0.000000000000000000000000000000000000011754944,
991                -170502050000000000000000000000000000000.
992            ),
993            1.
994        );
995        assert_eq!(f_compoundf(1.235, 1.432), 3.1634824);
996        assert_eq!(f_compoundf(2., 3.0), 27.);
997        assert!(f_compoundf(-2., 5.0).is_nan());
998        assert_eq!(f_compoundf(1., f32::INFINITY), f32::INFINITY);
999        assert_eq!(f_compoundf(1., f32::NEG_INFINITY), 0.0);
1000    }
1001}