pxfm/
double_double.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::bits::get_exponent_f64;
30#[allow(unused_imports)]
31use crate::common::*;
32use std::ops::{Mul, Neg};
33// https://hal.science/hal-01351529v3/document
34
35#[derive(Copy, Clone, Default, Debug)]
36pub(crate) struct DoubleDouble {
37    pub(crate) lo: f64,
38    pub(crate) hi: f64,
39}
40
41impl Neg for DoubleDouble {
42    type Output = Self;
43
44    #[inline]
45    fn neg(self) -> Self::Output {
46        Self {
47            hi: -self.hi,
48            lo: -self.lo,
49        }
50    }
51}
52
53impl DoubleDouble {
54    #[inline]
55    pub(crate) const fn from_bit_pair(pair: (u64, u64)) -> Self {
56        Self {
57            lo: f64::from_bits(pair.0),
58            hi: f64::from_bits(pair.1),
59        }
60    }
61
62    #[inline]
63    pub(crate) const fn new(lo: f64, hi: f64) -> Self {
64        DoubleDouble { lo, hi }
65    }
66
67    // Non FMA helper
68    #[allow(dead_code)]
69    #[inline]
70    pub(crate) const fn split(a: f64) -> DoubleDouble {
71        // CN = 2^N.
72        const CN: f64 = (1 << 27) as f64;
73        const C: f64 = CN + 1.0;
74        let t1 = C * a;
75        let t2 = a - t1;
76        let r_hi = t1 + t2;
77        let r_lo = a - r_hi;
78        DoubleDouble::new(r_lo, r_hi)
79    }
80
81    // Non FMA helper
82    #[allow(dead_code)]
83    #[inline]
84    fn from_exact_mult_impl_non_fma(asz: DoubleDouble, a: f64, b: f64) -> Self {
85        let bs = DoubleDouble::split(b);
86
87        let r_hi = a * b;
88        let t1 = asz.hi * bs.hi - r_hi;
89        let t2 = asz.hi * bs.lo + t1;
90        let t3 = asz.lo * bs.hi + t2;
91        let r_lo = asz.lo * bs.lo + t3;
92        DoubleDouble::new(r_lo, r_hi)
93    }
94
95    // valid only for |a| > b
96    #[inline]
97    pub(crate) const fn from_exact_add(a: f64, b: f64) -> DoubleDouble {
98        let r_hi = a + b;
99        let t = r_hi - a;
100        let r_lo = b - t;
101        DoubleDouble::new(r_lo, r_hi)
102    }
103
104    // valid only for |a| > b
105    #[inline]
106    pub(crate) const fn from_exact_sub(a: f64, b: f64) -> DoubleDouble {
107        let r_hi = a - b;
108        let t = a - r_hi;
109        let r_lo = t - b;
110        DoubleDouble::new(r_lo, r_hi)
111    }
112
113    #[inline]
114    pub(crate) const fn from_full_exact_add(a: f64, b: f64) -> DoubleDouble {
115        let r_hi = a + b;
116        let t1 = r_hi - a;
117        let t2 = r_hi - t1;
118        let t3 = b - t1;
119        let t4 = a - t2;
120        let r_lo = t3 + t4;
121        DoubleDouble::new(r_lo, r_hi)
122    }
123
124    #[allow(unused)]
125    #[inline]
126    pub(crate) fn dd_f64_mul_add(a: f64, b: f64, c: f64) -> f64 {
127        let ddx2 = DoubleDouble::from_exact_mult(a, b);
128        let zv = DoubleDouble::full_add_f64(ddx2, c);
129        zv.to_f64()
130    }
131
132    #[inline]
133    pub(crate) const fn from_full_exact_sub(a: f64, b: f64) -> Self {
134        let r_hi = a - b;
135        let t1 = r_hi - a;
136        let t2 = r_hi - t1;
137        let t3 = -b - t1;
138        let t4 = a - t2;
139        let r_lo = t3 + t4;
140        DoubleDouble::new(r_lo, r_hi)
141    }
142
143    #[inline]
144    pub(crate) fn add(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
145        let s = a.hi + b.hi;
146        let d = s - a.hi;
147        let l = ((b.hi - d) + (a.hi + (d - s))) + (a.lo + b.lo);
148        DoubleDouble::new(l, s)
149    }
150
151    #[inline]
152    pub(crate) fn quick_dd_add(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
153        let DoubleDouble { hi: sh, lo: sl } = DoubleDouble::from_full_exact_add(a.hi, b.hi);
154        let v = a.lo + b.lo;
155        let w = sl + v;
156        DoubleDouble::from_exact_add(sh, w)
157    }
158
159    #[inline]
160    pub(crate) fn quick_dd_sub(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
161        let DoubleDouble { hi: sh, lo: sl } = DoubleDouble::from_full_exact_sub(a.hi, b.hi);
162        let v = a.lo - b.lo;
163        let w = sl + v;
164        DoubleDouble::from_exact_add(sh, w)
165    }
166
167    #[inline]
168    pub(crate) fn full_dd_add(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
169        let DoubleDouble { hi: sh, lo: sl } = DoubleDouble::from_full_exact_add(a.hi, b.hi);
170        let DoubleDouble { hi: th, lo: tl } = DoubleDouble::from_full_exact_add(a.lo, b.lo);
171        let c = sl + th;
172        let v = DoubleDouble::from_exact_add(sh, c);
173        let w = tl + v.lo;
174        DoubleDouble::from_exact_add(v.hi, w)
175    }
176
177    #[inline]
178    pub(crate) fn full_dd_sub(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
179        DoubleDouble::full_dd_add(a, -b)
180    }
181
182    #[inline]
183    pub(crate) fn sub(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
184        let s = a.hi - b.hi;
185        let d = s - a.hi;
186        let l = ((-b.hi - d) + (a.hi + (d - s))) + (a.lo - b.lo);
187        DoubleDouble::new(l, s)
188    }
189
190    /// DoubleDouble-style square root for a double-double number
191    #[inline]
192    pub(crate) fn sqrt(self) -> DoubleDouble {
193        let a = self.hi + self.lo;
194
195        if a == 0.0 {
196            return DoubleDouble { hi: 0.0, lo: 0.0 };
197        }
198        if a < 0.0 || a.is_nan() {
199            return DoubleDouble {
200                hi: f64::NAN,
201                lo: 0.0,
202            };
203        }
204        if a.is_infinite() {
205            return DoubleDouble {
206                hi: f64::INFINITY,
207                lo: 0.0,
208            };
209        }
210
211        let x = a.sqrt();
212
213        let x2 = DoubleDouble::from_exact_mult(x, x);
214
215        // Residual = self - x²
216        let mut r = self.hi - x2.hi;
217        r += self.lo;
218        r -= x2.lo;
219
220        let dx = r / (2.0 * x);
221        let hi = x + dx;
222        let lo = (x - hi) + dx;
223
224        DoubleDouble { hi, lo }
225    }
226
227    /// DoubleDouble-style square root for a double-double number
228    #[inline]
229    pub(crate) fn fast_sqrt(self) -> DoubleDouble {
230        let a = self.hi + self.lo;
231        let x = a.sqrt();
232
233        let x2 = DoubleDouble::from_exact_mult(x, x);
234
235        // Residual = self - x²
236        let mut r = self.hi - x2.hi;
237        r += self.lo;
238        r -= x2.lo;
239
240        let dx = r / (2.0 * x);
241        let hi = x + dx;
242        let lo = (x - hi) + dx;
243
244        DoubleDouble { hi, lo }
245    }
246
247    /// `a*b+c`
248    ///
249    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
250    #[inline]
251    pub(crate) fn mul_add_f64(a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble {
252        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult(a, b);
253        let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_full_exact_add(c, h);
254        DoubleDouble::new(r + q, p)
255    }
256
257    /// `a*b+c`
258    ///
259    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
260    #[inline(always)]
261    #[allow(unused)]
262    pub(crate) fn mul_add_f64_fma(a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble {
263        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_fma(a, b);
264        let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_full_exact_add(c, h);
265        DoubleDouble::new(r + q, p)
266    }
267
268    /// `a*b+c`
269    ///
270    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
271    #[inline]
272    pub(crate) fn quick_mul_add_f64(a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble {
273        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult(a, b);
274        let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_exact_add(c, h);
275        DoubleDouble::new(r + q, p)
276    }
277
278    /// `a*b+c`
279    ///
280    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
281    #[inline]
282    pub(crate) fn mul_f64_add_f64(a: DoubleDouble, b: f64, c: f64) -> DoubleDouble {
283        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_f64(a, b);
284        let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_full_exact_add(c, h);
285        DoubleDouble::new(r + q, p)
286    }
287
288    // /// Accurate reciprocal: 1 / self
289    // #[inline]
290    // pub(crate) fn recip_raphson(self) -> DoubleDouble {
291    //     let y0 = DoubleDouble::recip(self);
292    //     let z = DoubleDouble::mul_add_f64(-self, y0, 1.0);
293    //     DoubleDouble::mul_add(y0, z, y0)
294    // }
295
296    /// Accurate reciprocal: 1 / self
297    #[inline]
298    pub(crate) fn recip(self) -> DoubleDouble {
299        #[cfg(any(
300            all(
301                any(target_arch = "x86", target_arch = "x86_64"),
302                target_feature = "fma"
303            ),
304            target_arch = "aarch64"
305        ))]
306        {
307            let y = 1. / self.hi;
308            let e1 = f_fmla(-self.hi, y, 1.0);
309            let e2 = f_fmla(-self.lo, y, e1);
310            let e = y * e2;
311            DoubleDouble::new(e, y)
312        }
313        #[cfg(not(any(
314            all(
315                any(target_arch = "x86", target_arch = "x86_64"),
316                target_feature = "fma"
317            ),
318            target_arch = "aarch64"
319        )))]
320        {
321            let y = 1.0 / self.hi;
322
323            let DoubleDouble { hi: p1, lo: err1 } = DoubleDouble::from_exact_mult(self.hi, y);
324            let e1 = (1.0 - p1) - err1;
325            let DoubleDouble { hi: p2, lo: err2 } = DoubleDouble::from_exact_mult(self.lo, y);
326            let e2 = (e1 - p2) - err2;
327            let e = y * e2;
328
329            DoubleDouble::new(e, y)
330        }
331    }
332
333    #[inline]
334    pub(crate) fn from_recip(b: f64) -> Self {
335        #[cfg(any(
336            all(
337                any(target_arch = "x86", target_arch = "x86_64"),
338                target_feature = "fma"
339            ),
340            target_arch = "aarch64"
341        ))]
342        {
343            let x_hi = 1.0 / b;
344            let err = f_fmla(-x_hi, b, 1.0);
345            let x_lo = err / b;
346            Self::new(x_lo, x_hi)
347        }
348        #[cfg(not(any(
349            all(
350                any(target_arch = "x86", target_arch = "x86_64"),
351                target_feature = "fma"
352            ),
353            target_arch = "aarch64"
354        )))]
355        {
356            let x_hi = 1.0 / b;
357            let prod = Self::from_exact_mult(x_hi, b);
358            let err = (1.0 - prod.hi) - prod.lo;
359            let x_lo = err / b;
360            Self::new(x_lo, x_hi)
361        }
362    }
363
364    #[inline(always)]
365    #[allow(unused)]
366    pub(crate) fn from_quick_recip_fma(b: f64) -> Self {
367        let h = 1.0 / b;
368        let hl = f64::mul_add(h, -b, 1.) * h;
369        DoubleDouble::new(hl, h)
370    }
371
372    #[inline]
373    pub(crate) fn from_quick_recip(b: f64) -> Self {
374        #[cfg(any(
375            all(
376                any(target_arch = "x86", target_arch = "x86_64"),
377                target_feature = "fma"
378            ),
379            target_arch = "aarch64"
380        ))]
381        {
382            let h = 1.0 / b;
383            let hl = f_fmla(h, -b, 1.) * h;
384            DoubleDouble::new(hl, h)
385        }
386        #[cfg(not(any(
387            all(
388                any(target_arch = "x86", target_arch = "x86_64"),
389                target_feature = "fma"
390            ),
391            target_arch = "aarch64"
392        )))]
393        {
394            let h = 1.0 / b;
395            let pr = DoubleDouble::from_exact_mult(h, b);
396            let err = (1.0 - pr.hi) - pr.lo;
397            let hl = err * h;
398            DoubleDouble::new(hl, h)
399        }
400    }
401
402    #[inline]
403    pub(crate) fn from_exact_div(a: f64, b: f64) -> Self {
404        #[cfg(any(
405            all(
406                any(target_arch = "x86", target_arch = "x86_64"),
407                target_feature = "fma"
408            ),
409            target_arch = "aarch64"
410        ))]
411        {
412            let q_hi = a / b;
413            let r = f_fmla(-q_hi, b, a);
414            let q_lo = r / b;
415            Self::new(q_lo, q_hi)
416        }
417
418        #[cfg(not(any(
419            all(
420                any(target_arch = "x86", target_arch = "x86_64"),
421                target_feature = "fma"
422            ),
423            target_arch = "aarch64"
424        )))]
425        {
426            let q_hi = a / b;
427
428            let p = DoubleDouble::from_exact_mult(q_hi, b);
429            let r = DoubleDouble::from_exact_sub(a, p.hi);
430            let r = r.hi + (r.lo - p.lo);
431            let q_lo = r / b;
432
433            Self::new(q_lo, q_hi)
434        }
435    }
436
437    // Resistant to overflow without FMA
438    #[inline]
439    pub(crate) fn from_exact_div_fma(a: f64, b: f64) -> Self {
440        let q_hi = a / b;
441        let r = f64::mul_add(-q_hi, b, a);
442        let q_lo = r / b;
443        Self::new(q_lo, q_hi)
444    }
445
446    #[inline]
447    pub(crate) fn from_sqrt(x: f64) -> Self {
448        #[cfg(any(
449            all(
450                any(target_arch = "x86", target_arch = "x86_64"),
451                target_feature = "fma"
452            ),
453            target_arch = "aarch64"
454        ))]
455        {
456            let h = x.sqrt();
457            /* h = sqrt(x) * (1 + e1) with |e1| < 2^-52
458            thus h^2 = x * (1 + e2) with |e2| < 2^-50.999 */
459            let e = -f_fmla(h, h, -x); // exact
460
461            /* e = x - h^2 */
462            let l = e / (h + h);
463            DoubleDouble::new(l, h)
464        }
465        #[cfg(not(any(
466            all(
467                any(target_arch = "x86", target_arch = "x86_64"),
468                target_feature = "fma"
469            ),
470            target_arch = "aarch64"
471        )))]
472        {
473            let h = x.sqrt();
474            let prod_hh = DoubleDouble::from_exact_mult(h, h);
475            let e = (x - prod_hh.hi) - prod_hh.lo; // exact
476
477            /* e = x - h^2 */
478            let l = e / (h + h);
479            DoubleDouble::new(l, h)
480        }
481    }
482
483    /// Safe to overflow underflow division using mandatory FMA.
484    #[inline]
485    #[allow(dead_code)]
486    pub(crate) fn div_safe_dd_f64(a: DoubleDouble, b: f64) -> Self {
487        let q1 = a.hi / b;
488        let r = f64::mul_add(-q1, b, a.hi);
489        let r = r + a.lo;
490        let q2 = r / b;
491
492        DoubleDouble::new(q2, q1)
493    }
494
495    #[inline]
496    pub(crate) fn div_dd_f64(a: DoubleDouble, b: f64) -> Self {
497        #[cfg(any(
498            all(
499                any(target_arch = "x86", target_arch = "x86_64"),
500                target_feature = "fma"
501            ),
502            target_arch = "aarch64"
503        ))]
504        {
505            let q1 = a.hi / b;
506            let r = f_fmla(-q1, b, a.hi);
507            let r = r + a.lo;
508            let q2 = r / b;
509
510            DoubleDouble::new(q2, q1)
511        }
512        #[cfg(not(any(
513            all(
514                any(target_arch = "x86", target_arch = "x86_64"),
515                target_feature = "fma"
516            ),
517            target_arch = "aarch64"
518        )))]
519        {
520            let th = a.hi / b;
521            let prod = DoubleDouble::from_exact_mult(th, b);
522            let beta_h = a.hi - prod.hi;
523            let beta_l = beta_h - prod.lo;
524            let beta = beta_l + a.lo;
525            let tl = beta / b;
526            DoubleDouble::new(tl, th)
527        }
528    }
529
530    // /// Dekker division with one refinement step
531    // #[inline]
532    // pub(crate) fn div_dd_f64_newton_raphson(a: DoubleDouble, b: f64) -> Self {
533    //     // Initial estimate q = a / b
534    //     let q = DoubleDouble::div_dd_f64(a, b);
535    //
536    //     // One Newton-Raphson refinement step:
537    //     // e = a - q * b
538    //     let qb = DoubleDouble::quick_mult_f64(q, b);
539    //     let e = DoubleDouble::sub(a, qb);
540    //     let e_div_b = DoubleDouble::div_dd_f64(e, b);
541    //
542    //     DoubleDouble::add(q, e_div_b)
543    // }
544
545    // /// Dekker division with two Newton-Raphson refinement steps
546    // #[inline]
547    // pub(crate) fn div_dd_f64_newton_raphson_2(a: Dekker, b: f64) -> Self {
548    //     // First estimate: q = a / b (one round of Dekker division)
549    //     let q1 = Dekker::div_dd_f64(a, b);
550    //
551    //     // First refinement: q2 = q1 + (a - q1 * b) / b
552    //     let qb1 = Dekker::quick_mult_f64(q1, b);
553    //     let e1 = Dekker::sub(a, qb1);
554    //     let dq1 = Dekker::div_dd_f64(e1, b);
555    //     let q2 = Dekker::add(q1, dq1);
556    //
557    //     // Second refinement: q3 = q2 + (a - q2 * b) / b
558    //     let qb2 = Dekker::quick_mult_f64(q2, b);
559    //     let e2 = Dekker::sub(a, qb2);
560    //     let dq2 = Dekker::div_dd_f64(e2, b);
561    //
562    //     Dekker::add(q2, dq2)
563    // }
564
565    // #[inline]
566    // pub(crate) fn neg(self) -> Self {
567    //     Self {
568    //         lo: -self.lo, hi: -self.hi,
569    //     }
570    // }
571
572    #[inline]
573    pub(crate) fn from_f64_div_dd(a: f64, b: DoubleDouble) -> Self {
574        let q1 = a / b.hi;
575
576        let prod = DoubleDouble::from_exact_mult(q1, b.hi);
577        let prod_lo = f_fmla(q1, b.lo, prod.lo);
578        let rem = f_fmla(-1.0, prod.hi, a) - prod_lo;
579
580        let q2 = rem / b.hi;
581
582        DoubleDouble::new(q2, q1)
583    }
584
585    #[inline(always)]
586    #[allow(unused)]
587    pub(crate) fn from_f64_div_dd_fma(a: f64, b: DoubleDouble) -> Self {
588        let q1 = a / b.hi;
589
590        let prod = DoubleDouble::from_exact_mult_fma(q1, b.hi);
591        let prod_lo = f64::mul_add(q1, b.lo, prod.lo);
592        let rem = f64::mul_add(-1.0, prod.hi, a) - prod_lo;
593
594        let q2 = rem / b.hi;
595
596        DoubleDouble::new(q2, q1)
597    }
598
599    #[inline(always)]
600    #[allow(unused)]
601    pub(crate) fn div_fma(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
602        let q = 1.0 / b.hi;
603        let r_hi = a.hi * q;
604        let e_hi = f64::mul_add(b.hi, -r_hi, a.hi);
605        let e_lo = f64::mul_add(b.lo, -r_hi, a.lo);
606        let r_lo = q * (e_hi + e_lo);
607        DoubleDouble::new(r_lo, r_hi)
608    }
609
610    #[inline]
611    pub(crate) fn div(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
612        let q = 1.0 / b.hi;
613        let r_hi = a.hi * q;
614        #[cfg(any(
615            all(
616                any(target_arch = "x86", target_arch = "x86_64"),
617                target_feature = "fma"
618            ),
619            target_arch = "aarch64"
620        ))]
621        {
622            let e_hi = f_fmla(b.hi, -r_hi, a.hi);
623            let e_lo = f_fmla(b.lo, -r_hi, a.lo);
624            let r_lo = q * (e_hi + e_lo);
625            DoubleDouble::new(r_lo, r_hi)
626        }
627
628        #[cfg(not(any(
629            all(
630                any(target_arch = "x86", target_arch = "x86_64"),
631                target_feature = "fma"
632            ),
633            target_arch = "aarch64"
634        )))]
635        {
636            let b_hi_r_hi = DoubleDouble::from_exact_mult(b.hi, -r_hi);
637            let b_lo_r_hi = DoubleDouble::from_exact_mult(b.lo, -r_hi);
638            let e_hi = (a.hi + b_hi_r_hi.hi) + b_hi_r_hi.lo;
639            let e_lo = (a.lo + b_lo_r_hi.hi) + b_lo_r_hi.lo;
640            let r_lo = q * (e_hi + e_lo);
641            DoubleDouble::new(r_lo, r_hi)
642        }
643    }
644
645    #[inline]
646    pub(crate) fn from_exact_mult(a: f64, b: f64) -> Self {
647        #[cfg(any(
648            all(
649                any(target_arch = "x86", target_arch = "x86_64"),
650                target_feature = "fma"
651            ),
652            target_arch = "aarch64"
653        ))]
654        {
655            let r_hi = a * b;
656            let r_lo = f_fmla(a, b, -r_hi);
657            DoubleDouble::new(r_lo, r_hi)
658        }
659        #[cfg(not(any(
660            all(
661                any(target_arch = "x86", target_arch = "x86_64"),
662                target_feature = "fma"
663            ),
664            target_arch = "aarch64"
665        )))]
666        {
667            let splat = DoubleDouble::split(a);
668            DoubleDouble::from_exact_mult_impl_non_fma(splat, a, b)
669        }
670    }
671
672    #[inline(always)]
673    #[allow(unused)]
674    pub(crate) fn from_exact_mult_fma(a: f64, b: f64) -> Self {
675        let r_hi = a * b;
676        let r_lo = f64::mul_add(a, b, -r_hi);
677        DoubleDouble::new(r_lo, r_hi)
678    }
679
680    // #[inline]
681    // pub(crate) fn add_f64(&self, other: f64) -> DoubleDouble {
682    //     let r = DoubleDouble::from_exact_add(self.hi, other);
683    //     Dekker::from_exact_add(r.hi, r.lo + self.lo)
684    // }
685
686    // #[inline]
687    // pub(crate) fn to_triple(self) -> TripleDouble {
688    //     TripleDouble::new(0., self.lo, self.hi)
689    // }
690
691    /// Computes `a * b + c`
692    /// `b` is an `f64`, `a` and `c` are `DoubleDouble`.
693    ///
694    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
695    #[inline]
696    pub(crate) fn mul_f64_add(a: DoubleDouble, b: f64, c: DoubleDouble) -> Self {
697        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_f64(a, b);
698        let DoubleDouble { hi: p, lo: q } = DoubleDouble::full_add_f64(c, h);
699        DoubleDouble::new(r + q, p)
700    }
701
702    /// Computes `a * b + c`
703    /// `b` is an `f64`, `a` and `c` are `DoubleDouble`.
704    ///
705    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
706    ///
707    /// *Correctness*
708    /// |c.hi| > |a.hi * b.hi|
709    #[inline]
710    pub(crate) fn quick_mul_f64_add(a: DoubleDouble, b: f64, c: DoubleDouble) -> Self {
711        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_f64(a, b);
712        let DoubleDouble { hi: p, lo: q } = DoubleDouble::add_f64(c, h);
713        DoubleDouble::new(r + q, p)
714    }
715
716    /// Computes `a * b + c`
717    ///
718    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
719    ///
720    /// *Correctness*
721    /// |c.hi| > |a.hi * b.hi|
722    #[inline]
723    pub(crate) fn quick_mul_f64_add_f64(a: DoubleDouble, b: f64, c: f64) -> Self {
724        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_f64(a, b);
725        let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_exact_add(c, h);
726        DoubleDouble::new(r + q, p)
727    }
728
729    // #[inline]
730    // pub(crate) fn mul_f64_add_full(a: DoubleDouble, b: f64, c: DoubleDouble) -> Self {
731    //     /*
732    //         double _t1, _t2, _t3, _t4, _t5, _t6, _t7, _t8;   \
733    //                                                  \
734    //         Mul12(&_t1,&_t2,(a),(bh));                       \
735    //         Add12(_t3,_t4,(ch),_t1);                         \
736    //         _t5 = (bl) * (a);                                \
737    //         _t6 = (cl) + _t2;                                \
738    //         _t7 = _t5 + _t6;                                 \
739    //         _t8 = _t7 + _t4;                                 \
740    //         Add12((*(resh)),(*(resl)),_t3,_t8);              \
741    //     */
742    //     let DoubleDouble { hi: t1, lo: t2 } = DoubleDouble::from_exact_mult(a.hi, b);
743    //     let DoubleDouble { hi: t3, lo: t4 } = DoubleDouble::from_full_exact_add(c.hi, t1);
744    //     let t5 = a.lo * b;
745    //     let t6 = c.lo + t2;
746    //     let t7 = t5 + t6;
747    //     let t8 = t7 + t4;
748    //     DoubleDouble::from_full_exact_add(t3, t8)
749    // }
750
751    /// Computes `a * b + c`
752    /// `b` is an `f64`, `a` and `c` are `DoubleDouble`.
753    ///
754    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
755    #[inline]
756    pub(crate) fn f64_mul_f64_add(a: f64, b: f64, c: DoubleDouble) -> Self {
757        let DoubleDouble { hi: h, lo: r } = DoubleDouble::from_exact_mult(a, b);
758        let DoubleDouble { hi: p, lo: q } = DoubleDouble::full_add_f64(c, h);
759        DoubleDouble::new(r + q, p)
760    }
761
762    // /// Computes `a * b + c`
763    // /// `b` is an `f64`, `a` and `c` are `DoubleDouble`.
764    // ///
765    // /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
766    // #[inline]
767    // pub(crate) fn single_mul_add(a: f64, b: f64, c: f64) -> Self {
768    //     let DoubleDouble { hi: h, lo: r } = DoubleDouble::from_exact_mult(a, b);
769    //     let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_full_exact_add(c, h);
770    //     DoubleDouble::new(r + q, p)
771    // }
772
773    // /// Computes `a * b + c` safe to overflow without FMA
774    // /// `b` is an `f64`, `a` and `c` are `DoubleDouble`.
775    // ///
776    // /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
777    // #[inline]
778    // pub(crate) fn mul_f64_safe_add(a: DoubleDouble, b: f64, c: DoubleDouble) -> Self {
779    //     let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_safe_f64(a, b);
780    //     let DoubleDouble { hi: p, lo: q } = DoubleDouble::full_add_f64(c, h);
781    //     DoubleDouble::new(r + q, p)
782    // }
783
784    /// `a*b+c`
785    ///
786    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
787    #[inline]
788    pub(crate) fn mul_add(a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> Self {
789        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult(a, b);
790        let DoubleDouble { hi: p, lo: q } = DoubleDouble::full_add_f64(c, h);
791        DoubleDouble::new(r + q, p)
792    }
793
794    /// `a*b+c`
795    ///
796    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
797    #[inline(always)]
798    #[allow(unused)]
799    pub(crate) fn mul_add_fma(a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> Self {
800        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_fma(a, b);
801        let DoubleDouble { hi: p, lo: q } = DoubleDouble::full_add_f64(c, h);
802        DoubleDouble::new(r + q, p)
803    }
804
805    /// `a*b+c`
806    ///
807    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
808    ///
809    /// *Correctness*
810    /// |c.hi| > |a.hi * b.hi|
811    #[inline]
812    pub(crate) fn quick_mul_add(a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> Self {
813        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult(a, b);
814        let DoubleDouble { hi: p, lo: q } = DoubleDouble::add_f64(c, h);
815        DoubleDouble::new(r + q, p)
816    }
817
818    /// `a*b+c`
819    ///
820    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
821    ///
822    /// *Correctness*
823    /// |c.hi| > |a.hi * b.hi|
824    #[inline(always)]
825    #[allow(unused)]
826    pub(crate) fn quick_mul_add_fma(a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> Self {
827        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_fma(a, b);
828        let DoubleDouble { hi: p, lo: q } = DoubleDouble::add_f64(c, h);
829        DoubleDouble::new(r + q, p)
830    }
831
832    #[inline]
833    pub(crate) fn quick_mult(a: DoubleDouble, b: DoubleDouble) -> Self {
834        #[cfg(any(
835            all(
836                any(target_arch = "x86", target_arch = "x86_64"),
837                target_feature = "fma"
838            ),
839            target_arch = "aarch64"
840        ))]
841        {
842            let mut r = DoubleDouble::from_exact_mult(a.hi, b.hi);
843            let t1 = f_fmla(a.hi, b.lo, r.lo);
844            let t2 = f_fmla(a.lo, b.hi, t1);
845            r.lo = t2;
846            r
847        }
848        #[cfg(not(any(
849            all(
850                any(target_arch = "x86", target_arch = "x86_64"),
851                target_feature = "fma"
852            ),
853            target_arch = "aarch64"
854        )))]
855        {
856            let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b.hi);
857            let tl1 = a.hi * b.lo;
858            let tl2 = a.lo * b.hi;
859            let cl2 = tl1 + tl2;
860            let cl3 = cl1 + cl2;
861            DoubleDouble::new(cl3, ch)
862        }
863    }
864
865    #[inline(always)]
866    #[allow(unused)]
867    pub(crate) fn quick_mult_fma(a: DoubleDouble, b: DoubleDouble) -> Self {
868        let mut r = DoubleDouble::from_exact_mult_fma(a.hi, b.hi);
869        let t1 = f64::mul_add(a.hi, b.lo, r.lo);
870        let t2 = f64::mul_add(a.lo, b.hi, t1);
871        r.lo = t2;
872        r
873    }
874
875    #[inline]
876    pub(crate) fn mult(a: DoubleDouble, b: DoubleDouble) -> Self {
877        #[cfg(any(
878            all(
879                any(target_arch = "x86", target_arch = "x86_64"),
880                target_feature = "fma"
881            ),
882            target_arch = "aarch64"
883        ))]
884        {
885            let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b.hi);
886            let tl0 = a.lo * b.lo;
887            let tl1 = f_fmla(a.hi, b.lo, tl0);
888            let cl2 = f_fmla(a.lo, b.hi, tl1);
889            let cl3 = cl1 + cl2;
890            DoubleDouble::from_exact_add(ch, cl3)
891        }
892        #[cfg(not(any(
893            all(
894                any(target_arch = "x86", target_arch = "x86_64"),
895                target_feature = "fma"
896            ),
897            target_arch = "aarch64"
898        )))]
899        {
900            let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b.hi);
901            let tl1 = a.hi * b.lo;
902            let tl2 = a.lo * b.hi;
903            let cl2 = tl1 + tl2;
904            let cl3 = cl1 + cl2;
905            DoubleDouble::from_exact_add(ch, cl3)
906        }
907    }
908
909    #[inline]
910    pub(crate) fn mult_f64(a: DoubleDouble, b: f64) -> Self {
911        #[cfg(any(
912            all(
913                any(target_arch = "x86", target_arch = "x86_64"),
914                target_feature = "fma"
915            ),
916            target_arch = "aarch64"
917        ))]
918        {
919            let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b);
920            let cl3 = f_fmla(a.lo, b, cl1);
921            DoubleDouble::from_exact_add(ch, cl3)
922        }
923        #[cfg(not(any(
924            all(
925                any(target_arch = "x86", target_arch = "x86_64"),
926                target_feature = "fma"
927            ),
928            target_arch = "aarch64"
929        )))]
930        {
931            let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b);
932            let cl2 = a.lo * b;
933            let t = DoubleDouble::from_exact_add(ch, cl2);
934            let tl2 = t.lo + cl1;
935            DoubleDouble::from_exact_add(t.hi, tl2)
936        }
937    }
938
939    #[inline(always)]
940    pub(crate) fn quick_f64_mult(a: f64, b: DoubleDouble) -> DoubleDouble {
941        DoubleDouble::quick_mult_f64(b, a)
942    }
943
944    #[inline]
945    pub(crate) fn quick_mult_f64(a: DoubleDouble, b: f64) -> Self {
946        #[cfg(any(
947            all(
948                any(target_arch = "x86", target_arch = "x86_64"),
949                target_feature = "fma"
950            ),
951            target_arch = "aarch64"
952        ))]
953        {
954            let h = b * a.hi;
955            let l = f_fmla(b, a.lo, f_fmla(b, a.hi, -h));
956            Self { lo: l, hi: h }
957        }
958        #[cfg(not(any(
959            all(
960                any(target_arch = "x86", target_arch = "x86_64"),
961                target_feature = "fma"
962            ),
963            target_arch = "aarch64"
964        )))]
965        {
966            let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b);
967            let cl2 = a.lo * b;
968            let cl3 = cl1 + cl2;
969            DoubleDouble::new(cl3, ch)
970        }
971    }
972
973    #[inline(always)]
974    #[allow(unused)]
975    pub(crate) fn quick_mult_f64_fma(a: DoubleDouble, b: f64) -> Self {
976        let h = b * a.hi;
977        let l = f64::mul_add(b, a.lo, f64::mul_add(b, a.hi, -h));
978        Self { lo: l, hi: h }
979    }
980
981    // /// Double-double multiplication safe to overflow without FMA
982    // #[inline]
983    // pub(crate) fn quick_mult_safe_f64(a: DoubleDouble, b: f64) -> Self {
984    //     let h = b * a.hi;
985    //     let l = f64::mul_add(b, a.lo, f64::mul_add(b, a.hi, -h));
986    //     Self { lo: l, hi: h }
987    // }
988
989    /// Valid only |a.hi| > |b|
990    #[inline]
991    pub(crate) fn add_f64(a: DoubleDouble, b: f64) -> Self {
992        let t = DoubleDouble::from_exact_add(a.hi, b);
993        let l = a.lo + t.lo;
994        Self { lo: l, hi: t.hi }
995    }
996
997    #[inline]
998    pub(crate) fn full_add_f64(a: DoubleDouble, b: f64) -> Self {
999        let t = DoubleDouble::from_full_exact_add(a.hi, b);
1000        let l = a.lo + t.lo;
1001        Self { lo: l, hi: t.hi }
1002    }
1003
1004    /// Valid only |b| > |a.hi|
1005    #[inline]
1006    pub(crate) fn f64_add(b: f64, a: DoubleDouble) -> Self {
1007        let t = DoubleDouble::from_exact_add(b, a.hi);
1008        let l = a.lo + t.lo;
1009        Self { lo: l, hi: t.hi }
1010    }
1011
1012    #[inline]
1013    pub(crate) const fn to_f64(self) -> f64 {
1014        self.lo + self.hi
1015    }
1016
1017    // #[inline]
1018    // pub(crate) fn from_rsqrt(x: f64) -> DoubleDouble {
1019    //     let r = DoubleDouble::div_dd_f64(DoubleDouble::from_sqrt(x), x);
1020    //     let rx = DoubleDouble::quick_mult_safe_f64(r, x);
1021    //     let drx = DoubleDouble::mul_f64_safe_add(r, x, -rx);
1022    //     let h = DoubleDouble::mul_add(r, drx, DoubleDouble::mul_add_f64(r, rx, -1.0));
1023    //     let dr = DoubleDouble::quick_mult(DoubleDouble::quick_mult_f64(r, 0.5), h);
1024    //     DoubleDouble::add(r, dr)
1025    // }
1026
1027    #[inline]
1028    pub(crate) fn from_rsqrt_fast(x: f64) -> DoubleDouble {
1029        let sqrt_x = DoubleDouble::from_sqrt(x);
1030        sqrt_x.recip()
1031    }
1032}
1033
1034impl Mul<DoubleDouble> for DoubleDouble {
1035    type Output = Self;
1036
1037    #[inline]
1038    fn mul(self, rhs: DoubleDouble) -> Self::Output {
1039        DoubleDouble::quick_mult(self, rhs)
1040    }
1041}
1042
1043/// check if number is valid for Exact mult
1044#[allow(dead_code)]
1045#[inline]
1046pub(crate) fn two_product_compatible(x: f64) -> bool {
1047    let exp = get_exponent_f64(x);
1048    !(exp >= 970 || exp <= -970)
1049}
1050
1051#[cfg(test)]
1052mod tests {
1053    use super::*;
1054    #[test]
1055    fn test_f64_mult() {
1056        let d1 = 1.1231;
1057        let d2 = DoubleDouble::new(1e-22, 3.2341);
1058        let p = DoubleDouble::quick_f64_mult(d1, d2);
1059        assert_eq!(p.hi, 3.6322177100000004);
1060        assert_eq!(p.lo, -1.971941841373783e-16);
1061    }
1062
1063    #[test]
1064    fn test_mult_64() {
1065        let d1 = 1.1231;
1066        let d2 = DoubleDouble::new(1e-22, 3.2341);
1067        let p = DoubleDouble::mult_f64(d2, d1);
1068        assert_eq!(p.hi, 3.6322177100000004);
1069        assert_eq!(p.lo, -1.971941841373783e-16);
1070    }
1071
1072    #[test]
1073    fn recip_test() {
1074        let d1 = 1.54352432142;
1075        let recip = DoubleDouble::new(0., d1).recip();
1076        assert_eq!(recip.hi, d1.recip());
1077        assert_ne!(recip.lo, 0.);
1078    }
1079
1080    #[test]
1081    fn from_recip_test() {
1082        let d1 = 1.54352432142;
1083        let recip = DoubleDouble::from_recip(d1);
1084        assert_eq!(recip.hi, d1.recip());
1085        assert_ne!(recip.lo, 0.);
1086    }
1087
1088    #[test]
1089    fn from_quick_recip_test() {
1090        let d1 = 1.54352432142;
1091        let recip = DoubleDouble::from_quick_recip(d1);
1092        assert_eq!(recip.hi, d1.recip());
1093        assert_ne!(recip.lo, 0.);
1094    }
1095}
pxfm/double_double.rs

pxfm/
double_double.rs