1use crate::bits::get_exponent_f64;
30#[allow(unused_imports)]
31use crate::common::*;
32use std::ops::{Mul, Neg};
33#[derive(Copy, Clone, Default, Debug)]
36pub(crate) struct DoubleDouble {
37 pub(crate) lo: f64,
38 pub(crate) hi: f64,
39}
40
41impl Neg for DoubleDouble {
42 type Output = Self;
43
44 #[inline]
45 fn neg(self) -> Self::Output {
46 Self {
47 hi: -self.hi,
48 lo: -self.lo,
49 }
50 }
51}
52
53impl DoubleDouble {
54 #[inline]
55 pub(crate) const fn from_bit_pair(pair: (u64, u64)) -> Self {
56 Self {
57 lo: f64::from_bits(pair.0),
58 hi: f64::from_bits(pair.1),
59 }
60 }
61
62 #[inline]
63 pub(crate) const fn new(lo: f64, hi: f64) -> Self {
64 DoubleDouble { lo, hi }
65 }
66
67 #[allow(dead_code)]
69 #[inline]
70 pub(crate) const fn split(a: f64) -> DoubleDouble {
71 const CN: f64 = (1 << 27) as f64;
73 const C: f64 = CN + 1.0;
74 let t1 = C * a;
75 let t2 = a - t1;
76 let r_hi = t1 + t2;
77 let r_lo = a - r_hi;
78 DoubleDouble::new(r_lo, r_hi)
79 }
80
81 #[allow(dead_code)]
83 #[inline]
84 fn from_exact_mult_impl_non_fma(asz: DoubleDouble, a: f64, b: f64) -> Self {
85 let bs = DoubleDouble::split(b);
86
87 let r_hi = a * b;
88 let t1 = asz.hi * bs.hi - r_hi;
89 let t2 = asz.hi * bs.lo + t1;
90 let t3 = asz.lo * bs.hi + t2;
91 let r_lo = asz.lo * bs.lo + t3;
92 DoubleDouble::new(r_lo, r_hi)
93 }
94
95 #[inline]
97 pub(crate) const fn from_exact_add(a: f64, b: f64) -> DoubleDouble {
98 let r_hi = a + b;
99 let t = r_hi - a;
100 let r_lo = b - t;
101 DoubleDouble::new(r_lo, r_hi)
102 }
103
104 #[inline]
106 pub(crate) const fn from_exact_sub(a: f64, b: f64) -> DoubleDouble {
107 let r_hi = a - b;
108 let t = a - r_hi;
109 let r_lo = t - b;
110 DoubleDouble::new(r_lo, r_hi)
111 }
112
113 #[inline]
114 pub(crate) const fn from_full_exact_add(a: f64, b: f64) -> DoubleDouble {
115 let r_hi = a + b;
116 let t1 = r_hi - a;
117 let t2 = r_hi - t1;
118 let t3 = b - t1;
119 let t4 = a - t2;
120 let r_lo = t3 + t4;
121 DoubleDouble::new(r_lo, r_hi)
122 }
123
124 #[allow(unused)]
125 #[inline]
126 pub(crate) fn dd_f64_mul_add(a: f64, b: f64, c: f64) -> f64 {
127 let ddx2 = DoubleDouble::from_exact_mult(a, b);
128 let zv = DoubleDouble::full_add_f64(ddx2, c);
129 zv.to_f64()
130 }
131
132 #[inline]
133 pub(crate) const fn from_full_exact_sub(a: f64, b: f64) -> Self {
134 let r_hi = a - b;
135 let t1 = r_hi - a;
136 let t2 = r_hi - t1;
137 let t3 = -b - t1;
138 let t4 = a - t2;
139 let r_lo = t3 + t4;
140 DoubleDouble::new(r_lo, r_hi)
141 }
142
143 #[inline]
144 pub(crate) fn add(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
145 let s = a.hi + b.hi;
146 let d = s - a.hi;
147 let l = ((b.hi - d) + (a.hi + (d - s))) + (a.lo + b.lo);
148 DoubleDouble::new(l, s)
149 }
150
151 #[inline]
152 pub(crate) fn quick_dd_add(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
153 let DoubleDouble { hi: sh, lo: sl } = DoubleDouble::from_full_exact_add(a.hi, b.hi);
154 let v = a.lo + b.lo;
155 let w = sl + v;
156 DoubleDouble::from_exact_add(sh, w)
157 }
158
159 #[inline]
160 pub(crate) fn quick_dd_sub(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
161 let DoubleDouble { hi: sh, lo: sl } = DoubleDouble::from_full_exact_sub(a.hi, b.hi);
162 let v = a.lo - b.lo;
163 let w = sl + v;
164 DoubleDouble::from_exact_add(sh, w)
165 }
166
167 #[inline]
168 pub(crate) fn full_dd_add(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
169 let DoubleDouble { hi: sh, lo: sl } = DoubleDouble::from_full_exact_add(a.hi, b.hi);
170 let DoubleDouble { hi: th, lo: tl } = DoubleDouble::from_full_exact_add(a.lo, b.lo);
171 let c = sl + th;
172 let v = DoubleDouble::from_exact_add(sh, c);
173 let w = tl + v.lo;
174 DoubleDouble::from_exact_add(v.hi, w)
175 }
176
177 #[inline]
178 pub(crate) fn full_dd_sub(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
179 DoubleDouble::full_dd_add(a, -b)
180 }
181
182 #[inline]
183 pub(crate) fn sub(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
184 let s = a.hi - b.hi;
185 let d = s - a.hi;
186 let l = ((-b.hi - d) + (a.hi + (d - s))) + (a.lo - b.lo);
187 DoubleDouble::new(l, s)
188 }
189
190 #[inline]
192 pub(crate) fn sqrt(self) -> DoubleDouble {
193 let a = self.hi + self.lo;
194
195 if a == 0.0 {
196 return DoubleDouble { hi: 0.0, lo: 0.0 };
197 }
198 if a < 0.0 || a.is_nan() {
199 return DoubleDouble {
200 hi: f64::NAN,
201 lo: 0.0,
202 };
203 }
204 if a.is_infinite() {
205 return DoubleDouble {
206 hi: f64::INFINITY,
207 lo: 0.0,
208 };
209 }
210
211 let x = a.sqrt();
212
213 let x2 = DoubleDouble::from_exact_mult(x, x);
214
215 let mut r = self.hi - x2.hi;
217 r += self.lo;
218 r -= x2.lo;
219
220 let dx = r / (2.0 * x);
221 let hi = x + dx;
222 let lo = (x - hi) + dx;
223
224 DoubleDouble { hi, lo }
225 }
226
227 #[inline]
229 pub(crate) fn fast_sqrt(self) -> DoubleDouble {
230 let a = self.hi + self.lo;
231 let x = a.sqrt();
232
233 let x2 = DoubleDouble::from_exact_mult(x, x);
234
235 let mut r = self.hi - x2.hi;
237 r += self.lo;
238 r -= x2.lo;
239
240 let dx = r / (2.0 * x);
241 let hi = x + dx;
242 let lo = (x - hi) + dx;
243
244 DoubleDouble { hi, lo }
245 }
246
247 #[inline]
251 pub(crate) fn mul_add_f64(a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble {
252 let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult(a, b);
253 let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_full_exact_add(c, h);
254 DoubleDouble::new(r + q, p)
255 }
256
257 #[inline(always)]
261 #[allow(unused)]
262 pub(crate) fn mul_add_f64_fma(a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble {
263 let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_fma(a, b);
264 let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_full_exact_add(c, h);
265 DoubleDouble::new(r + q, p)
266 }
267
268 #[inline]
272 pub(crate) fn quick_mul_add_f64(a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble {
273 let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult(a, b);
274 let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_exact_add(c, h);
275 DoubleDouble::new(r + q, p)
276 }
277
278 #[inline]
282 pub(crate) fn mul_f64_add_f64(a: DoubleDouble, b: f64, c: f64) -> DoubleDouble {
283 let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_f64(a, b);
284 let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_full_exact_add(c, h);
285 DoubleDouble::new(r + q, p)
286 }
287
288 #[inline]
298 pub(crate) fn recip(self) -> DoubleDouble {
299 #[cfg(any(
300 all(
301 any(target_arch = "x86", target_arch = "x86_64"),
302 target_feature = "fma"
303 ),
304 target_arch = "aarch64"
305 ))]
306 {
307 let y = 1. / self.hi;
308 let e1 = f_fmla(-self.hi, y, 1.0);
309 let e2 = f_fmla(-self.lo, y, e1);
310 let e = y * e2;
311 DoubleDouble::new(e, y)
312 }
313 #[cfg(not(any(
314 all(
315 any(target_arch = "x86", target_arch = "x86_64"),
316 target_feature = "fma"
317 ),
318 target_arch = "aarch64"
319 )))]
320 {
321 let y = 1.0 / self.hi;
322
323 let DoubleDouble { hi: p1, lo: err1 } = DoubleDouble::from_exact_mult(self.hi, y);
324 let e1 = (1.0 - p1) - err1;
325 let DoubleDouble { hi: p2, lo: err2 } = DoubleDouble::from_exact_mult(self.lo, y);
326 let e2 = (e1 - p2) - err2;
327 let e = y * e2;
328
329 DoubleDouble::new(e, y)
330 }
331 }
332
333 #[inline]
334 pub(crate) fn from_recip(b: f64) -> Self {
335 #[cfg(any(
336 all(
337 any(target_arch = "x86", target_arch = "x86_64"),
338 target_feature = "fma"
339 ),
340 target_arch = "aarch64"
341 ))]
342 {
343 let x_hi = 1.0 / b;
344 let err = f_fmla(-x_hi, b, 1.0);
345 let x_lo = err / b;
346 Self::new(x_lo, x_hi)
347 }
348 #[cfg(not(any(
349 all(
350 any(target_arch = "x86", target_arch = "x86_64"),
351 target_feature = "fma"
352 ),
353 target_arch = "aarch64"
354 )))]
355 {
356 let x_hi = 1.0 / b;
357 let prod = Self::from_exact_mult(x_hi, b);
358 let err = (1.0 - prod.hi) - prod.lo;
359 let x_lo = err / b;
360 Self::new(x_lo, x_hi)
361 }
362 }
363
364 #[inline(always)]
365 #[allow(unused)]
366 pub(crate) fn from_quick_recip_fma(b: f64) -> Self {
367 let h = 1.0 / b;
368 let hl = f64::mul_add(h, -b, 1.) * h;
369 DoubleDouble::new(hl, h)
370 }
371
372 #[inline]
373 pub(crate) fn from_quick_recip(b: f64) -> Self {
374 #[cfg(any(
375 all(
376 any(target_arch = "x86", target_arch = "x86_64"),
377 target_feature = "fma"
378 ),
379 target_arch = "aarch64"
380 ))]
381 {
382 let h = 1.0 / b;
383 let hl = f_fmla(h, -b, 1.) * h;
384 DoubleDouble::new(hl, h)
385 }
386 #[cfg(not(any(
387 all(
388 any(target_arch = "x86", target_arch = "x86_64"),
389 target_feature = "fma"
390 ),
391 target_arch = "aarch64"
392 )))]
393 {
394 let h = 1.0 / b;
395 let pr = DoubleDouble::from_exact_mult(h, b);
396 let err = (1.0 - pr.hi) - pr.lo;
397 let hl = err * h;
398 DoubleDouble::new(hl, h)
399 }
400 }
401
402 #[inline]
403 pub(crate) fn from_exact_div(a: f64, b: f64) -> Self {
404 #[cfg(any(
405 all(
406 any(target_arch = "x86", target_arch = "x86_64"),
407 target_feature = "fma"
408 ),
409 target_arch = "aarch64"
410 ))]
411 {
412 let q_hi = a / b;
413 let r = f_fmla(-q_hi, b, a);
414 let q_lo = r / b;
415 Self::new(q_lo, q_hi)
416 }
417
418 #[cfg(not(any(
419 all(
420 any(target_arch = "x86", target_arch = "x86_64"),
421 target_feature = "fma"
422 ),
423 target_arch = "aarch64"
424 )))]
425 {
426 let q_hi = a / b;
427
428 let p = DoubleDouble::from_exact_mult(q_hi, b);
429 let r = DoubleDouble::from_exact_sub(a, p.hi);
430 let r = r.hi + (r.lo - p.lo);
431 let q_lo = r / b;
432
433 Self::new(q_lo, q_hi)
434 }
435 }
436
437 #[inline]
439 pub(crate) fn from_exact_div_fma(a: f64, b: f64) -> Self {
440 let q_hi = a / b;
441 let r = f64::mul_add(-q_hi, b, a);
442 let q_lo = r / b;
443 Self::new(q_lo, q_hi)
444 }
445
446 #[inline]
447 pub(crate) fn from_sqrt(x: f64) -> Self {
448 #[cfg(any(
449 all(
450 any(target_arch = "x86", target_arch = "x86_64"),
451 target_feature = "fma"
452 ),
453 target_arch = "aarch64"
454 ))]
455 {
456 let h = x.sqrt();
457 let e = -f_fmla(h, h, -x); let l = e / (h + h);
463 DoubleDouble::new(l, h)
464 }
465 #[cfg(not(any(
466 all(
467 any(target_arch = "x86", target_arch = "x86_64"),
468 target_feature = "fma"
469 ),
470 target_arch = "aarch64"
471 )))]
472 {
473 let h = x.sqrt();
474 let prod_hh = DoubleDouble::from_exact_mult(h, h);
475 let e = (x - prod_hh.hi) - prod_hh.lo; let l = e / (h + h);
479 DoubleDouble::new(l, h)
480 }
481 }
482
483 #[inline]
485 #[allow(dead_code)]
486 pub(crate) fn div_safe_dd_f64(a: DoubleDouble, b: f64) -> Self {
487 let q1 = a.hi / b;
488 let r = f64::mul_add(-q1, b, a.hi);
489 let r = r + a.lo;
490 let q2 = r / b;
491
492 DoubleDouble::new(q2, q1)
493 }
494
495 #[inline]
496 pub(crate) fn div_dd_f64(a: DoubleDouble, b: f64) -> Self {
497 #[cfg(any(
498 all(
499 any(target_arch = "x86", target_arch = "x86_64"),
500 target_feature = "fma"
501 ),
502 target_arch = "aarch64"
503 ))]
504 {
505 let q1 = a.hi / b;
506 let r = f_fmla(-q1, b, a.hi);
507 let r = r + a.lo;
508 let q2 = r / b;
509
510 DoubleDouble::new(q2, q1)
511 }
512 #[cfg(not(any(
513 all(
514 any(target_arch = "x86", target_arch = "x86_64"),
515 target_feature = "fma"
516 ),
517 target_arch = "aarch64"
518 )))]
519 {
520 let th = a.hi / b;
521 let prod = DoubleDouble::from_exact_mult(th, b);
522 let beta_h = a.hi - prod.hi;
523 let beta_l = beta_h - prod.lo;
524 let beta = beta_l + a.lo;
525 let tl = beta / b;
526 DoubleDouble::new(tl, th)
527 }
528 }
529
530 #[inline]
573 pub(crate) fn from_f64_div_dd(a: f64, b: DoubleDouble) -> Self {
574 let q1 = a / b.hi;
575
576 let prod = DoubleDouble::from_exact_mult(q1, b.hi);
577 let prod_lo = f_fmla(q1, b.lo, prod.lo);
578 let rem = f_fmla(-1.0, prod.hi, a) - prod_lo;
579
580 let q2 = rem / b.hi;
581
582 DoubleDouble::new(q2, q1)
583 }
584
585 #[inline(always)]
586 #[allow(unused)]
587 pub(crate) fn from_f64_div_dd_fma(a: f64, b: DoubleDouble) -> Self {
588 let q1 = a / b.hi;
589
590 let prod = DoubleDouble::from_exact_mult_fma(q1, b.hi);
591 let prod_lo = f64::mul_add(q1, b.lo, prod.lo);
592 let rem = f64::mul_add(-1.0, prod.hi, a) - prod_lo;
593
594 let q2 = rem / b.hi;
595
596 DoubleDouble::new(q2, q1)
597 }
598
599 #[inline(always)]
600 #[allow(unused)]
601 pub(crate) fn div_fma(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
602 let q = 1.0 / b.hi;
603 let r_hi = a.hi * q;
604 let e_hi = f64::mul_add(b.hi, -r_hi, a.hi);
605 let e_lo = f64::mul_add(b.lo, -r_hi, a.lo);
606 let r_lo = q * (e_hi + e_lo);
607 DoubleDouble::new(r_lo, r_hi)
608 }
609
610 #[inline]
611 pub(crate) fn div(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
612 let q = 1.0 / b.hi;
613 let r_hi = a.hi * q;
614 #[cfg(any(
615 all(
616 any(target_arch = "x86", target_arch = "x86_64"),
617 target_feature = "fma"
618 ),
619 target_arch = "aarch64"
620 ))]
621 {
622 let e_hi = f_fmla(b.hi, -r_hi, a.hi);
623 let e_lo = f_fmla(b.lo, -r_hi, a.lo);
624 let r_lo = q * (e_hi + e_lo);
625 DoubleDouble::new(r_lo, r_hi)
626 }
627
628 #[cfg(not(any(
629 all(
630 any(target_arch = "x86", target_arch = "x86_64"),
631 target_feature = "fma"
632 ),
633 target_arch = "aarch64"
634 )))]
635 {
636 let b_hi_r_hi = DoubleDouble::from_exact_mult(b.hi, -r_hi);
637 let b_lo_r_hi = DoubleDouble::from_exact_mult(b.lo, -r_hi);
638 let e_hi = (a.hi + b_hi_r_hi.hi) + b_hi_r_hi.lo;
639 let e_lo = (a.lo + b_lo_r_hi.hi) + b_lo_r_hi.lo;
640 let r_lo = q * (e_hi + e_lo);
641 DoubleDouble::new(r_lo, r_hi)
642 }
643 }
644
645 #[inline]
646 pub(crate) fn from_exact_mult(a: f64, b: f64) -> Self {
647 #[cfg(any(
648 all(
649 any(target_arch = "x86", target_arch = "x86_64"),
650 target_feature = "fma"
651 ),
652 target_arch = "aarch64"
653 ))]
654 {
655 let r_hi = a * b;
656 let r_lo = f_fmla(a, b, -r_hi);
657 DoubleDouble::new(r_lo, r_hi)
658 }
659 #[cfg(not(any(
660 all(
661 any(target_arch = "x86", target_arch = "x86_64"),
662 target_feature = "fma"
663 ),
664 target_arch = "aarch64"
665 )))]
666 {
667 let splat = DoubleDouble::split(a);
668 DoubleDouble::from_exact_mult_impl_non_fma(splat, a, b)
669 }
670 }
671
672 #[inline(always)]
673 #[allow(unused)]
674 pub(crate) fn from_exact_mult_fma(a: f64, b: f64) -> Self {
675 let r_hi = a * b;
676 let r_lo = f64::mul_add(a, b, -r_hi);
677 DoubleDouble::new(r_lo, r_hi)
678 }
679
680 #[inline]
696 pub(crate) fn mul_f64_add(a: DoubleDouble, b: f64, c: DoubleDouble) -> Self {
697 let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_f64(a, b);
698 let DoubleDouble { hi: p, lo: q } = DoubleDouble::full_add_f64(c, h);
699 DoubleDouble::new(r + q, p)
700 }
701
702 #[inline]
710 pub(crate) fn quick_mul_f64_add(a: DoubleDouble, b: f64, c: DoubleDouble) -> Self {
711 let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_f64(a, b);
712 let DoubleDouble { hi: p, lo: q } = DoubleDouble::add_f64(c, h);
713 DoubleDouble::new(r + q, p)
714 }
715
716 #[inline]
723 pub(crate) fn quick_mul_f64_add_f64(a: DoubleDouble, b: f64, c: f64) -> Self {
724 let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_f64(a, b);
725 let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_exact_add(c, h);
726 DoubleDouble::new(r + q, p)
727 }
728
729 #[inline]
756 pub(crate) fn f64_mul_f64_add(a: f64, b: f64, c: DoubleDouble) -> Self {
757 let DoubleDouble { hi: h, lo: r } = DoubleDouble::from_exact_mult(a, b);
758 let DoubleDouble { hi: p, lo: q } = DoubleDouble::full_add_f64(c, h);
759 DoubleDouble::new(r + q, p)
760 }
761
762 #[inline]
788 pub(crate) fn mul_add(a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> Self {
789 let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult(a, b);
790 let DoubleDouble { hi: p, lo: q } = DoubleDouble::full_add_f64(c, h);
791 DoubleDouble::new(r + q, p)
792 }
793
794 #[inline(always)]
798 #[allow(unused)]
799 pub(crate) fn mul_add_fma(a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> Self {
800 let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_fma(a, b);
801 let DoubleDouble { hi: p, lo: q } = DoubleDouble::full_add_f64(c, h);
802 DoubleDouble::new(r + q, p)
803 }
804
805 #[inline]
812 pub(crate) fn quick_mul_add(a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> Self {
813 let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult(a, b);
814 let DoubleDouble { hi: p, lo: q } = DoubleDouble::add_f64(c, h);
815 DoubleDouble::new(r + q, p)
816 }
817
818 #[inline(always)]
825 #[allow(unused)]
826 pub(crate) fn quick_mul_add_fma(a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> Self {
827 let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_fma(a, b);
828 let DoubleDouble { hi: p, lo: q } = DoubleDouble::add_f64(c, h);
829 DoubleDouble::new(r + q, p)
830 }
831
832 #[inline]
833 pub(crate) fn quick_mult(a: DoubleDouble, b: DoubleDouble) -> Self {
834 #[cfg(any(
835 all(
836 any(target_arch = "x86", target_arch = "x86_64"),
837 target_feature = "fma"
838 ),
839 target_arch = "aarch64"
840 ))]
841 {
842 let mut r = DoubleDouble::from_exact_mult(a.hi, b.hi);
843 let t1 = f_fmla(a.hi, b.lo, r.lo);
844 let t2 = f_fmla(a.lo, b.hi, t1);
845 r.lo = t2;
846 r
847 }
848 #[cfg(not(any(
849 all(
850 any(target_arch = "x86", target_arch = "x86_64"),
851 target_feature = "fma"
852 ),
853 target_arch = "aarch64"
854 )))]
855 {
856 let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b.hi);
857 let tl1 = a.hi * b.lo;
858 let tl2 = a.lo * b.hi;
859 let cl2 = tl1 + tl2;
860 let cl3 = cl1 + cl2;
861 DoubleDouble::new(cl3, ch)
862 }
863 }
864
865 #[inline(always)]
866 #[allow(unused)]
867 pub(crate) fn quick_mult_fma(a: DoubleDouble, b: DoubleDouble) -> Self {
868 let mut r = DoubleDouble::from_exact_mult_fma(a.hi, b.hi);
869 let t1 = f64::mul_add(a.hi, b.lo, r.lo);
870 let t2 = f64::mul_add(a.lo, b.hi, t1);
871 r.lo = t2;
872 r
873 }
874
875 #[inline]
876 pub(crate) fn mult(a: DoubleDouble, b: DoubleDouble) -> Self {
877 #[cfg(any(
878 all(
879 any(target_arch = "x86", target_arch = "x86_64"),
880 target_feature = "fma"
881 ),
882 target_arch = "aarch64"
883 ))]
884 {
885 let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b.hi);
886 let tl0 = a.lo * b.lo;
887 let tl1 = f_fmla(a.hi, b.lo, tl0);
888 let cl2 = f_fmla(a.lo, b.hi, tl1);
889 let cl3 = cl1 + cl2;
890 DoubleDouble::from_exact_add(ch, cl3)
891 }
892 #[cfg(not(any(
893 all(
894 any(target_arch = "x86", target_arch = "x86_64"),
895 target_feature = "fma"
896 ),
897 target_arch = "aarch64"
898 )))]
899 {
900 let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b.hi);
901 let tl1 = a.hi * b.lo;
902 let tl2 = a.lo * b.hi;
903 let cl2 = tl1 + tl2;
904 let cl3 = cl1 + cl2;
905 DoubleDouble::from_exact_add(ch, cl3)
906 }
907 }
908
909 #[inline]
910 pub(crate) fn mult_f64(a: DoubleDouble, b: f64) -> Self {
911 #[cfg(any(
912 all(
913 any(target_arch = "x86", target_arch = "x86_64"),
914 target_feature = "fma"
915 ),
916 target_arch = "aarch64"
917 ))]
918 {
919 let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b);
920 let cl3 = f_fmla(a.lo, b, cl1);
921 DoubleDouble::from_exact_add(ch, cl3)
922 }
923 #[cfg(not(any(
924 all(
925 any(target_arch = "x86", target_arch = "x86_64"),
926 target_feature = "fma"
927 ),
928 target_arch = "aarch64"
929 )))]
930 {
931 let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b);
932 let cl2 = a.lo * b;
933 let t = DoubleDouble::from_exact_add(ch, cl2);
934 let tl2 = t.lo + cl1;
935 DoubleDouble::from_exact_add(t.hi, tl2)
936 }
937 }
938
939 #[inline(always)]
940 pub(crate) fn quick_f64_mult(a: f64, b: DoubleDouble) -> DoubleDouble {
941 DoubleDouble::quick_mult_f64(b, a)
942 }
943
944 #[inline]
945 pub(crate) fn quick_mult_f64(a: DoubleDouble, b: f64) -> Self {
946 #[cfg(any(
947 all(
948 any(target_arch = "x86", target_arch = "x86_64"),
949 target_feature = "fma"
950 ),
951 target_arch = "aarch64"
952 ))]
953 {
954 let h = b * a.hi;
955 let l = f_fmla(b, a.lo, f_fmla(b, a.hi, -h));
956 Self { lo: l, hi: h }
957 }
958 #[cfg(not(any(
959 all(
960 any(target_arch = "x86", target_arch = "x86_64"),
961 target_feature = "fma"
962 ),
963 target_arch = "aarch64"
964 )))]
965 {
966 let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b);
967 let cl2 = a.lo * b;
968 let cl3 = cl1 + cl2;
969 DoubleDouble::new(cl3, ch)
970 }
971 }
972
973 #[inline(always)]
974 #[allow(unused)]
975 pub(crate) fn quick_mult_f64_fma(a: DoubleDouble, b: f64) -> Self {
976 let h = b * a.hi;
977 let l = f64::mul_add(b, a.lo, f64::mul_add(b, a.hi, -h));
978 Self { lo: l, hi: h }
979 }
980
981 #[inline]
991 pub(crate) fn add_f64(a: DoubleDouble, b: f64) -> Self {
992 let t = DoubleDouble::from_exact_add(a.hi, b);
993 let l = a.lo + t.lo;
994 Self { lo: l, hi: t.hi }
995 }
996
997 #[inline]
998 pub(crate) fn full_add_f64(a: DoubleDouble, b: f64) -> Self {
999 let t = DoubleDouble::from_full_exact_add(a.hi, b);
1000 let l = a.lo + t.lo;
1001 Self { lo: l, hi: t.hi }
1002 }
1003
1004 #[inline]
1006 pub(crate) fn f64_add(b: f64, a: DoubleDouble) -> Self {
1007 let t = DoubleDouble::from_exact_add(b, a.hi);
1008 let l = a.lo + t.lo;
1009 Self { lo: l, hi: t.hi }
1010 }
1011
1012 #[inline]
1013 pub(crate) const fn to_f64(self) -> f64 {
1014 self.lo + self.hi
1015 }
1016
1017 #[inline]
1028 pub(crate) fn from_rsqrt_fast(x: f64) -> DoubleDouble {
1029 let sqrt_x = DoubleDouble::from_sqrt(x);
1030 sqrt_x.recip()
1031 }
1032}
1033
1034impl Mul<DoubleDouble> for DoubleDouble {
1035 type Output = Self;
1036
1037 #[inline]
1038 fn mul(self, rhs: DoubleDouble) -> Self::Output {
1039 DoubleDouble::quick_mult(self, rhs)
1040 }
1041}
1042
1043#[allow(dead_code)]
1045#[inline]
1046pub(crate) fn two_product_compatible(x: f64) -> bool {
1047 let exp = get_exponent_f64(x);
1048 !(exp >= 970 || exp <= -970)
1049}
1050
1051#[cfg(test)]
1052mod tests {
1053 use super::*;
1054 #[test]
1055 fn test_f64_mult() {
1056 let d1 = 1.1231;
1057 let d2 = DoubleDouble::new(1e-22, 3.2341);
1058 let p = DoubleDouble::quick_f64_mult(d1, d2);
1059 assert_eq!(p.hi, 3.6322177100000004);
1060 assert_eq!(p.lo, -1.971941841373783e-16);
1061 }
1062
1063 #[test]
1064 fn test_mult_64() {
1065 let d1 = 1.1231;
1066 let d2 = DoubleDouble::new(1e-22, 3.2341);
1067 let p = DoubleDouble::mult_f64(d2, d1);
1068 assert_eq!(p.hi, 3.6322177100000004);
1069 assert_eq!(p.lo, -1.971941841373783e-16);
1070 }
1071
1072 #[test]
1073 fn recip_test() {
1074 let d1 = 1.54352432142;
1075 let recip = DoubleDouble::new(0., d1).recip();
1076 assert_eq!(recip.hi, d1.recip());
1077 assert_ne!(recip.lo, 0.);
1078 }
1079
1080 #[test]
1081 fn from_recip_test() {
1082 let d1 = 1.54352432142;
1083 let recip = DoubleDouble::from_recip(d1);
1084 assert_eq!(recip.hi, d1.recip());
1085 assert_ne!(recip.lo, 0.);
1086 }
1087
1088 #[test]
1089 fn from_quick_recip_test() {
1090 let d1 = 1.54352432142;
1091 let recip = DoubleDouble::from_quick_recip(d1);
1092 assert_eq!(recip.hi, d1.recip());
1093 assert_ne!(recip.lo, 0.);
1094 }
1095}