1use crate::common::{dd_fmla, is_integerf};
30use crate::double_double::DoubleDouble;
31use crate::rounding::CpuRoundTiesEven;
32use std::hint::black_box;
33
34#[cold]
35#[inline(never)]
36fn as_compoundf_special(x: f32, y: f32) -> f32 {
37 let nx = x.to_bits();
38 let ny = y.to_bits();
39 let ax: u32 = nx.wrapping_shl(1);
40 let ay: u32 = ny.wrapping_shl(1);
41
42 if ax == 0 || ay == 0 {
43 if ax == 0 {
45 return if y.is_nan() { x + y } else { 1.0 };
47 }
48
49 if ay == 0 {
50 if x.is_nan() {
52 return x + y;
53 } return if x < -1.0 {
55 f32::NAN } else {
57 1.0
58 }; }
60 }
61
62 let mone = (-1.0f32).to_bits();
63 if ay >= 0xffu32 << 24 {
64 if ax > 0xffu32 << 24 {
67 return x + y;
68 } if ay == 0xffu32 << 24 {
70 if nx > mone {
72 return f32::NAN;
73 } let sy = ny >> 31; if nx == mone {
76 return if sy == 0 {
77 0.0 } else {
79 f32::INFINITY };
81 }
82 if x < 0.0 {
83 return if sy == 0 { 0.0 } else { f32::INFINITY };
84 }
85 if x > 0.0 {
86 return if sy != 0 { 0.0 } else { f32::INFINITY };
87 }
88 return 1.0;
89 }
90 return x + y; }
92
93 if nx >= mone || nx >= 0xffu32 << 23 {
94 if ax == 0xffu32 << 24 {
96 if (nx >> 31) != 0 {
98 return f32::NAN;
99 } return if (ny >> 31) != 0 { 1.0 / x } else { x };
102 }
103 if ax > 0xffu32 << 24 {
104 return x + y;
105 } if nx > mone {
107 return f32::NAN; }
109 return if (ny >> 31) != 0 {
111 f32::INFINITY
113 } else {
114 0.0
116 };
117 }
118 0.0
119}
120
121#[inline]
122pub(crate) fn log2p1_polyeval_1(z: f64) -> f64 {
123 const P: [u64; 8] = [
127 0x0000000000000000,
128 0x3ff71547652b82fe,
129 0xbfe71547652b8d11,
130 0x3fdec709dc3a5014,
131 0xbfd715475b144983,
132 0x3fd2776c3fda300e,
133 0xbfcec990162358ce,
134 0x3fca645337c29e27,
135 ];
136
137 let z2 = z * z;
138 let mut c5 = dd_fmla(f64::from_bits(P[6]), z, f64::from_bits(P[5]));
139 let c3 = dd_fmla(f64::from_bits(P[4]), z, f64::from_bits(P[3]));
140 let mut c1 = dd_fmla(f64::from_bits(P[2]), z, f64::from_bits(P[1]));
141 let z4 = z2 * z2;
142 c5 = dd_fmla(f64::from_bits(P[7]), z2, c5);
143 c1 = dd_fmla(c3, z2, c1);
144 c1 = dd_fmla(c5, z4, c1);
145 z * c1
146}
147
148pub(crate) static LOG2P1_COMPOUNDF_INV: [u64; 46] = [
150 0x3ff6800000000000,
151 0x3ff6000000000000,
152 0x3ff5800000000000,
153 0x3ff5000000000000,
154 0x3ff4c00000000000,
155 0x3ff4400000000000,
156 0x3ff4000000000000,
157 0x3ff3800000000000,
158 0x3ff3400000000000,
159 0x3ff2c00000000000,
160 0x3ff2800000000000,
161 0x3ff2000000000000,
162 0x3ff1c00000000000,
163 0x3ff1800000000000,
164 0x3ff1400000000000,
165 0x3ff1000000000000,
166 0x3ff0c00000000000,
167 0x3ff0800000000000,
168 0x3ff0000000000000,
169 0x3ff0000000000000,
170 0x3fef400000000000,
171 0x3feec00000000000,
172 0x3fee400000000000,
173 0x3fee000000000000,
174 0x3fed800000000000,
175 0x3fed000000000000,
176 0x3feca00000000000,
177 0x3fec400000000000,
178 0x3febe00000000000,
179 0x3feb800000000000,
180 0x3feb200000000000,
181 0x3feac00000000000,
182 0x3fea800000000000,
183 0x3fea200000000000,
184 0x3fe9c00000000000,
185 0x3fe9800000000000,
186 0x3fe9200000000000,
187 0x3fe8c00000000000,
188 0x3fe8800000000000,
189 0x3fe8400000000000,
190 0x3fe8000000000000,
191 0x3fe7c00000000000,
192 0x3fe7600000000000,
193 0x3fe7200000000000,
194 0x3fe6e00000000000,
195 0x3fe6a00000000000,
196];
197
198pub(crate) static LOG2P1_COMPOUNDF_LOG2_INV: [(u64, u64); 46] = [
202 (0x3c68f3673ffdd785, 0xbfdf7a8568cb06cf),
203 (0x3c1c141e66faaaad, 0xbfdd6753e032ea0f),
204 (0x3c76fae441c09d76, 0xbfdb47ebf73882a1),
205 (0x3c72d352bea51e59, 0xbfd91bba891f1709),
206 (0xbc69575b04fa6fbd, 0xbfd800a563161c54),
207 (0x3c7817fd3b7d7e5d, 0xbfd5c01a39fbd688),
208 (0x3c1b6d40900b2502, 0xbfd49a784bcd1b8b),
209 (0x3c7f6e91ad16ecff, 0xbfd24407ab0e073a),
210 (0x3c6a7b47d2c352d9, 0xbfd11307dad30b76),
211 (0x3c5b85a54d7ee2fd, 0xbfcd49ee4c325970),
212 (0x3c401ee1343fe7ca, 0xbfcacf5e2db4ec94),
213 (0x3c6817fd3b7d7e5d, 0xbfc5c01a39fbd688),
214 (0xbc4f51f2c075a74c, 0xbfc32ae9e278ae1a),
215 (0x3c6a7610e40bd6ab, 0xbfc08c588cda79e4),
216 (0xbc58ecb169b9465f, 0xbfbbc84240adabba),
217 (0xbc5f3314e0985116, 0xbfb663f6fac91316),
218 (0x3c530c22d15199b8, 0xbfb0eb389fa29f9b),
219 (0xbc389b03784b5be1, 0xbfa6bad3758efd87),
220 (0x0000000000000000, 0x0000000000000000),
221 (0x0000000000000000, 0x0000000000000000),
222 (0x3c3491f06c085bc2, 0x3fa184b8e4c56af8),
223 (0x3c0155660710eb2a, 0x3fad6ebd1f1febfe),
224 (0x3c2c141e66faaaad, 0x3fb4c560fe68af88),
225 (0x3c59ced1447e30ad, 0x3fb7d60496cfbb4c),
226 (0x3c592ce9636c90a0, 0x3fbe0b1ae8f2fd56),
227 (0xbc5696e2866c718e, 0x3fc22dadc2ab3497),
228 (0xbc61562d61af73f8, 0x3fc494f863b8df35),
229 (0xbc60798d1aa21694, 0x3fc7046031c79f85),
230 (0xbc6e95734abd2fcc, 0x3fc97c1cb13c7ec1),
231 (0x3c2bc0af7b82e7d7, 0x3fcbfc67a7fff4cc),
232 (0xbc6086fce864a1f6, 0x3fce857d3d361368),
233 (0xbc53d56efe4338fe, 0x3fd08bce0d95fa38),
234 (0x3c7c8d43e017579b, 0x3fd169c05363f158),
235 (0xbc50132ae5e417cd, 0x3fd2baa0c34be1ec),
236 (0xbc7c658d602e66b0, 0x3fd4106017c3eca3),
237 (0x3c7e393a16b94b52, 0x3fd4f6fbb2cec598),
238 (0x3c7ac9080333c605, 0x3fd6552b49986277),
239 (0x3c68f89e2eb553b2, 0x3fd7b89f02cf2aad),
240 (0x3c799aa6df8b7d83, 0x3fd8a8980abfbd32),
241 (0x3c7bca36fd02def0, 0x3fd99b072a96c6b2),
242 (0x3c5817fd3b7d7e5d, 0x3fda8ff971810a5e),
243 (0xbc501d98c3531027, 0x3fdb877c57b1b070),
244 (0x3c78a38b4175d665, 0x3fdcffae611ad12b),
245 (0x3c438c8946414c6a, 0x3fddfdd89d586e2b),
246 (0x3c76d261f1753e0b, 0x3fdefec61b011f85),
247 (0xbc87398fe685f171, 0x3fe0014332be0033),
248];
249
250#[inline]
253fn compoundf_expf_poly(z: f64) -> f64 {
254 const Q: [u64; 5] = [
257 0x3ff0000000000000,
258 0x3fe62e42fef6d01a,
259 0x3fcebfbdff7feeba,
260 0x3fac6b167e579bee,
261 0x3f83b2b3428d06de,
262 ];
263 let z2 = z * z;
264 let c3 = dd_fmla(f64::from_bits(Q[4]), z, f64::from_bits(Q[3]));
265 let c0 = dd_fmla(f64::from_bits(Q[1]), z, f64::from_bits(Q[0]));
266 let c2 = dd_fmla(c3, z, f64::from_bits(Q[2]));
267 dd_fmla(c2, z2, c0)
268}
269
270pub(crate) fn compoundf_log2p1_fast(x: f64) -> f64 {
271 let u = 1.0 + x;
276 let mut v = u.to_bits();
283 let m: u64 = v & 0xfffffffffffffu64;
284 let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;
285 v = v.wrapping_sub((e << 52) as u64);
287 let t = f64::from_bits(v);
288 v = (f64::from_bits(v) + 2.0).to_bits(); let i = (v >> 45) as i32 - 0x2002d; let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
293 let z = dd_fmla(r, t, -1.0); let p = log2p1_polyeval_1(z);
296 e as f64 + (f64::from_bits(LOG2P1_COMPOUNDF_LOG2_INV[i as usize].1) + p)
298}
299
300pub(crate) static COMPOUNDF_EXP2_T: [u64; 33] = [
301 0xbfe0000000000000,
302 0xbfde000000000000,
303 0xbfdc000000000000,
304 0xbfda000000000000,
305 0xbfd8000000000000,
306 0xbfd6000000000000,
307 0xbfd4000000000000,
308 0xbfd2000000000000,
309 0xbfd0000000000000,
310 0xbfcc000000000000,
311 0xbfc8000000000000,
312 0xbfc4000000000000,
313 0xbfc0000000000000,
314 0xbfb8000000000000,
315 0xbfb0000000000000,
316 0xbfa0000000000000,
317 0x0000000000000000,
318 0x3fa0000000000000,
319 0x3fb0000000000000,
320 0x3fb8000000000000,
321 0x3fc0000000000000,
322 0x3fc4000000000000,
323 0x3fc8000000000000,
324 0x3fcc000000000000,
325 0x3fd0000000000000,
326 0x3fd2000000000000,
327 0x3fd4000000000000,
328 0x3fd6000000000000,
329 0x3fd8000000000000,
330 0x3fda000000000000,
331 0x3fdc000000000000,
332 0x3fde000000000000,
333 0x3fe0000000000000,
334];
335
336pub(crate) static COMPOUNDF_EXP2_U: [(u64, u64); 33] = [
340 (0xbc8bdd3413b26456, 0x3fe6a09e667f3bcd),
341 (0xbc716e4786887a99, 0x3fe71f75e8ec5f74),
342 (0xbc741577ee04992f, 0x3fe7a11473eb0187),
343 (0xbc8d4c1dd41532d8, 0x3fe82589994cce13),
344 (0x3c86e9f156864b27, 0x3fe8ace5422aa0db),
345 (0xbc575fc781b57ebc, 0x3fe93737b0cdc5e5),
346 (0x3c6c7c46b071f2be, 0x3fe9c49182a3f090),
347 (0xbc8d2f6edb8d41e1, 0x3fea5503b23e255d),
348 (0x3c87a1cd345dcc81, 0x3feae89f995ad3ad),
349 (0xbc65584f7e54ac3b, 0x3feb7f76f2fb5e47),
350 (0x3c711065895048dd, 0x3fec199bdd85529c),
351 (0x3c6503cbd1e949db, 0x3fecb720dcef9069),
352 (0x3c72ed02d75b3707, 0x3fed5818dcfba487),
353 (0xbc81a5cd4f184b5c, 0x3fedfc97337b9b5f),
354 (0xbc8e9c23179c2893, 0x3feea4afa2a490da),
355 (0x3c89d3e12dd8a18b, 0x3fef50765b6e4540),
356 (0x0000000000000000, 0x3ff0000000000000),
357 (0x3c8d73e2a475b465, 0x3ff059b0d3158574),
358 (0x3c98a62e4adc610b, 0x3ff0b5586cf9890f),
359 (0xbc96c51039449b3a, 0x3ff11301d0125b51),
360 (0xbc819041b9d78a76, 0x3ff172b83c7d517b),
361 (0x3c9e016e00a2643c, 0x3ff1d4873168b9aa),
362 (0x3c99b07eb6c70573, 0x3ff2387a6e756238),
363 (0x3c8612e8afad1255, 0x3ff29e9df51fdee1),
364 (0x3c86f46ad23182e4, 0x3ff306fe0a31b715),
365 (0xbc963aeabf42eae2, 0x3ff371a7373aa9cb),
366 (0x3c8ada0911f09ebc, 0x3ff3dea64c123422),
367 (0x3c489b7a04ef80d0, 0x3ff44e086061892d),
368 (0x3c7d4397afec42e2, 0x3ff4bfdad5362a27),
369 (0xbc807abe1db13cad, 0x3ff5342b569d4f82),
370 (0x3c96324c054647ad, 0x3ff5ab07dd485429),
371 (0xbc9383c17e40b497, 0x3ff6247eb03a5585),
372 (0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd),
373];
374
375fn exp2_fast(t: f64) -> f64 {
380 let k = t.cpu_round_ties_even(); let mut r = t - k; let mut v: u64 = (3.015625 + r).to_bits(); let i: i32 = (v >> 46) as i32 - 0x10010; r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); v = (f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1) * compoundf_expf_poly(r)).to_bits();
389 let mut err: u64 = 0x3d61d00000000000; let ik = k as i64;
399 v = v.wrapping_add(ik.wrapping_shl(52) as u64); if f64::from_bits(v) < f64::from_bits(0x38100000000008e2) {
403 return -1.0;
404 }
405 err = err.wrapping_add((ik << 52) as u64); let lb = (f64::from_bits(v) - f64::from_bits(err)) as f32;
407 let rb = (f64::from_bits(v) + f64::from_bits(err)) as f32;
408 if lb != rb {
409 return -1.0;
410 } f64::from_bits(v)
413}
414
415pub(crate) static LOG2P1_SCALE: [u64; 158] = [
418 0x41c0000000000000,
419 0x41b0000000000000,
420 0x41a0000000000000,
421 0x4190000000000000,
422 0x4180000000000000,
423 0x4170000000000000,
424 0x4160000000000000,
425 0x4150000000000000,
426 0x4140000000000000,
427 0x4130000000000000,
428 0x4120000000000000,
429 0x4110000000000000,
430 0x4100000000000000,
431 0x40f0000000000000,
432 0x40e0000000000000,
433 0x40d0000000000000,
434 0x40c0000000000000,
435 0x40b0000000000000,
436 0x40a0000000000000,
437 0x4090000000000000,
438 0x4080000000000000,
439 0x4070000000000000,
440 0x4060000000000000,
441 0x4050000000000000,
442 0x4040000000000000,
443 0x4030000000000000,
444 0x4020000000000000,
445 0x4010000000000000,
446 0x4000000000000000,
447 0x3ff0000000000000,
448 0x3fe0000000000000,
449 0x3fd0000000000000,
450 0x3fc0000000000000,
451 0x3fb0000000000000,
452 0x3fa0000000000000,
453 0x3f90000000000000,
454 0x3f80000000000000,
455 0x3f70000000000000,
456 0x3f60000000000000,
457 0x3f50000000000000,
458 0x3f40000000000000,
459 0x3f30000000000000,
460 0x3f20000000000000,
461 0x3f10000000000000,
462 0x3f00000000000000,
463 0x3ef0000000000000,
464 0x3ee0000000000000,
465 0x3ed0000000000000,
466 0x3ec0000000000000,
467 0x3eb0000000000000,
468 0x3ea0000000000000,
469 0x3e90000000000000,
470 0x3e80000000000000,
471 0x3e70000000000000,
472 0x3e60000000000000,
473 0x3e50000000000000,
474 0x3e40000000000000,
475 0x3e30000000000000,
476 0x3e20000000000000,
477 0x3e10000000000000,
478 0x3e00000000000000,
479 0x3df0000000000000,
480 0x3de0000000000000,
481 0x3dd0000000000000,
482 0x3dc0000000000000,
483 0x3db0000000000000,
484 0x3da0000000000000,
485 0x3d90000000000000,
486 0x3d80000000000000,
487 0x3d70000000000000,
488 0x3d60000000000000,
489 0x3d50000000000000,
490 0x3d40000000000000,
491 0x3d30000000000000,
492 0x3d20000000000000,
493 0x3d10000000000000,
494 0x3d00000000000000,
495 0x3cf0000000000000,
496 0x3ce0000000000000,
497 0x3cd0000000000000,
498 0x3cc0000000000000,
499 0x3cb0000000000000,
500 0x3ca0000000000000,
501 0x3c90000000000000,
502 0x3c80000000000000,
503 0x3c70000000000000,
504 0x3c60000000000000,
505 0x3c50000000000000,
506 0x3c40000000000000,
507 0x3c30000000000000,
508 0x3c20000000000000,
509 0x3c10000000000000,
510 0x3c00000000000000,
511 0x3bf0000000000000,
512 0x3be0000000000000,
513 0x3bd0000000000000,
514 0x3bc0000000000000,
515 0x3bb0000000000000,
516 0x3ba0000000000000,
517 0x3b90000000000000,
518 0x3b80000000000000,
519 0x3b70000000000000,
520 0x3b60000000000000,
521 0x3b50000000000000,
522 0x3b40000000000000,
523 0x3b30000000000000,
524 0x3b20000000000000,
525 0x3b10000000000000,
526 0x3b00000000000000,
527 0x3af0000000000000,
528 0x3ae0000000000000,
529 0x3ad0000000000000,
530 0x3ac0000000000000,
531 0x3ab0000000000000,
532 0x3aa0000000000000,
533 0x3a90000000000000,
534 0x3a80000000000000,
535 0x3a70000000000000,
536 0x3a60000000000000,
537 0x3a50000000000000,
538 0x3a40000000000000,
539 0x3a30000000000000,
540 0x3a20000000000000,
541 0x3a10000000000000,
542 0x3a00000000000000,
543 0x39f0000000000000,
544 0x39e0000000000000,
545 0x39d0000000000000,
546 0x39c0000000000000,
547 0x39b0000000000000,
548 0x39a0000000000000,
549 0x3990000000000000,
550 0x3980000000000000,
551 0x3970000000000000,
552 0x3960000000000000,
553 0x3950000000000000,
554 0x3940000000000000,
555 0x3930000000000000,
556 0x3920000000000000,
557 0x3910000000000000,
558 0x3900000000000000,
559 0x38f0000000000000,
560 0x38e0000000000000,
561 0x38d0000000000000,
562 0x38c0000000000000,
563 0x38b0000000000000,
564 0x38a0000000000000,
565 0x3890000000000000,
566 0x3880000000000000,
567 0x3870000000000000,
568 0x3860000000000000,
569 0x3850000000000000,
570 0x3840000000000000,
571 0x3830000000000000,
572 0x3820000000000000,
573 0x3810000000000000,
574 0x3800000000000000,
575 0x37f0000000000000,
576];
577
578static LOG2P1_LOG2_POLY: [u64; 18] = [
590 0x3ff71547652b82fe,
591 0x3c7777d0ffda0d80,
592 0xbfe71547652b82fe,
593 0xbc6777d0fd20b49c,
594 0x3fdec709dc3a03fd,
595 0x3c7d27f05171b74a,
596 0xbfd71547652b82fe,
597 0xbc57814e70b828b0,
598 0x3fd2776c50ef9bfe,
599 0x3c7e4f63e12bff83,
600 0xbfcec709dc3a03f4,
601 0x3fca61762a7adecc,
602 0xbfc71547652d8849,
603 0x3fc484b13d7e7029,
604 0xbfc2776c1b2a40fd,
605 0x3fc0c9a80f9b7c1c,
606 0xbfbecc6801121200,
607 0x3fbc6e4b91fd10e5,
608];
609
610fn log2_poly2(z: DoubleDouble) -> DoubleDouble {
611 let mut h = f64::from_bits(LOG2P1_LOG2_POLY[4 + 13]); for i in 7..=12 {
618 h = dd_fmla(z.hi, z.hi, f64::from_bits(LOG2P1_LOG2_POLY[4 + i]));
619 }
620
621 let mut v = DoubleDouble::quick_mult_f64(z, h);
622 let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[10]));
623 v.hi = t.hi;
624 v.lo += t.lo;
625
626 v = DoubleDouble::quick_mult(v, z);
627
628 let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[8]));
629 v.hi = t.hi;
630 v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[9]);
631
632 v = DoubleDouble::quick_mult(v, z);
633
634 let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[6]));
635 v.hi = t.hi;
636 v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[7]);
637
638 v = DoubleDouble::quick_mult(v, z);
639
640 let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[4]));
641 v.hi = t.hi;
642 v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[5]);
643
644 v = DoubleDouble::quick_mult(v, z);
645
646 let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[2]));
647 v.hi = t.hi;
648 v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[3]);
649
650 v = DoubleDouble::quick_mult(v, z);
651
652 let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[0]));
653 v.hi = t.hi;
654 v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[1]);
655
656 v = DoubleDouble::quick_mult(v, z);
657
658 v
659}
660
661pub(crate) fn compoundf_log2p1_accurate(x: f64) -> DoubleDouble {
666 let mut v_dd = if 1.0 >= x {
667 if (x as f32).abs() >= f32::from_bits(0x25000000) {
669 DoubleDouble::from_exact_add(1.0, x)
670 } else {
671 DoubleDouble::new(x, 1.0)
672 }
673 } else {
674 DoubleDouble::from_exact_add(x, 1.0)
676 };
677
678 let mut v = v_dd.hi.to_bits();
680 let m = v & 0xfffffffffffffu64;
681 let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;
682
683 let scale = f64::from_bits(LOG2P1_SCALE[e.wrapping_add(29) as usize]);
684 v_dd.hi *= scale;
685 v_dd.lo *= scale;
686
687 v = (2.0 + v_dd.hi).to_bits(); let i: i32 = (v >> 45) as i32 - 0x2002d; let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
694 let mut z_dd = DoubleDouble::new(r * v_dd.lo, dd_fmla(r, v_dd.hi, -1.0)); z_dd = DoubleDouble::from_exact_add(z_dd.hi, z_dd.lo);
699 let log_p = log2_poly2(z_dd);
709 let log2_inv = LOG2P1_COMPOUNDF_LOG2_INV[i as usize];
715 v_dd = DoubleDouble::from_exact_add(e as f64, f64::from_bits(log2_inv.1));
716 v_dd.lo += f64::from_bits(log2_inv.0);
717 let mut p = DoubleDouble::from_exact_add(v_dd.hi, log_p.hi);
718 p.lo += v_dd.lo + log_p.lo;
719 p
720}
721
722pub(crate) fn compoundf_exp2_poly2(z: DoubleDouble) -> DoubleDouble {
723 static Q2: [u64; 12] = [
727 0x3ff0000000000000,
728 0x3fe62e42fefa39ef,
729 0x3c7abc9d45534d06,
730 0x3fcebfbdff82c58f,
731 0xbc65e4383cf9ddf7,
732 0x3fac6b08d704a0c0,
733 0xbc46cbc55586c8f1,
734 0x3f83b2ab6fba4e77,
735 0x3f55d87fe789aec5,
736 0x3f2430912f879daa,
737 0x3eeffcc774b2367a,
738 0x3eb62c017b9bdfe6,
739 ];
740 let h2 = z.hi * z.hi;
741 let c7 = dd_fmla(f64::from_bits(Q2[11]), z.hi, f64::from_bits(Q2[10]));
742 let mut c5 = dd_fmla(f64::from_bits(Q2[9]), z.hi, f64::from_bits(Q2[8]));
743 c5 = dd_fmla(c7, h2, c5);
744 let z_vqh = c5 * z.hi;
746 let mut q = DoubleDouble::from_exact_add(f64::from_bits(Q2[7]), z_vqh);
747 q = DoubleDouble::quick_mult(q, z);
749 let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[5]), q.hi);
751 q.hi = t.hi;
752 q.lo += t.lo + f64::from_bits(Q2[6]);
753 q = DoubleDouble::quick_mult(q, z);
755 let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[3]), q.hi);
756 q.hi = t.hi;
757 q.lo += t.lo + f64::from_bits(Q2[4]);
758 q = DoubleDouble::quick_mult(q, z);
760 let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[1]), q.hi);
761 q.hi = t.hi;
762 q.lo += t.lo + f64::from_bits(Q2[2]);
763 q = DoubleDouble::quick_mult(q, z);
765 let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[0]), q.hi);
766 q.hi = t.hi;
767 q.lo += t.lo;
768 q
769}
770
771fn compoundf_exp2_accurate(x_dd: DoubleDouble, x: f32, y: f32) -> f32 {
777 if y == 1.0 {
778 let res = 1.0 + x;
779 return res;
780 }
781 let k = x_dd.hi.cpu_round_ties_even(); if k == 0. && x_dd.hi.abs() <= f64::from_bits(0x3e6715476af0d4c8) {
785 return (1.0 + x_dd.hi * 0.5) as f32;
790 }
791
792 let r = x_dd.hi - k; let mut v_dd = DoubleDouble::from_exact_add(r, x_dd.lo);
795 let mut v = (3.015625 + v_dd.hi).to_bits(); let i: i32 = ((v >> 46) as i32).wrapping_sub(0x10010); v_dd.hi -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); v_dd = DoubleDouble::from_exact_add(v_dd.hi, v_dd.lo);
804 let q = compoundf_exp2_poly2(v_dd);
805
806 let exp2u = DoubleDouble::from_bit_pair(COMPOUNDF_EXP2_U[i as usize]);
809 let mut q = DoubleDouble::quick_mult(exp2u, q);
810 q = DoubleDouble::from_exact_add(q.hi, q.lo);
811 let mut w = q.hi.to_bits();
835 if ((w.wrapping_add(1)) & 0xfffffffu64) <= 2 {
836 static ERR: [u64; 2] = [0x3abb100000000000, 0x3a2d800000000000];
837 let small: bool = k == 0. && i == 16 && x_dd.hi <= f64::from_bits(0x3f60000000000000);
838 let err = f64::from_bits(ERR[small as usize]);
839
840 w = (q.hi + (q.lo + err)).to_bits();
841 w = unsafe { w.wrapping_add(k.to_int_unchecked::<i64>().wrapping_shl(52) as u64) };
842 }
843
844 v = (q.hi + q.lo).to_bits();
847 if (w.wrapping_shl(36)) == 0 && f64::from_bits(v) == q.hi && q.lo != 0. {
850 v = v.wrapping_add((if q.lo > 0. { 1i64 } else { -1i64 }) as u64); }
852 v = unsafe { v.wrapping_add(k.to_int_unchecked::<i64>().wrapping_shl(52) as u64) };
853 f64::from_bits(v) as f32
855}
856
857#[cold]
862#[inline(never)]
863fn compoundf_accurate(x: f32, y: f32) -> f32 {
864 let mut v = compoundf_log2p1_accurate(x as f64);
865 v = DoubleDouble::quick_mult_f64(v, y as f64);
869 compoundf_exp2_accurate(v, x, y)
879}
880
881#[inline]
885pub fn f_compoundf(x: f32, y: f32) -> f32 {
886 let mone = (-1.0f32).to_bits();
897 let nx = x.to_bits();
898 let ny = y.to_bits();
899 if nx >= mone {
900 return as_compoundf_special(x, y);
901 } let ax: u32 = nx.wrapping_shl(1);
905 let ay: u32 = ny.wrapping_shl(1);
906 if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 {
907 return as_compoundf_special(x, y);
908 } if is_integerf(y) && ay <= 0x83000000u32 && ax <= 0xbefffffeu32 {
912 if ax <= 0x62000000u32 {
913 return 1.0 + y * x;
914 } let mut s = x as f64 + 1.;
916 let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
917
918 let mut acc = if iter_count % 2 != 0 { s } else { 1. };
920
921 while {
922 iter_count >>= 1;
923 iter_count
924 } != 0
925 {
926 s = s * s;
927 if iter_count % 2 != 0 {
928 acc *= s;
929 }
930 }
931
932 let dz = if y.is_sign_negative() { 1. / acc } else { acc };
933 return dz as f32;
934 }
935
936 let xd = x as f64;
937 let yd = y as f64;
938 let tx = xd.to_bits();
939 let ty = yd.to_bits();
940
941 let l: f64 = compoundf_log2p1_fast(f64::from_bits(tx));
942
943 let t: u64 = (l * f64::from_bits(ty)).to_bits();
947 if (t.wrapping_shl(1)) >= (0x406u64 << 53) {
953 if t >= 0x3018bu64 << 46 {
955 return black_box(f32::from_bits(0x00800000)) * black_box(f32::from_bits(0x00800000));
957 } else if (t >> 63) == 0 {
958 return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000));
960 }
961 }
962
963 if (t.wrapping_shl(1)) <= 0x3e6715476ba97f14u64 {
968 return if (t >> 63) != 0 {
969 black_box(1.0) - black_box(f32::from_bits(0x33000000))
970 } else {
971 black_box(1.0) + black_box(f32::from_bits(0x33000000))
972 };
973 }
974
975 let res = exp2_fast(f64::from_bits(t));
976 if res != -1.0 {
977 return res as f32;
978 }
979 compoundf_accurate(x, y)
980}
981
982#[cfg(test)]
983mod tests {
984 use super::*;
985
986 #[test]
987 fn test_compoundf() {
988 assert_eq!(
989 f_compoundf(
990 0.000000000000000000000000000000000000011754944,
991 -170502050000000000000000000000000000000.
992 ),
993 1.
994 );
995 assert_eq!(f_compoundf(1.235, 1.432), 3.1634824);
996 assert_eq!(f_compoundf(2., 3.0), 27.);
997 assert!(f_compoundf(-2., 5.0).is_nan());
998 assert_eq!(f_compoundf(1., f32::INFINITY), f32::INFINITY);
999 assert_eq!(f_compoundf(1., f32::NEG_INFINITY), 0.0);
1000 }
1001}