Skip to main content

pxfm/
common.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#[inline(always)]
31pub(crate) fn is_integerf(x: f32) -> bool {
32    #[cfg(any(
33        all(
34            any(target_arch = "x86", target_arch = "x86_64"),
35            target_feature = "sse4.1"
36        ),
37        target_arch = "aarch64"
38    ))]
39    {
40        x.round_ties_even() == x
41    }
42    #[cfg(not(any(
43        all(
44            any(target_arch = "x86", target_arch = "x86_64"),
45            target_feature = "sse4.1"
46        ),
47        target_arch = "aarch64"
48    )))]
49    {
50        let x_u = x.to_bits();
51        let x_e = (x_u & EXP_MASK_F32) >> 23;
52        let lsb = (x_u | EXP_MASK_F32).trailing_zeros();
53        const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
54        const UNIT_EXPONENT: u32 = E_BIAS + 23;
55        x_e + lsb >= UNIT_EXPONENT
56    }
57}
58
59#[inline(always)]
60pub(crate) fn is_odd_integerf(x: f32) -> bool {
61    #[cfg(target_arch = "aarch64")]
62    {
63        (x as i32 & 1) != 0
64    }
65    #[cfg(not(target_arch = "aarch64"))]
66    {
67        let x_u = x.to_bits();
68        let x_e = (x_u & EXP_MASK_F32) >> 23;
69        let lsb = (x_u | EXP_MASK_F32).trailing_zeros();
70        const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
71
72        const UNIT_EXPONENT: u32 = E_BIAS + 23;
73        x_e + lsb == UNIT_EXPONENT
74    }
75}
76
77#[inline(always)]
78pub(crate) fn is_integer(n: f64) -> bool {
79    #[cfg(any(
80        all(
81            any(target_arch = "x86", target_arch = "x86_64"),
82            target_feature = "sse4.1"
83        ),
84        target_arch = "aarch64"
85    ))]
86    {
87        n == n.round_ties_even()
88    }
89    #[cfg(not(any(
90        all(
91            any(target_arch = "x86", target_arch = "x86_64"),
92            target_feature = "sse4.1"
93        ),
94        target_arch = "aarch64"
95    )))]
96    {
97        use crate::bits::EXP_MASK;
98        let x_u = n.to_bits();
99        let x_e = (x_u & EXP_MASK) >> 52;
100        let lsb = (x_u | EXP_MASK).trailing_zeros();
101        const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
102
103        const UNIT_EXPONENT: u64 = E_BIAS + 52;
104        x_e + lsb as u64 >= UNIT_EXPONENT
105    }
106}
107
108#[inline(always)]
109#[allow(unused)]
110pub(crate) fn is_odd_integer_fast(x: f64) -> bool {
111    unsafe { (x.to_int_unchecked::<i64>() & 1) != 0 }
112}
113
114#[inline(always)]
115#[allow(unused)]
116pub(crate) fn is_odd_integerf_fast(x: f32) -> bool {
117    unsafe { (x.to_int_unchecked::<i32>() & 1) != 0 }
118}
119
120#[inline(always)]
121pub(crate) fn is_odd_integer(x: f64) -> bool {
122    #[cfg(any(
123        all(
124            any(target_arch = "x86", target_arch = "x86_64"),
125            target_feature = "sse4.1"
126        ),
127        target_arch = "aarch64"
128    ))]
129    {
130        (x as i64 & 1) != 0
131    }
132    #[cfg(not(any(
133        all(
134            any(target_arch = "x86", target_arch = "x86_64"),
135            target_feature = "sse4.1"
136        ),
137        target_arch = "aarch64"
138    )))]
139    {
140        use crate::bits::EXP_MASK;
141        let x_u = x.to_bits();
142        let x_e = (x_u & EXP_MASK) >> 52;
143        let lsb = (x_u | EXP_MASK).trailing_zeros();
144        const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
145
146        const UNIT_EXPONENT: u64 = E_BIAS + 52;
147        x_e + lsb as u64 == UNIT_EXPONENT
148    }
149}
150
151#[inline]
152pub(crate) const fn rintfk(x: f32) -> f32 {
153    (if x < 0. { x - 0.5 } else { x + 0.5 }) as i32 as f32
154}
155
156#[inline(always)]
157pub(crate) const fn fmlaf(a: f32, b: f32, c: f32) -> f32 {
158    c + a * b
159}
160
161#[inline(always)]
162pub(crate) fn f_fmlaf(a: f32, b: f32, c: f32) -> f32 {
163    #[cfg(any(
164        all(
165            any(target_arch = "x86", target_arch = "x86_64"),
166            target_feature = "fma"
167        ),
168        target_arch = "aarch64"
169    ))]
170    {
171        f32::mul_add(a, b, c)
172    }
173    #[cfg(not(any(
174        all(
175            any(target_arch = "x86", target_arch = "x86_64"),
176            target_feature = "fma"
177        ),
178        target_arch = "aarch64"
179    )))]
180    {
181        a * b + c
182    }
183}
184
185/// Optional FMA, if it is available hardware FMA will use, if not then just scalar `c + a * b`
186#[inline(always)]
187pub(crate) fn f_fmla(a: f64, b: f64, c: f64) -> f64 {
188    #[cfg(any(
189        all(
190            any(target_arch = "x86", target_arch = "x86_64"),
191            target_feature = "fma"
192        ),
193        target_arch = "aarch64"
194    ))]
195    {
196        f64::mul_add(a, b, c)
197    }
198    #[cfg(not(any(
199        all(
200            any(target_arch = "x86", target_arch = "x86_64"),
201            target_feature = "fma"
202        ),
203        target_arch = "aarch64"
204    )))]
205    {
206        a * b + c
207    }
208}
209
210#[inline(always)]
211pub(crate) const fn fmla(a: f64, b: f64, c: f64) -> f64 {
212    c + a * b
213}
214
215/// Executes mandatory FMA
216/// if not available will be simulated through Dekker and Veltkamp
217#[inline(always)]
218pub(crate) fn dd_fmla(a: f64, b: f64, c: f64) -> f64 {
219    #[cfg(any(
220        all(
221            any(target_arch = "x86", target_arch = "x86_64"),
222            target_feature = "fma"
223        ),
224        target_arch = "aarch64"
225    ))]
226    {
227        f_fmla(a, b, c)
228    }
229    #[cfg(not(any(
230        all(
231            any(target_arch = "x86", target_arch = "x86_64"),
232            target_feature = "fma"
233        ),
234        target_arch = "aarch64"
235    )))]
236    {
237        use crate::double_double::DoubleDouble;
238        DoubleDouble::dd_f64_mul_add(a, b, c)
239    }
240}
241
242// Executes mandatory FMA
243// if not available will be simulated through dyadic float 128
244#[inline(always)]
245pub(crate) fn dyad_fmla(a: f64, b: f64, c: f64) -> f64 {
246    #[cfg(any(
247        all(
248            any(target_arch = "x86", target_arch = "x86_64"),
249            target_feature = "fma"
250        ),
251        target_arch = "aarch64"
252    ))]
253    {
254        f_fmla(a, b, c)
255    }
256    #[cfg(not(any(
257        all(
258            any(target_arch = "x86", target_arch = "x86_64"),
259            target_feature = "fma"
260        ),
261        target_arch = "aarch64"
262    )))]
263    {
264        use crate::dyadic_float::DyadicFloat128;
265        let z = DyadicFloat128::new_from_f64(a);
266        let k = DyadicFloat128::new_from_f64(b);
267        let p = z * k + DyadicFloat128::new_from_f64(c);
268        p.fast_as_f64()
269    }
270}
271
272// Executes mandatory FMA
273// if not available will be simulated through Dekker and Veltkamp
274#[inline(always)]
275#[allow(unused)]
276pub(crate) fn dd_fmlaf(a: f32, b: f32, c: f32) -> f32 {
277    #[cfg(any(
278        all(
279            any(target_arch = "x86", target_arch = "x86_64"),
280            target_feature = "fma"
281        ),
282        target_arch = "aarch64"
283    ))]
284    {
285        f_fmlaf(a, b, c)
286    }
287    #[cfg(not(any(
288        all(
289            any(target_arch = "x86", target_arch = "x86_64"),
290            target_feature = "fma"
291        ),
292        target_arch = "aarch64"
293    )))]
294    {
295        (a as f64 * b as f64 + c as f64) as f32
296    }
297}
298
299/// Copies sign from `y` to `x`
300#[inline]
301pub const fn copysignfk(x: f32, y: f32) -> f32 {
302    f32::from_bits((x.to_bits() & !(1 << 31)) ^ (y.to_bits() & (1 << 31)))
303}
304
305// #[inline]
306// // Founds n in ln(𝑥)=ln(𝑎)+𝑛ln(2)
307// pub(crate) const fn ilogb2kf(d: f32) -> i32 {
308//     (((d.to_bits() as i32) >> 23) & 0xff) - 0x7f
309// }
310//
311// #[inline]
312// // Founds a in x=a+𝑛ln(2)
313// pub(crate) const fn ldexp3kf(d: f32, n: i32) -> f32 {
314//     f32::from_bits(((d.to_bits() as i32) + (n << 23)) as u32)
315// }
316
317#[inline]
318pub(crate) const fn pow2if(q: i32) -> f32 {
319    f32::from_bits((q.wrapping_add(0x7f) as u32) << 23)
320}
321
322/// Round towards whole integral number
323#[inline]
324pub(crate) const fn rintk(x: f64) -> f64 {
325    (if x < 0. { x - 0.5 } else { x + 0.5 }) as i64 as f64
326}
327
328/// Computes 2^n
329#[inline(always)]
330pub(crate) const fn pow2i(q: i32) -> f64 {
331    f64::from_bits((q.wrapping_add(0x3ff) as u64) << 52)
332}
333
334// #[inline]
335// pub(crate) const fn ilogb2k(d: f64) -> i32 {
336//     (((d.to_bits() >> 52) & 0x7ff) as i32) - 0x3ff
337// }
338//
339// #[inline]
340// pub(crate) const fn ldexp3k(d: f64, e: i32) -> f64 {
341//     f64::from_bits(((d.to_bits() as i64) + ((e as i64) << 52)) as u64)
342// }
343
344/// Copies sign from `y` to `x`
345#[inline]
346pub const fn copysignk(x: f64, y: f64) -> f64 {
347    f64::from_bits((x.to_bits() & !(1 << 63)) ^ (y.to_bits() & (1 << 63)))
348}
349
350#[inline]
351pub(crate) const fn min_normal_f64() -> f64 {
352    let exponent_bits = 1u64 << 52;
353    let bits = exponent_bits;
354
355    f64::from_bits(bits)
356}
357
358#[inline]
359const fn mask_trailing_ones_u32(len: u32) -> u32 {
360    if len >= 32 {
361        u32::MAX // All ones if length is 64 or more
362    } else {
363        (1u32 << len).wrapping_sub(1)
364    }
365}
366
367pub(crate) const EXP_MASK_F32: u32 = mask_trailing_ones_u32(8) << 23;
368
369#[inline]
370pub(crate) fn set_exponent_f32(x: u32, new_exp: u32) -> u32 {
371    let encoded_mask = new_exp.wrapping_shl(23) & EXP_MASK_F32;
372    x ^ ((x ^ encoded_mask) & EXP_MASK_F32)
373}
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378    #[test]
379    fn test_is_integer() {
380        assert_eq!(is_integer(5.), true);
381        assert_eq!(is_integer(6.), true);
382        assert_eq!(is_integer(6.01), false);
383        assert_eq!(is_odd_integer(5.), true);
384        assert_eq!(is_odd_integer(6.), false);
385        assert_eq!(is_odd_integer(6.01), false);
386        assert_eq!(is_integerf(5.), true);
387        assert_eq!(is_integerf(6.), true);
388        assert_eq!(is_integerf(6.01), false);
389        assert_eq!(is_odd_integerf(5.), true);
390        assert_eq!(is_odd_integerf(6.), false);
391        assert_eq!(is_odd_integerf(6.01), false);
392    }
393}