Skip to main content

pxfm/
common.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#[inline(always)]
31pub(crate) fn is_integerf(x: f32) -> bool {
32    #[cfg(any(
33        all(
34            any(target_arch = "x86", target_arch = "x86_64"),
35            target_feature = "sse4.1"
36        ),
37        target_arch = "aarch64"
38    ))]
39    {
40        x.round_ties_even() == x
41    }
42    #[cfg(not(any(
43        all(
44            any(target_arch = "x86", target_arch = "x86_64"),
45            target_feature = "sse4.1"
46        ),
47        target_arch = "aarch64"
48    )))]
49    {
50        let x_u = x.to_bits();
51        let x_e = (x_u & EXP_MASK_F32) >> 23;
52        let lsb = (x_u | EXP_MASK_F32).trailing_zeros();
53        const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
54        const UNIT_EXPONENT: u32 = E_BIAS + 23;
55        x_e + lsb >= UNIT_EXPONENT
56    }
57}
58
59#[inline(always)]
60pub(crate) fn is_odd_integerf(x: f32) -> bool {
61    #[cfg(target_arch = "aarch64")]
62    {
63        (x as i32 & 1) != 0
64    }
65    #[cfg(not(target_arch = "aarch64"))]
66    {
67        let x_u = x.to_bits();
68        let x_e = (x_u & EXP_MASK_F32) >> 23;
69        let lsb = (x_u | EXP_MASK_F32).trailing_zeros();
70        const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
71
72        const UNIT_EXPONENT: u32 = E_BIAS + 23;
73        x_e + lsb == UNIT_EXPONENT
74    }
75}
76
77#[inline(always)]
78pub(crate) fn is_integer(n: f64) -> bool {
79    #[cfg(any(
80        all(
81            any(target_arch = "x86", target_arch = "x86_64"),
82            target_feature = "sse4.1"
83        ),
84        target_arch = "aarch64"
85    ))]
86    {
87        n == n.round_ties_even()
88    }
89    #[cfg(not(any(
90        all(
91            any(target_arch = "x86", target_arch = "x86_64"),
92            target_feature = "sse4.1"
93        ),
94        target_arch = "aarch64"
95    )))]
96    {
97        use crate::bits::EXP_MASK;
98        let x_u = n.to_bits();
99        let x_e = (x_u & EXP_MASK) >> 52;
100        let lsb = (x_u | EXP_MASK).trailing_zeros();
101        const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
102
103        const UNIT_EXPONENT: u64 = E_BIAS + 52;
104        x_e + lsb as u64 >= UNIT_EXPONENT
105    }
106}
107
108#[inline(always)]
109pub(crate) fn is_odd_integer(x: f64) -> bool {
110    #[cfg(any(
111        all(
112            any(target_arch = "x86", target_arch = "x86_64"),
113            target_feature = "sse4.1"
114        ),
115        target_arch = "aarch64"
116    ))]
117    {
118        (x as i64 & 1) != 0
119    }
120    #[cfg(not(any(
121        all(
122            any(target_arch = "x86", target_arch = "x86_64"),
123            target_feature = "sse4.1"
124        ),
125        target_arch = "aarch64"
126    )))]
127    {
128        use crate::bits::EXP_MASK;
129        let x_u = x.to_bits();
130        let x_e = (x_u & EXP_MASK) >> 52;
131        let lsb = (x_u | EXP_MASK).trailing_zeros();
132        const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
133
134        const UNIT_EXPONENT: u64 = E_BIAS + 52;
135        x_e + lsb as u64 == UNIT_EXPONENT
136    }
137}
138
139#[inline]
140pub(crate) const fn rintfk(x: f32) -> f32 {
141    (if x < 0. { x - 0.5 } else { x + 0.5 }) as i32 as f32
142}
143
144#[inline(always)]
145pub(crate) const fn fmlaf(a: f32, b: f32, c: f32) -> f32 {
146    c + a * b
147}
148
149#[inline(always)]
150pub(crate) fn f_fmlaf(a: f32, b: f32, c: f32) -> f32 {
151    #[cfg(any(
152        all(
153            any(target_arch = "x86", target_arch = "x86_64"),
154            target_feature = "fma"
155        ),
156        target_arch = "aarch64"
157    ))]
158    {
159        f32::mul_add(a, b, c)
160    }
161    #[cfg(not(any(
162        all(
163            any(target_arch = "x86", target_arch = "x86_64"),
164            target_feature = "fma"
165        ),
166        target_arch = "aarch64"
167    )))]
168    {
169        a * b + c
170    }
171}
172
173/// Optional FMA, if it is available hardware FMA will use, if not then just scalar `c + a * b`
174#[inline(always)]
175pub(crate) fn f_fmla(a: f64, b: f64, c: f64) -> f64 {
176    #[cfg(any(
177        all(
178            any(target_arch = "x86", target_arch = "x86_64"),
179            target_feature = "fma"
180        ),
181        target_arch = "aarch64"
182    ))]
183    {
184        f64::mul_add(a, b, c)
185    }
186    #[cfg(not(any(
187        all(
188            any(target_arch = "x86", target_arch = "x86_64"),
189            target_feature = "fma"
190        ),
191        target_arch = "aarch64"
192    )))]
193    {
194        a * b + c
195    }
196}
197
198#[inline(always)]
199pub(crate) const fn fmla(a: f64, b: f64, c: f64) -> f64 {
200    c + a * b
201}
202
203/// Executes mandatory FMA
204/// if not available will be simulated through Dekker and Veltkamp
205#[inline(always)]
206pub(crate) fn dd_fmla(a: f64, b: f64, c: f64) -> f64 {
207    #[cfg(any(
208        all(
209            any(target_arch = "x86", target_arch = "x86_64"),
210            target_feature = "fma"
211        ),
212        target_arch = "aarch64"
213    ))]
214    {
215        f_fmla(a, b, c)
216    }
217    #[cfg(not(any(
218        all(
219            any(target_arch = "x86", target_arch = "x86_64"),
220            target_feature = "fma"
221        ),
222        target_arch = "aarch64"
223    )))]
224    {
225        use crate::double_double::DoubleDouble;
226        DoubleDouble::dd_f64_mul_add(a, b, c)
227    }
228}
229
230// Executes mandatory FMA
231// if not available will be simulated through dyadic float 128
232#[inline(always)]
233pub(crate) fn dyad_fmla(a: f64, b: f64, c: f64) -> f64 {
234    #[cfg(any(
235        all(
236            any(target_arch = "x86", target_arch = "x86_64"),
237            target_feature = "fma"
238        ),
239        target_arch = "aarch64"
240    ))]
241    {
242        f_fmla(a, b, c)
243    }
244    #[cfg(not(any(
245        all(
246            any(target_arch = "x86", target_arch = "x86_64"),
247            target_feature = "fma"
248        ),
249        target_arch = "aarch64"
250    )))]
251    {
252        use crate::dyadic_float::DyadicFloat128;
253        let z = DyadicFloat128::new_from_f64(a);
254        let k = DyadicFloat128::new_from_f64(b);
255        let p = z * k + DyadicFloat128::new_from_f64(c);
256        p.fast_as_f64()
257    }
258}
259
260// Executes mandatory FMA
261// if not available will be simulated through Dekker and Veltkamp
262#[inline(always)]
263#[allow(unused)]
264pub(crate) fn dd_fmlaf(a: f32, b: f32, c: f32) -> f32 {
265    #[cfg(any(
266        all(
267            any(target_arch = "x86", target_arch = "x86_64"),
268            target_feature = "fma"
269        ),
270        target_arch = "aarch64"
271    ))]
272    {
273        f_fmlaf(a, b, c)
274    }
275    #[cfg(not(any(
276        all(
277            any(target_arch = "x86", target_arch = "x86_64"),
278            target_feature = "fma"
279        ),
280        target_arch = "aarch64"
281    )))]
282    {
283        (a as f64 * b as f64 + c as f64) as f32
284    }
285}
286
287/// Copies sign from `y` to `x`
288#[inline]
289pub const fn copysignfk(x: f32, y: f32) -> f32 {
290    f32::from_bits((x.to_bits() & !(1 << 31)) ^ (y.to_bits() & (1 << 31)))
291}
292
293// #[inline]
294// // Founds n in ln(𝑥)=ln(𝑎)+𝑛ln(2)
295// pub(crate) const fn ilogb2kf(d: f32) -> i32 {
296//     (((d.to_bits() as i32) >> 23) & 0xff) - 0x7f
297// }
298//
299// #[inline]
300// // Founds a in x=a+𝑛ln(2)
301// pub(crate) const fn ldexp3kf(d: f32, n: i32) -> f32 {
302//     f32::from_bits(((d.to_bits() as i32) + (n << 23)) as u32)
303// }
304
305#[inline]
306pub(crate) const fn pow2if(q: i32) -> f32 {
307    f32::from_bits((q.wrapping_add(0x7f) as u32) << 23)
308}
309
310/// Round towards whole integral number
311#[inline]
312pub(crate) const fn rintk(x: f64) -> f64 {
313    (if x < 0. { x - 0.5 } else { x + 0.5 }) as i64 as f64
314}
315
316/// Computes 2^n
317#[inline(always)]
318pub(crate) const fn pow2i(q: i32) -> f64 {
319    f64::from_bits((q.wrapping_add(0x3ff) as u64) << 52)
320}
321
322// #[inline]
323// pub(crate) const fn ilogb2k(d: f64) -> i32 {
324//     (((d.to_bits() >> 52) & 0x7ff) as i32) - 0x3ff
325// }
326//
327// #[inline]
328// pub(crate) const fn ldexp3k(d: f64, e: i32) -> f64 {
329//     f64::from_bits(((d.to_bits() as i64) + ((e as i64) << 52)) as u64)
330// }
331
332/// Copies sign from `y` to `x`
333#[inline]
334pub const fn copysignk(x: f64, y: f64) -> f64 {
335    f64::from_bits((x.to_bits() & !(1 << 63)) ^ (y.to_bits() & (1 << 63)))
336}
337
338#[inline]
339pub(crate) const fn min_normal_f64() -> f64 {
340    let exponent_bits = 1u64 << 52;
341    let bits = exponent_bits;
342
343    f64::from_bits(bits)
344}
345
346#[inline]
347const fn mask_trailing_ones_u32(len: u32) -> u32 {
348    if len >= 32 {
349        u32::MAX // All ones if length is 64 or more
350    } else {
351        (1u32 << len).wrapping_sub(1)
352    }
353}
354
355pub(crate) const EXP_MASK_F32: u32 = mask_trailing_ones_u32(8) << 23;
356
357#[inline]
358pub(crate) fn set_exponent_f32(x: u32, new_exp: u32) -> u32 {
359    let encoded_mask = new_exp.wrapping_shl(23) & EXP_MASK_F32;
360    x ^ ((x ^ encoded_mask) & EXP_MASK_F32)
361}
362
363#[cfg(test)]
364mod tests {
365    use super::*;
366    #[test]
367    fn test_is_integer() {
368        assert_eq!(is_integer(5.), true);
369        assert_eq!(is_integer(6.), true);
370        assert_eq!(is_integer(6.01), false);
371        assert_eq!(is_odd_integer(5.), true);
372        assert_eq!(is_odd_integer(6.), false);
373        assert_eq!(is_odd_integer(6.01), false);
374        assert_eq!(is_integerf(5.), true);
375        assert_eq!(is_integerf(6.), true);
376        assert_eq!(is_integerf(6.01), false);
377        assert_eq!(is_odd_integerf(5.), true);
378        assert_eq!(is_odd_integerf(6.), false);
379        assert_eq!(is_odd_integerf(6.01), false);
380    }
381}