fearless_simd/core_arch/x86/
fma.rs

1// Copyright 2024 the Fearless_SIMD Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Access to FMA intrinsics.
5
6use crate::impl_macros::delegate;
7#[cfg(target_arch = "x86")]
8use core::arch::x86 as arch;
9#[cfg(target_arch = "x86_64")]
10use core::arch::x86_64 as arch;
11
12use arch::*;
13
14/// A token for FMA intrinsics on `x86` and `x86_64`.
15#[derive(Clone, Copy, Debug)]
16pub struct Fma {
17    _private: (),
18}
19
20impl Fma {
21    /// Create a SIMD token.
22    ///
23    /// # Safety
24    ///
25    /// The required CPU features must be available.
26    #[inline]
27    pub unsafe fn new_unchecked() -> Self {
28        Self { _private: () }
29    }
30
31    delegate! { arch:
32        fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
33        fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
34        fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128;
35        fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256;
36        fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
37        fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128;
38        fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
39        fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
40        fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128;
41        fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256;
42        fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
43        fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
44        fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128;
45        fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256;
46        fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
47        fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128;
48        fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
49        fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
50        fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128;
51        fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256;
52        fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
53        fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
54        fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128;
55        fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256;
56        fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
57        fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128;
58        fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
59        fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
60        fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128;
61        fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256;
62        fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
63        fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128;
64    }
65}