Skip to main content

fearless_simd/
macros.rs

1// Copyright 2024 the Fearless_SIMD Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Macros publicly exported
5
6/// Access the applicable [`Simd`] for a given `level`, and perform an operation using it.
7///
8/// This macro is the root of how any explicitly written SIMD functions in this crate are
9/// called from a non-SIMD context.
10///
11/// The first parameter to the macro is the [`Level`].
12/// You should prefer to construct a [`Level`] once and pass it around, rather than
13/// frequently calling [`Level::new()`].
14/// This is because `Level::new` has to detect which target features are available, which can be slow.
15///
16/// The code of the operation will be repeated literally several times in the output, so you should prefer
17/// to keep this code small (as it will be type-checked, etc. for each supported SIMD level on your target).
18/// In most cases, it should be a single call to a function which is generic over `Simd` implementations,
19/// as seen in [the examples](#examples).
20/// For clarity, it will only be executed once per execution of `dispatch`.
21///
22/// To guarantee target-feature-specific code generation, any functions called within the operation should
23/// be `#[inline(always)]`.
24///
25/// Note that as an implementation detail of this macro, the operation will be executed inside a closure.
26/// This is what enables the target features to be enabled for the code inside the operation.
27/// A consequence of this is that early `return` and `?` will not work as expected.
28/// Note that in cases where you use `dispatch` to call a single function (which we expect to be the
29/// majority of cases), you can use `?` on the return value of dispatch instead.
30/// To emulate early return, you can use [`ControlFlow`](core::ops::ControlFlow) instead.
31///
32/// # Example
33///
34/// ```rust
35/// use fearless_simd::{Level, Simd, dispatch};
36///
37/// #[inline(always)]
38/// fn sigmoid<S: Simd>(simd: S, x: &[f32], out: &mut [f32]) { /* ... */ }
39///
40/// let level = Level::new();
41///
42/// dispatch!(level, simd => sigmoid(simd, &[/*...*/], &mut [/*...*/]));
43/// ```
44///
45/// [`Level`]: crate::Level
46/// [`Level::new()`]: crate::Level::new
47/// [`Simd`]: crate::Simd
48#[macro_export]
49macro_rules! dispatch {
50    // This falls through to the next branch, but with `forced_fallback_arm` turned into a boolean literal
51    // indicating whether or not the `force_support_fallback` crate feature is enabled.
52    ($level:expr, $simd:pat => $op:expr) => {{ $crate::internal_unstable_dispatch_inner!($level, $simd => $op) }};
53    (@impl $level:expr, $simd:pat => $op:expr; $forced_fallback_arm: literal) => {{
54        /// Convert the `Simd` value into an `impl Simd`, which enforces that
55        /// it is correctly handled.
56        // TODO: Just make into a `pub` function in fearless_simd itself?
57        #[inline(always)]
58        fn launder<S: $crate::Simd>(x: S) -> impl $crate::Simd {
59            x
60        }
61
62        match $level {
63            #[cfg(target_arch = "aarch64")]
64            $crate::Level::Neon(neon) => {
65                let $simd = launder(neon);
66                $crate::Simd::vectorize(
67                    neon,
68                    #[inline(always)]
69                    || $op,
70                )
71            }
72            #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
73            $crate::Level::WasmSimd128(wasm) => {
74                let $simd = launder(wasm);
75                $crate::Simd::vectorize(
76                    wasm,
77                    #[inline(always)]
78                    || $op,
79                )
80            }
81            // This fallthrough logic is documented at the definition site of `Level`.
82            #[cfg(all(
83                any(target_arch = "x86", target_arch = "x86_64"),
84                not(all(
85                    target_feature = "avx2",
86                    target_feature = "bmi1",
87                    target_feature = "bmi2",
88                    target_feature = "cmpxchg16b",
89                    target_feature = "f16c",
90                    target_feature = "fma",
91                    target_feature = "lzcnt",
92                    target_feature = "movbe",
93                    target_feature = "popcnt",
94                    target_feature = "xsave"
95                ))
96            ))]
97            $crate::Level::Sse4_2(sse4_2) => {
98                let $simd = launder(sse4_2);
99                $crate::Simd::vectorize(
100                    sse4_2,
101                    #[inline(always)]
102                    || $op,
103                )
104            }
105            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
106            $crate::Level::Avx2(avx2) => {
107                let $simd = launder(avx2);
108                $crate::Simd::vectorize(
109                    avx2,
110                    #[inline(always)]
111                    || $op,
112                )
113            }
114            #[cfg(any(
115                all(target_arch = "aarch64", not(target_feature = "neon")),
116                all(
117                    any(target_arch = "x86", target_arch = "x86_64"),
118                    not(all(
119                        target_feature = "sse4.2",
120                        target_feature = "cmpxchg16b",
121                        target_feature = "popcnt"
122                    ))
123                ),
124                all(target_arch = "wasm32", not(target_feature = "simd128")),
125                not(any(
126                    target_arch = "x86",
127                    target_arch = "x86_64",
128                    target_arch = "aarch64",
129                    target_arch = "wasm32"
130                )),
131                $forced_fallback_arm
132            ))]
133            $crate::Level::Fallback(fb) => {
134                let $simd = launder(fb);
135                // This vectorize call does nothing, but it is reasonable to be consistent here.
136                $crate::Simd::vectorize(
137                    fb,
138                    #[inline(always)]
139                    || $op,
140                )
141            }
142            _ => unreachable!(),
143        }
144    }};
145}
146
147// This macro turns whether the `force_support_fallback` macro is enabled into a boolean literal
148// in `dispatch`, which allows it to be used correctly cross-crate.
149// This trickery is required because macros are expanded in the context of the calling crate, including for
150// evaluating `cfg`s.
151
152/// Implementation detail of [`crate::dispatch`]; this is not public API.
153#[macro_export]
154#[doc(hidden)]
155#[cfg(feature = "force_support_fallback")]
156macro_rules! internal_unstable_dispatch_inner {
157    ($level:expr, $simd:pat => $op:expr) => {
158        $crate::dispatch!(
159            @impl $level, $simd => $op; true
160        )
161    };
162}
163
164/// Implementation detail of [`crate::dispatch`]; this is not public API.
165#[macro_export]
166#[doc(hidden)]
167#[cfg(not(feature = "force_support_fallback"))]
168macro_rules! internal_unstable_dispatch_inner {
169    ($level:expr, $simd:pat => $op:expr) => {
170        $crate::dispatch!(@impl $level, $simd => $op; false)
171    };
172}
173
174#[cfg(test)]
175// This expect also validates that we haven't missed any levels!
176#[expect(
177    unreachable_patterns,
178    reason = "Level is non_exhaustive, but you must be exhaustive within the same crate."
179)]
180mod tests {
181    use crate::{Level, Simd};
182
183    #[allow(dead_code, reason = "Compile test")]
184    fn dispatch_generic() {
185        fn generic<S: Simd, T>(_: S, x: T) -> T {
186            x
187        }
188        dispatch!(Level::new(), simd => generic::<_, ()>(simd, ()));
189    }
190
191    #[allow(dead_code, reason = "Compile test")]
192    fn dispatch_value() {
193        fn make_fn<S: Simd>() -> impl FnOnce(S) {
194            |_| ()
195        }
196        dispatch!(Level::new(), simd => (make_fn())(simd));
197    }
198
199    #[test]
200    fn dispatch_output() {
201        assert_eq!(42, dispatch!(Level::new(), _simd => 42));
202    }
203
204    mod no_import_simd {
205        /// We should be able to use [`dispatch`] in a scope which doesn't import anything.
206        #[test]
207        fn dispatch_with_no_imports() {
208            let res = dispatch!(crate::Level::new(), _ => 1 + 2);
209            assert_eq!(res, 3);
210        }
211    }
212}