fearless_simd/
macros.rs

1// Copyright 2024 the Fearless_SIMD Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Macros publicly exported
5
6/// Defines a new function which dispatches to a SIMD-generic function, enabling the correct
7/// target features.
8///
9/// The `fn` token in the definition can be prefixed with a visibility (e.g. `pub`),
10/// to set the visibility of the outer function.
11/// We recommend that the implementation function remains private, and
12/// should only be called through the dispatch function.
13/// (The exact patterns for SIMD functions using Fearleess SIMD have not
14/// yet been designed/enumerated).
15///
16/// The implementation function (which is outside of this macro) *should* have the
17/// `#[inline(always)]` attribute.
18/// There are likely to be severe performance consequences if this is not the case, as
19/// Rust will be unable to inline SIMD intrinsics in that case.
20///
21/// The `fn` token in the definition can be prefixed with `unsafe`, to allow an unsafe inner function.
22/// The safety comment added by you in the call to  `simd_dispatch` the function must have
23/// the preconditions required to call the inner function.
24///
25/// # Examples
26///
27/// ```rust
28/// use fearless_simd::{Simd, simd_dispatch};
29///
30/// #[inline(always)]
31/// fn sigmoid_impl<S: Simd>(simd: S, x: &[f32], out: &mut [f32]) { /* ... */ }
32///
33/// simd_dispatch!(fn sigmoid(level, x: &[f32], out: &mut [f32]) = sigmoid_impl);
34/// ```
35///
36/// The signature of the generated function will be:
37///
38/// ```rust
39/// use fearless_simd::Level;
40/// fn sigmoid(level: Level, x: &[f32], out: &mut [f32]) { /* ... */ }
41/// ```
42#[macro_export]
43#[deprecated = "use dispatch!(level, simd => operation) instead"]
44macro_rules! simd_dispatch {
45    (
46        $( #[$meta:meta] )* $vis:vis
47        unsafe fn $func:ident ( level $( , $arg:ident : $ty:ty $(,)? )* ) $( -> $ret:ty )?
48        = $inner:ident
49    ) => {
50        simd_dispatch!{@impl => $(#[$meta])* $vis (unsafe) fn $func (level, $(,$arg:$ty,)*) $(->$ret)? = $inner}
51    };
52    (
53        $( #[$meta:meta] )* $vis:vis
54        fn $func:ident ( level $( , $arg:ident : $ty:ty $(,)? )* ) $( -> $ret:ty )?
55        = $inner:ident
56    ) => {
57        simd_dispatch!{@impl => $(#[$meta])* $vis () fn $func (level $(,$arg:$ty)*) $(->$ret)? = $inner}
58    };
59    (
60        @impl => $( #[$meta:meta] )* $vis:vis
61        ($($unsafe: ident)?) fn $func:ident ( level $( , $arg:ident : $ty:ty $(,)? )* ) $( -> $ret:ty )?
62        = $inner:ident
63    ) => {
64        $( #[$meta] )* $vis
65        $($unsafe)? fn $func(level: $crate::Level $(, $arg: $ty )*) $( -> $ret )? {
66            #[cfg(target_arch = "aarch64")]
67            #[target_feature(enable = "neon")]
68            #[inline]
69            $($unsafe)? fn inner_neon(neon: $crate::aarch64::Neon $( , $arg: $ty )* ) $( -> $ret )? {
70                $($unsafe)? {
71                    $inner( neon $( , $arg )* )
72                }
73            }
74            #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
75            #[inline]
76            $($unsafe)? fn inner_wasm_simd128(simd128: $crate::wasm32::WasmSimd128 $( , $arg: $ty )* ) $( -> $ret )? {
77                $($unsafe)? {
78                    $inner( simd128 $( , $arg )* )
79                }
80            }
81            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
82            #[target_feature(enable = "sse4.2")]
83            #[inline]
84            $($unsafe)? fn inner_sse4_2(sse4_2: $crate::x86::Sse4_2 $( , $arg: $ty )* ) $( -> $ret )? {
85                $($unsafe)? {
86                    $inner( sse4_2 $( , $arg )* )
87                }
88            }
89            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
90            #[target_feature(enable = "avx2,fma")]
91            #[inline]
92            $($unsafe)? fn inner_avx2(avx2: $crate::x86::Avx2 $( , $arg: $ty )* ) $( -> $ret )? {
93                $($unsafe)? {
94                    $inner( avx2 $( , $arg )* )
95                }
96            }
97            match level {
98                $crate::Level::Fallback(fb) => {
99                    $($unsafe)? {
100                        $inner(fb $( , $arg )* )
101                    }
102                },
103                #[cfg(target_arch = "aarch64")]
104                $crate::Level::Neon(neon) => unsafe { inner_neon (neon $( , $arg )* ) }
105                #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
106                $crate::Level::WasmSimd128(wasm) => unsafe { inner_wasm_simd128 (wasm $( , $arg )* ) }
107                #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
108                $crate::Level::Sse4_2(sse4_2) => unsafe { inner_sse4_2(sse4_2 $( , $arg)* ) }
109                #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
110                $crate::Level::Avx2(avx2) => unsafe { inner_avx2(avx2 $( , $arg)* ) }
111                _ => unreachable!()
112            }
113        }
114    };
115}
116
117/// Access the applicable [`Simd`] for a given `level`, and perform an operation using it.
118///
119/// This macro is the root of how any explicitly written SIMD functions in this crate are
120/// called from a non-SIMD context.
121///
122/// The first parameter to the macro is the [`Level`].
123/// You should prefer to construct a [`Level`] once and pass it around, rather than
124/// frequently calling [`Level::new()`].
125/// This is because `Level::new` has to detect which target features are available, which can be slow.
126///
127/// The code of the operation will be repeated literally several times in the output, so you should prefer
128/// to keep this code small (as it will be type-checked, etc. for each supported SIMD level on your target).
129/// In most cases, it should be a single call to a function which is generic over `Simd` implementations,
130/// as seen in [the examples](#examples).
131/// For clarity, it will only be executed once per execution of `dispatch`.
132///
133/// To guarantee target-feature-specific code generation, any functions called within the operation should
134/// be `#[inline(always)]`.
135///
136/// Note that as an implementation detail of this macro, the operation will be executed inside a closure.
137/// This is what enables the target features to be enabled for the code inside the operation.
138/// A consequence of this is that early `return` and `?` will not work as expected.
139/// Note that in cases where you use `dispatch` to call a single function (which we expect to be the
140/// majority of cases), you can use `?` on the return value of dispatch instead.
141/// To emulate early return, you can use [`ControlFlow`](core::ops::ControlFlow) instead.
142///
143/// # Example
144///
145/// ```
146/// use fearless_simd::{Level, Simd, dispatch};
147///
148/// #[inline(always)]
149/// fn sigmoid<S: Simd>(simd: S, x: &[f32], out: &mut [f32]) { /* ... */ }
150///
151/// let level = Level::new();
152///
153/// dispatch!(level, simd => sigmoid(simd, &[/*...*/], &mut [/*...*/]));
154/// ```
155///
156/// [`Level`]: crate::Level
157/// [`Level::new()`]: crate::Level::new
158/// [`Simd`]: crate::Simd
159#[macro_export]
160macro_rules! dispatch {
161    ($level:expr, $simd:pat => $op:expr) => {{
162        /// Convert the `Simd` value into an `impl Simd`, which enforces that
163        /// it is correctly handled.
164        #[inline(always)]
165        fn launder<S: $crate::Simd>(x: S) -> impl $crate::Simd {
166            x
167        }
168
169        match $level {
170            $crate::Level::Fallback(fb) => {
171                let $simd = launder(fb);
172                // This vectorize call does nothing, but it is reasonable to be consistent here.
173                $crate::Simd::vectorize(
174                    fb,
175                    #[inline(always)]
176                    || $op,
177                )
178            }
179            #[cfg(target_arch = "aarch64")]
180            $crate::Level::Neon(neon) => {
181                let $simd = launder(neon);
182                $crate::Simd::vectorize(
183                    neon,
184                    #[inline(always)]
185                    || $op,
186                )
187            }
188            #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
189            $crate::Level::WasmSimd128(wasm) => {
190                let $simd = launder(wasm);
191                $crate::Simd::vectorize(
192                    wasm,
193                    #[inline(always)]
194                    || $op,
195                )
196            }
197            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
198            $crate::Level::Sse4_2(sse4_2) => {
199                let $simd = launder(sse4_2);
200                $crate::Simd::vectorize(
201                    sse4_2,
202                    #[inline(always)]
203                    || $op,
204                )
205            }
206            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
207            $crate::Level::Avx2(avx2) => {
208                let $simd = launder(avx2);
209                $crate::Simd::vectorize(
210                    avx2,
211                    #[inline(always)]
212                    || $op,
213                )
214            }
215            _ => unreachable!(),
216        }
217    }};
218}
219
220#[cfg(test)]
221// This expect also validates that we haven't missed any levels!
222#[expect(
223    unreachable_patterns,
224    reason = "Level is non_exhaustive, but you must be exhaustive within the same crate."
225)]
226mod tests {
227    use crate::{Level, Simd};
228
229    #[allow(dead_code, reason = "Compile test")]
230    fn dispatch_generic() {
231        fn generic<S: Simd, T>(_: S, x: T) -> T {
232            x
233        }
234        dispatch!(Level::new(), simd => generic::<_, ()>(simd, ()));
235    }
236
237    #[allow(dead_code, reason = "Compile test")]
238    fn dispatch_value() {
239        fn make_fn<S: Simd>() -> impl FnOnce(S) {
240            |_| ()
241        }
242        dispatch!(Level::new(), simd => (make_fn())(simd));
243    }
244
245    #[test]
246    fn dispatch_output() {
247        assert_eq!(42, dispatch!(Level::new(), _simd => 42));
248    }
249
250    mod no_import_simd {
251        /// We should be able to use [`dispatch`] in a scope which doesn't import anything.
252        #[test]
253        fn dispatch_with_no_imports() {
254            let res = dispatch!(crate::Level::new(), _ => 1 + 2);
255            assert_eq!(res, 3);
256        }
257    }
258}