fearless_simd/macros.rs
1// Copyright 2024 the Fearless_SIMD Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Macros publicly exported
5
6/// Defines a new function which dispatches to a SIMD-generic function, enabling the correct
7/// target features.
8///
9/// The `fn` token in the definition can be prefixed with a visibility (e.g. `pub`),
10/// to set the visibility of the outer function.
11/// We recommend that the implementation function remains private, and
12/// should only be called through the dispatch function.
13/// (The exact patterns for SIMD functions using Fearleess SIMD have not
14/// yet been designed/enumerated).
15///
16/// The implementation function (which is outside of this macro) *should* have the
17/// `#[inline(always)]` attribute.
18/// There are likely to be severe performance consequences if this is not the case, as
19/// Rust will be unable to inline SIMD intrinsics in that case.
20///
21/// The `fn` token in the definition can be prefixed with `unsafe`, to allow an unsafe inner function.
22/// The safety comment added by you in the call to `simd_dispatch` the function must have
23/// the preconditions required to call the inner function.
24///
25/// # Examples
26///
27/// ```rust
28/// use fearless_simd::{Simd, simd_dispatch};
29///
30/// #[inline(always)]
31/// fn sigmoid_impl<S: Simd>(simd: S, x: &[f32], out: &mut [f32]) { /* ... */ }
32///
33/// simd_dispatch!(fn sigmoid(level, x: &[f32], out: &mut [f32]) = sigmoid_impl);
34/// ```
35///
36/// The signature of the generated function will be:
37///
38/// ```rust
39/// use fearless_simd::Level;
40/// fn sigmoid(level: Level, x: &[f32], out: &mut [f32]) { /* ... */ }
41/// ```
42#[macro_export]
43#[deprecated = "use dispatch!(level, simd => operation) instead"]
44macro_rules! simd_dispatch {
45 (
46 $( #[$meta:meta] )* $vis:vis
47 unsafe fn $func:ident ( level $( , $arg:ident : $ty:ty $(,)? )* ) $( -> $ret:ty )?
48 = $inner:ident
49 ) => {
50 simd_dispatch!{@impl => $(#[$meta])* $vis (unsafe) fn $func (level, $(,$arg:$ty,)*) $(->$ret)? = $inner}
51 };
52 (
53 $( #[$meta:meta] )* $vis:vis
54 fn $func:ident ( level $( , $arg:ident : $ty:ty $(,)? )* ) $( -> $ret:ty )?
55 = $inner:ident
56 ) => {
57 simd_dispatch!{@impl => $(#[$meta])* $vis () fn $func (level $(,$arg:$ty)*) $(->$ret)? = $inner}
58 };
59 (
60 @impl => $( #[$meta:meta] )* $vis:vis
61 ($($unsafe: ident)?) fn $func:ident ( level $( , $arg:ident : $ty:ty $(,)? )* ) $( -> $ret:ty )?
62 = $inner:ident
63 ) => {
64 $( #[$meta] )* $vis
65 $($unsafe)? fn $func(level: $crate::Level $(, $arg: $ty )*) $( -> $ret )? {
66 #[cfg(target_arch = "aarch64")]
67 #[target_feature(enable = "neon")]
68 #[inline]
69 $($unsafe)? fn inner_neon(neon: $crate::aarch64::Neon $( , $arg: $ty )* ) $( -> $ret )? {
70 $($unsafe)? {
71 $inner( neon $( , $arg )* )
72 }
73 }
74 #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
75 #[inline]
76 $($unsafe)? fn inner_wasm_simd128(simd128: $crate::wasm32::WasmSimd128 $( , $arg: $ty )* ) $( -> $ret )? {
77 $($unsafe)? {
78 $inner( simd128 $( , $arg )* )
79 }
80 }
81 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
82 #[target_feature(enable = "sse4.2")]
83 #[inline]
84 $($unsafe)? fn inner_sse4_2(sse4_2: $crate::x86::Sse4_2 $( , $arg: $ty )* ) $( -> $ret )? {
85 $($unsafe)? {
86 $inner( sse4_2 $( , $arg )* )
87 }
88 }
89 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
90 #[target_feature(enable = "avx2,fma")]
91 #[inline]
92 $($unsafe)? fn inner_avx2(avx2: $crate::x86::Avx2 $( , $arg: $ty )* ) $( -> $ret )? {
93 $($unsafe)? {
94 $inner( avx2 $( , $arg )* )
95 }
96 }
97 match level {
98 $crate::Level::Fallback(fb) => {
99 $($unsafe)? {
100 $inner(fb $( , $arg )* )
101 }
102 },
103 #[cfg(target_arch = "aarch64")]
104 $crate::Level::Neon(neon) => unsafe { inner_neon (neon $( , $arg )* ) }
105 #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
106 $crate::Level::WasmSimd128(wasm) => unsafe { inner_wasm_simd128 (wasm $( , $arg )* ) }
107 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
108 $crate::Level::Sse4_2(sse4_2) => unsafe { inner_sse4_2(sse4_2 $( , $arg)* ) }
109 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
110 $crate::Level::Avx2(avx2) => unsafe { inner_avx2(avx2 $( , $arg)* ) }
111 _ => unreachable!()
112 }
113 }
114 };
115}
116
117/// Access the applicable [`Simd`] for a given `level`, and perform an operation using it.
118///
119/// This macro is the root of how any explicitly written SIMD functions in this crate are
120/// called from a non-SIMD context.
121///
122/// The first parameter to the macro is the [`Level`].
123/// You should prefer to construct a [`Level`] once and pass it around, rather than
124/// frequently calling [`Level::new()`].
125/// This is because `Level::new` has to detect which target features are available, which can be slow.
126///
127/// The code of the operation will be repeated literally several times in the output, so you should prefer
128/// to keep this code small (as it will be type-checked, etc. for each supported SIMD level on your target).
129/// In most cases, it should be a single call to a function which is generic over `Simd` implementations,
130/// as seen in [the examples](#examples).
131/// For clarity, it will only be executed once per execution of `dispatch`.
132///
133/// To guarantee target-feature-specific code generation, any functions called within the operation should
134/// be `#[inline(always)]`.
135///
136/// Note that as an implementation detail of this macro, the operation will be executed inside a closure.
137/// This is what enables the target features to be enabled for the code inside the operation.
138/// A consequence of this is that early `return` and `?` will not work as expected.
139/// Note that in cases where you use `dispatch` to call a single function (which we expect to be the
140/// majority of cases), you can use `?` on the return value of dispatch instead.
141/// To emulate early return, you can use [`ControlFlow`](core::ops::ControlFlow) instead.
142///
143/// # Example
144///
145/// ```
146/// use fearless_simd::{Level, Simd, dispatch};
147///
148/// #[inline(always)]
149/// fn sigmoid<S: Simd>(simd: S, x: &[f32], out: &mut [f32]) { /* ... */ }
150///
151/// let level = Level::new();
152///
153/// dispatch!(level, simd => sigmoid(simd, &[/*...*/], &mut [/*...*/]));
154/// ```
155///
156/// [`Level`]: crate::Level
157/// [`Level::new()`]: crate::Level::new
158/// [`Simd`]: crate::Simd
159#[macro_export]
160macro_rules! dispatch {
161 ($level:expr, $simd:pat => $op:expr) => {{
162 /// Convert the `Simd` value into an `impl Simd`, which enforces that
163 /// it is correctly handled.
164 #[inline(always)]
165 fn launder<S: $crate::Simd>(x: S) -> impl $crate::Simd {
166 x
167 }
168
169 match $level {
170 $crate::Level::Fallback(fb) => {
171 let $simd = launder(fb);
172 // This vectorize call does nothing, but it is reasonable to be consistent here.
173 $crate::Simd::vectorize(
174 fb,
175 #[inline(always)]
176 || $op,
177 )
178 }
179 #[cfg(target_arch = "aarch64")]
180 $crate::Level::Neon(neon) => {
181 let $simd = launder(neon);
182 $crate::Simd::vectorize(
183 neon,
184 #[inline(always)]
185 || $op,
186 )
187 }
188 #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
189 $crate::Level::WasmSimd128(wasm) => {
190 let $simd = launder(wasm);
191 $crate::Simd::vectorize(
192 wasm,
193 #[inline(always)]
194 || $op,
195 )
196 }
197 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
198 $crate::Level::Sse4_2(sse4_2) => {
199 let $simd = launder(sse4_2);
200 $crate::Simd::vectorize(
201 sse4_2,
202 #[inline(always)]
203 || $op,
204 )
205 }
206 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
207 $crate::Level::Avx2(avx2) => {
208 let $simd = launder(avx2);
209 $crate::Simd::vectorize(
210 avx2,
211 #[inline(always)]
212 || $op,
213 )
214 }
215 _ => unreachable!(),
216 }
217 }};
218}
219
220#[cfg(test)]
221// This expect also validates that we haven't missed any levels!
222#[expect(
223 unreachable_patterns,
224 reason = "Level is non_exhaustive, but you must be exhaustive within the same crate."
225)]
226mod tests {
227 use crate::{Level, Simd};
228
229 #[allow(dead_code, reason = "Compile test")]
230 fn dispatch_generic() {
231 fn generic<S: Simd, T>(_: S, x: T) -> T {
232 x
233 }
234 dispatch!(Level::new(), simd => generic::<_, ()>(simd, ()));
235 }
236
237 #[allow(dead_code, reason = "Compile test")]
238 fn dispatch_value() {
239 fn make_fn<S: Simd>() -> impl FnOnce(S) {
240 |_| ()
241 }
242 dispatch!(Level::new(), simd => (make_fn())(simd));
243 }
244
245 #[test]
246 fn dispatch_output() {
247 assert_eq!(42, dispatch!(Level::new(), _simd => 42));
248 }
249
250 mod no_import_simd {
251 /// We should be able to use [`dispatch`] in a scope which doesn't import anything.
252 #[test]
253 fn dispatch_with_no_imports() {
254 let res = dispatch!(crate::Level::new(), _ => 1 + 2);
255 assert_eq!(res, 3);
256 }
257 }
258}