fearless_simd/macros.rs
1// Copyright 2024 the Fearless_SIMD Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Macros publicly exported
5
6/// Access the applicable [`Simd`] for a given `level`, and perform an operation using it.
7///
8/// This macro is the root of how any explicitly written SIMD functions in this crate are
9/// called from a non-SIMD context.
10///
11/// The first parameter to the macro is the [`Level`].
12/// You should prefer to construct a [`Level`] once and pass it around, rather than
13/// frequently calling [`Level::new()`].
14/// This is because `Level::new` has to detect which target features are available, which can be slow.
15///
16/// The code of the operation will be repeated literally several times in the output, so you should prefer
17/// to keep this code small (as it will be type-checked, etc. for each supported SIMD level on your target).
18/// In most cases, it should be a single call to a function which is generic over `Simd` implementations,
19/// as seen in [the examples](#examples).
20/// For clarity, it will only be executed once per execution of `dispatch`.
21///
22/// To guarantee target-feature-specific code generation, any functions called within the operation should
23/// be `#[inline(always)]`.
24///
25/// Note that as an implementation detail of this macro, the operation will be executed inside a closure.
26/// This is what enables the target features to be enabled for the code inside the operation.
27/// A consequence of this is that early `return` and `?` will not work as expected.
28/// Note that in cases where you use `dispatch` to call a single function (which we expect to be the
29/// majority of cases), you can use `?` on the return value of dispatch instead.
30/// To emulate early return, you can use [`ControlFlow`](core::ops::ControlFlow) instead.
31///
32/// # Example
33///
34/// ```rust
35/// use fearless_simd::{Level, Simd, dispatch};
36///
37/// #[inline(always)]
38/// fn sigmoid<S: Simd>(simd: S, x: &[f32], out: &mut [f32]) { /* ... */ }
39///
40/// let level = Level::new();
41///
42/// dispatch!(level, simd => sigmoid(simd, &[/*...*/], &mut [/*...*/]));
43/// ```
44///
45/// [`Level`]: crate::Level
46/// [`Level::new()`]: crate::Level::new
47/// [`Simd`]: crate::Simd
48#[macro_export]
49macro_rules! dispatch {
50 // This falls through to the next branch, but with `forced_fallback_arm` turned into a boolean literal
51 // indicating whether or not the `force_support_fallback` crate feature is enabled.
52 ($level:expr, $simd:pat => $op:expr) => {{ $crate::internal_unstable_dispatch_inner!($level, $simd => $op) }};
53 (@impl $level:expr, $simd:pat => $op:expr; $forced_fallback_arm: literal) => {{
54 /// Convert the `Simd` value into an `impl Simd`, which enforces that
55 /// it is correctly handled.
56 // TODO: Just make into a `pub` function in fearless_simd itself?
57 #[inline(always)]
58 fn launder<S: $crate::Simd>(x: S) -> impl $crate::Simd {
59 x
60 }
61
62 match $level {
63 #[cfg(target_arch = "aarch64")]
64 $crate::Level::Neon(neon) => {
65 let $simd = launder(neon);
66 $crate::Simd::vectorize(
67 neon,
68 #[inline(always)]
69 || $op,
70 )
71 }
72 #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
73 $crate::Level::WasmSimd128(wasm) => {
74 let $simd = launder(wasm);
75 $crate::Simd::vectorize(
76 wasm,
77 #[inline(always)]
78 || $op,
79 )
80 }
81 // This fallthrough logic is documented at the definition site of `Level`.
82 #[cfg(all(
83 any(target_arch = "x86", target_arch = "x86_64"),
84 not(all(
85 target_feature = "avx2",
86 target_feature = "bmi1",
87 target_feature = "bmi2",
88 target_feature = "cmpxchg16b",
89 target_feature = "f16c",
90 target_feature = "fma",
91 target_feature = "lzcnt",
92 target_feature = "movbe",
93 target_feature = "popcnt",
94 target_feature = "xsave"
95 ))
96 ))]
97 $crate::Level::Sse4_2(sse4_2) => {
98 let $simd = launder(sse4_2);
99 $crate::Simd::vectorize(
100 sse4_2,
101 #[inline(always)]
102 || $op,
103 )
104 }
105 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
106 $crate::Level::Avx2(avx2) => {
107 let $simd = launder(avx2);
108 $crate::Simd::vectorize(
109 avx2,
110 #[inline(always)]
111 || $op,
112 )
113 }
114 #[cfg(any(
115 all(target_arch = "aarch64", not(target_feature = "neon")),
116 all(
117 any(target_arch = "x86", target_arch = "x86_64"),
118 not(all(
119 target_feature = "sse4.2",
120 target_feature = "cmpxchg16b",
121 target_feature = "popcnt"
122 ))
123 ),
124 all(target_arch = "wasm32", not(target_feature = "simd128")),
125 not(any(
126 target_arch = "x86",
127 target_arch = "x86_64",
128 target_arch = "aarch64",
129 target_arch = "wasm32"
130 )),
131 $forced_fallback_arm
132 ))]
133 $crate::Level::Fallback(fb) => {
134 let $simd = launder(fb);
135 // This vectorize call does nothing, but it is reasonable to be consistent here.
136 $crate::Simd::vectorize(
137 fb,
138 #[inline(always)]
139 || $op,
140 )
141 }
142 _ => unreachable!(),
143 }
144 }};
145}
146
147// This macro turns whether the `force_support_fallback` macro is enabled into a boolean literal
148// in `dispatch`, which allows it to be used correctly cross-crate.
149// This trickery is required because macros are expanded in the context of the calling crate, including for
150// evaluating `cfg`s.
151
152/// Implementation detail of [`crate::dispatch`]; this is not public API.
153#[macro_export]
154#[doc(hidden)]
155#[cfg(feature = "force_support_fallback")]
156macro_rules! internal_unstable_dispatch_inner {
157 ($level:expr, $simd:pat => $op:expr) => {
158 $crate::dispatch!(
159 @impl $level, $simd => $op; true
160 )
161 };
162}
163
164/// Implementation detail of [`crate::dispatch`]; this is not public API.
165#[macro_export]
166#[doc(hidden)]
167#[cfg(not(feature = "force_support_fallback"))]
168macro_rules! internal_unstable_dispatch_inner {
169 ($level:expr, $simd:pat => $op:expr) => {
170 $crate::dispatch!(@impl $level, $simd => $op; false)
171 };
172}
173
174#[cfg(test)]
175// This expect also validates that we haven't missed any levels!
176#[expect(
177 unreachable_patterns,
178 reason = "Level is non_exhaustive, but you must be exhaustive within the same crate."
179)]
180mod tests {
181 use crate::{Level, Simd};
182
183 #[allow(dead_code, reason = "Compile test")]
184 fn dispatch_generic() {
185 fn generic<S: Simd, T>(_: S, x: T) -> T {
186 x
187 }
188 dispatch!(Level::new(), simd => generic::<_, ()>(simd, ()));
189 }
190
191 #[allow(dead_code, reason = "Compile test")]
192 fn dispatch_value() {
193 fn make_fn<S: Simd>() -> impl FnOnce(S) {
194 |_| ()
195 }
196 dispatch!(Level::new(), simd => (make_fn())(simd));
197 }
198
199 #[test]
200 fn dispatch_output() {
201 assert_eq!(42, dispatch!(Level::new(), _simd => 42));
202 }
203
204 mod no_import_simd {
205 /// We should be able to use [`dispatch`] in a scope which doesn't import anything.
206 #[test]
207 fn dispatch_with_no_imports() {
208 let res = dispatch!(crate::Level::new(), _ => 1 + 2);
209 assert_eq!(res, 3);
210 }
211 }
212}