fearless_simd/lib.rs
1// Copyright 2024 the Fearless_SIMD Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! A helper library to make SIMD more friendly.
5//!
6//! Fearless SIMD exposes safe SIMD with ergonomic multi-versioning in Rust.
7//!
8//! Fearless SIMD uses "marker values" which serve as proofs of which target features are available on the current CPU.
9//! These each implement the [`Simd`] trait, which exposes a core set of SIMD operations which are implemented as
10//! efficiently as possible on each target platform.
11//!
12//! Additionally, there are types for packed vectors of a specific width and element type (such as [`f32x4`]).
13//! Fearless SIMD does not currently support vectors of less than 128 bits.
14//! These vector types implement some standard arithmetic traits (i.e. they can be added together using
15//! `+`, multiplied by a scalar using `*`, among others), which are implemented as efficiently
16//! as possible using SIMD instructions.
17//! These can be created in a SIMD context using the [`SimdFrom`] trait, or the
18//! [`from_slice`][SimdBase::from_slice] associated function.
19//!
20//! To create a function which SIMD and can be multiversioned, it will have a signature like:
21//!
22//! ```rust
23//! use fearless_simd::{Simd, simd_dispatch};
24//!
25//! #[inline(always)]
26//! fn sigmoid_impl<S: Simd>(simd: S, x: &[f32], out: &mut [f32]) { /* ... */ }
27//!
28//! simd_dispatch!(sigmoid(level, x: &[f32], out: &mut [f32]) = sigmoid_impl);
29//! ```
30//!
31//! A few things to note:
32//!
33//! 1) This is generic over any `Simd` type.
34//! 2) The [`simd_dispatch`] macro is used to create a multi-versioned version of the given function.
35//! 3) The `_impl` suffix is used by convention to indicate the version of a function which will be dispatched to.
36//! 4) The `impl` function *must* be `#[inline(always)]`.
37//! The performance of the SIMD implementation will be poor if that isn't the case. See [the section on inlining for details](#inlining)
38//!
39//! The signature of the generated function will be:
40//!
41//! ```rust
42//! use fearless_simd::Level;
43//! fn sigmoid(level: Level, x: &[f32], out: &mut [f32]) { /* ... */ }
44//! ```
45//!
46//! The first parameter to this function is the [`Level`].
47//! If you are writing an application, you should create this once (using [`Level::new`]), and pass it to any function which wants to use SIMD.
48//! This type stores which instruction sets are available for the current process, which is used
49//! in the (generated) `sigmoid` function to dispatch to the most optimal variant of the function for this process.
50//!
51//! # Inlining
52//!
53//! Fearless SIMD relies heavily on Rust's inlining support to create functions which have the
54//! given target features enabled.
55//! As such, most functions which you write when using Fearless SIMD should have the `#[inline(always)]` attribute.
56//! This is required because in LLVM, functions with different target features cannot.
57//!
58//! <!--
59//! # Kernels vs not kernels
60//!
61//! TODO: Talk about writing versions of functions which can be called in other `S: Simd` functions.
62//! I think this pattern can also have a macro.
63//! -->
64//!
65//! # Webassembly
66//!
67//! WASM SIMD doesn't have feature detection, and so you need to compile two versions of your bundle for WASM, one with SIMD and one without,
68//! then select the appropriate one for your user's browser.
69//! TODO: Expand on this.
70//!
71//! # Feature Flags
72//!
73//! The following crate [feature flags](https://doc.rust-lang.org/cargo/reference/features.html#dependency-features) are available:
74//!
75//! - `std` (enabled by default): Get floating point functions from the standard library (likely using your target's libc).
76//! - `libm`: Use floating point implementations from [libm].
77//! - `safe_wrappers`: Include safe wrappers for (some) target feature specific intrinsics,
78//! beyond the basic SIMD operations abstracted on all platforms.
79//!
80//! At least one of `std` and `libm` is required; `std` overrides `libm`.
81// LINEBENDER LINT SET - lib.rs - v3
82// See https://linebender.org/wiki/canonical-lints/
83// These lints shouldn't apply to examples or tests.
84#![cfg_attr(not(test), warn(unused_crate_dependencies))]
85// These lints shouldn't apply to examples.
86#![warn(clippy::print_stdout, clippy::print_stderr)]
87// Targeting e.g. 32-bit means structs containing usize can give false positives for 64-bit.
88#![cfg_attr(target_pointer_width = "64", warn(clippy::trivially_copy_pass_by_ref))]
89// END LINEBENDER LINT SET
90#![cfg_attr(docsrs, feature(doc_auto_cfg))]
91#![allow(non_camel_case_types, reason = "TODO")]
92#![expect(clippy::unused_unit, reason = "easier for code generation")]
93#![expect(
94 clippy::new_without_default,
95 clippy::use_self,
96 reason = "TODO: https://github.com/linebender/fearless_simd/issues/40"
97)]
98#![no_std]
99
100#[cfg(feature = "std")]
101extern crate std;
102
103#[cfg(all(not(feature = "libm"), not(feature = "std")))]
104compile_error!("fearless_simd requires either the `std` or `libm` feature");
105
106// Suppress the unused_crate_dependencies lint when both std and libm are specified.
107#[cfg(all(feature = "std", feature = "libm"))]
108use libm as _;
109
110pub mod core_arch;
111mod impl_macros;
112
113mod generated;
114mod macros;
115mod traits;
116
117pub use generated::*;
118pub use traits::*;
119
120/// Implementations of [`Simd`] for 64 bit ARM.
121#[cfg(all(feature = "std", target_arch = "aarch64"))]
122pub mod aarch64 {
123 pub use crate::generated::Neon;
124}
125
126/// Implementations of [`Simd`] for webassembly.
127#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
128pub mod wasm32 {
129 pub use crate::generated::WasmSimd128;
130}
131
132/// Implementations of [`Simd`] on x86 architectures (both 32 and 64 bit).
133#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
134pub mod x86 {
135 pub use crate::generated::Avx2;
136 pub use crate::generated::Sse4_2;
137}
138
139/// The level enum with the specific SIMD capabilities available.
140///
141/// The contained values serve as a proof that the associated target
142/// feature is available.
143#[derive(Clone, Copy, Debug)]
144#[non_exhaustive]
145pub enum Level {
146 /// Scalar fallback level, i.e. no supported SIMD features are to be used.
147 ///
148 /// This can be created with [`Level::fallback`].
149 Fallback(Fallback),
150 /// The Neon instruction set on 64 bit ARM.
151 #[cfg(all(feature = "std", target_arch = "aarch64"))]
152 Neon(Neon),
153 /// The SIMD 128 instructions on 32-bit WebAssembly.
154 #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
155 WasmSimd128(WasmSimd128),
156 /// The SSE4.2 instruction set on (32 and 64 bit) x86.
157 #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
158 Sse4_2(Sse4_2),
159 /// The AVX2 and FMA instruction set on (32 and 64 bit) x86.
160 #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
161 Avx2(Avx2),
162 // If new variants are added, make sure to handle them in `Level::dispatch`
163 // and `simd_dispatch`
164}
165
166impl Level {
167 /// Detect the available features on the current CPU, and returns the best level.
168 ///
169 /// If no SIMD instruction set is available, a scalar fallback will be used instead.
170 ///
171 /// This value will be passed to functions generated using [`simd_dispatch`].
172 pub fn new() -> Self {
173 #[cfg(all(feature = "std", target_arch = "aarch64"))]
174 if std::arch::is_aarch64_feature_detected!("neon") {
175 return unsafe { Level::Neon(Neon::new_unchecked()) };
176 }
177 #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
178 return Level::WasmSimd128(WasmSimd128::new_unchecked());
179 #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
180 {
181 if std::arch::is_x86_feature_detected!("avx2")
182 && std::arch::is_x86_feature_detected!("fma")
183 {
184 return unsafe { Level::Avx2(Avx2::new_unchecked()) };
185 } else if std::arch::is_x86_feature_detected!("sse4.2") {
186 return unsafe { Level::Sse4_2(Sse4_2::new_unchecked()) };
187 }
188 }
189 #[cfg(not(all(target_arch = "wasm32", target_feature = "simd128")))]
190 Self::fallback()
191 }
192
193 /// If this is a proof that Neon (or better) is available, access that instruction set.
194 ///
195 /// This method should be preferred over matching against the `Neon` variant of self,
196 /// because if Fearless SIMD gets support for an instruction set which is a superset of Neon,
197 /// this method will return a value even if that "better" instruction set is available.
198 ///
199 /// This can be used in combination with the `safe_wrappers` feature to gain checked access to
200 /// the level-specific SIMD capabilities.
201 #[cfg(all(feature = "std", target_arch = "aarch64"))]
202 #[inline]
203 pub fn as_neon(self) -> Option<Neon> {
204 match self {
205 Level::Neon(neon) => Some(neon),
206 _ => None,
207 }
208 }
209
210 /// If this is a proof that SIMD 128 (or better) is available, access that instruction set.
211 ///
212 /// This method should be preferred over matching against the `WasmSimd128` variant of self,
213 /// because if Fearless SIMD gets support for an instruction set which is a superset of SIMD 128,
214 /// this method will return a value even if that "better" instruction set is available.
215 ///
216 /// This can be used in combination with the `safe_wrappers` feature to gain checked access to
217 /// the level-specific SIMD capabilities.
218 #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
219 #[inline]
220 pub fn as_wasm_simd128(self) -> Option<WasmSimd128> {
221 match self {
222 Level::WasmSimd128(simd128) => Some(simd128),
223 _ => None,
224 }
225 }
226
227 /// If this is a proof that SSE4.2 (or better) is available, access that instruction set.
228 ///
229 /// This method should be preferred over matching against the `Sse4_2` variant of self,
230 /// because if Fearless SIMD gets support for an instruction set which is a superset of SSE4.2,
231 /// this method will return a value even if that "better" instruction set is available.
232 ///
233 /// This can be used in combination with the `safe_wrappers` feature to gain checked access to
234 /// the level-specific SIMD capabilities.
235 #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
236 #[inline]
237 pub fn as_sse4_2(self) -> Option<Sse4_2> {
238 match self {
239 Level::Sse4_2(sse42) => Some(sse42),
240 _ => None,
241 }
242 }
243
244 /// If this is a proof that AVX2 and FMA (or better) is available, access that instruction set.
245 ///
246 /// This method should be preferred over matching against the `AVX2` variant of self,
247 /// because if Fearless SIMD gets support for an instruction set which is a superset of AVX2,
248 /// this method will return a value even if that "better" instruction set is available.
249 ///
250 /// This can be used in combination with the `safe_wrappers` feature to gain checked access to
251 /// the level-specific SIMD capabilities.
252 #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
253 #[inline]
254 pub fn as_avx2(self) -> Option<Avx2> {
255 match self {
256 Level::Avx2(avx2) => Some(avx2),
257 _ => None,
258 }
259 }
260
261 /// Create a scalar fallback level, which uses no SIMD instructions.
262 ///
263 /// This is primarily intended for tests; most users should prefer [`Level::new`].
264 #[inline]
265 pub fn fallback() -> Self {
266 Self::Fallback(Fallback::new())
267 }
268
269 /// Dispatch `f` to a context where the target features which this `Level` proves are available are [enabled].
270 ///
271 /// Most users of Fearless SIMD should prefer to use [`simd_dispatch`] to
272 /// explicitly vectorize a function. That has a better developer experience
273 /// than an implementation of `WithSimd`, and is less likely to miss a vectorization
274 /// opportunity.
275 ///
276 /// This has two use cases:
277 /// 1) To call a manually written implementation of [`WithSimd`].
278 /// 2) To ask the compiler to auto-vectorize scalar code.
279 ///
280 /// For the second case to work, the provided function *must* be attributed with `#[inline(always)]`.
281 /// Note also that any calls that function makes to other functions will likely not be auto-vectorized,
282 /// unless they are also `#[inline(always)]`.
283 ///
284 /// [enabled]: https://doc.rust-lang.org/reference/attributes/codegen.html#the-target_feature-attribute
285 #[inline]
286 pub fn dispatch<W: WithSimd>(self, f: W) -> W::Output {
287 #[cfg(all(feature = "std", target_arch = "aarch64"))]
288 #[target_feature(enable = "neon")]
289 #[inline]
290 // unsafe not needed here with tf11, but can be justified
291 unsafe fn dispatch_neon<W: WithSimd>(f: W, neon: Neon) -> W::Output {
292 f.with_simd(neon)
293 }
294
295 #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
296 #[inline]
297 fn dispatch_simd128<W: WithSimd>(f: W, simd128: WasmSimd128) -> W::Output {
298 f.with_simd(simd128)
299 }
300
301 #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
302 #[target_feature(enable = "sse4.2")]
303 #[inline]
304 unsafe fn dispatch_sse4_2<W: WithSimd>(f: W, sse4_2: Sse4_2) -> W::Output {
305 f.with_simd(sse4_2)
306 }
307
308 #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
309 #[target_feature(enable = "avx2,fma")]
310 #[inline]
311 unsafe fn dispatch_avx2<W: WithSimd>(f: W, avx2: Avx2) -> W::Output {
312 f.with_simd(avx2)
313 }
314
315 #[inline]
316 fn dispatch_fallback<W: WithSimd>(f: W, fallback: Fallback) -> W::Output {
317 f.with_simd(fallback)
318 }
319
320 match self {
321 #[cfg(all(feature = "std", target_arch = "aarch64"))]
322 Level::Neon(neon) => unsafe { dispatch_neon(f, neon) },
323 #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
324 Level::WasmSimd128(simd128) => dispatch_simd128(f, simd128),
325 #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
326 Level::Sse4_2(sse4_2) => unsafe { dispatch_sse4_2(f, sse4_2) },
327 #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
328 Level::Avx2(avx2) => unsafe { dispatch_avx2(f, avx2) },
329 Level::Fallback(fallback) => dispatch_fallback(f, fallback),
330 }
331 }
332}
333
334#[cfg(test)]
335mod tests {
336 use crate::Level;
337
338 const fn assert_is_send_sync<T: Send + Sync>() {}
339 /// If this test compiles, we know that [`Level`] is properly `Send` and `Sync`.
340 #[test]
341 fn level_is_send_sync() {
342 assert_is_send_sync::<Level>();
343 }
344}