fearless_simd/
lib.rs

1// Copyright 2024 the Fearless_SIMD Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! A helper library to make SIMD more friendly.
5//!
6//! Fearless SIMD exposes safe SIMD with ergonomic multi-versioning in Rust.
7//!
8//! Fearless SIMD uses "marker values" which serve as proofs of which target features are available on the current CPU.
9//! These each implement the [`Simd`] trait, which exposes a core set of SIMD operations which are implemented as
10//! efficiently as possible on each target platform.
11//!
12//! Additionally, there are types for packed vectors of a specific width and element type (such as [`f32x4`]).
13//! Fearless SIMD does not currently support vectors of less than 128 bits.
14//! These vector types implement some standard arithmetic traits (i.e. they can be added together using
15//! `+`, multiplied by a scalar using `*`, among others), which are implemented as efficiently
16//! as possible using SIMD instructions.
17//! These can be created in a SIMD context using the [`SimdFrom`] trait, or the
18//! [`from_slice`][SimdBase::from_slice] associated function.
19//!
20//! To call a function with the best available target features and get the associated `Simd`
21//! implementation, use the [`dispatch!()`] macro:
22//!
23//! ```rust
24//! use fearless_simd::{Level, Simd, dispatch};
25//!
26//! #[inline(always)]
27//! fn sigmoid<S: Simd>(simd: S, x: &[f32], out: &mut [f32]) { /* ... */ }
28//!
29//! // The stored level, which you should only construct once in your application.
30//! let level = Level::new();
31//!
32//! dispatch!(level, simd => sigmoid(simd, &[/*...*/], &mut [/*...*/]));
33//! ```
34//!
35//! A few things to note:
36//!
37//! 1) `sigmoid` is generic over any `Simd` type.
38//! 2) The [`dispatch`] macro is used to invoke the given function with the target features associated with the supplied [`Level`].
39//! 3) The function or closure passed to [`dispatch!()`] should be `#[inline(always)]`.
40//!    The performance of the SIMD implementation may be poor if that isn't the case. See [the section on inlining for details](#inlining)
41//!
42//! The first parameter to [`dispatch!()`] is the [`Level`].
43//! If you are writing an application, you should create this once (using [`Level::new`]), and pass it to any function which wants to use SIMD.
44//! This type stores which instruction sets are available for the current process, which is used
45//! in the macro to dispatch to the most optimal variant of the supplied function for this process.
46//!
47//! # Inlining
48//!
49//! Fearless SIMD relies heavily on Rust's inlining support to create functions which have the
50//! given target features enabled.
51//! As such, most functions which you write when using Fearless SIMD should have the `#[inline(always)]` attribute.
52//!
53//! <!--
54//! # Kernels vs not kernels
55//!
56//! TODO: Talk about writing versions of functions which can be called in other `S: Simd` functions.
57//! I think this pattern can also have a macro.
58//! -->
59//!
60//! # Webassembly
61//!
62//! WASM SIMD doesn't have feature detection, and so you need to compile two versions of your bundle for WASM, one with SIMD and one without,
63//! then select the appropriate one for your user's browser.
64//! TODO: Expand on this.
65//!
66//! ## Credits
67//!
68//! This crate was inspired by [`pulp`], [`std::simd`], among others in the Rust ecosystem, though makes many decisions differently.
69//! It benefited from conversations with Luca Versari, though he is not responsible for any of the mistakes or bad decisions.
70//!
71//! # Feature Flags
72//!
73//! The following crate [feature flags](https://doc.rust-lang.org/cargo/reference/features.html#dependency-features) are available:
74//!
75//! - `std` (enabled by default): Get floating point functions from the standard library (likely using your target's libc).
76//!   Also allows using [`Level::new`] on all platforms, to detect which target features are enabled.
77//! - `libm`: Use floating point implementations from [libm].
78//! - `safe_wrappers`: Include safe wrappers for (some) target feature specific intrinsics,
79//!   beyond the basic SIMD operations abstracted on all platforms.
80//!
81//! At least one of `std` and `libm` is required; `std` overrides `libm`.
82//!
83//! [`pulp`]: https://crates.io/crates/pulp
84// LINEBENDER LINT SET - lib.rs - v3
85// See https://linebender.org/wiki/canonical-lints/
86// These lints shouldn't apply to examples or tests.
87#![cfg_attr(not(test), warn(unused_crate_dependencies))]
88// These lints shouldn't apply to examples.
89#![warn(clippy::print_stdout, clippy::print_stderr)]
90// Targeting e.g. 32-bit means structs containing usize can give false positives for 64-bit.
91#![cfg_attr(target_pointer_width = "64", warn(clippy::trivially_copy_pass_by_ref))]
92// END LINEBENDER LINT SET
93#![cfg_attr(docsrs, feature(doc_cfg))]
94#![allow(non_camel_case_types, reason = "TODO")]
95#![expect(clippy::unused_unit, reason = "easier for code generation")]
96#![expect(
97    clippy::new_without_default,
98    clippy::use_self,
99    reason = "TODO: https://github.com/linebender/fearless_simd/issues/40"
100)]
101#![no_std]
102
103#[cfg(feature = "std")]
104extern crate std;
105
106#[cfg(all(not(feature = "libm"), not(feature = "std")))]
107compile_error!("fearless_simd requires either the `std` or `libm` feature");
108
109// Suppress the unused_crate_dependencies lint when both std and libm are specified.
110#[cfg(all(feature = "std", feature = "libm"))]
111use libm as _;
112
113pub mod core_arch;
114mod impl_macros;
115
116mod generated;
117mod macros;
118mod traits;
119
120pub use generated::*;
121pub use traits::*;
122
123/// Implementations of [`Simd`] for 64 bit ARM.
124#[cfg(target_arch = "aarch64")]
125pub mod aarch64 {
126    pub use crate::generated::Neon;
127}
128
129/// Implementations of [`Simd`] for webassembly.
130#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
131pub mod wasm32 {
132    pub use crate::generated::WasmSimd128;
133}
134
135/// Implementations of [`Simd`] on x86 architectures (both 32 and 64 bit).
136#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
137pub mod x86 {
138    pub use crate::generated::Avx2;
139    pub use crate::generated::Sse4_2;
140}
141
142/// The level enum with the specific SIMD capabilities available.
143///
144/// The contained values serve as a proof that the associated target
145/// feature is available.
146#[derive(Clone, Copy, Debug)]
147#[non_exhaustive]
148pub enum Level {
149    /// Scalar fallback level, i.e. no supported SIMD features are to be used.
150    ///
151    /// This can be created with [`Level::fallback`].
152    // TODO: Allow not compiling this in (probably only on web, but maybe elsewhere?)
153    Fallback(Fallback),
154    /// The Neon instruction set on 64 bit ARM.
155    #[cfg(target_arch = "aarch64")]
156    Neon(Neon),
157    /// The SIMD 128 instructions on 32-bit WebAssembly.
158    #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
159    WasmSimd128(WasmSimd128),
160    /// The SSE4.2 instruction set on (32 and 64 bit) x86.
161    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
162    Sse4_2(Sse4_2),
163    /// The AVX2 and FMA instruction set on (32 and 64 bit) x86.
164    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
165    Avx2(Avx2),
166    // If new variants are added, make sure to handle them in `Level::dispatch`
167    // and `dispatch!()`
168}
169
170impl Level {
171    /// Detect the available features on the current CPU, and returns the best level.
172    ///
173    /// If no SIMD instruction set is available, a scalar fallback will be used instead.
174    ///
175    /// This function requires the standard library, to use the
176    /// [`is_x86_feature_detected`](std::arch::is_x86_feature_detected)
177    /// or [`is_aarch64_feature_detected`](std::arch::is_aarch64_feature_detected).
178    /// On wasm32, this requirement does not apply, so the standard library isn't required.
179    ///
180    /// Note that in most cases, this function should only be called by end-user applications.
181    /// Libraries should instead accept a `Level` argument, probably as they are
182    /// creating their data structures, then storing the level for any computations.
183    /// Libraries which wish to abstract away SIMD usage for their common-case clients,
184    /// should make their non-`Level` entrypoint match this function's `cfg`; to instead
185    /// handle this at runtime, they can use [`try_detect`](Self::try_detect),
186    /// handling the `None` case as they deem fit (probably panicking).
187    /// This strategy avoids users of the library inadvertently using the fallback level,
188    /// even if the requisite target features are available.
189    ///
190    /// If you are on an embedded device where these macros are not supported,
191    /// you should construct the relevant variants yourself, using whatever
192    /// way your specific chip supports accessing the current level.
193    ///
194    /// This value should be passed to [`dispatch!()`].
195    #[cfg(any(feature = "std", target_arch = "wasm32"))]
196    #[must_use]
197    pub fn new() -> Self {
198        #[cfg(target_arch = "aarch64")]
199        if std::arch::is_aarch64_feature_detected!("neon") {
200            return unsafe { Level::Neon(Neon::new_unchecked()) };
201        }
202        #[cfg(target_arch = "wasm32")]
203        {
204            // WASM always either has the SIMD feature compiled in or not.
205            #[cfg(target_feature = "simd128")]
206            return Level::WasmSimd128(WasmSimd128::new_unchecked());
207            #[cfg(not(target_feature = "simd128"))]
208            return Level::fallback();
209        }
210        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
211        {
212            if std::arch::is_x86_feature_detected!("avx2")
213                && std::arch::is_x86_feature_detected!("fma")
214            {
215                return unsafe { Level::Avx2(Avx2::new_unchecked()) };
216            } else if std::arch::is_x86_feature_detected!("sse4.2") {
217                return unsafe { Level::Sse4_2(Sse4_2::new_unchecked()) };
218            }
219        }
220        #[cfg(not(target_arch = "wasm32"))]
221        Self::fallback()
222    }
223
224    /// Get the target feature level suitable for this run.
225    ///
226    /// Should be used in libraries if they wish to handle the case where
227    /// target features cannot be detected at runtime.
228    /// Most users should prefer [`new`](Self::new).
229    /// This is discussed in more detail in `new`'s documentation.
230    #[allow(clippy::allow_attributes, reason = "Only needed in some cfgs.")]
231    #[allow(unreachable_code, reason = "Fallback unreachable in some cfgs.")]
232    pub fn try_detect() -> Option<Self> {
233        #[cfg(any(feature = "std", target_arch = "wasm32"))]
234        return Some(Self::new());
235        None
236    }
237
238    /// If this is a proof that Neon (or better) is available, access that instruction set.
239    ///
240    /// This method should be preferred over matching against the `Neon` variant of self,
241    /// because if Fearless SIMD gets support for an instruction set which is a superset of Neon,
242    /// this method will return a value even if that "better" instruction set is available.
243    ///
244    /// This can be used in combination with the `safe_wrappers` feature to gain checked access to
245    /// the level-specific SIMD capabilities.
246    #[cfg(target_arch = "aarch64")]
247    #[inline]
248    pub fn as_neon(self) -> Option<Neon> {
249        match self {
250            Level::Neon(neon) => Some(neon),
251            _ => None,
252        }
253    }
254
255    /// If this is a proof that SIMD 128 (or better) is available, access that instruction set.
256    ///
257    /// This method should be preferred over matching against the `WasmSimd128` variant of self,
258    /// because if Fearless SIMD gets support for an instruction set which is a superset of SIMD 128,
259    /// this method will return a value even if that "better" instruction set is available.
260    ///
261    /// This can be used in combination with the `safe_wrappers` feature to gain checked access to
262    /// the level-specific SIMD capabilities.
263    #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
264    #[inline]
265    pub fn as_wasm_simd128(self) -> Option<WasmSimd128> {
266        match self {
267            Level::WasmSimd128(simd128) => Some(simd128),
268            _ => None,
269        }
270    }
271
272    /// If this is a proof that SSE4.2 (or better) is available, access that instruction set.
273    ///
274    /// This method should be preferred over matching against the `Sse4_2` variant of self,
275    /// because if Fearless SIMD gets support for an instruction set which is a superset of SSE4.2,
276    /// this method will return a value even if that "better" instruction set is available.
277    ///
278    /// This can be used in combination with the `safe_wrappers` feature to gain checked access to
279    /// the level-specific SIMD capabilities.
280    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
281    #[inline]
282    pub fn as_sse4_2(self) -> Option<Sse4_2> {
283        match self {
284            Level::Sse4_2(sse42) => Some(sse42),
285            _ => None,
286        }
287    }
288
289    /// If this is a proof that AVX2 and FMA (or better) is available, access that instruction set.
290    ///
291    /// This method should be preferred over matching against the `AVX2` variant of self,
292    /// because if Fearless SIMD gets support for an instruction set which is a superset of AVX2,
293    /// this method will return a value even if that "better" instruction set is available.
294    ///
295    /// This can be used in combination with the `safe_wrappers` feature to gain checked access to
296    /// the level-specific SIMD capabilities.
297    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
298    #[inline]
299    pub fn as_avx2(self) -> Option<Avx2> {
300        match self {
301            Level::Avx2(avx2) => Some(avx2),
302            _ => None,
303        }
304    }
305
306    /// Create a scalar fallback level, which uses no SIMD instructions.
307    ///
308    /// This is primarily intended for tests; most users should prefer [`Level::new`].
309    #[inline]
310    pub const fn fallback() -> Self {
311        Self::Fallback(Fallback::new())
312    }
313
314    /// Dispatch `f` to a context where the target features which this `Level` proves are available are [enabled].
315    ///
316    /// Most users of Fearless SIMD should prefer to use [`dispatch!()`] to
317    /// explicitly vectorize a function. That has a better developer experience
318    /// than an implementation of `WithSimd`, and is less likely to miss a vectorization
319    /// opportunity.
320    ///
321    /// This has two use cases:
322    /// 1) To call a manually written implementation of [`WithSimd`].
323    /// 2) To ask the compiler to auto-vectorize scalar code.
324    ///
325    /// For the second case to work, the provided function *must* be attributed with `#[inline(always)]`.
326    /// Note also that any calls that function makes to other functions will likely not be auto-vectorized,
327    /// unless they are also `#[inline(always)]`.
328    ///
329    /// [enabled]: https://doc.rust-lang.org/reference/attributes/codegen.html#the-target_feature-attribute
330    #[inline]
331    pub fn dispatch<W: WithSimd>(self, f: W) -> W::Output {
332        #[cfg(target_arch = "aarch64")]
333        #[target_feature(enable = "neon")]
334        #[inline]
335        fn dispatch_neon<W: WithSimd>(f: W, neon: Neon) -> W::Output {
336            f.with_simd(neon)
337        }
338
339        #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
340        #[inline]
341        fn dispatch_simd128<W: WithSimd>(f: W, simd128: WasmSimd128) -> W::Output {
342            f.with_simd(simd128)
343        }
344
345        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
346        #[target_feature(enable = "sse4.2")]
347        #[inline]
348        fn dispatch_sse4_2<W: WithSimd>(f: W, sse4_2: Sse4_2) -> W::Output {
349            f.with_simd(sse4_2)
350        }
351
352        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
353        #[target_feature(enable = "avx2,fma")]
354        #[inline]
355        fn dispatch_avx2<W: WithSimd>(f: W, avx2: Avx2) -> W::Output {
356            f.with_simd(avx2)
357        }
358
359        #[inline]
360        fn dispatch_fallback<W: WithSimd>(f: W, fallback: Fallback) -> W::Output {
361            f.with_simd(fallback)
362        }
363
364        match self {
365            #[cfg(target_arch = "aarch64")]
366            Level::Neon(neon) => unsafe { dispatch_neon(f, neon) },
367            #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
368            Level::WasmSimd128(simd128) => dispatch_simd128(f, simd128),
369            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
370            Level::Sse4_2(sse4_2) => unsafe { dispatch_sse4_2(f, sse4_2) },
371            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
372            Level::Avx2(avx2) => unsafe { dispatch_avx2(f, avx2) },
373            Level::Fallback(fallback) => dispatch_fallback(f, fallback),
374        }
375    }
376}
377
378#[cfg(test)]
379mod tests {
380    use crate::Level;
381
382    const fn assert_is_send_sync<T: Send + Sync>() {}
383    /// If this test compiles, we know that [`Level`] is properly `Send` and `Sync`.
384    #[test]
385    fn level_is_send_sync() {
386        assert_is_send_sync::<Level>();
387    }
388}