fearless_simd/
lib.rs

1// Copyright 2024 the Fearless_SIMD Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! A helper library to make SIMD more friendly.
5//!
6//! Fearless SIMD exposes safe SIMD with ergonomic multi-versioning in Rust.
7//!
8//! Fearless SIMD uses "marker values" which serve as proofs of which target features are available on the current CPU.
9//! These each implement the [`Simd`] trait, which exposes a core set of SIMD operations which are implemented as
10//! efficiently as possible on each target platform.
11//!
12//! Additionally, there are types for packed vectors of a specific width and element type (such as [`f32x4`]).
13//! Fearless SIMD does not currently support vectors of less than 128 bits.
14//! These vector types implement some standard arithmetic traits (i.e. they can be added together using
15//! `+`, multiplied by a scalar using `*`, among others), which are implemented as efficiently
16//! as possible using SIMD instructions.
17//! These can be created in a SIMD context using the [`SimdFrom`] trait, or the
18//! [`from_slice`][SimdBase::from_slice] associated function.
19//!
20//! To create a function which SIMD and can be multiversioned, it will have a signature like:
21//!
22//! ```rust
23//! use fearless_simd::{Simd, simd_dispatch};
24//!
25//! #[inline(always)]
26//! fn sigmoid_impl<S: Simd>(simd: S, x: &[f32], out: &mut [f32]) { /* ... */ }
27//!
28//! simd_dispatch!(sigmoid(level, x: &[f32], out: &mut [f32]) = sigmoid_impl);
29//! ```
30//!
31//! A few things to note:
32//!
33//! 1) This is generic over any `Simd` type.
34//! 2) The [`simd_dispatch`] macro is used to create a multi-versioned version of the given function.
35//! 3) The `_impl` suffix is used by convention to indicate the version of a function which will be dispatched to.
36//! 4) The `impl` function *must* be `#[inline(always)]`.
37//!    The performance of the SIMD implementation will be poor if that isn't the case. See [the section on inlining for details](#inlining)
38//!
39//! The signature of the generated function will be:
40//!
41//! ```rust
42//! use fearless_simd::Level;
43//! fn sigmoid(level: Level, x: &[f32], out: &mut [f32]) { /* ... */ }
44//! ```
45//!
46//! The first parameter to this function is the [`Level`].
47//! If you are writing an application, you should create this once (using [`Level::new`]), and pass it to any function which wants to use SIMD.
48//! This type stores which instruction sets are available for the current process, which is used
49//! in the (generated) `sigmoid` function to dispatch to the most optimal variant of the function for this process.
50//!
51//! # Inlining
52//!
53//! Fearless SIMD relies heavily on Rust's inlining support to create functions which have the
54//! given target features enabled.
55//! As such, most functions which you write when using Fearless SIMD should have the `#[inline(always)]` attribute.
56//! This is required because in LLVM, functions with different target features cannot.
57//!
58//! <!--
59//! # Kernels vs not kernels
60//!
61//! TODO: Talk about writing versions of functions which can be called in other `S: Simd` functions.
62//! I think this pattern can also have a macro.
63//! -->
64//!
65//! # Webassembly
66//!
67//! WASM SIMD doesn't have feature detection, and so you need to compile two versions of your bundle for WASM, one with SIMD and one without,
68//! then select the appropriate one for your user's browser.
69//! TODO: Expand on this.
70//!
71//! # Feature Flags
72//!
73//! The following crate [feature flags](https://doc.rust-lang.org/cargo/reference/features.html#dependency-features) are available:
74//!
75//! - `std` (enabled by default): Get floating point functions from the standard library (likely using your target's libc).
76//! - `libm`: Use floating point implementations from [libm].
77//! - `safe_wrappers`: Include safe wrappers for (some) target feature specific intrinsics,
78//!   beyond the basic SIMD operations abstracted on all platforms.
79//!
80//! At least one of `std` and `libm` is required; `std` overrides `libm`.
81// LINEBENDER LINT SET - lib.rs - v3
82// See https://linebender.org/wiki/canonical-lints/
83// These lints shouldn't apply to examples or tests.
84#![cfg_attr(not(test), warn(unused_crate_dependencies))]
85// These lints shouldn't apply to examples.
86#![warn(clippy::print_stdout, clippy::print_stderr)]
87// Targeting e.g. 32-bit means structs containing usize can give false positives for 64-bit.
88#![cfg_attr(target_pointer_width = "64", warn(clippy::trivially_copy_pass_by_ref))]
89// END LINEBENDER LINT SET
90#![cfg_attr(docsrs, feature(doc_auto_cfg))]
91#![allow(non_camel_case_types, reason = "TODO")]
92#![expect(clippy::unused_unit, reason = "easier for code generation")]
93#![expect(
94    clippy::new_without_default,
95    clippy::use_self,
96    reason = "TODO: https://github.com/linebender/fearless_simd/issues/40"
97)]
98#![no_std]
99
100#[cfg(feature = "std")]
101extern crate std;
102
103#[cfg(all(not(feature = "libm"), not(feature = "std")))]
104compile_error!("fearless_simd requires either the `std` or `libm` feature");
105
106// Suppress the unused_crate_dependencies lint when both std and libm are specified.
107#[cfg(all(feature = "std", feature = "libm"))]
108use libm as _;
109
110pub mod core_arch;
111mod impl_macros;
112
113mod generated;
114mod macros;
115mod traits;
116
117pub use generated::*;
118pub use traits::*;
119
120/// Implementations of [`Simd`] for 64 bit ARM.
121#[cfg(all(feature = "std", target_arch = "aarch64"))]
122pub mod aarch64 {
123    pub use crate::generated::Neon;
124}
125
126/// Implementations of [`Simd`] for webassembly.
127#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
128pub mod wasm32 {
129    pub use crate::generated::WasmSimd128;
130}
131
132/// Implementations of [`Simd`] on x86 architectures (both 32 and 64 bit).
133#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
134pub mod x86 {
135    pub use crate::generated::Avx2;
136    pub use crate::generated::Sse4_2;
137}
138
139/// The level enum with the specific SIMD capabilities available.
140///
141/// The contained values serve as a proof that the associated target
142/// feature is available.
143#[derive(Clone, Copy, Debug)]
144#[non_exhaustive]
145pub enum Level {
146    /// Scalar fallback level, i.e. no supported SIMD features are to be used.
147    ///
148    /// This can be created with [`Level::fallback`].
149    Fallback(Fallback),
150    /// The Neon instruction set on 64 bit ARM.
151    #[cfg(all(feature = "std", target_arch = "aarch64"))]
152    Neon(Neon),
153    /// The SIMD 128 instructions on 32-bit WebAssembly.
154    #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
155    WasmSimd128(WasmSimd128),
156    /// The SSE4.2 instruction set on (32 and 64 bit) x86.
157    #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
158    Sse4_2(Sse4_2),
159    /// The AVX2 and FMA instruction set on (32 and 64 bit) x86.
160    #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
161    Avx2(Avx2),
162    // If new variants are added, make sure to handle them in `Level::dispatch`
163    // and `simd_dispatch`
164}
165
166impl Level {
167    /// Detect the available features on the current CPU, and returns the best level.
168    ///
169    /// If no SIMD instruction set is available, a scalar fallback will be used instead.
170    ///
171    /// This value will be passed to functions generated using [`simd_dispatch`].
172    pub fn new() -> Self {
173        #[cfg(all(feature = "std", target_arch = "aarch64"))]
174        if std::arch::is_aarch64_feature_detected!("neon") {
175            return unsafe { Level::Neon(Neon::new_unchecked()) };
176        }
177        #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
178        return Level::WasmSimd128(WasmSimd128::new_unchecked());
179        #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
180        {
181            if std::arch::is_x86_feature_detected!("avx2")
182                && std::arch::is_x86_feature_detected!("fma")
183            {
184                return unsafe { Level::Avx2(Avx2::new_unchecked()) };
185            } else if std::arch::is_x86_feature_detected!("sse4.2") {
186                return unsafe { Level::Sse4_2(Sse4_2::new_unchecked()) };
187            }
188        }
189        #[cfg(not(all(target_arch = "wasm32", target_feature = "simd128")))]
190        Self::fallback()
191    }
192
193    /// If this is a proof that Neon (or better) is available, access that instruction set.
194    ///
195    /// This method should be preferred over matching against the `Neon` variant of self,
196    /// because if Fearless SIMD gets support for an instruction set which is a superset of Neon,
197    /// this method will return a value even if that "better" instruction set is available.
198    ///
199    /// This can be used in combination with the `safe_wrappers` feature to gain checked access to
200    /// the level-specific SIMD capabilities.
201    #[cfg(all(feature = "std", target_arch = "aarch64"))]
202    #[inline]
203    pub fn as_neon(self) -> Option<Neon> {
204        match self {
205            Level::Neon(neon) => Some(neon),
206            _ => None,
207        }
208    }
209
210    /// If this is a proof that SIMD 128 (or better) is available, access that instruction set.
211    ///
212    /// This method should be preferred over matching against the `WasmSimd128` variant of self,
213    /// because if Fearless SIMD gets support for an instruction set which is a superset of SIMD 128,
214    /// this method will return a value even if that "better" instruction set is available.
215    ///
216    /// This can be used in combination with the `safe_wrappers` feature to gain checked access to
217    /// the level-specific SIMD capabilities.
218    #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
219    #[inline]
220    pub fn as_wasm_simd128(self) -> Option<WasmSimd128> {
221        match self {
222            Level::WasmSimd128(simd128) => Some(simd128),
223            _ => None,
224        }
225    }
226
227    /// If this is a proof that SSE4.2 (or better) is available, access that instruction set.
228    ///
229    /// This method should be preferred over matching against the `Sse4_2` variant of self,
230    /// because if Fearless SIMD gets support for an instruction set which is a superset of SSE4.2,
231    /// this method will return a value even if that "better" instruction set is available.
232    ///
233    /// This can be used in combination with the `safe_wrappers` feature to gain checked access to
234    /// the level-specific SIMD capabilities.
235    #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
236    #[inline]
237    pub fn as_sse4_2(self) -> Option<Sse4_2> {
238        match self {
239            Level::Sse4_2(sse42) => Some(sse42),
240            _ => None,
241        }
242    }
243
244    /// If this is a proof that AVX2 and FMA (or better) is available, access that instruction set.
245    ///
246    /// This method should be preferred over matching against the `AVX2` variant of self,
247    /// because if Fearless SIMD gets support for an instruction set which is a superset of AVX2,
248    /// this method will return a value even if that "better" instruction set is available.
249    ///
250    /// This can be used in combination with the `safe_wrappers` feature to gain checked access to
251    /// the level-specific SIMD capabilities.
252    #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
253    #[inline]
254    pub fn as_avx2(self) -> Option<Avx2> {
255        match self {
256            Level::Avx2(avx2) => Some(avx2),
257            _ => None,
258        }
259    }
260
261    /// Create a scalar fallback level, which uses no SIMD instructions.
262    ///
263    /// This is primarily intended for tests; most users should prefer [`Level::new`].
264    #[inline]
265    pub fn fallback() -> Self {
266        Self::Fallback(Fallback::new())
267    }
268
269    /// Dispatch `f` to a context where the target features which this `Level` proves are available are [enabled].
270    ///
271    /// Most users of Fearless SIMD should prefer to use [`simd_dispatch`] to
272    /// explicitly vectorize a function. That has a better developer experience
273    /// than an implementation of `WithSimd`, and is less likely to miss a vectorization
274    /// opportunity.
275    ///
276    /// This has two use cases:
277    /// 1) To call a manually written implementation of [`WithSimd`].
278    /// 2) To ask the compiler to auto-vectorize scalar code.
279    ///
280    /// For the second case to work, the provided function *must* be attributed with `#[inline(always)]`.
281    /// Note also that any calls that function makes to other functions will likely not be auto-vectorized,
282    /// unless they are also `#[inline(always)]`.
283    ///
284    /// [enabled]: https://doc.rust-lang.org/reference/attributes/codegen.html#the-target_feature-attribute
285    #[inline]
286    pub fn dispatch<W: WithSimd>(self, f: W) -> W::Output {
287        #[cfg(all(feature = "std", target_arch = "aarch64"))]
288        #[target_feature(enable = "neon")]
289        #[inline]
290        // unsafe not needed here with tf11, but can be justified
291        unsafe fn dispatch_neon<W: WithSimd>(f: W, neon: Neon) -> W::Output {
292            f.with_simd(neon)
293        }
294
295        #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
296        #[inline]
297        fn dispatch_simd128<W: WithSimd>(f: W, simd128: WasmSimd128) -> W::Output {
298            f.with_simd(simd128)
299        }
300
301        #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
302        #[target_feature(enable = "sse4.2")]
303        #[inline]
304        unsafe fn dispatch_sse4_2<W: WithSimd>(f: W, sse4_2: Sse4_2) -> W::Output {
305            f.with_simd(sse4_2)
306        }
307
308        #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
309        #[target_feature(enable = "avx2,fma")]
310        #[inline]
311        unsafe fn dispatch_avx2<W: WithSimd>(f: W, avx2: Avx2) -> W::Output {
312            f.with_simd(avx2)
313        }
314
315        #[inline]
316        fn dispatch_fallback<W: WithSimd>(f: W, fallback: Fallback) -> W::Output {
317            f.with_simd(fallback)
318        }
319
320        match self {
321            #[cfg(all(feature = "std", target_arch = "aarch64"))]
322            Level::Neon(neon) => unsafe { dispatch_neon(f, neon) },
323            #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
324            Level::WasmSimd128(simd128) => dispatch_simd128(f, simd128),
325            #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
326            Level::Sse4_2(sse4_2) => unsafe { dispatch_sse4_2(f, sse4_2) },
327            #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
328            Level::Avx2(avx2) => unsafe { dispatch_avx2(f, avx2) },
329            Level::Fallback(fallback) => dispatch_fallback(f, fallback),
330        }
331    }
332}
333
334#[cfg(test)]
335mod tests {
336    use crate::Level;
337
338    const fn assert_is_send_sync<T: Send + Sync>() {}
339    /// If this test compiles, we know that [`Level`] is properly `Send` and `Sync`.
340    #[test]
341    fn level_is_send_sync() {
342        assert_is_send_sync::<Level>();
343    }
344}