fearless_simd/core_arch/x86/
sse.rs

1// Copyright 2024 the Fearless_SIMD Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Access to SSE intrinsics.
5
6use crate::impl_macros::delegate;
7#[cfg(target_arch = "x86")]
8use core::arch::x86 as arch;
9#[cfg(target_arch = "x86_64")]
10use core::arch::x86_64 as arch;
11
12use arch::*;
13
14/// A token for SSE intrinsics on `x86` and `x86_64`.
15#[derive(Clone, Copy, Debug)]
16pub struct Sse {
17    _private: (),
18}
19
20#[expect(
21    clippy::missing_safety_doc,
22    reason = "TODO: https://github.com/linebender/fearless_simd/issues/40"
23)]
24impl Sse {
25    /// Create a SIMD token.
26    ///
27    /// # Safety
28    ///
29    /// The required CPU features must be available.
30    #[inline]
31    pub const unsafe fn new_unchecked() -> Self {
32        Self { _private: () }
33    }
34
35    delegate! { arch:
36        fn _mm_add_ss(a: __m128, b: __m128) -> __m128;
37        fn _mm_add_ps(a: __m128, b: __m128) -> __m128;
38        fn _mm_sub_ss(a: __m128, b: __m128) -> __m128;
39        fn _mm_sub_ps(a: __m128, b: __m128) -> __m128;
40        fn _mm_mul_ss(a: __m128, b: __m128) -> __m128;
41        fn _mm_mul_ps(a: __m128, b: __m128) -> __m128;
42        fn _mm_div_ss(a: __m128, b: __m128) -> __m128;
43        fn _mm_div_ps(a: __m128, b: __m128) -> __m128;
44        fn _mm_sqrt_ss(a: __m128) -> __m128;
45        fn _mm_sqrt_ps(a: __m128) -> __m128;
46        fn _mm_rcp_ss(a: __m128) -> __m128;
47        fn _mm_rcp_ps(a: __m128) -> __m128;
48        fn _mm_rsqrt_ss(a: __m128) -> __m128;
49        fn _mm_rsqrt_ps(a: __m128) -> __m128;
50        fn _mm_min_ss(a: __m128, b: __m128) -> __m128;
51        fn _mm_min_ps(a: __m128, b: __m128) -> __m128;
52        fn _mm_max_ss(a: __m128, b: __m128) -> __m128;
53        fn _mm_max_ps(a: __m128, b: __m128) -> __m128;
54        fn _mm_and_ps(a: __m128, b: __m128) -> __m128;
55        fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128;
56        fn _mm_or_ps(a: __m128, b: __m128) -> __m128;
57        fn _mm_xor_ps(a: __m128, b: __m128) -> __m128;
58        fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128;
59        fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128;
60        fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128;
61        fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128;
62        fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128;
63        fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128;
64        fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128;
65        fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128;
66        fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128;
67        fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128;
68        fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128;
69        fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128;
70        fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128;
71        fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128;
72        fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128;
73        fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128;
74        fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128;
75        fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128;
76        fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128;
77        fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128;
78        fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128;
79        fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128;
80        fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128;
81        fn _mm_cmpunord_ps(a: __m128, b: __m128) -> __m128;
82        fn _mm_comieq_ss(a: __m128, b: __m128) -> i32;
83        fn _mm_comilt_ss(a: __m128, b: __m128) -> i32;
84        fn _mm_comile_ss(a: __m128, b: __m128) -> i32;
85        fn _mm_comigt_ss(a: __m128, b: __m128) -> i32;
86        fn _mm_comige_ss(a: __m128, b: __m128) -> i32;
87        fn _mm_comineq_ss(a: __m128, b: __m128) -> i32;
88        fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32;
89        fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32;
90        fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32;
91        fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32;
92        fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32;
93        fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32;
94        fn _mm_cvtss_si32(a: __m128) -> i32;
95        fn _mm_cvt_ss2si(a: __m128) -> i32;
96        fn _mm_cvttss_si32(a: __m128) -> i32;
97        fn _mm_cvtt_ss2si(a: __m128) -> i32;
98        fn _mm_cvtss_f32(a: __m128) -> f32;
99        fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128;
100        fn _mm_cvt_si2ss(a: __m128, b: i32) -> __m128;
101        fn _mm_set_ss(a: f32) -> __m128;
102        fn _mm_set1_ps(a: f32) -> __m128;
103        fn _mm_set_ps1(a: f32) -> __m128;
104        fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128;
105        fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128;
106        fn _mm_setzero_ps() -> __m128;
107        fn _mm_shuffle_ps<const MASK: i32>(a: __m128, b: __m128) -> __m128;
108        fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128;
109        fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128;
110        fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128;
111        fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128;
112        fn _mm_movemask_ps(a: __m128) -> i32;
113        unsafe fn _mm_load_ss(p: *const f32) -> __m128;
114        unsafe fn _mm_load1_ps(p: *const f32) -> __m128;
115        unsafe fn _mm_load_ps1(p: *const f32) -> __m128;
116        unsafe fn _mm_load_ps(p: *const f32) -> __m128;
117        unsafe fn _mm_loadu_ps(p: *const f32) -> __m128;
118        unsafe fn _mm_loadr_ps(p: *const f32) -> __m128;
119        unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i;
120        unsafe fn _mm_store_ss(p: *mut f32, a: __m128);
121        unsafe fn _mm_store1_ps(p: *mut f32, a: __m128);
122        unsafe fn _mm_store_ps1(p: *mut f32, a: __m128);
123        unsafe fn _mm_store_ps(p: *mut f32, a: __m128);
124        unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128);
125        unsafe fn _mm_storer_ps(p: *mut f32, a: __m128);
126        fn _mm_move_ss(a: __m128, b: __m128) -> __m128;
127        fn _mm_sfence();
128        #[expect(clippy::not_unsafe_ptr_arg_deref, reason="Prefetch has no preconditions, so is valid to accept a pointer.")]
129        fn _mm_prefetch<const STRATEGY: i32>(p: *const i8);
130        fn _mm_undefined_ps() -> __m128;
131        #[allow(non_snake_case)]
132        fn _MM_TRANSPOSE4_PS(
133            row0: &mut __m128,
134            row1: &mut __m128,
135            row2: &mut __m128,
136            row3: &mut __m128,
137        );
138        unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128);
139    }
140}