fearless_simd/core_arch/x86/
sse4_1.rs

1// Copyright 2024 the Fearless_SIMD Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Access to SSE4.1 intrinsics.
5
6use crate::impl_macros::delegate;
7#[cfg(target_arch = "x86")]
8use core::arch::x86 as arch;
9#[cfg(target_arch = "x86_64")]
10use core::arch::x86_64 as arch;
11
12use arch::*;
13
14/// A token for SSE4.1 intrinsics on `x86` and `x86_64`.
15#[derive(Clone, Copy, Debug)]
16pub struct Sse4_1 {
17    _private: (),
18}
19
20impl Sse4_1 {
21    /// Create a SIMD token.
22    ///
23    /// # Safety
24    ///
25    /// The required CPU features must be available.
26    #[inline]
27    pub unsafe fn new_unchecked() -> Self {
28        Self { _private: () }
29    }
30
31    delegate! { arch:
32        fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i;
33        fn _mm_blend_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i;
34        fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d;
35        fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128;
36        fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d;
37        fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128;
38        fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32;
39        fn _mm_extract_epi8<const IMM8: i32>(a: __m128i) -> i32;
40        fn _mm_extract_epi32<const IMM8: i32>(a: __m128i) -> i32;
41        fn _mm_insert_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128;
42        fn _mm_insert_epi8<const IMM8: i32>(a: __m128i, i: i32) -> __m128i;
43        fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i;
44        fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i;
45        fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i;
46        fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i;
47        fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i;
48        fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i;
49        fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i;
50        fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i;
51        fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i;
52        fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i;
53        fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i;
54        fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i;
55        fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i;
56        fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i;
57        fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i;
58        fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i;
59        fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i;
60        fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i;
61        fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i;
62        fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i;
63        fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i;
64        fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i;
65        fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i;
66        fn _mm_dp_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d;
67        fn _mm_dp_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128;
68        fn _mm_floor_pd(a: __m128d) -> __m128d;
69        fn _mm_floor_ps(a: __m128) -> __m128;
70        fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d;
71        fn _mm_floor_ss(a: __m128, b: __m128) -> __m128;
72        fn _mm_ceil_pd(a: __m128d) -> __m128d;
73        fn _mm_ceil_ps(a: __m128) -> __m128;
74        fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d;
75        fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128;
76        fn _mm_round_pd<const ROUNDING: i32>(a: __m128d) -> __m128d;
77        fn _mm_round_ps<const ROUNDING: i32>(a: __m128) -> __m128;
78        fn _mm_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d;
79        fn _mm_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128;
80        fn _mm_minpos_epu16(a: __m128i) -> __m128i;
81        fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i;
82        fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i;
83        fn _mm_mpsadbw_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i;
84        fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32;
85        fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32;
86        fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32;
87        fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32;
88        fn _mm_test_all_ones(a: __m128i) -> i32;
89        fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32;
90    }
91}