Skip to main content

tiny_skia/wide/
i32x4_t.rs

1// Copyright 2020 Yevhenii Reizner
2//
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5
6// Based on https://github.com/Lokathor/wide (Zlib)
7
8use bytemuck::cast;
9
10use super::f32x4;
11
12cfg_if::cfg_if! {
13    if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
14        #[cfg(target_arch = "x86")]
15        use core::arch::x86::*;
16        #[cfg(target_arch = "x86_64")]
17        use core::arch::x86_64::*;
18
19        #[derive(Clone, Copy, Debug)]
20        #[repr(C, align(16))]
21        pub struct i32x4(pub __m128i);
22    } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
23        use core::arch::wasm32::*;
24
25        #[derive(Clone, Copy, Debug)]
26        #[repr(C, align(16))]
27        pub struct i32x4(pub v128);
28    } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
29        use core::arch::aarch64::*;
30
31        #[derive(Clone, Copy, Debug)]
32        #[repr(C, align(16))]
33        pub struct i32x4(pub int32x4_t);
34    } else {
35        #[derive(Clone, Copy, Debug)]
36        #[repr(C, align(16))]
37        pub struct i32x4([i32; 4]);
38    }
39}
40
41unsafe impl bytemuck::Zeroable for i32x4 {}
42unsafe impl bytemuck::Pod for i32x4 {}
43
44impl Default for i32x4 {
45    fn default() -> Self {
46        Self::splat(0)
47    }
48}
49
50impl i32x4 {
51    pub fn splat(n: i32) -> Self {
52        cast([n, n, n, n])
53    }
54
55    pub fn blend(self, t: Self, f: Self) -> Self {
56        cfg_if::cfg_if! {
57            if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
58                Self(unsafe { _mm_blendv_epi8(f.0, t.0, self.0) })
59            } else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
60                Self(i32x4_relaxed_laneselect(t.0, f.0, self.0))
61            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
62                Self(v128_bitselect(t.0, f.0, self.0))
63            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
64                Self(unsafe { vbslq_s32(cast(self.0), t.0, f.0) })
65            } else {
66                super::generic_bit_blend(self, t, f)
67            }
68        }
69    }
70
71    pub fn cmp_eq(self, rhs: Self) -> Self {
72        cfg_if::cfg_if! {
73            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
74                cast(Self(cast(unsafe { _mm_cmpeq_epi32(self.0, rhs.0) })))
75            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
76                Self(i32x4_eq(self.0, rhs.0))
77            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
78                Self(unsafe { cast(vceqq_s32(self.0, rhs.0)) })
79            } else {
80                Self([
81                    if self.0[0] == rhs.0[0] { -1 } else { 0 },
82                    if self.0[1] == rhs.0[1] { -1 } else { 0 },
83                    if self.0[2] == rhs.0[2] { -1 } else { 0 },
84                    if self.0[3] == rhs.0[3] { -1 } else { 0 },
85                ])
86            }
87        }
88    }
89
90    pub fn cmp_gt(self, rhs: Self) -> Self {
91        cfg_if::cfg_if! {
92            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
93                cast(Self(cast(unsafe { _mm_cmpgt_epi32(self.0, rhs.0) })))
94            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
95                Self(i32x4_gt(self.0, rhs.0))
96            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
97                Self(unsafe { cast(vcgtq_s32(self.0, rhs.0)) })
98            } else {
99                Self([
100                    if self.0[0] > rhs.0[0] { -1 } else { 0 },
101                    if self.0[1] > rhs.0[1] { -1 } else { 0 },
102                    if self.0[2] > rhs.0[2] { -1 } else { 0 },
103                    if self.0[3] > rhs.0[3] { -1 } else { 0 },
104                ])
105            }
106        }
107    }
108
109    pub fn cmp_lt(self, rhs: Self) -> Self {
110        cfg_if::cfg_if! {
111            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
112                cast(Self(cast(unsafe { _mm_cmplt_epi32(self.0, rhs.0) })))
113            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
114                Self(i32x4_lt(self.0, rhs.0))
115            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
116                Self(unsafe { cast(vcltq_s32(self.0, rhs.0)) })
117            } else {
118                Self([
119                    if self.0[0] < rhs.0[0] { -1 } else { 0 },
120                    if self.0[1] < rhs.0[1] { -1 } else { 0 },
121                    if self.0[2] < rhs.0[2] { -1 } else { 0 },
122                    if self.0[3] < rhs.0[3] { -1 } else { 0 },
123                ])
124            }
125        }
126    }
127
128    pub fn to_f32x4(self) -> f32x4 {
129        cfg_if::cfg_if! {
130            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
131                cast(Self(cast(unsafe { _mm_cvtepi32_ps(self.0) })))
132            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
133                cast(Self(f32x4_convert_i32x4(self.0)))
134            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
135                cast(Self(unsafe { cast(vcvtq_f32_s32(self.0)) }))
136            } else {
137                let arr: [i32; 4] = cast(self);
138                cast([
139                    arr[0] as f32,
140                    arr[1] as f32,
141                    arr[2] as f32,
142                    arr[3] as f32,
143                ])
144            }
145        }
146    }
147
148    pub fn to_f32x4_bitcast(self) -> f32x4 {
149        bytemuck::cast(self)
150    }
151}
152
153impl From<[i32; 4]> for i32x4 {
154    fn from(v: [i32; 4]) -> Self {
155        cast(v)
156    }
157}
158
159impl From<i32x4> for [i32; 4] {
160    fn from(v: i32x4) -> Self {
161        cast(v)
162    }
163}
164
165impl core::ops::Add for i32x4 {
166    type Output = Self;
167
168    fn add(self, rhs: Self) -> Self::Output {
169        cfg_if::cfg_if! {
170            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
171                Self(unsafe { _mm_add_epi32(self.0, rhs.0) })
172            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
173                Self(i32x4_add(self.0, rhs.0))
174            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
175                Self(unsafe { vaddq_s32(self.0, rhs.0) })
176            } else {
177                Self([
178                    self.0[0].wrapping_add(rhs.0[0]),
179                    self.0[1].wrapping_add(rhs.0[1]),
180                    self.0[2].wrapping_add(rhs.0[2]),
181                    self.0[3].wrapping_add(rhs.0[3]),
182                ])
183            }
184        }
185    }
186}
187
188impl core::ops::BitAnd for i32x4 {
189    type Output = Self;
190
191    fn bitand(self, rhs: Self) -> Self::Output {
192        cfg_if::cfg_if! {
193            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
194                Self(unsafe { _mm_and_si128(self.0, rhs.0) })
195            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
196                Self(v128_and(self.0, rhs.0))
197            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
198                Self(unsafe { vandq_s32(self.0, rhs.0) })
199            } else {
200                Self([
201                    self.0[0] & rhs.0[0],
202                    self.0[1] & rhs.0[1],
203                    self.0[2] & rhs.0[2],
204                    self.0[3] & rhs.0[3],
205                ])
206            }
207        }
208    }
209}
210
211impl core::ops::Mul for i32x4 {
212    type Output = Self;
213
214    fn mul(self, rhs: Self) -> Self::Output {
215        cfg_if::cfg_if! {
216            if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
217                Self(unsafe { _mm_mullo_epi32(self.0, rhs.0) })
218            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
219                Self(i32x4_mul(self.0, rhs.0))
220            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
221                Self(unsafe { vmulq_s32(self.0, rhs.0) })
222            } else {
223                // Cast is required, since we have to use scalar multiplication on SSE2.
224                let a: [i32; 4] = cast(self);
225                let b: [i32; 4] = cast(rhs);
226                Self(cast([
227                    a[0].wrapping_mul(b[0]),
228                    a[1].wrapping_mul(b[1]),
229                    a[2].wrapping_mul(b[2]),
230                    a[3].wrapping_mul(b[3]),
231                ]))
232            }
233        }
234    }
235}
236
237impl core::ops::BitOr for i32x4 {
238    type Output = Self;
239
240    #[inline]
241    fn bitor(self, rhs: Self) -> Self::Output {
242        cfg_if::cfg_if! {
243            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
244                Self(unsafe { _mm_or_si128(self.0, rhs.0) })
245            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
246                Self(v128_or(self.0, rhs.0))
247            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
248                Self(unsafe { vorrq_s32(self.0, rhs.0) })
249            } else {
250                Self([
251                    self.0[0] | rhs.0[0],
252                    self.0[1] | rhs.0[1],
253                    self.0[2] | rhs.0[2],
254                    self.0[3] | rhs.0[3],
255                ])
256            }
257        }
258    }
259}
260
261impl core::ops::BitXor for i32x4 {
262    type Output = Self;
263
264    #[inline]
265    fn bitxor(self, rhs: Self) -> Self::Output {
266        cfg_if::cfg_if! {
267            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
268                Self(unsafe { _mm_xor_si128(self.0, rhs.0) })
269            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
270                Self(v128_xor(self.0, rhs.0))
271            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
272                Self(unsafe { veorq_s32(self.0, rhs.0) })
273            } else {
274                Self([
275                    self.0[0] ^ rhs.0[0],
276                    self.0[1] ^ rhs.0[1],
277                    self.0[2] ^ rhs.0[2],
278                    self.0[3] ^ rhs.0[3],
279                ])
280            }
281        }
282    }
283}