1use bytemuck::cast;
9
10use super::f32x4;
11
12cfg_if::cfg_if! {
13 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
14 #[cfg(target_arch = "x86")]
15 use core::arch::x86::*;
16 #[cfg(target_arch = "x86_64")]
17 use core::arch::x86_64::*;
18
19 #[derive(Clone, Copy, Debug)]
20 #[repr(C, align(16))]
21 pub struct i32x4(pub __m128i);
22 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
23 use core::arch::wasm32::*;
24
25 #[derive(Clone, Copy, Debug)]
26 #[repr(C, align(16))]
27 pub struct i32x4(pub v128);
28 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
29 use core::arch::aarch64::*;
30
31 #[derive(Clone, Copy, Debug)]
32 #[repr(C, align(16))]
33 pub struct i32x4(pub int32x4_t);
34 } else {
35 #[derive(Clone, Copy, Debug)]
36 #[repr(C, align(16))]
37 pub struct i32x4([i32; 4]);
38 }
39}
40
41unsafe impl bytemuck::Zeroable for i32x4 {}
42unsafe impl bytemuck::Pod for i32x4 {}
43
44impl Default for i32x4 {
45 fn default() -> Self {
46 Self::splat(0)
47 }
48}
49
50impl i32x4 {
51 pub fn splat(n: i32) -> Self {
52 cast([n, n, n, n])
53 }
54
55 pub fn blend(self, t: Self, f: Self) -> Self {
56 cfg_if::cfg_if! {
57 if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
58 Self(unsafe { _mm_blendv_epi8(f.0, t.0, self.0) })
59 } else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
60 Self(i32x4_relaxed_laneselect(t.0, f.0, self.0))
61 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
62 Self(v128_bitselect(t.0, f.0, self.0))
63 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
64 Self(unsafe { vbslq_s32(cast(self.0), t.0, f.0) })
65 } else {
66 super::generic_bit_blend(self, t, f)
67 }
68 }
69 }
70
71 pub fn cmp_eq(self, rhs: Self) -> Self {
72 cfg_if::cfg_if! {
73 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
74 cast(Self(cast(unsafe { _mm_cmpeq_epi32(self.0, rhs.0) })))
75 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
76 Self(i32x4_eq(self.0, rhs.0))
77 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
78 Self(unsafe { cast(vceqq_s32(self.0, rhs.0)) })
79 } else {
80 Self([
81 if self.0[0] == rhs.0[0] { -1 } else { 0 },
82 if self.0[1] == rhs.0[1] { -1 } else { 0 },
83 if self.0[2] == rhs.0[2] { -1 } else { 0 },
84 if self.0[3] == rhs.0[3] { -1 } else { 0 },
85 ])
86 }
87 }
88 }
89
90 pub fn cmp_gt(self, rhs: Self) -> Self {
91 cfg_if::cfg_if! {
92 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
93 cast(Self(cast(unsafe { _mm_cmpgt_epi32(self.0, rhs.0) })))
94 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
95 Self(i32x4_gt(self.0, rhs.0))
96 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
97 Self(unsafe { cast(vcgtq_s32(self.0, rhs.0)) })
98 } else {
99 Self([
100 if self.0[0] > rhs.0[0] { -1 } else { 0 },
101 if self.0[1] > rhs.0[1] { -1 } else { 0 },
102 if self.0[2] > rhs.0[2] { -1 } else { 0 },
103 if self.0[3] > rhs.0[3] { -1 } else { 0 },
104 ])
105 }
106 }
107 }
108
109 pub fn cmp_lt(self, rhs: Self) -> Self {
110 cfg_if::cfg_if! {
111 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
112 cast(Self(cast(unsafe { _mm_cmplt_epi32(self.0, rhs.0) })))
113 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
114 Self(i32x4_lt(self.0, rhs.0))
115 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
116 Self(unsafe { cast(vcltq_s32(self.0, rhs.0)) })
117 } else {
118 Self([
119 if self.0[0] < rhs.0[0] { -1 } else { 0 },
120 if self.0[1] < rhs.0[1] { -1 } else { 0 },
121 if self.0[2] < rhs.0[2] { -1 } else { 0 },
122 if self.0[3] < rhs.0[3] { -1 } else { 0 },
123 ])
124 }
125 }
126 }
127
128 pub fn to_f32x4(self) -> f32x4 {
129 cfg_if::cfg_if! {
130 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
131 cast(Self(cast(unsafe { _mm_cvtepi32_ps(self.0) })))
132 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
133 cast(Self(f32x4_convert_i32x4(self.0)))
134 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
135 cast(Self(unsafe { cast(vcvtq_f32_s32(self.0)) }))
136 } else {
137 let arr: [i32; 4] = cast(self);
138 cast([
139 arr[0] as f32,
140 arr[1] as f32,
141 arr[2] as f32,
142 arr[3] as f32,
143 ])
144 }
145 }
146 }
147
148 pub fn to_f32x4_bitcast(self) -> f32x4 {
149 bytemuck::cast(self)
150 }
151}
152
153impl From<[i32; 4]> for i32x4 {
154 fn from(v: [i32; 4]) -> Self {
155 cast(v)
156 }
157}
158
159impl From<i32x4> for [i32; 4] {
160 fn from(v: i32x4) -> Self {
161 cast(v)
162 }
163}
164
165impl core::ops::Add for i32x4 {
166 type Output = Self;
167
168 fn add(self, rhs: Self) -> Self::Output {
169 cfg_if::cfg_if! {
170 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
171 Self(unsafe { _mm_add_epi32(self.0, rhs.0) })
172 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
173 Self(i32x4_add(self.0, rhs.0))
174 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
175 Self(unsafe { vaddq_s32(self.0, rhs.0) })
176 } else {
177 Self([
178 self.0[0].wrapping_add(rhs.0[0]),
179 self.0[1].wrapping_add(rhs.0[1]),
180 self.0[2].wrapping_add(rhs.0[2]),
181 self.0[3].wrapping_add(rhs.0[3]),
182 ])
183 }
184 }
185 }
186}
187
188impl core::ops::BitAnd for i32x4 {
189 type Output = Self;
190
191 fn bitand(self, rhs: Self) -> Self::Output {
192 cfg_if::cfg_if! {
193 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
194 Self(unsafe { _mm_and_si128(self.0, rhs.0) })
195 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
196 Self(v128_and(self.0, rhs.0))
197 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
198 Self(unsafe { vandq_s32(self.0, rhs.0) })
199 } else {
200 Self([
201 self.0[0] & rhs.0[0],
202 self.0[1] & rhs.0[1],
203 self.0[2] & rhs.0[2],
204 self.0[3] & rhs.0[3],
205 ])
206 }
207 }
208 }
209}
210
211impl core::ops::Mul for i32x4 {
212 type Output = Self;
213
214 fn mul(self, rhs: Self) -> Self::Output {
215 cfg_if::cfg_if! {
216 if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
217 Self(unsafe { _mm_mullo_epi32(self.0, rhs.0) })
218 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
219 Self(i32x4_mul(self.0, rhs.0))
220 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
221 Self(unsafe { vmulq_s32(self.0, rhs.0) })
222 } else {
223 let a: [i32; 4] = cast(self);
225 let b: [i32; 4] = cast(rhs);
226 Self(cast([
227 a[0].wrapping_mul(b[0]),
228 a[1].wrapping_mul(b[1]),
229 a[2].wrapping_mul(b[2]),
230 a[3].wrapping_mul(b[3]),
231 ]))
232 }
233 }
234 }
235}
236
237impl core::ops::BitOr for i32x4 {
238 type Output = Self;
239
240 #[inline]
241 fn bitor(self, rhs: Self) -> Self::Output {
242 cfg_if::cfg_if! {
243 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
244 Self(unsafe { _mm_or_si128(self.0, rhs.0) })
245 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
246 Self(v128_or(self.0, rhs.0))
247 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
248 Self(unsafe { vorrq_s32(self.0, rhs.0) })
249 } else {
250 Self([
251 self.0[0] | rhs.0[0],
252 self.0[1] | rhs.0[1],
253 self.0[2] | rhs.0[2],
254 self.0[3] | rhs.0[3],
255 ])
256 }
257 }
258 }
259}
260
261impl core::ops::BitXor for i32x4 {
262 type Output = Self;
263
264 #[inline]
265 fn bitxor(self, rhs: Self) -> Self::Output {
266 cfg_if::cfg_if! {
267 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
268 Self(unsafe { _mm_xor_si128(self.0, rhs.0) })
269 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
270 Self(v128_xor(self.0, rhs.0))
271 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
272 Self(unsafe { veorq_s32(self.0, rhs.0) })
273 } else {
274 Self([
275 self.0[0] ^ rhs.0[0],
276 self.0[1] ^ rhs.0[1],
277 self.0[2] ^ rhs.0[2],
278 self.0[3] ^ rhs.0[3],
279 ])
280 }
281 }
282 }
283}