Skip to main content

tiny_skia/wide/
f32x4_t.rs

1// Copyright 2020 Yevhenii Reizner
2//
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5
6// Based on https://github.com/Lokathor/wide (Zlib)
7
8use bytemuck::cast;
9
10#[cfg(all(not(feature = "std"), feature = "no-std-float"))]
11use tiny_skia_path::NoStdFloat;
12
13use super::i32x4;
14
15cfg_if::cfg_if! {
16    if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
17        #[cfg(target_arch = "x86")]
18        use core::arch::x86::*;
19        #[cfg(target_arch = "x86_64")]
20        use core::arch::x86_64::*;
21
22        #[derive(Clone, Copy, Debug)]
23        #[repr(C, align(16))]
24        pub struct f32x4(__m128);
25    } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
26        use core::arch::wasm32::*;
27
28        // repr(transparent) allows for directly passing the v128 on the WASM stack.
29        #[derive(Clone, Copy, Debug)]
30        #[repr(transparent)]
31        pub struct f32x4(v128);
32    } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
33        use core::arch::aarch64::*;
34
35        #[derive(Clone, Copy, Debug)]
36        #[repr(C, align(16))]
37        pub struct f32x4(float32x4_t);
38    } else {
39        use super::FasterMinMax;
40
41        #[derive(Clone, Copy, Debug)]
42        #[repr(C, align(16))]
43        pub struct f32x4([f32; 4]);
44    }
45}
46
47unsafe impl bytemuck::Zeroable for f32x4 {}
48unsafe impl bytemuck::Pod for f32x4 {}
49
50impl Default for f32x4 {
51    fn default() -> Self {
52        Self::splat(0.0)
53    }
54}
55
56impl f32x4 {
57    pub fn splat(n: f32) -> Self {
58        Self::from([n, n, n, n])
59    }
60
61    pub fn floor(self) -> Self {
62        cfg_if::cfg_if! {
63            if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
64                Self(f32x4_floor(self.0))
65            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
66                Self(unsafe { vrndmq_f32(self.0) })
67            } else {
68                let roundtrip: f32x4 = cast(self.trunc_int().to_f32x4());
69                roundtrip - roundtrip.cmp_gt(self).blend(f32x4::splat(1.0), f32x4::default())
70            }
71        }
72    }
73
74    pub fn abs(self) -> Self {
75        cfg_if::cfg_if! {
76            if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
77                Self(f32x4_abs(self.0))
78            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
79                Self(unsafe { vabsq_f32(self.0) })
80            } else {
81                let non_sign_bits = f32x4::splat(f32::from_bits(i32::MAX as u32));
82                self & non_sign_bits
83            }
84        }
85    }
86
87    pub fn max(self, rhs: Self) -> Self {
88        // These technically don't have the same semantics for NaN and 0, but it
89        // doesn't seem to matter as Skia does it the same way.
90        cfg_if::cfg_if! {
91            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
92                Self(unsafe { _mm_max_ps(self.0, rhs.0) })
93            } else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
94                Self(f32x4_relaxed_max(self.0, rhs.0))
95            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
96                Self(f32x4_pmax(self.0, rhs.0))
97            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
98                Self(unsafe { vmaxq_f32(self.0, rhs.0) })
99            } else {
100                Self([
101                    self.0[0].faster_max(rhs.0[0]),
102                    self.0[1].faster_max(rhs.0[1]),
103                    self.0[2].faster_max(rhs.0[2]),
104                    self.0[3].faster_max(rhs.0[3]),
105                ])
106            }
107        }
108    }
109
110    pub fn min(self, rhs: Self) -> Self {
111        // These technically don't have the same semantics for NaN and 0, but it
112        // doesn't seem to matter as Skia does it the same way.
113        cfg_if::cfg_if! {
114            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
115                Self(unsafe { _mm_min_ps(self.0, rhs.0) })
116            }  else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
117                Self(f32x4_relaxed_min(self.0, rhs.0))
118            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
119                Self(f32x4_pmin(self.0, rhs.0))
120            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
121                Self(unsafe { vminq_f32(self.0, rhs.0) })
122            } else {
123                Self([
124                    self.0[0].faster_min(rhs.0[0]),
125                    self.0[1].faster_min(rhs.0[1]),
126                    self.0[2].faster_min(rhs.0[2]),
127                    self.0[3].faster_min(rhs.0[3]),
128                ])
129            }
130        }
131    }
132
133    pub fn cmp_eq(self, rhs: Self) -> Self {
134        cfg_if::cfg_if! {
135            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
136                Self(unsafe { _mm_cmpeq_ps(self.0, rhs.0) })
137            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
138                Self(f32x4_eq(self.0, rhs.0))
139            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
140                Self(cast(unsafe { vceqq_f32(self.0, rhs.0) }))
141            } else {
142                Self([
143                    if self.0[0] == rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
144                    if self.0[1] == rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
145                    if self.0[2] == rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
146                    if self.0[3] == rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
147                ])
148            }
149        }
150    }
151
152    pub fn cmp_ne(self, rhs: Self) -> Self {
153        cfg_if::cfg_if! {
154            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
155                Self(unsafe { _mm_cmpneq_ps(self.0, rhs.0) })
156            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
157                Self(f32x4_ne(self.0, rhs.0))
158            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
159                Self(cast(unsafe { vmvnq_u32(vceqq_f32(self.0, rhs.0)) }))
160            } else {
161                Self([
162                    if self.0[0] != rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
163                    if self.0[1] != rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
164                    if self.0[2] != rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
165                    if self.0[3] != rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
166                ])
167            }
168        }
169    }
170
171    pub fn cmp_ge(self, rhs: Self) -> Self {
172        cfg_if::cfg_if! {
173            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
174                Self(unsafe { _mm_cmpge_ps(self.0, rhs.0) })
175            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
176                Self(f32x4_ge(self.0, rhs.0))
177            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
178                Self(cast(unsafe { vcgeq_f32(self.0, rhs.0) }))
179            } else {
180                Self([
181                    if self.0[0] >= rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
182                    if self.0[1] >= rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
183                    if self.0[2] >= rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
184                    if self.0[3] >= rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
185                ])
186            }
187        }
188    }
189
190    pub fn cmp_gt(self, rhs: Self) -> Self {
191        cfg_if::cfg_if! {
192            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
193                Self(unsafe { _mm_cmpgt_ps(self.0, rhs.0) })
194            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
195                Self(f32x4_gt(self.0, rhs.0))
196            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
197                Self(cast(unsafe { vcgtq_f32(self.0, rhs.0) }))
198            } else {
199                Self([
200                    if self.0[0] > rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
201                    if self.0[1] > rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
202                    if self.0[2] > rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
203                    if self.0[3] > rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
204                ])
205            }
206        }
207    }
208
209    pub fn cmp_le(self, rhs: Self) -> Self {
210        cfg_if::cfg_if! {
211            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
212                Self(unsafe { _mm_cmple_ps(self.0, rhs.0) })
213            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
214                Self(f32x4_le(self.0, rhs.0))
215            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
216                Self(cast(unsafe { vcleq_f32(self.0, rhs.0) }))
217            } else {
218                Self([
219                    if self.0[0] <= rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
220                    if self.0[1] <= rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
221                    if self.0[2] <= rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
222                    if self.0[3] <= rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
223                ])
224            }
225        }
226    }
227
228    pub fn cmp_lt(self, rhs: Self) -> Self {
229        cfg_if::cfg_if! {
230            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
231                Self(unsafe { _mm_cmplt_ps(self.0, rhs.0) })
232            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
233                Self(f32x4_lt(self.0, rhs.0))
234            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
235                Self(cast(unsafe { vcltq_f32(self.0, rhs.0) }))
236            } else {
237                Self([
238                    if self.0[0] < rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
239                    if self.0[1] < rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
240                    if self.0[2] < rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
241                    if self.0[3] < rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
242                ])
243            }
244        }
245    }
246
247    #[inline]
248    pub fn blend(self, t: Self, f: Self) -> Self {
249        cfg_if::cfg_if! {
250            if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
251                Self(unsafe { _mm_blendv_ps(f.0, t.0, self.0) })
252            } else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
253                Self(i32x4_relaxed_laneselect(t.0, f.0, self.0))
254            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
255                Self(v128_bitselect(t.0, f.0, self.0))
256            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
257                Self(unsafe { cast(vbslq_u32( cast(self.0), cast(t.0), cast(f.0))) })
258            } else {
259                super::generic_bit_blend(self, t, f)
260            }
261        }
262    }
263
264    pub fn round(self) -> Self {
265        cfg_if::cfg_if! {
266            if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
267                Self(
268                    unsafe { _mm_round_ps(self.0, _MM_FROUND_NO_EXC | _MM_FROUND_TO_NEAREST_INT) },
269                )
270            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
271                Self(f32x4_nearest(self.0))
272            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
273                Self(unsafe { vrndnq_f32(self.0) })
274            } else {
275                use super::u32x4;
276
277                let to_int = f32x4::splat(1.0 / f32::EPSILON);
278                let u: u32x4 = cast(self);
279                let e: i32x4 = cast(u.shr::<23>() & u32x4::splat(0xff));
280                let mut y: f32x4;
281
282                let no_op_magic = i32x4::splat(0x7f + 23);
283                let no_op_mask: f32x4 = cast(e.cmp_gt(no_op_magic) | e.cmp_eq(no_op_magic));
284                let no_op_val: f32x4 = self;
285
286                let zero_magic = i32x4::splat(0x7f - 1);
287                let zero_mask: f32x4 = cast(e.cmp_lt(zero_magic));
288                let zero_val: f32x4 = self * f32x4::splat(0.0);
289
290                let neg_bit: f32x4 = cast(cast::<u32x4, i32x4>(u).cmp_lt(i32x4::default()));
291                let x: f32x4 = neg_bit.blend(-self, self);
292                y = x + to_int - to_int - x;
293                y = y.cmp_gt(f32x4::splat(0.5)).blend(
294                    y + x - f32x4::splat(-1.0),
295                    y.cmp_lt(f32x4::splat(-0.5)).blend(y + x + f32x4::splat(1.0), y + x),
296                );
297                y = neg_bit.blend(-y, y);
298
299                no_op_mask.blend(no_op_val, zero_mask.blend(zero_val, y))
300            }
301        }
302    }
303
304    pub fn round_int(self) -> i32x4 {
305        // These technically don't have the same semantics for NaN and out of
306        // range values, but it doesn't seem to matter as Skia does it the same
307        // way.
308        cfg_if::cfg_if! {
309            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
310                i32x4(unsafe { _mm_cvtps_epi32(self.0) })
311            } else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
312                i32x4(i32x4_relaxed_trunc_f32x4(self.round().0))
313            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
314                i32x4(i32x4_trunc_sat_f32x4(self.round().0))
315            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
316                i32x4(unsafe { vcvtnq_s32_f32(self.0) } )
317            } else {
318                let rounded: [f32; 4] = cast(self.round());
319                cast([
320                    rounded[0] as i32,
321                    rounded[1] as i32,
322                    rounded[2] as i32,
323                    rounded[3] as i32,
324                ])
325            }
326        }
327    }
328
329    pub fn trunc_int(self) -> i32x4 {
330        // These technically don't have the same semantics for NaN and out of
331        // range values, but it doesn't seem to matter as Skia does it the same
332        // way.
333        cfg_if::cfg_if! {
334            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
335                i32x4(unsafe { _mm_cvttps_epi32(self.0) })
336            } else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
337                i32x4(i32x4_relaxed_trunc_f32x4(self.0))
338            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
339                i32x4(i32x4_trunc_sat_f32x4(self.0))
340            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
341                i32x4(unsafe { vcvtq_s32_f32(self.0) })
342            } else {
343                cast([
344                    self.0[0] as i32,
345                    self.0[1] as i32,
346                    self.0[2] as i32,
347                    self.0[3] as i32,
348                ])
349            }
350        }
351    }
352
353    pub fn recip_fast(self) -> Self {
354        cfg_if::cfg_if! {
355            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
356                Self(unsafe { _mm_rcp_ps(self.0) })
357            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
358                Self(f32x4_div(f32x4_splat(1.0), self.0))
359            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
360                unsafe {
361                    let a = vrecpeq_f32(self.0);
362                    let a = vmulq_f32(vrecpsq_f32(self.0, a), a);
363                    Self(a)
364                }
365            } else {
366                Self::from([
367                    1.0 / self.0[0],
368                    1.0 / self.0[1],
369                    1.0 / self.0[2],
370                    1.0 / self.0[3],
371                ])
372            }
373        }
374    }
375
376    pub fn recip_sqrt(self) -> Self {
377        cfg_if::cfg_if! {
378            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
379                Self(unsafe { _mm_rsqrt_ps(self.0) })
380            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
381                Self(f32x4_div(f32x4_splat(1.0), f32x4_sqrt(self.0)))
382            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
383                unsafe {
384                    let a = vrsqrteq_f32(self.0);
385                    let a = vmulq_f32(vrsqrtsq_f32(self.0, vmulq_f32(a, a)), a);
386                    Self(a)
387                }
388            } else {
389                Self::from([
390                    1.0 / self.0[0].sqrt(),
391                    1.0 / self.0[1].sqrt(),
392                    1.0 / self.0[2].sqrt(),
393                    1.0 / self.0[3].sqrt(),
394                ])
395            }
396        }
397    }
398
399    pub fn sqrt(self) -> Self {
400        cfg_if::cfg_if! {
401            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
402                Self(unsafe { _mm_sqrt_ps(self.0) })
403            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
404                Self(f32x4_sqrt(self.0))
405            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
406                Self(unsafe { vsqrtq_f32(self.0) })
407            } else {
408                Self::from([
409                    self.0[0].sqrt(),
410                    self.0[1].sqrt(),
411                    self.0[2].sqrt(),
412                    self.0[3].sqrt(),
413                ])
414            }
415        }
416    }
417}
418
419impl From<[f32; 4]> for f32x4 {
420    fn from(v: [f32; 4]) -> Self {
421        cast(v)
422    }
423}
424
425impl From<f32x4> for [f32; 4] {
426    fn from(v: f32x4) -> Self {
427        cast(v)
428    }
429}
430
431impl core::ops::Add for f32x4 {
432    type Output = Self;
433
434    fn add(self, rhs: Self) -> Self::Output {
435        cfg_if::cfg_if! {
436            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
437                Self(unsafe { _mm_add_ps(self.0, rhs.0) })
438            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
439                Self(f32x4_add(self.0, rhs.0))
440            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
441                Self(unsafe { vaddq_f32(self.0, rhs.0) })
442            } else {
443                Self([
444                    self.0[0] + rhs.0[0],
445                    self.0[1] + rhs.0[1],
446                    self.0[2] + rhs.0[2],
447                    self.0[3] + rhs.0[3],
448                ])
449            }
450        }
451    }
452}
453
454impl core::ops::AddAssign for f32x4 {
455    fn add_assign(&mut self, rhs: f32x4) {
456        *self = *self + rhs;
457    }
458}
459
460impl core::ops::Sub for f32x4 {
461    type Output = Self;
462
463    fn sub(self, rhs: Self) -> Self::Output {
464        cfg_if::cfg_if! {
465            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
466                Self(unsafe { _mm_sub_ps(self.0, rhs.0) })
467            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
468                Self(f32x4_sub(self.0, rhs.0))
469            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
470                Self(unsafe { vsubq_f32(self.0, rhs.0) })
471            } else {
472                Self([
473                    self.0[0] - rhs.0[0],
474                    self.0[1] - rhs.0[1],
475                    self.0[2] - rhs.0[2],
476                    self.0[3] - rhs.0[3],
477                ])
478            }
479        }
480    }
481}
482
483impl core::ops::Mul for f32x4 {
484    type Output = Self;
485
486    fn mul(self, rhs: Self) -> Self::Output {
487        cfg_if::cfg_if! {
488            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
489                Self(unsafe { _mm_mul_ps(self.0, rhs.0) })
490            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
491                Self(f32x4_mul(self.0, rhs.0))
492            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
493                Self(unsafe { vmulq_f32(self.0, rhs.0) })
494            } else {
495                Self([
496                    self.0[0] * rhs.0[0],
497                    self.0[1] * rhs.0[1],
498                    self.0[2] * rhs.0[2],
499                    self.0[3] * rhs.0[3],
500                ])
501            }
502        }
503    }
504}
505
506impl core::ops::MulAssign for f32x4 {
507    fn mul_assign(&mut self, rhs: f32x4) {
508        *self = *self * rhs;
509    }
510}
511
512impl core::ops::Div for f32x4 {
513    type Output = Self;
514
515    fn div(self, rhs: Self) -> Self::Output {
516        cfg_if::cfg_if! {
517            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
518                Self(unsafe { _mm_div_ps(self.0, rhs.0) })
519            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
520                Self(f32x4_div(self.0, rhs.0))
521            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
522                Self(unsafe { vdivq_f32(self.0, rhs.0) })
523            } else {
524                Self([
525                    self.0[0] / rhs.0[0],
526                    self.0[1] / rhs.0[1],
527                    self.0[2] / rhs.0[2],
528                    self.0[3] / rhs.0[3],
529                ])
530            }
531        }
532    }
533}
534
535impl core::ops::BitAnd for f32x4 {
536    type Output = Self;
537
538    #[inline(always)]
539    fn bitand(self, rhs: Self) -> Self::Output {
540        cfg_if::cfg_if! {
541            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
542                Self(unsafe { _mm_and_ps(self.0, rhs.0) })
543            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
544                Self(v128_and(self.0, rhs.0))
545            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
546                Self(cast(unsafe { vandq_u32(cast(self.0), cast(rhs.0)) }))
547            } else {
548                Self([
549                    f32::from_bits(self.0[0].to_bits() & rhs.0[0].to_bits()),
550                    f32::from_bits(self.0[1].to_bits() & rhs.0[1].to_bits()),
551                    f32::from_bits(self.0[2].to_bits() & rhs.0[2].to_bits()),
552                    f32::from_bits(self.0[3].to_bits() & rhs.0[3].to_bits()),
553                ])
554            }
555        }
556    }
557}
558
559impl core::ops::BitOr for f32x4 {
560    type Output = Self;
561
562    #[inline(always)]
563    fn bitor(self, rhs: Self) -> Self::Output {
564        cfg_if::cfg_if! {
565            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
566                Self(unsafe { _mm_or_ps(self.0, rhs.0) })
567            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
568                Self(v128_or(self.0, rhs.0))
569            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
570                Self(cast(unsafe { vorrq_u32(cast(self.0), cast(rhs.0)) }))
571            } else {
572                Self([
573                    f32::from_bits(self.0[0].to_bits() | rhs.0[0].to_bits()),
574                    f32::from_bits(self.0[1].to_bits() | rhs.0[1].to_bits()),
575                    f32::from_bits(self.0[2].to_bits() | rhs.0[2].to_bits()),
576                    f32::from_bits(self.0[3].to_bits() | rhs.0[3].to_bits()),
577                ])
578            }
579        }
580    }
581}
582
583impl core::ops::BitXor for f32x4 {
584    type Output = Self;
585
586    #[inline(always)]
587    fn bitxor(self, rhs: Self) -> Self::Output {
588        cfg_if::cfg_if! {
589            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
590                Self(unsafe { _mm_xor_ps(self.0, rhs.0) })
591            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
592                Self(v128_xor(self.0, rhs.0))
593            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
594                Self(cast(unsafe { veorq_u32(cast(self.0), cast(rhs.0)) }))
595            } else {
596                Self([
597                    f32::from_bits(self.0[0].to_bits() ^ rhs.0[0].to_bits()),
598                    f32::from_bits(self.0[1].to_bits() ^ rhs.0[1].to_bits()),
599                    f32::from_bits(self.0[2].to_bits() ^ rhs.0[2].to_bits()),
600                    f32::from_bits(self.0[3].to_bits() ^ rhs.0[3].to_bits()),
601                ])
602            }
603        }
604    }
605}
606
607impl core::ops::Neg for f32x4 {
608    type Output = Self;
609
610    fn neg(self) -> Self {
611        Self::default() - self
612    }
613}
614
615impl core::ops::Not for f32x4 {
616    type Output = Self;
617
618    fn not(self) -> Self {
619        cfg_if::cfg_if! {
620            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
621                unsafe {
622                    let all_bits = _mm_set1_ps(f32::from_bits(u32::MAX));
623                    Self(_mm_xor_ps(self.0, all_bits))
624                }
625            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
626                Self(v128_not(self.0))
627            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
628                Self(cast(unsafe { vmvnq_u32(cast(self.0)) }))
629            } else {
630                self ^ Self::splat(cast(u32::MAX))
631            }
632        }
633    }
634}
635
636impl core::cmp::PartialEq for f32x4 {
637    fn eq(&self, rhs: &Self) -> bool {
638        cfg_if::cfg_if! {
639            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
640                unsafe { _mm_movemask_ps(_mm_cmpeq_ps(self.0, rhs.0)) == 0b1111 }
641            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
642                unsafe { vminvq_u32(vceqq_f32(self.0, rhs.0)) != 0 }
643            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
644                u32x4_all_true(f32x4_eq(self.0, rhs.0))
645            } else {
646                self.0 == rhs.0
647            }
648        }
649    }
650}