1use bytemuck::cast;
9
10#[cfg(all(not(feature = "std"), feature = "no-std-float"))]
11use tiny_skia_path::NoStdFloat;
12
13use super::i32x4;
14
15cfg_if::cfg_if! {
16 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
17 #[cfg(target_arch = "x86")]
18 use core::arch::x86::*;
19 #[cfg(target_arch = "x86_64")]
20 use core::arch::x86_64::*;
21
22 #[derive(Clone, Copy, Debug)]
23 #[repr(C, align(16))]
24 pub struct f32x4(__m128);
25 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
26 use core::arch::wasm32::*;
27
28 #[derive(Clone, Copy, Debug)]
30 #[repr(transparent)]
31 pub struct f32x4(v128);
32 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
33 use core::arch::aarch64::*;
34
35 #[derive(Clone, Copy, Debug)]
36 #[repr(C, align(16))]
37 pub struct f32x4(float32x4_t);
38 } else {
39 use super::FasterMinMax;
40
41 #[derive(Clone, Copy, Debug)]
42 #[repr(C, align(16))]
43 pub struct f32x4([f32; 4]);
44 }
45}
46
47unsafe impl bytemuck::Zeroable for f32x4 {}
48unsafe impl bytemuck::Pod for f32x4 {}
49
50impl Default for f32x4 {
51 fn default() -> Self {
52 Self::splat(0.0)
53 }
54}
55
56impl f32x4 {
57 pub fn splat(n: f32) -> Self {
58 Self::from([n, n, n, n])
59 }
60
61 pub fn floor(self) -> Self {
62 cfg_if::cfg_if! {
63 if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
64 Self(f32x4_floor(self.0))
65 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
66 Self(unsafe { vrndmq_f32(self.0) })
67 } else {
68 let roundtrip: f32x4 = cast(self.trunc_int().to_f32x4());
69 roundtrip - roundtrip.cmp_gt(self).blend(f32x4::splat(1.0), f32x4::default())
70 }
71 }
72 }
73
74 pub fn abs(self) -> Self {
75 cfg_if::cfg_if! {
76 if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
77 Self(f32x4_abs(self.0))
78 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
79 Self(unsafe { vabsq_f32(self.0) })
80 } else {
81 let non_sign_bits = f32x4::splat(f32::from_bits(i32::MAX as u32));
82 self & non_sign_bits
83 }
84 }
85 }
86
87 pub fn max(self, rhs: Self) -> Self {
88 cfg_if::cfg_if! {
91 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
92 Self(unsafe { _mm_max_ps(self.0, rhs.0) })
93 } else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
94 Self(f32x4_relaxed_max(self.0, rhs.0))
95 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
96 Self(f32x4_pmax(self.0, rhs.0))
97 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
98 Self(unsafe { vmaxq_f32(self.0, rhs.0) })
99 } else {
100 Self([
101 self.0[0].faster_max(rhs.0[0]),
102 self.0[1].faster_max(rhs.0[1]),
103 self.0[2].faster_max(rhs.0[2]),
104 self.0[3].faster_max(rhs.0[3]),
105 ])
106 }
107 }
108 }
109
110 pub fn min(self, rhs: Self) -> Self {
111 cfg_if::cfg_if! {
114 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
115 Self(unsafe { _mm_min_ps(self.0, rhs.0) })
116 } else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
117 Self(f32x4_relaxed_min(self.0, rhs.0))
118 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
119 Self(f32x4_pmin(self.0, rhs.0))
120 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
121 Self(unsafe { vminq_f32(self.0, rhs.0) })
122 } else {
123 Self([
124 self.0[0].faster_min(rhs.0[0]),
125 self.0[1].faster_min(rhs.0[1]),
126 self.0[2].faster_min(rhs.0[2]),
127 self.0[3].faster_min(rhs.0[3]),
128 ])
129 }
130 }
131 }
132
133 pub fn cmp_eq(self, rhs: Self) -> Self {
134 cfg_if::cfg_if! {
135 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
136 Self(unsafe { _mm_cmpeq_ps(self.0, rhs.0) })
137 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
138 Self(f32x4_eq(self.0, rhs.0))
139 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
140 Self(cast(unsafe { vceqq_f32(self.0, rhs.0) }))
141 } else {
142 Self([
143 if self.0[0] == rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
144 if self.0[1] == rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
145 if self.0[2] == rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
146 if self.0[3] == rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
147 ])
148 }
149 }
150 }
151
152 pub fn cmp_ne(self, rhs: Self) -> Self {
153 cfg_if::cfg_if! {
154 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
155 Self(unsafe { _mm_cmpneq_ps(self.0, rhs.0) })
156 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
157 Self(f32x4_ne(self.0, rhs.0))
158 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
159 Self(cast(unsafe { vmvnq_u32(vceqq_f32(self.0, rhs.0)) }))
160 } else {
161 Self([
162 if self.0[0] != rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
163 if self.0[1] != rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
164 if self.0[2] != rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
165 if self.0[3] != rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
166 ])
167 }
168 }
169 }
170
171 pub fn cmp_ge(self, rhs: Self) -> Self {
172 cfg_if::cfg_if! {
173 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
174 Self(unsafe { _mm_cmpge_ps(self.0, rhs.0) })
175 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
176 Self(f32x4_ge(self.0, rhs.0))
177 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
178 Self(cast(unsafe { vcgeq_f32(self.0, rhs.0) }))
179 } else {
180 Self([
181 if self.0[0] >= rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
182 if self.0[1] >= rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
183 if self.0[2] >= rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
184 if self.0[3] >= rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
185 ])
186 }
187 }
188 }
189
190 pub fn cmp_gt(self, rhs: Self) -> Self {
191 cfg_if::cfg_if! {
192 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
193 Self(unsafe { _mm_cmpgt_ps(self.0, rhs.0) })
194 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
195 Self(f32x4_gt(self.0, rhs.0))
196 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
197 Self(cast(unsafe { vcgtq_f32(self.0, rhs.0) }))
198 } else {
199 Self([
200 if self.0[0] > rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
201 if self.0[1] > rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
202 if self.0[2] > rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
203 if self.0[3] > rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
204 ])
205 }
206 }
207 }
208
209 pub fn cmp_le(self, rhs: Self) -> Self {
210 cfg_if::cfg_if! {
211 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
212 Self(unsafe { _mm_cmple_ps(self.0, rhs.0) })
213 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
214 Self(f32x4_le(self.0, rhs.0))
215 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
216 Self(cast(unsafe { vcleq_f32(self.0, rhs.0) }))
217 } else {
218 Self([
219 if self.0[0] <= rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
220 if self.0[1] <= rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
221 if self.0[2] <= rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
222 if self.0[3] <= rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
223 ])
224 }
225 }
226 }
227
228 pub fn cmp_lt(self, rhs: Self) -> Self {
229 cfg_if::cfg_if! {
230 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
231 Self(unsafe { _mm_cmplt_ps(self.0, rhs.0) })
232 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
233 Self(f32x4_lt(self.0, rhs.0))
234 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
235 Self(cast(unsafe { vcltq_f32(self.0, rhs.0) }))
236 } else {
237 Self([
238 if self.0[0] < rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
239 if self.0[1] < rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
240 if self.0[2] < rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
241 if self.0[3] < rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
242 ])
243 }
244 }
245 }
246
247 #[inline]
248 pub fn blend(self, t: Self, f: Self) -> Self {
249 cfg_if::cfg_if! {
250 if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
251 Self(unsafe { _mm_blendv_ps(f.0, t.0, self.0) })
252 } else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
253 Self(i32x4_relaxed_laneselect(t.0, f.0, self.0))
254 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
255 Self(v128_bitselect(t.0, f.0, self.0))
256 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
257 Self(unsafe { cast(vbslq_u32( cast(self.0), cast(t.0), cast(f.0))) })
258 } else {
259 super::generic_bit_blend(self, t, f)
260 }
261 }
262 }
263
264 pub fn round(self) -> Self {
265 cfg_if::cfg_if! {
266 if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
267 Self(
268 unsafe { _mm_round_ps(self.0, _MM_FROUND_NO_EXC | _MM_FROUND_TO_NEAREST_INT) },
269 )
270 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
271 Self(f32x4_nearest(self.0))
272 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
273 Self(unsafe { vrndnq_f32(self.0) })
274 } else {
275 use super::u32x4;
276
277 let to_int = f32x4::splat(1.0 / f32::EPSILON);
278 let u: u32x4 = cast(self);
279 let e: i32x4 = cast(u.shr::<23>() & u32x4::splat(0xff));
280 let mut y: f32x4;
281
282 let no_op_magic = i32x4::splat(0x7f + 23);
283 let no_op_mask: f32x4 = cast(e.cmp_gt(no_op_magic) | e.cmp_eq(no_op_magic));
284 let no_op_val: f32x4 = self;
285
286 let zero_magic = i32x4::splat(0x7f - 1);
287 let zero_mask: f32x4 = cast(e.cmp_lt(zero_magic));
288 let zero_val: f32x4 = self * f32x4::splat(0.0);
289
290 let neg_bit: f32x4 = cast(cast::<u32x4, i32x4>(u).cmp_lt(i32x4::default()));
291 let x: f32x4 = neg_bit.blend(-self, self);
292 y = x + to_int - to_int - x;
293 y = y.cmp_gt(f32x4::splat(0.5)).blend(
294 y + x - f32x4::splat(-1.0),
295 y.cmp_lt(f32x4::splat(-0.5)).blend(y + x + f32x4::splat(1.0), y + x),
296 );
297 y = neg_bit.blend(-y, y);
298
299 no_op_mask.blend(no_op_val, zero_mask.blend(zero_val, y))
300 }
301 }
302 }
303
304 pub fn round_int(self) -> i32x4 {
305 cfg_if::cfg_if! {
309 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
310 i32x4(unsafe { _mm_cvtps_epi32(self.0) })
311 } else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
312 i32x4(i32x4_relaxed_trunc_f32x4(self.round().0))
313 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
314 i32x4(i32x4_trunc_sat_f32x4(self.round().0))
315 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
316 i32x4(unsafe { vcvtnq_s32_f32(self.0) } )
317 } else {
318 let rounded: [f32; 4] = cast(self.round());
319 cast([
320 rounded[0] as i32,
321 rounded[1] as i32,
322 rounded[2] as i32,
323 rounded[3] as i32,
324 ])
325 }
326 }
327 }
328
329 pub fn trunc_int(self) -> i32x4 {
330 cfg_if::cfg_if! {
334 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
335 i32x4(unsafe { _mm_cvttps_epi32(self.0) })
336 } else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
337 i32x4(i32x4_relaxed_trunc_f32x4(self.0))
338 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
339 i32x4(i32x4_trunc_sat_f32x4(self.0))
340 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
341 i32x4(unsafe { vcvtq_s32_f32(self.0) })
342 } else {
343 cast([
344 self.0[0] as i32,
345 self.0[1] as i32,
346 self.0[2] as i32,
347 self.0[3] as i32,
348 ])
349 }
350 }
351 }
352
353 pub fn recip_fast(self) -> Self {
354 cfg_if::cfg_if! {
355 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
356 Self(unsafe { _mm_rcp_ps(self.0) })
357 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
358 Self(f32x4_div(f32x4_splat(1.0), self.0))
359 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
360 unsafe {
361 let a = vrecpeq_f32(self.0);
362 let a = vmulq_f32(vrecpsq_f32(self.0, a), a);
363 Self(a)
364 }
365 } else {
366 Self::from([
367 1.0 / self.0[0],
368 1.0 / self.0[1],
369 1.0 / self.0[2],
370 1.0 / self.0[3],
371 ])
372 }
373 }
374 }
375
376 pub fn recip_sqrt(self) -> Self {
377 cfg_if::cfg_if! {
378 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
379 Self(unsafe { _mm_rsqrt_ps(self.0) })
380 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
381 Self(f32x4_div(f32x4_splat(1.0), f32x4_sqrt(self.0)))
382 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
383 unsafe {
384 let a = vrsqrteq_f32(self.0);
385 let a = vmulq_f32(vrsqrtsq_f32(self.0, vmulq_f32(a, a)), a);
386 Self(a)
387 }
388 } else {
389 Self::from([
390 1.0 / self.0[0].sqrt(),
391 1.0 / self.0[1].sqrt(),
392 1.0 / self.0[2].sqrt(),
393 1.0 / self.0[3].sqrt(),
394 ])
395 }
396 }
397 }
398
399 pub fn sqrt(self) -> Self {
400 cfg_if::cfg_if! {
401 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
402 Self(unsafe { _mm_sqrt_ps(self.0) })
403 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
404 Self(f32x4_sqrt(self.0))
405 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
406 Self(unsafe { vsqrtq_f32(self.0) })
407 } else {
408 Self::from([
409 self.0[0].sqrt(),
410 self.0[1].sqrt(),
411 self.0[2].sqrt(),
412 self.0[3].sqrt(),
413 ])
414 }
415 }
416 }
417}
418
419impl From<[f32; 4]> for f32x4 {
420 fn from(v: [f32; 4]) -> Self {
421 cast(v)
422 }
423}
424
425impl From<f32x4> for [f32; 4] {
426 fn from(v: f32x4) -> Self {
427 cast(v)
428 }
429}
430
431impl core::ops::Add for f32x4 {
432 type Output = Self;
433
434 fn add(self, rhs: Self) -> Self::Output {
435 cfg_if::cfg_if! {
436 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
437 Self(unsafe { _mm_add_ps(self.0, rhs.0) })
438 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
439 Self(f32x4_add(self.0, rhs.0))
440 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
441 Self(unsafe { vaddq_f32(self.0, rhs.0) })
442 } else {
443 Self([
444 self.0[0] + rhs.0[0],
445 self.0[1] + rhs.0[1],
446 self.0[2] + rhs.0[2],
447 self.0[3] + rhs.0[3],
448 ])
449 }
450 }
451 }
452}
453
454impl core::ops::AddAssign for f32x4 {
455 fn add_assign(&mut self, rhs: f32x4) {
456 *self = *self + rhs;
457 }
458}
459
460impl core::ops::Sub for f32x4 {
461 type Output = Self;
462
463 fn sub(self, rhs: Self) -> Self::Output {
464 cfg_if::cfg_if! {
465 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
466 Self(unsafe { _mm_sub_ps(self.0, rhs.0) })
467 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
468 Self(f32x4_sub(self.0, rhs.0))
469 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
470 Self(unsafe { vsubq_f32(self.0, rhs.0) })
471 } else {
472 Self([
473 self.0[0] - rhs.0[0],
474 self.0[1] - rhs.0[1],
475 self.0[2] - rhs.0[2],
476 self.0[3] - rhs.0[3],
477 ])
478 }
479 }
480 }
481}
482
483impl core::ops::Mul for f32x4 {
484 type Output = Self;
485
486 fn mul(self, rhs: Self) -> Self::Output {
487 cfg_if::cfg_if! {
488 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
489 Self(unsafe { _mm_mul_ps(self.0, rhs.0) })
490 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
491 Self(f32x4_mul(self.0, rhs.0))
492 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
493 Self(unsafe { vmulq_f32(self.0, rhs.0) })
494 } else {
495 Self([
496 self.0[0] * rhs.0[0],
497 self.0[1] * rhs.0[1],
498 self.0[2] * rhs.0[2],
499 self.0[3] * rhs.0[3],
500 ])
501 }
502 }
503 }
504}
505
506impl core::ops::MulAssign for f32x4 {
507 fn mul_assign(&mut self, rhs: f32x4) {
508 *self = *self * rhs;
509 }
510}
511
512impl core::ops::Div for f32x4 {
513 type Output = Self;
514
515 fn div(self, rhs: Self) -> Self::Output {
516 cfg_if::cfg_if! {
517 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
518 Self(unsafe { _mm_div_ps(self.0, rhs.0) })
519 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
520 Self(f32x4_div(self.0, rhs.0))
521 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
522 Self(unsafe { vdivq_f32(self.0, rhs.0) })
523 } else {
524 Self([
525 self.0[0] / rhs.0[0],
526 self.0[1] / rhs.0[1],
527 self.0[2] / rhs.0[2],
528 self.0[3] / rhs.0[3],
529 ])
530 }
531 }
532 }
533}
534
535impl core::ops::BitAnd for f32x4 {
536 type Output = Self;
537
538 #[inline(always)]
539 fn bitand(self, rhs: Self) -> Self::Output {
540 cfg_if::cfg_if! {
541 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
542 Self(unsafe { _mm_and_ps(self.0, rhs.0) })
543 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
544 Self(v128_and(self.0, rhs.0))
545 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
546 Self(cast(unsafe { vandq_u32(cast(self.0), cast(rhs.0)) }))
547 } else {
548 Self([
549 f32::from_bits(self.0[0].to_bits() & rhs.0[0].to_bits()),
550 f32::from_bits(self.0[1].to_bits() & rhs.0[1].to_bits()),
551 f32::from_bits(self.0[2].to_bits() & rhs.0[2].to_bits()),
552 f32::from_bits(self.0[3].to_bits() & rhs.0[3].to_bits()),
553 ])
554 }
555 }
556 }
557}
558
559impl core::ops::BitOr for f32x4 {
560 type Output = Self;
561
562 #[inline(always)]
563 fn bitor(self, rhs: Self) -> Self::Output {
564 cfg_if::cfg_if! {
565 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
566 Self(unsafe { _mm_or_ps(self.0, rhs.0) })
567 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
568 Self(v128_or(self.0, rhs.0))
569 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
570 Self(cast(unsafe { vorrq_u32(cast(self.0), cast(rhs.0)) }))
571 } else {
572 Self([
573 f32::from_bits(self.0[0].to_bits() | rhs.0[0].to_bits()),
574 f32::from_bits(self.0[1].to_bits() | rhs.0[1].to_bits()),
575 f32::from_bits(self.0[2].to_bits() | rhs.0[2].to_bits()),
576 f32::from_bits(self.0[3].to_bits() | rhs.0[3].to_bits()),
577 ])
578 }
579 }
580 }
581}
582
583impl core::ops::BitXor for f32x4 {
584 type Output = Self;
585
586 #[inline(always)]
587 fn bitxor(self, rhs: Self) -> Self::Output {
588 cfg_if::cfg_if! {
589 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
590 Self(unsafe { _mm_xor_ps(self.0, rhs.0) })
591 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
592 Self(v128_xor(self.0, rhs.0))
593 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
594 Self(cast(unsafe { veorq_u32(cast(self.0), cast(rhs.0)) }))
595 } else {
596 Self([
597 f32::from_bits(self.0[0].to_bits() ^ rhs.0[0].to_bits()),
598 f32::from_bits(self.0[1].to_bits() ^ rhs.0[1].to_bits()),
599 f32::from_bits(self.0[2].to_bits() ^ rhs.0[2].to_bits()),
600 f32::from_bits(self.0[3].to_bits() ^ rhs.0[3].to_bits()),
601 ])
602 }
603 }
604 }
605}
606
607impl core::ops::Neg for f32x4 {
608 type Output = Self;
609
610 fn neg(self) -> Self {
611 Self::default() - self
612 }
613}
614
615impl core::ops::Not for f32x4 {
616 type Output = Self;
617
618 fn not(self) -> Self {
619 cfg_if::cfg_if! {
620 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
621 unsafe {
622 let all_bits = _mm_set1_ps(f32::from_bits(u32::MAX));
623 Self(_mm_xor_ps(self.0, all_bits))
624 }
625 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
626 Self(v128_not(self.0))
627 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
628 Self(cast(unsafe { vmvnq_u32(cast(self.0)) }))
629 } else {
630 self ^ Self::splat(cast(u32::MAX))
631 }
632 }
633 }
634}
635
636impl core::cmp::PartialEq for f32x4 {
637 fn eq(&self, rhs: &Self) -> bool {
638 cfg_if::cfg_if! {
639 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
640 unsafe { _mm_movemask_ps(_mm_cmpeq_ps(self.0, rhs.0)) == 0b1111 }
641 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
642 unsafe { vminvq_u32(vceqq_f32(self.0, rhs.0)) != 0 }
643 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
644 u32x4_all_true(f32x4_eq(self.0, rhs.0))
645 } else {
646 self.0 == rhs.0
647 }
648 }
649 }
650}