1use crate::{Level, arch_types::ArchTypes, prelude::*, seal::Seal};
7use crate::{
8 f32x4, f32x8, f32x16, f64x2, f64x4, f64x8, i8x16, i8x32, i8x64, i16x8, i16x16, i16x32, i32x4,
9 i32x8, i32x16, mask8x16, mask8x32, mask8x64, mask16x8, mask16x16, mask16x32, mask32x4,
10 mask32x8, mask32x16, mask64x2, mask64x4, mask64x8, u8x16, u8x32, u8x64, u16x8, u16x16, u16x32,
11 u32x4, u32x8, u32x16,
12};
13use core::ops::*;
14#[cfg(all(feature = "libm", not(feature = "std")))]
15trait FloatExt {
16 fn floor(self) -> Self;
17 fn ceil(self) -> Self;
18 fn round_ties_even(self) -> Self;
19 fn fract(self) -> Self;
20 fn sqrt(self) -> Self;
21 fn trunc(self) -> Self;
22}
23#[cfg(all(feature = "libm", not(feature = "std")))]
24impl FloatExt for f32 {
25 #[inline(always)]
26 fn floor(self) -> f32 {
27 libm::floorf(self)
28 }
29 #[inline(always)]
30 fn ceil(self) -> f32 {
31 libm::ceilf(self)
32 }
33 #[inline(always)]
34 fn round_ties_even(self) -> f32 {
35 libm::rintf(self)
36 }
37 #[inline(always)]
38 fn sqrt(self) -> f32 {
39 libm::sqrtf(self)
40 }
41 #[inline(always)]
42 fn fract(self) -> f32 {
43 self - self.trunc()
44 }
45 #[inline(always)]
46 fn trunc(self) -> f32 {
47 libm::truncf(self)
48 }
49}
50#[cfg(all(feature = "libm", not(feature = "std")))]
51impl FloatExt for f64 {
52 #[inline(always)]
53 fn floor(self) -> f64 {
54 libm::floor(self)
55 }
56 #[inline(always)]
57 fn ceil(self) -> f64 {
58 libm::ceil(self)
59 }
60 #[inline(always)]
61 fn round_ties_even(self) -> f64 {
62 libm::rint(self)
63 }
64 #[inline(always)]
65 fn sqrt(self) -> f64 {
66 libm::sqrt(self)
67 }
68 #[inline(always)]
69 fn fract(self) -> f64 {
70 self - self.trunc()
71 }
72 #[inline(always)]
73 fn trunc(self) -> f64 {
74 libm::trunc(self)
75 }
76}
77#[doc = "The SIMD token for the \"fallback\" level."]
78#[derive(Clone, Copy, Debug)]
79pub struct Fallback {
80 pub fallback: crate::core_arch::fallback::Fallback,
81}
82impl Fallback {
83 #[inline]
84 pub const fn new() -> Self {
85 Self {
86 fallback: crate::core_arch::fallback::Fallback::new(),
87 }
88 }
89}
90impl Seal for Fallback {}
91impl ArchTypes for Fallback {
92 type f32x4 = crate::support::Aligned128<[f32; 4usize]>;
93 type i8x16 = crate::support::Aligned128<[i8; 16usize]>;
94 type u8x16 = crate::support::Aligned128<[u8; 16usize]>;
95 type mask8x16 = crate::support::Aligned128<[i8; 16usize]>;
96 type i16x8 = crate::support::Aligned128<[i16; 8usize]>;
97 type u16x8 = crate::support::Aligned128<[u16; 8usize]>;
98 type mask16x8 = crate::support::Aligned128<[i16; 8usize]>;
99 type i32x4 = crate::support::Aligned128<[i32; 4usize]>;
100 type u32x4 = crate::support::Aligned128<[u32; 4usize]>;
101 type mask32x4 = crate::support::Aligned128<[i32; 4usize]>;
102 type f64x2 = crate::support::Aligned128<[f64; 2usize]>;
103 type mask64x2 = crate::support::Aligned128<[i64; 2usize]>;
104 type f32x8 = crate::support::Aligned256<[f32; 8usize]>;
105 type i8x32 = crate::support::Aligned256<[i8; 32usize]>;
106 type u8x32 = crate::support::Aligned256<[u8; 32usize]>;
107 type mask8x32 = crate::support::Aligned256<[i8; 32usize]>;
108 type i16x16 = crate::support::Aligned256<[i16; 16usize]>;
109 type u16x16 = crate::support::Aligned256<[u16; 16usize]>;
110 type mask16x16 = crate::support::Aligned256<[i16; 16usize]>;
111 type i32x8 = crate::support::Aligned256<[i32; 8usize]>;
112 type u32x8 = crate::support::Aligned256<[u32; 8usize]>;
113 type mask32x8 = crate::support::Aligned256<[i32; 8usize]>;
114 type f64x4 = crate::support::Aligned256<[f64; 4usize]>;
115 type mask64x4 = crate::support::Aligned256<[i64; 4usize]>;
116 type f32x16 = crate::support::Aligned512<[f32; 16usize]>;
117 type i8x64 = crate::support::Aligned512<[i8; 64usize]>;
118 type u8x64 = crate::support::Aligned512<[u8; 64usize]>;
119 type mask8x64 = crate::support::Aligned512<[i8; 64usize]>;
120 type i16x32 = crate::support::Aligned512<[i16; 32usize]>;
121 type u16x32 = crate::support::Aligned512<[u16; 32usize]>;
122 type mask16x32 = crate::support::Aligned512<[i16; 32usize]>;
123 type i32x16 = crate::support::Aligned512<[i32; 16usize]>;
124 type u32x16 = crate::support::Aligned512<[u32; 16usize]>;
125 type mask32x16 = crate::support::Aligned512<[i32; 16usize]>;
126 type f64x8 = crate::support::Aligned512<[f64; 8usize]>;
127 type mask64x8 = crate::support::Aligned512<[i64; 8usize]>;
128}
129impl Simd for Fallback {
130 type f32s = f32x4<Self>;
131 type f64s = f64x2<Self>;
132 type u8s = u8x16<Self>;
133 type i8s = i8x16<Self>;
134 type u16s = u16x8<Self>;
135 type i16s = i16x8<Self>;
136 type u32s = u32x4<Self>;
137 type i32s = i32x4<Self>;
138 type mask8s = mask8x16<Self>;
139 type mask16s = mask16x8<Self>;
140 type mask32s = mask32x4<Self>;
141 type mask64s = mask64x2<Self>;
142 #[inline(always)]
143 fn level(self) -> Level {
144 #[cfg(feature = "force_support_fallback")]
145 return Level::Fallback(self);
146 #[cfg(not(feature = "force_support_fallback"))]
147 Level::baseline()
148 }
149 #[inline]
150 fn vectorize<F: FnOnce() -> R, R>(self, f: F) -> R {
151 fn vectorize_inner<F: FnOnce() -> R, R>(f: F) -> R {
152 f()
153 }
154 vectorize_inner(f)
155 }
156 #[inline(always)]
157 fn splat_f32x4(self, val: f32) -> f32x4<Self> {
158 [val; 4usize].simd_into(self)
159 }
160 #[inline(always)]
161 fn load_array_f32x4(self, val: [f32; 4usize]) -> f32x4<Self> {
162 f32x4 {
163 val: crate::support::Aligned128(val),
164 simd: self,
165 }
166 }
167 #[inline(always)]
168 fn load_array_ref_f32x4(self, val: &[f32; 4usize]) -> f32x4<Self> {
169 f32x4 {
170 val: crate::support::Aligned128(*val),
171 simd: self,
172 }
173 }
174 #[inline(always)]
175 fn as_array_f32x4(self, a: f32x4<Self>) -> [f32; 4usize] {
176 a.val.0
177 }
178 #[inline(always)]
179 fn as_array_ref_f32x4(self, a: &f32x4<Self>) -> &[f32; 4usize] {
180 &a.val.0
181 }
182 #[inline(always)]
183 fn as_array_mut_f32x4(self, a: &mut f32x4<Self>) -> &mut [f32; 4usize] {
184 &mut a.val.0
185 }
186 #[inline(always)]
187 fn store_array_f32x4(self, a: f32x4<Self>, dest: &mut [f32; 4usize]) -> () {
188 *dest = a.val.0;
189 }
190 #[inline(always)]
191 fn cvt_from_bytes_f32x4(self, a: u8x16<Self>) -> f32x4<Self> {
192 unsafe {
193 f32x4 {
194 val: core::mem::transmute(a.val),
195 simd: self,
196 }
197 }
198 }
199 #[inline(always)]
200 fn cvt_to_bytes_f32x4(self, a: f32x4<Self>) -> u8x16<Self> {
201 unsafe {
202 u8x16 {
203 val: core::mem::transmute(a.val),
204 simd: self,
205 }
206 }
207 }
208 #[inline(always)]
209 fn slide_f32x4<const SHIFT: usize>(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
210 let mut dest = [Default::default(); 4usize];
211 dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
212 dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
213 dest.simd_into(self)
214 }
215 #[inline(always)]
216 fn slide_within_blocks_f32x4<const SHIFT: usize>(
217 self,
218 a: f32x4<Self>,
219 b: f32x4<Self>,
220 ) -> f32x4<Self> {
221 self.slide_f32x4::<SHIFT>(a, b)
222 }
223 #[inline(always)]
224 fn abs_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
225 [
226 f32::abs(a[0usize]),
227 f32::abs(a[1usize]),
228 f32::abs(a[2usize]),
229 f32::abs(a[3usize]),
230 ]
231 .simd_into(self)
232 }
233 #[inline(always)]
234 fn neg_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
235 [
236 f32::neg(a[0usize]),
237 f32::neg(a[1usize]),
238 f32::neg(a[2usize]),
239 f32::neg(a[3usize]),
240 ]
241 .simd_into(self)
242 }
243 #[inline(always)]
244 fn sqrt_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
245 [
246 f32::sqrt(a[0usize]),
247 f32::sqrt(a[1usize]),
248 f32::sqrt(a[2usize]),
249 f32::sqrt(a[3usize]),
250 ]
251 .simd_into(self)
252 }
253 #[inline(always)]
254 fn add_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
255 [
256 f32::add(a[0usize], &b[0usize]),
257 f32::add(a[1usize], &b[1usize]),
258 f32::add(a[2usize], &b[2usize]),
259 f32::add(a[3usize], &b[3usize]),
260 ]
261 .simd_into(self)
262 }
263 #[inline(always)]
264 fn sub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
265 [
266 f32::sub(a[0usize], &b[0usize]),
267 f32::sub(a[1usize], &b[1usize]),
268 f32::sub(a[2usize], &b[2usize]),
269 f32::sub(a[3usize], &b[3usize]),
270 ]
271 .simd_into(self)
272 }
273 #[inline(always)]
274 fn mul_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
275 [
276 f32::mul(a[0usize], &b[0usize]),
277 f32::mul(a[1usize], &b[1usize]),
278 f32::mul(a[2usize], &b[2usize]),
279 f32::mul(a[3usize], &b[3usize]),
280 ]
281 .simd_into(self)
282 }
283 #[inline(always)]
284 fn div_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
285 [
286 f32::div(a[0usize], &b[0usize]),
287 f32::div(a[1usize], &b[1usize]),
288 f32::div(a[2usize], &b[2usize]),
289 f32::div(a[3usize], &b[3usize]),
290 ]
291 .simd_into(self)
292 }
293 #[inline(always)]
294 fn copysign_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
295 [
296 f32::copysign(a[0usize], b[0usize]),
297 f32::copysign(a[1usize], b[1usize]),
298 f32::copysign(a[2usize], b[2usize]),
299 f32::copysign(a[3usize], b[3usize]),
300 ]
301 .simd_into(self)
302 }
303 #[inline(always)]
304 fn simd_eq_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
305 [
306 -(f32::eq(&a[0usize], &b[0usize]) as i32),
307 -(f32::eq(&a[1usize], &b[1usize]) as i32),
308 -(f32::eq(&a[2usize], &b[2usize]) as i32),
309 -(f32::eq(&a[3usize], &b[3usize]) as i32),
310 ]
311 .simd_into(self)
312 }
313 #[inline(always)]
314 fn simd_lt_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
315 [
316 -(f32::lt(&a[0usize], &b[0usize]) as i32),
317 -(f32::lt(&a[1usize], &b[1usize]) as i32),
318 -(f32::lt(&a[2usize], &b[2usize]) as i32),
319 -(f32::lt(&a[3usize], &b[3usize]) as i32),
320 ]
321 .simd_into(self)
322 }
323 #[inline(always)]
324 fn simd_le_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
325 [
326 -(f32::le(&a[0usize], &b[0usize]) as i32),
327 -(f32::le(&a[1usize], &b[1usize]) as i32),
328 -(f32::le(&a[2usize], &b[2usize]) as i32),
329 -(f32::le(&a[3usize], &b[3usize]) as i32),
330 ]
331 .simd_into(self)
332 }
333 #[inline(always)]
334 fn simd_ge_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
335 [
336 -(f32::ge(&a[0usize], &b[0usize]) as i32),
337 -(f32::ge(&a[1usize], &b[1usize]) as i32),
338 -(f32::ge(&a[2usize], &b[2usize]) as i32),
339 -(f32::ge(&a[3usize], &b[3usize]) as i32),
340 ]
341 .simd_into(self)
342 }
343 #[inline(always)]
344 fn simd_gt_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
345 [
346 -(f32::gt(&a[0usize], &b[0usize]) as i32),
347 -(f32::gt(&a[1usize], &b[1usize]) as i32),
348 -(f32::gt(&a[2usize], &b[2usize]) as i32),
349 -(f32::gt(&a[3usize], &b[3usize]) as i32),
350 ]
351 .simd_into(self)
352 }
353 #[inline(always)]
354 fn zip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
355 [a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
356 }
357 #[inline(always)]
358 fn zip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
359 [a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
360 }
361 #[inline(always)]
362 fn unzip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
363 [a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
364 }
365 #[inline(always)]
366 fn unzip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
367 [a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
368 }
369 #[inline(always)]
370 fn interleave_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> (f32x4<Self>, f32x4<Self>) {
371 (self.zip_low_f32x4(a, b), self.zip_high_f32x4(a, b))
372 }
373 #[inline(always)]
374 fn deinterleave_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> (f32x4<Self>, f32x4<Self>) {
375 (self.unzip_low_f32x4(a, b), self.unzip_high_f32x4(a, b))
376 }
377 #[inline(always)]
378 fn max_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
379 [
380 f32::max(a[0usize], b[0usize]),
381 f32::max(a[1usize], b[1usize]),
382 f32::max(a[2usize], b[2usize]),
383 f32::max(a[3usize], b[3usize]),
384 ]
385 .simd_into(self)
386 }
387 #[inline(always)]
388 fn min_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
389 [
390 f32::min(a[0usize], b[0usize]),
391 f32::min(a[1usize], b[1usize]),
392 f32::min(a[2usize], b[2usize]),
393 f32::min(a[3usize], b[3usize]),
394 ]
395 .simd_into(self)
396 }
397 #[inline(always)]
398 fn max_precise_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
399 [
400 f32::max(a[0usize], b[0usize]),
401 f32::max(a[1usize], b[1usize]),
402 f32::max(a[2usize], b[2usize]),
403 f32::max(a[3usize], b[3usize]),
404 ]
405 .simd_into(self)
406 }
407 #[inline(always)]
408 fn min_precise_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
409 [
410 f32::min(a[0usize], b[0usize]),
411 f32::min(a[1usize], b[1usize]),
412 f32::min(a[2usize], b[2usize]),
413 f32::min(a[3usize], b[3usize]),
414 ]
415 .simd_into(self)
416 }
417 #[inline(always)]
418 fn mul_add_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
419 a.mul(b).add(c)
420 }
421 #[inline(always)]
422 fn mul_sub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
423 a.mul(b).sub(c)
424 }
425 #[inline(always)]
426 fn floor_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
427 [
428 f32::floor(a[0usize]),
429 f32::floor(a[1usize]),
430 f32::floor(a[2usize]),
431 f32::floor(a[3usize]),
432 ]
433 .simd_into(self)
434 }
435 #[inline(always)]
436 fn ceil_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
437 [
438 f32::ceil(a[0usize]),
439 f32::ceil(a[1usize]),
440 f32::ceil(a[2usize]),
441 f32::ceil(a[3usize]),
442 ]
443 .simd_into(self)
444 }
445 #[inline(always)]
446 fn round_ties_even_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
447 [
448 f32::round_ties_even(a[0usize]),
449 f32::round_ties_even(a[1usize]),
450 f32::round_ties_even(a[2usize]),
451 f32::round_ties_even(a[3usize]),
452 ]
453 .simd_into(self)
454 }
455 #[inline(always)]
456 fn fract_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
457 [
458 f32::fract(a[0usize]),
459 f32::fract(a[1usize]),
460 f32::fract(a[2usize]),
461 f32::fract(a[3usize]),
462 ]
463 .simd_into(self)
464 }
465 #[inline(always)]
466 fn trunc_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
467 [
468 f32::trunc(a[0usize]),
469 f32::trunc(a[1usize]),
470 f32::trunc(a[2usize]),
471 f32::trunc(a[3usize]),
472 ]
473 .simd_into(self)
474 }
475 #[inline(always)]
476 fn select_f32x4(self, a: mask32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
477 [
478 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
479 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
480 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
481 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
482 ]
483 .simd_into(self)
484 }
485 #[inline(always)]
486 fn combine_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x8<Self> {
487 let mut result = [0.0; 8usize];
488 result[0..4usize].copy_from_slice(&a.val.0);
489 result[4usize..8usize].copy_from_slice(&b.val.0);
490 result.simd_into(self)
491 }
492 #[inline(always)]
493 fn reinterpret_f64_f32x4(self, a: f32x4<Self>) -> f64x2<Self> {
494 a.bitcast()
495 }
496 #[inline(always)]
497 fn reinterpret_i32_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
498 a.bitcast()
499 }
500 #[inline(always)]
501 fn reinterpret_u8_f32x4(self, a: f32x4<Self>) -> u8x16<Self> {
502 a.bitcast()
503 }
504 #[inline(always)]
505 fn reinterpret_u32_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
506 a.bitcast()
507 }
508 #[inline(always)]
509 fn cvt_u32_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
510 [
511 a[0usize] as u32,
512 a[1usize] as u32,
513 a[2usize] as u32,
514 a[3usize] as u32,
515 ]
516 .simd_into(self)
517 }
518 #[inline(always)]
519 fn cvt_u32_precise_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
520 self.cvt_u32_f32x4(a)
521 }
522 #[inline(always)]
523 fn cvt_i32_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
524 [
525 a[0usize] as i32,
526 a[1usize] as i32,
527 a[2usize] as i32,
528 a[3usize] as i32,
529 ]
530 .simd_into(self)
531 }
532 #[inline(always)]
533 fn cvt_i32_precise_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
534 self.cvt_i32_f32x4(a)
535 }
536 #[inline(always)]
537 fn splat_i8x16(self, val: i8) -> i8x16<Self> {
538 [val; 16usize].simd_into(self)
539 }
540 #[inline(always)]
541 fn load_array_i8x16(self, val: [i8; 16usize]) -> i8x16<Self> {
542 i8x16 {
543 val: crate::support::Aligned128(val),
544 simd: self,
545 }
546 }
547 #[inline(always)]
548 fn load_array_ref_i8x16(self, val: &[i8; 16usize]) -> i8x16<Self> {
549 i8x16 {
550 val: crate::support::Aligned128(*val),
551 simd: self,
552 }
553 }
554 #[inline(always)]
555 fn as_array_i8x16(self, a: i8x16<Self>) -> [i8; 16usize] {
556 a.val.0
557 }
558 #[inline(always)]
559 fn as_array_ref_i8x16(self, a: &i8x16<Self>) -> &[i8; 16usize] {
560 &a.val.0
561 }
562 #[inline(always)]
563 fn as_array_mut_i8x16(self, a: &mut i8x16<Self>) -> &mut [i8; 16usize] {
564 &mut a.val.0
565 }
566 #[inline(always)]
567 fn store_array_i8x16(self, a: i8x16<Self>, dest: &mut [i8; 16usize]) -> () {
568 *dest = a.val.0;
569 }
570 #[inline(always)]
571 fn cvt_from_bytes_i8x16(self, a: u8x16<Self>) -> i8x16<Self> {
572 unsafe {
573 i8x16 {
574 val: core::mem::transmute(a.val),
575 simd: self,
576 }
577 }
578 }
579 #[inline(always)]
580 fn cvt_to_bytes_i8x16(self, a: i8x16<Self>) -> u8x16<Self> {
581 unsafe {
582 u8x16 {
583 val: core::mem::transmute(a.val),
584 simd: self,
585 }
586 }
587 }
588 #[inline(always)]
589 fn slide_i8x16<const SHIFT: usize>(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
590 let mut dest = [Default::default(); 16usize];
591 dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
592 dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
593 dest.simd_into(self)
594 }
595 #[inline(always)]
596 fn slide_within_blocks_i8x16<const SHIFT: usize>(
597 self,
598 a: i8x16<Self>,
599 b: i8x16<Self>,
600 ) -> i8x16<Self> {
601 self.slide_i8x16::<SHIFT>(a, b)
602 }
603 #[inline(always)]
604 fn add_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
605 [
606 i8::wrapping_add(a[0usize], b[0usize]),
607 i8::wrapping_add(a[1usize], b[1usize]),
608 i8::wrapping_add(a[2usize], b[2usize]),
609 i8::wrapping_add(a[3usize], b[3usize]),
610 i8::wrapping_add(a[4usize], b[4usize]),
611 i8::wrapping_add(a[5usize], b[5usize]),
612 i8::wrapping_add(a[6usize], b[6usize]),
613 i8::wrapping_add(a[7usize], b[7usize]),
614 i8::wrapping_add(a[8usize], b[8usize]),
615 i8::wrapping_add(a[9usize], b[9usize]),
616 i8::wrapping_add(a[10usize], b[10usize]),
617 i8::wrapping_add(a[11usize], b[11usize]),
618 i8::wrapping_add(a[12usize], b[12usize]),
619 i8::wrapping_add(a[13usize], b[13usize]),
620 i8::wrapping_add(a[14usize], b[14usize]),
621 i8::wrapping_add(a[15usize], b[15usize]),
622 ]
623 .simd_into(self)
624 }
625 #[inline(always)]
626 fn sub_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
627 [
628 i8::wrapping_sub(a[0usize], b[0usize]),
629 i8::wrapping_sub(a[1usize], b[1usize]),
630 i8::wrapping_sub(a[2usize], b[2usize]),
631 i8::wrapping_sub(a[3usize], b[3usize]),
632 i8::wrapping_sub(a[4usize], b[4usize]),
633 i8::wrapping_sub(a[5usize], b[5usize]),
634 i8::wrapping_sub(a[6usize], b[6usize]),
635 i8::wrapping_sub(a[7usize], b[7usize]),
636 i8::wrapping_sub(a[8usize], b[8usize]),
637 i8::wrapping_sub(a[9usize], b[9usize]),
638 i8::wrapping_sub(a[10usize], b[10usize]),
639 i8::wrapping_sub(a[11usize], b[11usize]),
640 i8::wrapping_sub(a[12usize], b[12usize]),
641 i8::wrapping_sub(a[13usize], b[13usize]),
642 i8::wrapping_sub(a[14usize], b[14usize]),
643 i8::wrapping_sub(a[15usize], b[15usize]),
644 ]
645 .simd_into(self)
646 }
647 #[inline(always)]
648 fn mul_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
649 [
650 i8::wrapping_mul(a[0usize], b[0usize]),
651 i8::wrapping_mul(a[1usize], b[1usize]),
652 i8::wrapping_mul(a[2usize], b[2usize]),
653 i8::wrapping_mul(a[3usize], b[3usize]),
654 i8::wrapping_mul(a[4usize], b[4usize]),
655 i8::wrapping_mul(a[5usize], b[5usize]),
656 i8::wrapping_mul(a[6usize], b[6usize]),
657 i8::wrapping_mul(a[7usize], b[7usize]),
658 i8::wrapping_mul(a[8usize], b[8usize]),
659 i8::wrapping_mul(a[9usize], b[9usize]),
660 i8::wrapping_mul(a[10usize], b[10usize]),
661 i8::wrapping_mul(a[11usize], b[11usize]),
662 i8::wrapping_mul(a[12usize], b[12usize]),
663 i8::wrapping_mul(a[13usize], b[13usize]),
664 i8::wrapping_mul(a[14usize], b[14usize]),
665 i8::wrapping_mul(a[15usize], b[15usize]),
666 ]
667 .simd_into(self)
668 }
669 #[inline(always)]
670 fn and_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
671 [
672 i8::bitand(a[0usize], &b[0usize]),
673 i8::bitand(a[1usize], &b[1usize]),
674 i8::bitand(a[2usize], &b[2usize]),
675 i8::bitand(a[3usize], &b[3usize]),
676 i8::bitand(a[4usize], &b[4usize]),
677 i8::bitand(a[5usize], &b[5usize]),
678 i8::bitand(a[6usize], &b[6usize]),
679 i8::bitand(a[7usize], &b[7usize]),
680 i8::bitand(a[8usize], &b[8usize]),
681 i8::bitand(a[9usize], &b[9usize]),
682 i8::bitand(a[10usize], &b[10usize]),
683 i8::bitand(a[11usize], &b[11usize]),
684 i8::bitand(a[12usize], &b[12usize]),
685 i8::bitand(a[13usize], &b[13usize]),
686 i8::bitand(a[14usize], &b[14usize]),
687 i8::bitand(a[15usize], &b[15usize]),
688 ]
689 .simd_into(self)
690 }
691 #[inline(always)]
692 fn or_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
693 [
694 i8::bitor(a[0usize], &b[0usize]),
695 i8::bitor(a[1usize], &b[1usize]),
696 i8::bitor(a[2usize], &b[2usize]),
697 i8::bitor(a[3usize], &b[3usize]),
698 i8::bitor(a[4usize], &b[4usize]),
699 i8::bitor(a[5usize], &b[5usize]),
700 i8::bitor(a[6usize], &b[6usize]),
701 i8::bitor(a[7usize], &b[7usize]),
702 i8::bitor(a[8usize], &b[8usize]),
703 i8::bitor(a[9usize], &b[9usize]),
704 i8::bitor(a[10usize], &b[10usize]),
705 i8::bitor(a[11usize], &b[11usize]),
706 i8::bitor(a[12usize], &b[12usize]),
707 i8::bitor(a[13usize], &b[13usize]),
708 i8::bitor(a[14usize], &b[14usize]),
709 i8::bitor(a[15usize], &b[15usize]),
710 ]
711 .simd_into(self)
712 }
713 #[inline(always)]
714 fn xor_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
715 [
716 i8::bitxor(a[0usize], &b[0usize]),
717 i8::bitxor(a[1usize], &b[1usize]),
718 i8::bitxor(a[2usize], &b[2usize]),
719 i8::bitxor(a[3usize], &b[3usize]),
720 i8::bitxor(a[4usize], &b[4usize]),
721 i8::bitxor(a[5usize], &b[5usize]),
722 i8::bitxor(a[6usize], &b[6usize]),
723 i8::bitxor(a[7usize], &b[7usize]),
724 i8::bitxor(a[8usize], &b[8usize]),
725 i8::bitxor(a[9usize], &b[9usize]),
726 i8::bitxor(a[10usize], &b[10usize]),
727 i8::bitxor(a[11usize], &b[11usize]),
728 i8::bitxor(a[12usize], &b[12usize]),
729 i8::bitxor(a[13usize], &b[13usize]),
730 i8::bitxor(a[14usize], &b[14usize]),
731 i8::bitxor(a[15usize], &b[15usize]),
732 ]
733 .simd_into(self)
734 }
735 #[inline(always)]
736 fn not_i8x16(self, a: i8x16<Self>) -> i8x16<Self> {
737 [
738 i8::not(a[0usize]),
739 i8::not(a[1usize]),
740 i8::not(a[2usize]),
741 i8::not(a[3usize]),
742 i8::not(a[4usize]),
743 i8::not(a[5usize]),
744 i8::not(a[6usize]),
745 i8::not(a[7usize]),
746 i8::not(a[8usize]),
747 i8::not(a[9usize]),
748 i8::not(a[10usize]),
749 i8::not(a[11usize]),
750 i8::not(a[12usize]),
751 i8::not(a[13usize]),
752 i8::not(a[14usize]),
753 i8::not(a[15usize]),
754 ]
755 .simd_into(self)
756 }
757 #[inline(always)]
758 fn shl_i8x16(self, a: i8x16<Self>, shift: u32) -> i8x16<Self> {
759 [
760 i8::shl(a[0usize], shift),
761 i8::shl(a[1usize], shift),
762 i8::shl(a[2usize], shift),
763 i8::shl(a[3usize], shift),
764 i8::shl(a[4usize], shift),
765 i8::shl(a[5usize], shift),
766 i8::shl(a[6usize], shift),
767 i8::shl(a[7usize], shift),
768 i8::shl(a[8usize], shift),
769 i8::shl(a[9usize], shift),
770 i8::shl(a[10usize], shift),
771 i8::shl(a[11usize], shift),
772 i8::shl(a[12usize], shift),
773 i8::shl(a[13usize], shift),
774 i8::shl(a[14usize], shift),
775 i8::shl(a[15usize], shift),
776 ]
777 .simd_into(self)
778 }
779 #[inline(always)]
780 fn shlv_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
781 [
782 i8::shl(a[0usize], &b[0usize]),
783 i8::shl(a[1usize], &b[1usize]),
784 i8::shl(a[2usize], &b[2usize]),
785 i8::shl(a[3usize], &b[3usize]),
786 i8::shl(a[4usize], &b[4usize]),
787 i8::shl(a[5usize], &b[5usize]),
788 i8::shl(a[6usize], &b[6usize]),
789 i8::shl(a[7usize], &b[7usize]),
790 i8::shl(a[8usize], &b[8usize]),
791 i8::shl(a[9usize], &b[9usize]),
792 i8::shl(a[10usize], &b[10usize]),
793 i8::shl(a[11usize], &b[11usize]),
794 i8::shl(a[12usize], &b[12usize]),
795 i8::shl(a[13usize], &b[13usize]),
796 i8::shl(a[14usize], &b[14usize]),
797 i8::shl(a[15usize], &b[15usize]),
798 ]
799 .simd_into(self)
800 }
801 #[inline(always)]
802 fn shr_i8x16(self, a: i8x16<Self>, shift: u32) -> i8x16<Self> {
803 [
804 i8::shr(a[0usize], shift),
805 i8::shr(a[1usize], shift),
806 i8::shr(a[2usize], shift),
807 i8::shr(a[3usize], shift),
808 i8::shr(a[4usize], shift),
809 i8::shr(a[5usize], shift),
810 i8::shr(a[6usize], shift),
811 i8::shr(a[7usize], shift),
812 i8::shr(a[8usize], shift),
813 i8::shr(a[9usize], shift),
814 i8::shr(a[10usize], shift),
815 i8::shr(a[11usize], shift),
816 i8::shr(a[12usize], shift),
817 i8::shr(a[13usize], shift),
818 i8::shr(a[14usize], shift),
819 i8::shr(a[15usize], shift),
820 ]
821 .simd_into(self)
822 }
823 #[inline(always)]
824 fn shrv_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
825 [
826 i8::shr(a[0usize], &b[0usize]),
827 i8::shr(a[1usize], &b[1usize]),
828 i8::shr(a[2usize], &b[2usize]),
829 i8::shr(a[3usize], &b[3usize]),
830 i8::shr(a[4usize], &b[4usize]),
831 i8::shr(a[5usize], &b[5usize]),
832 i8::shr(a[6usize], &b[6usize]),
833 i8::shr(a[7usize], &b[7usize]),
834 i8::shr(a[8usize], &b[8usize]),
835 i8::shr(a[9usize], &b[9usize]),
836 i8::shr(a[10usize], &b[10usize]),
837 i8::shr(a[11usize], &b[11usize]),
838 i8::shr(a[12usize], &b[12usize]),
839 i8::shr(a[13usize], &b[13usize]),
840 i8::shr(a[14usize], &b[14usize]),
841 i8::shr(a[15usize], &b[15usize]),
842 ]
843 .simd_into(self)
844 }
845 #[inline(always)]
846 fn simd_eq_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
847 [
848 -(i8::eq(&a[0usize], &b[0usize]) as i8),
849 -(i8::eq(&a[1usize], &b[1usize]) as i8),
850 -(i8::eq(&a[2usize], &b[2usize]) as i8),
851 -(i8::eq(&a[3usize], &b[3usize]) as i8),
852 -(i8::eq(&a[4usize], &b[4usize]) as i8),
853 -(i8::eq(&a[5usize], &b[5usize]) as i8),
854 -(i8::eq(&a[6usize], &b[6usize]) as i8),
855 -(i8::eq(&a[7usize], &b[7usize]) as i8),
856 -(i8::eq(&a[8usize], &b[8usize]) as i8),
857 -(i8::eq(&a[9usize], &b[9usize]) as i8),
858 -(i8::eq(&a[10usize], &b[10usize]) as i8),
859 -(i8::eq(&a[11usize], &b[11usize]) as i8),
860 -(i8::eq(&a[12usize], &b[12usize]) as i8),
861 -(i8::eq(&a[13usize], &b[13usize]) as i8),
862 -(i8::eq(&a[14usize], &b[14usize]) as i8),
863 -(i8::eq(&a[15usize], &b[15usize]) as i8),
864 ]
865 .simd_into(self)
866 }
867 #[inline(always)]
868 fn simd_lt_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
869 [
870 -(i8::lt(&a[0usize], &b[0usize]) as i8),
871 -(i8::lt(&a[1usize], &b[1usize]) as i8),
872 -(i8::lt(&a[2usize], &b[2usize]) as i8),
873 -(i8::lt(&a[3usize], &b[3usize]) as i8),
874 -(i8::lt(&a[4usize], &b[4usize]) as i8),
875 -(i8::lt(&a[5usize], &b[5usize]) as i8),
876 -(i8::lt(&a[6usize], &b[6usize]) as i8),
877 -(i8::lt(&a[7usize], &b[7usize]) as i8),
878 -(i8::lt(&a[8usize], &b[8usize]) as i8),
879 -(i8::lt(&a[9usize], &b[9usize]) as i8),
880 -(i8::lt(&a[10usize], &b[10usize]) as i8),
881 -(i8::lt(&a[11usize], &b[11usize]) as i8),
882 -(i8::lt(&a[12usize], &b[12usize]) as i8),
883 -(i8::lt(&a[13usize], &b[13usize]) as i8),
884 -(i8::lt(&a[14usize], &b[14usize]) as i8),
885 -(i8::lt(&a[15usize], &b[15usize]) as i8),
886 ]
887 .simd_into(self)
888 }
889 #[inline(always)]
890 fn simd_le_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
891 [
892 -(i8::le(&a[0usize], &b[0usize]) as i8),
893 -(i8::le(&a[1usize], &b[1usize]) as i8),
894 -(i8::le(&a[2usize], &b[2usize]) as i8),
895 -(i8::le(&a[3usize], &b[3usize]) as i8),
896 -(i8::le(&a[4usize], &b[4usize]) as i8),
897 -(i8::le(&a[5usize], &b[5usize]) as i8),
898 -(i8::le(&a[6usize], &b[6usize]) as i8),
899 -(i8::le(&a[7usize], &b[7usize]) as i8),
900 -(i8::le(&a[8usize], &b[8usize]) as i8),
901 -(i8::le(&a[9usize], &b[9usize]) as i8),
902 -(i8::le(&a[10usize], &b[10usize]) as i8),
903 -(i8::le(&a[11usize], &b[11usize]) as i8),
904 -(i8::le(&a[12usize], &b[12usize]) as i8),
905 -(i8::le(&a[13usize], &b[13usize]) as i8),
906 -(i8::le(&a[14usize], &b[14usize]) as i8),
907 -(i8::le(&a[15usize], &b[15usize]) as i8),
908 ]
909 .simd_into(self)
910 }
911 #[inline(always)]
912 fn simd_ge_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
913 [
914 -(i8::ge(&a[0usize], &b[0usize]) as i8),
915 -(i8::ge(&a[1usize], &b[1usize]) as i8),
916 -(i8::ge(&a[2usize], &b[2usize]) as i8),
917 -(i8::ge(&a[3usize], &b[3usize]) as i8),
918 -(i8::ge(&a[4usize], &b[4usize]) as i8),
919 -(i8::ge(&a[5usize], &b[5usize]) as i8),
920 -(i8::ge(&a[6usize], &b[6usize]) as i8),
921 -(i8::ge(&a[7usize], &b[7usize]) as i8),
922 -(i8::ge(&a[8usize], &b[8usize]) as i8),
923 -(i8::ge(&a[9usize], &b[9usize]) as i8),
924 -(i8::ge(&a[10usize], &b[10usize]) as i8),
925 -(i8::ge(&a[11usize], &b[11usize]) as i8),
926 -(i8::ge(&a[12usize], &b[12usize]) as i8),
927 -(i8::ge(&a[13usize], &b[13usize]) as i8),
928 -(i8::ge(&a[14usize], &b[14usize]) as i8),
929 -(i8::ge(&a[15usize], &b[15usize]) as i8),
930 ]
931 .simd_into(self)
932 }
933 #[inline(always)]
934 fn simd_gt_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
935 [
936 -(i8::gt(&a[0usize], &b[0usize]) as i8),
937 -(i8::gt(&a[1usize], &b[1usize]) as i8),
938 -(i8::gt(&a[2usize], &b[2usize]) as i8),
939 -(i8::gt(&a[3usize], &b[3usize]) as i8),
940 -(i8::gt(&a[4usize], &b[4usize]) as i8),
941 -(i8::gt(&a[5usize], &b[5usize]) as i8),
942 -(i8::gt(&a[6usize], &b[6usize]) as i8),
943 -(i8::gt(&a[7usize], &b[7usize]) as i8),
944 -(i8::gt(&a[8usize], &b[8usize]) as i8),
945 -(i8::gt(&a[9usize], &b[9usize]) as i8),
946 -(i8::gt(&a[10usize], &b[10usize]) as i8),
947 -(i8::gt(&a[11usize], &b[11usize]) as i8),
948 -(i8::gt(&a[12usize], &b[12usize]) as i8),
949 -(i8::gt(&a[13usize], &b[13usize]) as i8),
950 -(i8::gt(&a[14usize], &b[14usize]) as i8),
951 -(i8::gt(&a[15usize], &b[15usize]) as i8),
952 ]
953 .simd_into(self)
954 }
955 #[inline(always)]
956 fn zip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
957 [
958 a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
959 a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
960 ]
961 .simd_into(self)
962 }
963 #[inline(always)]
964 fn zip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
965 [
966 a[8usize], b[8usize], a[9usize], b[9usize], a[10usize], b[10usize], a[11usize],
967 b[11usize], a[12usize], b[12usize], a[13usize], b[13usize], a[14usize], b[14usize],
968 a[15usize], b[15usize],
969 ]
970 .simd_into(self)
971 }
972 #[inline(always)]
973 fn unzip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
974 [
975 a[0usize], a[2usize], a[4usize], a[6usize], a[8usize], a[10usize], a[12usize],
976 a[14usize], b[0usize], b[2usize], b[4usize], b[6usize], b[8usize], b[10usize],
977 b[12usize], b[14usize],
978 ]
979 .simd_into(self)
980 }
981 #[inline(always)]
982 fn unzip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
983 [
984 a[1usize], a[3usize], a[5usize], a[7usize], a[9usize], a[11usize], a[13usize],
985 a[15usize], b[1usize], b[3usize], b[5usize], b[7usize], b[9usize], b[11usize],
986 b[13usize], b[15usize],
987 ]
988 .simd_into(self)
989 }
990 #[inline(always)]
991 fn interleave_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> (i8x16<Self>, i8x16<Self>) {
992 (self.zip_low_i8x16(a, b), self.zip_high_i8x16(a, b))
993 }
994 #[inline(always)]
995 fn deinterleave_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> (i8x16<Self>, i8x16<Self>) {
996 (self.unzip_low_i8x16(a, b), self.unzip_high_i8x16(a, b))
997 }
998 #[inline(always)]
999 fn select_i8x16(self, a: mask8x16<Self>, b: i8x16<Self>, c: i8x16<Self>) -> i8x16<Self> {
1000 [
1001 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1002 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1003 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1004 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1005 if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1006 if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1007 if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1008 if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1009 if a[8usize] != 0 { b[8usize] } else { c[8usize] },
1010 if a[9usize] != 0 { b[9usize] } else { c[9usize] },
1011 if a[10usize] != 0 {
1012 b[10usize]
1013 } else {
1014 c[10usize]
1015 },
1016 if a[11usize] != 0 {
1017 b[11usize]
1018 } else {
1019 c[11usize]
1020 },
1021 if a[12usize] != 0 {
1022 b[12usize]
1023 } else {
1024 c[12usize]
1025 },
1026 if a[13usize] != 0 {
1027 b[13usize]
1028 } else {
1029 c[13usize]
1030 },
1031 if a[14usize] != 0 {
1032 b[14usize]
1033 } else {
1034 c[14usize]
1035 },
1036 if a[15usize] != 0 {
1037 b[15usize]
1038 } else {
1039 c[15usize]
1040 },
1041 ]
1042 .simd_into(self)
1043 }
1044 #[inline(always)]
1045 fn min_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
1046 [
1047 i8::min(a[0usize], b[0usize]),
1048 i8::min(a[1usize], b[1usize]),
1049 i8::min(a[2usize], b[2usize]),
1050 i8::min(a[3usize], b[3usize]),
1051 i8::min(a[4usize], b[4usize]),
1052 i8::min(a[5usize], b[5usize]),
1053 i8::min(a[6usize], b[6usize]),
1054 i8::min(a[7usize], b[7usize]),
1055 i8::min(a[8usize], b[8usize]),
1056 i8::min(a[9usize], b[9usize]),
1057 i8::min(a[10usize], b[10usize]),
1058 i8::min(a[11usize], b[11usize]),
1059 i8::min(a[12usize], b[12usize]),
1060 i8::min(a[13usize], b[13usize]),
1061 i8::min(a[14usize], b[14usize]),
1062 i8::min(a[15usize], b[15usize]),
1063 ]
1064 .simd_into(self)
1065 }
1066 #[inline(always)]
1067 fn max_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
1068 [
1069 i8::max(a[0usize], b[0usize]),
1070 i8::max(a[1usize], b[1usize]),
1071 i8::max(a[2usize], b[2usize]),
1072 i8::max(a[3usize], b[3usize]),
1073 i8::max(a[4usize], b[4usize]),
1074 i8::max(a[5usize], b[5usize]),
1075 i8::max(a[6usize], b[6usize]),
1076 i8::max(a[7usize], b[7usize]),
1077 i8::max(a[8usize], b[8usize]),
1078 i8::max(a[9usize], b[9usize]),
1079 i8::max(a[10usize], b[10usize]),
1080 i8::max(a[11usize], b[11usize]),
1081 i8::max(a[12usize], b[12usize]),
1082 i8::max(a[13usize], b[13usize]),
1083 i8::max(a[14usize], b[14usize]),
1084 i8::max(a[15usize], b[15usize]),
1085 ]
1086 .simd_into(self)
1087 }
1088 #[inline(always)]
1089 fn combine_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x32<Self> {
1090 let mut result = [0; 32usize];
1091 result[0..16usize].copy_from_slice(&a.val.0);
1092 result[16usize..32usize].copy_from_slice(&b.val.0);
1093 result.simd_into(self)
1094 }
1095 #[inline(always)]
1096 fn neg_i8x16(self, a: i8x16<Self>) -> i8x16<Self> {
1097 [
1098 i8::neg(a[0usize]),
1099 i8::neg(a[1usize]),
1100 i8::neg(a[2usize]),
1101 i8::neg(a[3usize]),
1102 i8::neg(a[4usize]),
1103 i8::neg(a[5usize]),
1104 i8::neg(a[6usize]),
1105 i8::neg(a[7usize]),
1106 i8::neg(a[8usize]),
1107 i8::neg(a[9usize]),
1108 i8::neg(a[10usize]),
1109 i8::neg(a[11usize]),
1110 i8::neg(a[12usize]),
1111 i8::neg(a[13usize]),
1112 i8::neg(a[14usize]),
1113 i8::neg(a[15usize]),
1114 ]
1115 .simd_into(self)
1116 }
1117 #[inline(always)]
1118 fn reinterpret_u8_i8x16(self, a: i8x16<Self>) -> u8x16<Self> {
1119 a.bitcast()
1120 }
1121 #[inline(always)]
1122 fn reinterpret_u32_i8x16(self, a: i8x16<Self>) -> u32x4<Self> {
1123 a.bitcast()
1124 }
1125 #[inline(always)]
1126 fn splat_u8x16(self, val: u8) -> u8x16<Self> {
1127 [val; 16usize].simd_into(self)
1128 }
1129 #[inline(always)]
1130 fn load_array_u8x16(self, val: [u8; 16usize]) -> u8x16<Self> {
1131 u8x16 {
1132 val: crate::support::Aligned128(val),
1133 simd: self,
1134 }
1135 }
1136 #[inline(always)]
1137 fn load_array_ref_u8x16(self, val: &[u8; 16usize]) -> u8x16<Self> {
1138 u8x16 {
1139 val: crate::support::Aligned128(*val),
1140 simd: self,
1141 }
1142 }
1143 #[inline(always)]
1144 fn as_array_u8x16(self, a: u8x16<Self>) -> [u8; 16usize] {
1145 a.val.0
1146 }
1147 #[inline(always)]
1148 fn as_array_ref_u8x16(self, a: &u8x16<Self>) -> &[u8; 16usize] {
1149 &a.val.0
1150 }
1151 #[inline(always)]
1152 fn as_array_mut_u8x16(self, a: &mut u8x16<Self>) -> &mut [u8; 16usize] {
1153 &mut a.val.0
1154 }
1155 #[inline(always)]
1156 fn store_array_u8x16(self, a: u8x16<Self>, dest: &mut [u8; 16usize]) -> () {
1157 *dest = a.val.0;
1158 }
1159 #[inline(always)]
1160 fn cvt_from_bytes_u8x16(self, a: u8x16<Self>) -> u8x16<Self> {
1161 unsafe {
1162 u8x16 {
1163 val: core::mem::transmute(a.val),
1164 simd: self,
1165 }
1166 }
1167 }
1168 #[inline(always)]
1169 fn cvt_to_bytes_u8x16(self, a: u8x16<Self>) -> u8x16<Self> {
1170 unsafe {
1171 u8x16 {
1172 val: core::mem::transmute(a.val),
1173 simd: self,
1174 }
1175 }
1176 }
1177 #[inline(always)]
1178 fn slide_u8x16<const SHIFT: usize>(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1179 let mut dest = [Default::default(); 16usize];
1180 dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
1181 dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
1182 dest.simd_into(self)
1183 }
1184 #[inline(always)]
1185 fn slide_within_blocks_u8x16<const SHIFT: usize>(
1186 self,
1187 a: u8x16<Self>,
1188 b: u8x16<Self>,
1189 ) -> u8x16<Self> {
1190 self.slide_u8x16::<SHIFT>(a, b)
1191 }
1192 #[inline(always)]
1193 fn add_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1194 [
1195 u8::wrapping_add(a[0usize], b[0usize]),
1196 u8::wrapping_add(a[1usize], b[1usize]),
1197 u8::wrapping_add(a[2usize], b[2usize]),
1198 u8::wrapping_add(a[3usize], b[3usize]),
1199 u8::wrapping_add(a[4usize], b[4usize]),
1200 u8::wrapping_add(a[5usize], b[5usize]),
1201 u8::wrapping_add(a[6usize], b[6usize]),
1202 u8::wrapping_add(a[7usize], b[7usize]),
1203 u8::wrapping_add(a[8usize], b[8usize]),
1204 u8::wrapping_add(a[9usize], b[9usize]),
1205 u8::wrapping_add(a[10usize], b[10usize]),
1206 u8::wrapping_add(a[11usize], b[11usize]),
1207 u8::wrapping_add(a[12usize], b[12usize]),
1208 u8::wrapping_add(a[13usize], b[13usize]),
1209 u8::wrapping_add(a[14usize], b[14usize]),
1210 u8::wrapping_add(a[15usize], b[15usize]),
1211 ]
1212 .simd_into(self)
1213 }
1214 #[inline(always)]
1215 fn sub_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1216 [
1217 u8::wrapping_sub(a[0usize], b[0usize]),
1218 u8::wrapping_sub(a[1usize], b[1usize]),
1219 u8::wrapping_sub(a[2usize], b[2usize]),
1220 u8::wrapping_sub(a[3usize], b[3usize]),
1221 u8::wrapping_sub(a[4usize], b[4usize]),
1222 u8::wrapping_sub(a[5usize], b[5usize]),
1223 u8::wrapping_sub(a[6usize], b[6usize]),
1224 u8::wrapping_sub(a[7usize], b[7usize]),
1225 u8::wrapping_sub(a[8usize], b[8usize]),
1226 u8::wrapping_sub(a[9usize], b[9usize]),
1227 u8::wrapping_sub(a[10usize], b[10usize]),
1228 u8::wrapping_sub(a[11usize], b[11usize]),
1229 u8::wrapping_sub(a[12usize], b[12usize]),
1230 u8::wrapping_sub(a[13usize], b[13usize]),
1231 u8::wrapping_sub(a[14usize], b[14usize]),
1232 u8::wrapping_sub(a[15usize], b[15usize]),
1233 ]
1234 .simd_into(self)
1235 }
1236 #[inline(always)]
1237 fn mul_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1238 [
1239 u8::wrapping_mul(a[0usize], b[0usize]),
1240 u8::wrapping_mul(a[1usize], b[1usize]),
1241 u8::wrapping_mul(a[2usize], b[2usize]),
1242 u8::wrapping_mul(a[3usize], b[3usize]),
1243 u8::wrapping_mul(a[4usize], b[4usize]),
1244 u8::wrapping_mul(a[5usize], b[5usize]),
1245 u8::wrapping_mul(a[6usize], b[6usize]),
1246 u8::wrapping_mul(a[7usize], b[7usize]),
1247 u8::wrapping_mul(a[8usize], b[8usize]),
1248 u8::wrapping_mul(a[9usize], b[9usize]),
1249 u8::wrapping_mul(a[10usize], b[10usize]),
1250 u8::wrapping_mul(a[11usize], b[11usize]),
1251 u8::wrapping_mul(a[12usize], b[12usize]),
1252 u8::wrapping_mul(a[13usize], b[13usize]),
1253 u8::wrapping_mul(a[14usize], b[14usize]),
1254 u8::wrapping_mul(a[15usize], b[15usize]),
1255 ]
1256 .simd_into(self)
1257 }
1258 #[inline(always)]
1259 fn and_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1260 [
1261 u8::bitand(a[0usize], &b[0usize]),
1262 u8::bitand(a[1usize], &b[1usize]),
1263 u8::bitand(a[2usize], &b[2usize]),
1264 u8::bitand(a[3usize], &b[3usize]),
1265 u8::bitand(a[4usize], &b[4usize]),
1266 u8::bitand(a[5usize], &b[5usize]),
1267 u8::bitand(a[6usize], &b[6usize]),
1268 u8::bitand(a[7usize], &b[7usize]),
1269 u8::bitand(a[8usize], &b[8usize]),
1270 u8::bitand(a[9usize], &b[9usize]),
1271 u8::bitand(a[10usize], &b[10usize]),
1272 u8::bitand(a[11usize], &b[11usize]),
1273 u8::bitand(a[12usize], &b[12usize]),
1274 u8::bitand(a[13usize], &b[13usize]),
1275 u8::bitand(a[14usize], &b[14usize]),
1276 u8::bitand(a[15usize], &b[15usize]),
1277 ]
1278 .simd_into(self)
1279 }
1280 #[inline(always)]
1281 fn or_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1282 [
1283 u8::bitor(a[0usize], &b[0usize]),
1284 u8::bitor(a[1usize], &b[1usize]),
1285 u8::bitor(a[2usize], &b[2usize]),
1286 u8::bitor(a[3usize], &b[3usize]),
1287 u8::bitor(a[4usize], &b[4usize]),
1288 u8::bitor(a[5usize], &b[5usize]),
1289 u8::bitor(a[6usize], &b[6usize]),
1290 u8::bitor(a[7usize], &b[7usize]),
1291 u8::bitor(a[8usize], &b[8usize]),
1292 u8::bitor(a[9usize], &b[9usize]),
1293 u8::bitor(a[10usize], &b[10usize]),
1294 u8::bitor(a[11usize], &b[11usize]),
1295 u8::bitor(a[12usize], &b[12usize]),
1296 u8::bitor(a[13usize], &b[13usize]),
1297 u8::bitor(a[14usize], &b[14usize]),
1298 u8::bitor(a[15usize], &b[15usize]),
1299 ]
1300 .simd_into(self)
1301 }
1302 #[inline(always)]
1303 fn xor_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1304 [
1305 u8::bitxor(a[0usize], &b[0usize]),
1306 u8::bitxor(a[1usize], &b[1usize]),
1307 u8::bitxor(a[2usize], &b[2usize]),
1308 u8::bitxor(a[3usize], &b[3usize]),
1309 u8::bitxor(a[4usize], &b[4usize]),
1310 u8::bitxor(a[5usize], &b[5usize]),
1311 u8::bitxor(a[6usize], &b[6usize]),
1312 u8::bitxor(a[7usize], &b[7usize]),
1313 u8::bitxor(a[8usize], &b[8usize]),
1314 u8::bitxor(a[9usize], &b[9usize]),
1315 u8::bitxor(a[10usize], &b[10usize]),
1316 u8::bitxor(a[11usize], &b[11usize]),
1317 u8::bitxor(a[12usize], &b[12usize]),
1318 u8::bitxor(a[13usize], &b[13usize]),
1319 u8::bitxor(a[14usize], &b[14usize]),
1320 u8::bitxor(a[15usize], &b[15usize]),
1321 ]
1322 .simd_into(self)
1323 }
1324 #[inline(always)]
1325 fn not_u8x16(self, a: u8x16<Self>) -> u8x16<Self> {
1326 [
1327 u8::not(a[0usize]),
1328 u8::not(a[1usize]),
1329 u8::not(a[2usize]),
1330 u8::not(a[3usize]),
1331 u8::not(a[4usize]),
1332 u8::not(a[5usize]),
1333 u8::not(a[6usize]),
1334 u8::not(a[7usize]),
1335 u8::not(a[8usize]),
1336 u8::not(a[9usize]),
1337 u8::not(a[10usize]),
1338 u8::not(a[11usize]),
1339 u8::not(a[12usize]),
1340 u8::not(a[13usize]),
1341 u8::not(a[14usize]),
1342 u8::not(a[15usize]),
1343 ]
1344 .simd_into(self)
1345 }
1346 #[inline(always)]
1347 fn shl_u8x16(self, a: u8x16<Self>, shift: u32) -> u8x16<Self> {
1348 [
1349 u8::shl(a[0usize], shift),
1350 u8::shl(a[1usize], shift),
1351 u8::shl(a[2usize], shift),
1352 u8::shl(a[3usize], shift),
1353 u8::shl(a[4usize], shift),
1354 u8::shl(a[5usize], shift),
1355 u8::shl(a[6usize], shift),
1356 u8::shl(a[7usize], shift),
1357 u8::shl(a[8usize], shift),
1358 u8::shl(a[9usize], shift),
1359 u8::shl(a[10usize], shift),
1360 u8::shl(a[11usize], shift),
1361 u8::shl(a[12usize], shift),
1362 u8::shl(a[13usize], shift),
1363 u8::shl(a[14usize], shift),
1364 u8::shl(a[15usize], shift),
1365 ]
1366 .simd_into(self)
1367 }
1368 #[inline(always)]
1369 fn shlv_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1370 [
1371 u8::shl(a[0usize], &b[0usize]),
1372 u8::shl(a[1usize], &b[1usize]),
1373 u8::shl(a[2usize], &b[2usize]),
1374 u8::shl(a[3usize], &b[3usize]),
1375 u8::shl(a[4usize], &b[4usize]),
1376 u8::shl(a[5usize], &b[5usize]),
1377 u8::shl(a[6usize], &b[6usize]),
1378 u8::shl(a[7usize], &b[7usize]),
1379 u8::shl(a[8usize], &b[8usize]),
1380 u8::shl(a[9usize], &b[9usize]),
1381 u8::shl(a[10usize], &b[10usize]),
1382 u8::shl(a[11usize], &b[11usize]),
1383 u8::shl(a[12usize], &b[12usize]),
1384 u8::shl(a[13usize], &b[13usize]),
1385 u8::shl(a[14usize], &b[14usize]),
1386 u8::shl(a[15usize], &b[15usize]),
1387 ]
1388 .simd_into(self)
1389 }
1390 #[inline(always)]
1391 fn shr_u8x16(self, a: u8x16<Self>, shift: u32) -> u8x16<Self> {
1392 [
1393 u8::shr(a[0usize], shift),
1394 u8::shr(a[1usize], shift),
1395 u8::shr(a[2usize], shift),
1396 u8::shr(a[3usize], shift),
1397 u8::shr(a[4usize], shift),
1398 u8::shr(a[5usize], shift),
1399 u8::shr(a[6usize], shift),
1400 u8::shr(a[7usize], shift),
1401 u8::shr(a[8usize], shift),
1402 u8::shr(a[9usize], shift),
1403 u8::shr(a[10usize], shift),
1404 u8::shr(a[11usize], shift),
1405 u8::shr(a[12usize], shift),
1406 u8::shr(a[13usize], shift),
1407 u8::shr(a[14usize], shift),
1408 u8::shr(a[15usize], shift),
1409 ]
1410 .simd_into(self)
1411 }
1412 #[inline(always)]
1413 fn shrv_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1414 [
1415 u8::shr(a[0usize], &b[0usize]),
1416 u8::shr(a[1usize], &b[1usize]),
1417 u8::shr(a[2usize], &b[2usize]),
1418 u8::shr(a[3usize], &b[3usize]),
1419 u8::shr(a[4usize], &b[4usize]),
1420 u8::shr(a[5usize], &b[5usize]),
1421 u8::shr(a[6usize], &b[6usize]),
1422 u8::shr(a[7usize], &b[7usize]),
1423 u8::shr(a[8usize], &b[8usize]),
1424 u8::shr(a[9usize], &b[9usize]),
1425 u8::shr(a[10usize], &b[10usize]),
1426 u8::shr(a[11usize], &b[11usize]),
1427 u8::shr(a[12usize], &b[12usize]),
1428 u8::shr(a[13usize], &b[13usize]),
1429 u8::shr(a[14usize], &b[14usize]),
1430 u8::shr(a[15usize], &b[15usize]),
1431 ]
1432 .simd_into(self)
1433 }
1434 #[inline(always)]
1435 fn simd_eq_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1436 [
1437 -(u8::eq(&a[0usize], &b[0usize]) as i8),
1438 -(u8::eq(&a[1usize], &b[1usize]) as i8),
1439 -(u8::eq(&a[2usize], &b[2usize]) as i8),
1440 -(u8::eq(&a[3usize], &b[3usize]) as i8),
1441 -(u8::eq(&a[4usize], &b[4usize]) as i8),
1442 -(u8::eq(&a[5usize], &b[5usize]) as i8),
1443 -(u8::eq(&a[6usize], &b[6usize]) as i8),
1444 -(u8::eq(&a[7usize], &b[7usize]) as i8),
1445 -(u8::eq(&a[8usize], &b[8usize]) as i8),
1446 -(u8::eq(&a[9usize], &b[9usize]) as i8),
1447 -(u8::eq(&a[10usize], &b[10usize]) as i8),
1448 -(u8::eq(&a[11usize], &b[11usize]) as i8),
1449 -(u8::eq(&a[12usize], &b[12usize]) as i8),
1450 -(u8::eq(&a[13usize], &b[13usize]) as i8),
1451 -(u8::eq(&a[14usize], &b[14usize]) as i8),
1452 -(u8::eq(&a[15usize], &b[15usize]) as i8),
1453 ]
1454 .simd_into(self)
1455 }
1456 #[inline(always)]
1457 fn simd_lt_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1458 [
1459 -(u8::lt(&a[0usize], &b[0usize]) as i8),
1460 -(u8::lt(&a[1usize], &b[1usize]) as i8),
1461 -(u8::lt(&a[2usize], &b[2usize]) as i8),
1462 -(u8::lt(&a[3usize], &b[3usize]) as i8),
1463 -(u8::lt(&a[4usize], &b[4usize]) as i8),
1464 -(u8::lt(&a[5usize], &b[5usize]) as i8),
1465 -(u8::lt(&a[6usize], &b[6usize]) as i8),
1466 -(u8::lt(&a[7usize], &b[7usize]) as i8),
1467 -(u8::lt(&a[8usize], &b[8usize]) as i8),
1468 -(u8::lt(&a[9usize], &b[9usize]) as i8),
1469 -(u8::lt(&a[10usize], &b[10usize]) as i8),
1470 -(u8::lt(&a[11usize], &b[11usize]) as i8),
1471 -(u8::lt(&a[12usize], &b[12usize]) as i8),
1472 -(u8::lt(&a[13usize], &b[13usize]) as i8),
1473 -(u8::lt(&a[14usize], &b[14usize]) as i8),
1474 -(u8::lt(&a[15usize], &b[15usize]) as i8),
1475 ]
1476 .simd_into(self)
1477 }
1478 #[inline(always)]
1479 fn simd_le_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1480 [
1481 -(u8::le(&a[0usize], &b[0usize]) as i8),
1482 -(u8::le(&a[1usize], &b[1usize]) as i8),
1483 -(u8::le(&a[2usize], &b[2usize]) as i8),
1484 -(u8::le(&a[3usize], &b[3usize]) as i8),
1485 -(u8::le(&a[4usize], &b[4usize]) as i8),
1486 -(u8::le(&a[5usize], &b[5usize]) as i8),
1487 -(u8::le(&a[6usize], &b[6usize]) as i8),
1488 -(u8::le(&a[7usize], &b[7usize]) as i8),
1489 -(u8::le(&a[8usize], &b[8usize]) as i8),
1490 -(u8::le(&a[9usize], &b[9usize]) as i8),
1491 -(u8::le(&a[10usize], &b[10usize]) as i8),
1492 -(u8::le(&a[11usize], &b[11usize]) as i8),
1493 -(u8::le(&a[12usize], &b[12usize]) as i8),
1494 -(u8::le(&a[13usize], &b[13usize]) as i8),
1495 -(u8::le(&a[14usize], &b[14usize]) as i8),
1496 -(u8::le(&a[15usize], &b[15usize]) as i8),
1497 ]
1498 .simd_into(self)
1499 }
1500 #[inline(always)]
1501 fn simd_ge_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1502 [
1503 -(u8::ge(&a[0usize], &b[0usize]) as i8),
1504 -(u8::ge(&a[1usize], &b[1usize]) as i8),
1505 -(u8::ge(&a[2usize], &b[2usize]) as i8),
1506 -(u8::ge(&a[3usize], &b[3usize]) as i8),
1507 -(u8::ge(&a[4usize], &b[4usize]) as i8),
1508 -(u8::ge(&a[5usize], &b[5usize]) as i8),
1509 -(u8::ge(&a[6usize], &b[6usize]) as i8),
1510 -(u8::ge(&a[7usize], &b[7usize]) as i8),
1511 -(u8::ge(&a[8usize], &b[8usize]) as i8),
1512 -(u8::ge(&a[9usize], &b[9usize]) as i8),
1513 -(u8::ge(&a[10usize], &b[10usize]) as i8),
1514 -(u8::ge(&a[11usize], &b[11usize]) as i8),
1515 -(u8::ge(&a[12usize], &b[12usize]) as i8),
1516 -(u8::ge(&a[13usize], &b[13usize]) as i8),
1517 -(u8::ge(&a[14usize], &b[14usize]) as i8),
1518 -(u8::ge(&a[15usize], &b[15usize]) as i8),
1519 ]
1520 .simd_into(self)
1521 }
1522 #[inline(always)]
1523 fn simd_gt_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1524 [
1525 -(u8::gt(&a[0usize], &b[0usize]) as i8),
1526 -(u8::gt(&a[1usize], &b[1usize]) as i8),
1527 -(u8::gt(&a[2usize], &b[2usize]) as i8),
1528 -(u8::gt(&a[3usize], &b[3usize]) as i8),
1529 -(u8::gt(&a[4usize], &b[4usize]) as i8),
1530 -(u8::gt(&a[5usize], &b[5usize]) as i8),
1531 -(u8::gt(&a[6usize], &b[6usize]) as i8),
1532 -(u8::gt(&a[7usize], &b[7usize]) as i8),
1533 -(u8::gt(&a[8usize], &b[8usize]) as i8),
1534 -(u8::gt(&a[9usize], &b[9usize]) as i8),
1535 -(u8::gt(&a[10usize], &b[10usize]) as i8),
1536 -(u8::gt(&a[11usize], &b[11usize]) as i8),
1537 -(u8::gt(&a[12usize], &b[12usize]) as i8),
1538 -(u8::gt(&a[13usize], &b[13usize]) as i8),
1539 -(u8::gt(&a[14usize], &b[14usize]) as i8),
1540 -(u8::gt(&a[15usize], &b[15usize]) as i8),
1541 ]
1542 .simd_into(self)
1543 }
1544 #[inline(always)]
1545 fn zip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1546 [
1547 a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
1548 a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
1549 ]
1550 .simd_into(self)
1551 }
1552 #[inline(always)]
1553 fn zip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1554 [
1555 a[8usize], b[8usize], a[9usize], b[9usize], a[10usize], b[10usize], a[11usize],
1556 b[11usize], a[12usize], b[12usize], a[13usize], b[13usize], a[14usize], b[14usize],
1557 a[15usize], b[15usize],
1558 ]
1559 .simd_into(self)
1560 }
1561 #[inline(always)]
1562 fn unzip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1563 [
1564 a[0usize], a[2usize], a[4usize], a[6usize], a[8usize], a[10usize], a[12usize],
1565 a[14usize], b[0usize], b[2usize], b[4usize], b[6usize], b[8usize], b[10usize],
1566 b[12usize], b[14usize],
1567 ]
1568 .simd_into(self)
1569 }
1570 #[inline(always)]
1571 fn unzip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1572 [
1573 a[1usize], a[3usize], a[5usize], a[7usize], a[9usize], a[11usize], a[13usize],
1574 a[15usize], b[1usize], b[3usize], b[5usize], b[7usize], b[9usize], b[11usize],
1575 b[13usize], b[15usize],
1576 ]
1577 .simd_into(self)
1578 }
1579 #[inline(always)]
1580 fn interleave_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> (u8x16<Self>, u8x16<Self>) {
1581 (self.zip_low_u8x16(a, b), self.zip_high_u8x16(a, b))
1582 }
1583 #[inline(always)]
1584 fn deinterleave_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> (u8x16<Self>, u8x16<Self>) {
1585 (self.unzip_low_u8x16(a, b), self.unzip_high_u8x16(a, b))
1586 }
1587 #[inline(always)]
1588 fn select_u8x16(self, a: mask8x16<Self>, b: u8x16<Self>, c: u8x16<Self>) -> u8x16<Self> {
1589 [
1590 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1591 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1592 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1593 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1594 if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1595 if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1596 if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1597 if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1598 if a[8usize] != 0 { b[8usize] } else { c[8usize] },
1599 if a[9usize] != 0 { b[9usize] } else { c[9usize] },
1600 if a[10usize] != 0 {
1601 b[10usize]
1602 } else {
1603 c[10usize]
1604 },
1605 if a[11usize] != 0 {
1606 b[11usize]
1607 } else {
1608 c[11usize]
1609 },
1610 if a[12usize] != 0 {
1611 b[12usize]
1612 } else {
1613 c[12usize]
1614 },
1615 if a[13usize] != 0 {
1616 b[13usize]
1617 } else {
1618 c[13usize]
1619 },
1620 if a[14usize] != 0 {
1621 b[14usize]
1622 } else {
1623 c[14usize]
1624 },
1625 if a[15usize] != 0 {
1626 b[15usize]
1627 } else {
1628 c[15usize]
1629 },
1630 ]
1631 .simd_into(self)
1632 }
1633 #[inline(always)]
1634 fn min_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1635 [
1636 u8::min(a[0usize], b[0usize]),
1637 u8::min(a[1usize], b[1usize]),
1638 u8::min(a[2usize], b[2usize]),
1639 u8::min(a[3usize], b[3usize]),
1640 u8::min(a[4usize], b[4usize]),
1641 u8::min(a[5usize], b[5usize]),
1642 u8::min(a[6usize], b[6usize]),
1643 u8::min(a[7usize], b[7usize]),
1644 u8::min(a[8usize], b[8usize]),
1645 u8::min(a[9usize], b[9usize]),
1646 u8::min(a[10usize], b[10usize]),
1647 u8::min(a[11usize], b[11usize]),
1648 u8::min(a[12usize], b[12usize]),
1649 u8::min(a[13usize], b[13usize]),
1650 u8::min(a[14usize], b[14usize]),
1651 u8::min(a[15usize], b[15usize]),
1652 ]
1653 .simd_into(self)
1654 }
1655 #[inline(always)]
1656 fn max_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1657 [
1658 u8::max(a[0usize], b[0usize]),
1659 u8::max(a[1usize], b[1usize]),
1660 u8::max(a[2usize], b[2usize]),
1661 u8::max(a[3usize], b[3usize]),
1662 u8::max(a[4usize], b[4usize]),
1663 u8::max(a[5usize], b[5usize]),
1664 u8::max(a[6usize], b[6usize]),
1665 u8::max(a[7usize], b[7usize]),
1666 u8::max(a[8usize], b[8usize]),
1667 u8::max(a[9usize], b[9usize]),
1668 u8::max(a[10usize], b[10usize]),
1669 u8::max(a[11usize], b[11usize]),
1670 u8::max(a[12usize], b[12usize]),
1671 u8::max(a[13usize], b[13usize]),
1672 u8::max(a[14usize], b[14usize]),
1673 u8::max(a[15usize], b[15usize]),
1674 ]
1675 .simd_into(self)
1676 }
1677 #[inline(always)]
1678 fn combine_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x32<Self> {
1679 let mut result = [0; 32usize];
1680 result[0..16usize].copy_from_slice(&a.val.0);
1681 result[16usize..32usize].copy_from_slice(&b.val.0);
1682 result.simd_into(self)
1683 }
1684 #[inline(always)]
1685 fn widen_u8x16(self, a: u8x16<Self>) -> u16x16<Self> {
1686 [
1687 a[0usize] as u16,
1688 a[1usize] as u16,
1689 a[2usize] as u16,
1690 a[3usize] as u16,
1691 a[4usize] as u16,
1692 a[5usize] as u16,
1693 a[6usize] as u16,
1694 a[7usize] as u16,
1695 a[8usize] as u16,
1696 a[9usize] as u16,
1697 a[10usize] as u16,
1698 a[11usize] as u16,
1699 a[12usize] as u16,
1700 a[13usize] as u16,
1701 a[14usize] as u16,
1702 a[15usize] as u16,
1703 ]
1704 .simd_into(self)
1705 }
1706 #[inline(always)]
1707 fn reinterpret_u32_u8x16(self, a: u8x16<Self>) -> u32x4<Self> {
1708 a.bitcast()
1709 }
1710 #[inline(always)]
1711 fn splat_mask8x16(self, val: i8) -> mask8x16<Self> {
1712 [val; 16usize].simd_into(self)
1713 }
1714 #[inline(always)]
1715 fn load_array_mask8x16(self, val: [i8; 16usize]) -> mask8x16<Self> {
1716 mask8x16 {
1717 val: crate::support::Aligned128(val),
1718 simd: self,
1719 }
1720 }
1721 #[inline(always)]
1722 fn load_array_ref_mask8x16(self, val: &[i8; 16usize]) -> mask8x16<Self> {
1723 mask8x16 {
1724 val: crate::support::Aligned128(*val),
1725 simd: self,
1726 }
1727 }
1728 #[inline(always)]
1729 fn as_array_mask8x16(self, a: mask8x16<Self>) -> [i8; 16usize] {
1730 a.val.0
1731 }
1732 #[inline(always)]
1733 fn as_array_ref_mask8x16(self, a: &mask8x16<Self>) -> &[i8; 16usize] {
1734 &a.val.0
1735 }
1736 #[inline(always)]
1737 fn as_array_mut_mask8x16(self, a: &mut mask8x16<Self>) -> &mut [i8; 16usize] {
1738 &mut a.val.0
1739 }
1740 #[inline(always)]
1741 fn store_array_mask8x16(self, a: mask8x16<Self>, dest: &mut [i8; 16usize]) -> () {
1742 *dest = a.val.0;
1743 }
1744 #[inline(always)]
1745 fn cvt_from_bytes_mask8x16(self, a: u8x16<Self>) -> mask8x16<Self> {
1746 unsafe {
1747 mask8x16 {
1748 val: core::mem::transmute(a.val),
1749 simd: self,
1750 }
1751 }
1752 }
1753 #[inline(always)]
1754 fn cvt_to_bytes_mask8x16(self, a: mask8x16<Self>) -> u8x16<Self> {
1755 unsafe {
1756 u8x16 {
1757 val: core::mem::transmute(a.val),
1758 simd: self,
1759 }
1760 }
1761 }
1762 #[inline(always)]
1763 fn slide_mask8x16<const SHIFT: usize>(
1764 self,
1765 a: mask8x16<Self>,
1766 b: mask8x16<Self>,
1767 ) -> mask8x16<Self> {
1768 let mut dest = [Default::default(); 16usize];
1769 dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
1770 dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
1771 dest.simd_into(self)
1772 }
1773 #[inline(always)]
1774 fn slide_within_blocks_mask8x16<const SHIFT: usize>(
1775 self,
1776 a: mask8x16<Self>,
1777 b: mask8x16<Self>,
1778 ) -> mask8x16<Self> {
1779 self.slide_mask8x16::<SHIFT>(a, b)
1780 }
1781 #[inline(always)]
1782 fn and_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1783 [
1784 i8::bitand(a[0usize], &b[0usize]),
1785 i8::bitand(a[1usize], &b[1usize]),
1786 i8::bitand(a[2usize], &b[2usize]),
1787 i8::bitand(a[3usize], &b[3usize]),
1788 i8::bitand(a[4usize], &b[4usize]),
1789 i8::bitand(a[5usize], &b[5usize]),
1790 i8::bitand(a[6usize], &b[6usize]),
1791 i8::bitand(a[7usize], &b[7usize]),
1792 i8::bitand(a[8usize], &b[8usize]),
1793 i8::bitand(a[9usize], &b[9usize]),
1794 i8::bitand(a[10usize], &b[10usize]),
1795 i8::bitand(a[11usize], &b[11usize]),
1796 i8::bitand(a[12usize], &b[12usize]),
1797 i8::bitand(a[13usize], &b[13usize]),
1798 i8::bitand(a[14usize], &b[14usize]),
1799 i8::bitand(a[15usize], &b[15usize]),
1800 ]
1801 .simd_into(self)
1802 }
1803 #[inline(always)]
1804 fn or_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1805 [
1806 i8::bitor(a[0usize], &b[0usize]),
1807 i8::bitor(a[1usize], &b[1usize]),
1808 i8::bitor(a[2usize], &b[2usize]),
1809 i8::bitor(a[3usize], &b[3usize]),
1810 i8::bitor(a[4usize], &b[4usize]),
1811 i8::bitor(a[5usize], &b[5usize]),
1812 i8::bitor(a[6usize], &b[6usize]),
1813 i8::bitor(a[7usize], &b[7usize]),
1814 i8::bitor(a[8usize], &b[8usize]),
1815 i8::bitor(a[9usize], &b[9usize]),
1816 i8::bitor(a[10usize], &b[10usize]),
1817 i8::bitor(a[11usize], &b[11usize]),
1818 i8::bitor(a[12usize], &b[12usize]),
1819 i8::bitor(a[13usize], &b[13usize]),
1820 i8::bitor(a[14usize], &b[14usize]),
1821 i8::bitor(a[15usize], &b[15usize]),
1822 ]
1823 .simd_into(self)
1824 }
1825 #[inline(always)]
1826 fn xor_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1827 [
1828 i8::bitxor(a[0usize], &b[0usize]),
1829 i8::bitxor(a[1usize], &b[1usize]),
1830 i8::bitxor(a[2usize], &b[2usize]),
1831 i8::bitxor(a[3usize], &b[3usize]),
1832 i8::bitxor(a[4usize], &b[4usize]),
1833 i8::bitxor(a[5usize], &b[5usize]),
1834 i8::bitxor(a[6usize], &b[6usize]),
1835 i8::bitxor(a[7usize], &b[7usize]),
1836 i8::bitxor(a[8usize], &b[8usize]),
1837 i8::bitxor(a[9usize], &b[9usize]),
1838 i8::bitxor(a[10usize], &b[10usize]),
1839 i8::bitxor(a[11usize], &b[11usize]),
1840 i8::bitxor(a[12usize], &b[12usize]),
1841 i8::bitxor(a[13usize], &b[13usize]),
1842 i8::bitxor(a[14usize], &b[14usize]),
1843 i8::bitxor(a[15usize], &b[15usize]),
1844 ]
1845 .simd_into(self)
1846 }
1847 #[inline(always)]
1848 fn not_mask8x16(self, a: mask8x16<Self>) -> mask8x16<Self> {
1849 [
1850 i8::not(a[0usize]),
1851 i8::not(a[1usize]),
1852 i8::not(a[2usize]),
1853 i8::not(a[3usize]),
1854 i8::not(a[4usize]),
1855 i8::not(a[5usize]),
1856 i8::not(a[6usize]),
1857 i8::not(a[7usize]),
1858 i8::not(a[8usize]),
1859 i8::not(a[9usize]),
1860 i8::not(a[10usize]),
1861 i8::not(a[11usize]),
1862 i8::not(a[12usize]),
1863 i8::not(a[13usize]),
1864 i8::not(a[14usize]),
1865 i8::not(a[15usize]),
1866 ]
1867 .simd_into(self)
1868 }
1869 #[inline(always)]
1870 fn select_mask8x16(
1871 self,
1872 a: mask8x16<Self>,
1873 b: mask8x16<Self>,
1874 c: mask8x16<Self>,
1875 ) -> mask8x16<Self> {
1876 [
1877 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1878 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1879 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1880 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1881 if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1882 if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1883 if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1884 if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1885 if a[8usize] != 0 { b[8usize] } else { c[8usize] },
1886 if a[9usize] != 0 { b[9usize] } else { c[9usize] },
1887 if a[10usize] != 0 {
1888 b[10usize]
1889 } else {
1890 c[10usize]
1891 },
1892 if a[11usize] != 0 {
1893 b[11usize]
1894 } else {
1895 c[11usize]
1896 },
1897 if a[12usize] != 0 {
1898 b[12usize]
1899 } else {
1900 c[12usize]
1901 },
1902 if a[13usize] != 0 {
1903 b[13usize]
1904 } else {
1905 c[13usize]
1906 },
1907 if a[14usize] != 0 {
1908 b[14usize]
1909 } else {
1910 c[14usize]
1911 },
1912 if a[15usize] != 0 {
1913 b[15usize]
1914 } else {
1915 c[15usize]
1916 },
1917 ]
1918 .simd_into(self)
1919 }
1920 #[inline(always)]
1921 fn simd_eq_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1922 [
1923 -(i8::eq(&a[0usize], &b[0usize]) as i8),
1924 -(i8::eq(&a[1usize], &b[1usize]) as i8),
1925 -(i8::eq(&a[2usize], &b[2usize]) as i8),
1926 -(i8::eq(&a[3usize], &b[3usize]) as i8),
1927 -(i8::eq(&a[4usize], &b[4usize]) as i8),
1928 -(i8::eq(&a[5usize], &b[5usize]) as i8),
1929 -(i8::eq(&a[6usize], &b[6usize]) as i8),
1930 -(i8::eq(&a[7usize], &b[7usize]) as i8),
1931 -(i8::eq(&a[8usize], &b[8usize]) as i8),
1932 -(i8::eq(&a[9usize], &b[9usize]) as i8),
1933 -(i8::eq(&a[10usize], &b[10usize]) as i8),
1934 -(i8::eq(&a[11usize], &b[11usize]) as i8),
1935 -(i8::eq(&a[12usize], &b[12usize]) as i8),
1936 -(i8::eq(&a[13usize], &b[13usize]) as i8),
1937 -(i8::eq(&a[14usize], &b[14usize]) as i8),
1938 -(i8::eq(&a[15usize], &b[15usize]) as i8),
1939 ]
1940 .simd_into(self)
1941 }
1942 #[inline(always)]
1943 fn any_true_mask8x16(self, a: mask8x16<Self>) -> bool {
1944 a[0usize] != 0
1945 || a[1usize] != 0
1946 || a[2usize] != 0
1947 || a[3usize] != 0
1948 || a[4usize] != 0
1949 || a[5usize] != 0
1950 || a[6usize] != 0
1951 || a[7usize] != 0
1952 || a[8usize] != 0
1953 || a[9usize] != 0
1954 || a[10usize] != 0
1955 || a[11usize] != 0
1956 || a[12usize] != 0
1957 || a[13usize] != 0
1958 || a[14usize] != 0
1959 || a[15usize] != 0
1960 }
1961 #[inline(always)]
1962 fn all_true_mask8x16(self, a: mask8x16<Self>) -> bool {
1963 a[0usize] != 0
1964 && a[1usize] != 0
1965 && a[2usize] != 0
1966 && a[3usize] != 0
1967 && a[4usize] != 0
1968 && a[5usize] != 0
1969 && a[6usize] != 0
1970 && a[7usize] != 0
1971 && a[8usize] != 0
1972 && a[9usize] != 0
1973 && a[10usize] != 0
1974 && a[11usize] != 0
1975 && a[12usize] != 0
1976 && a[13usize] != 0
1977 && a[14usize] != 0
1978 && a[15usize] != 0
1979 }
1980 #[inline(always)]
1981 fn any_false_mask8x16(self, a: mask8x16<Self>) -> bool {
1982 a[0usize] == 0
1983 || a[1usize] == 0
1984 || a[2usize] == 0
1985 || a[3usize] == 0
1986 || a[4usize] == 0
1987 || a[5usize] == 0
1988 || a[6usize] == 0
1989 || a[7usize] == 0
1990 || a[8usize] == 0
1991 || a[9usize] == 0
1992 || a[10usize] == 0
1993 || a[11usize] == 0
1994 || a[12usize] == 0
1995 || a[13usize] == 0
1996 || a[14usize] == 0
1997 || a[15usize] == 0
1998 }
1999 #[inline(always)]
2000 fn all_false_mask8x16(self, a: mask8x16<Self>) -> bool {
2001 a[0usize] == 0
2002 && a[1usize] == 0
2003 && a[2usize] == 0
2004 && a[3usize] == 0
2005 && a[4usize] == 0
2006 && a[5usize] == 0
2007 && a[6usize] == 0
2008 && a[7usize] == 0
2009 && a[8usize] == 0
2010 && a[9usize] == 0
2011 && a[10usize] == 0
2012 && a[11usize] == 0
2013 && a[12usize] == 0
2014 && a[13usize] == 0
2015 && a[14usize] == 0
2016 && a[15usize] == 0
2017 }
2018 #[inline(always)]
2019 fn combine_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x32<Self> {
2020 let mut result = [0; 32usize];
2021 result[0..16usize].copy_from_slice(&a.val.0);
2022 result[16usize..32usize].copy_from_slice(&b.val.0);
2023 result.simd_into(self)
2024 }
2025 #[inline(always)]
2026 fn splat_i16x8(self, val: i16) -> i16x8<Self> {
2027 [val; 8usize].simd_into(self)
2028 }
2029 #[inline(always)]
2030 fn load_array_i16x8(self, val: [i16; 8usize]) -> i16x8<Self> {
2031 i16x8 {
2032 val: crate::support::Aligned128(val),
2033 simd: self,
2034 }
2035 }
2036 #[inline(always)]
2037 fn load_array_ref_i16x8(self, val: &[i16; 8usize]) -> i16x8<Self> {
2038 i16x8 {
2039 val: crate::support::Aligned128(*val),
2040 simd: self,
2041 }
2042 }
2043 #[inline(always)]
2044 fn as_array_i16x8(self, a: i16x8<Self>) -> [i16; 8usize] {
2045 a.val.0
2046 }
2047 #[inline(always)]
2048 fn as_array_ref_i16x8(self, a: &i16x8<Self>) -> &[i16; 8usize] {
2049 &a.val.0
2050 }
2051 #[inline(always)]
2052 fn as_array_mut_i16x8(self, a: &mut i16x8<Self>) -> &mut [i16; 8usize] {
2053 &mut a.val.0
2054 }
2055 #[inline(always)]
2056 fn store_array_i16x8(self, a: i16x8<Self>, dest: &mut [i16; 8usize]) -> () {
2057 *dest = a.val.0;
2058 }
2059 #[inline(always)]
2060 fn cvt_from_bytes_i16x8(self, a: u8x16<Self>) -> i16x8<Self> {
2061 unsafe {
2062 i16x8 {
2063 val: core::mem::transmute(a.val),
2064 simd: self,
2065 }
2066 }
2067 }
2068 #[inline(always)]
2069 fn cvt_to_bytes_i16x8(self, a: i16x8<Self>) -> u8x16<Self> {
2070 unsafe {
2071 u8x16 {
2072 val: core::mem::transmute(a.val),
2073 simd: self,
2074 }
2075 }
2076 }
2077 #[inline(always)]
2078 fn slide_i16x8<const SHIFT: usize>(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2079 let mut dest = [Default::default(); 8usize];
2080 dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
2081 dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
2082 dest.simd_into(self)
2083 }
2084 #[inline(always)]
2085 fn slide_within_blocks_i16x8<const SHIFT: usize>(
2086 self,
2087 a: i16x8<Self>,
2088 b: i16x8<Self>,
2089 ) -> i16x8<Self> {
2090 self.slide_i16x8::<SHIFT>(a, b)
2091 }
2092 #[inline(always)]
2093 fn add_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2094 [
2095 i16::wrapping_add(a[0usize], b[0usize]),
2096 i16::wrapping_add(a[1usize], b[1usize]),
2097 i16::wrapping_add(a[2usize], b[2usize]),
2098 i16::wrapping_add(a[3usize], b[3usize]),
2099 i16::wrapping_add(a[4usize], b[4usize]),
2100 i16::wrapping_add(a[5usize], b[5usize]),
2101 i16::wrapping_add(a[6usize], b[6usize]),
2102 i16::wrapping_add(a[7usize], b[7usize]),
2103 ]
2104 .simd_into(self)
2105 }
2106 #[inline(always)]
2107 fn sub_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2108 [
2109 i16::wrapping_sub(a[0usize], b[0usize]),
2110 i16::wrapping_sub(a[1usize], b[1usize]),
2111 i16::wrapping_sub(a[2usize], b[2usize]),
2112 i16::wrapping_sub(a[3usize], b[3usize]),
2113 i16::wrapping_sub(a[4usize], b[4usize]),
2114 i16::wrapping_sub(a[5usize], b[5usize]),
2115 i16::wrapping_sub(a[6usize], b[6usize]),
2116 i16::wrapping_sub(a[7usize], b[7usize]),
2117 ]
2118 .simd_into(self)
2119 }
2120 #[inline(always)]
2121 fn mul_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2122 [
2123 i16::wrapping_mul(a[0usize], b[0usize]),
2124 i16::wrapping_mul(a[1usize], b[1usize]),
2125 i16::wrapping_mul(a[2usize], b[2usize]),
2126 i16::wrapping_mul(a[3usize], b[3usize]),
2127 i16::wrapping_mul(a[4usize], b[4usize]),
2128 i16::wrapping_mul(a[5usize], b[5usize]),
2129 i16::wrapping_mul(a[6usize], b[6usize]),
2130 i16::wrapping_mul(a[7usize], b[7usize]),
2131 ]
2132 .simd_into(self)
2133 }
2134 #[inline(always)]
2135 fn and_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2136 [
2137 i16::bitand(a[0usize], &b[0usize]),
2138 i16::bitand(a[1usize], &b[1usize]),
2139 i16::bitand(a[2usize], &b[2usize]),
2140 i16::bitand(a[3usize], &b[3usize]),
2141 i16::bitand(a[4usize], &b[4usize]),
2142 i16::bitand(a[5usize], &b[5usize]),
2143 i16::bitand(a[6usize], &b[6usize]),
2144 i16::bitand(a[7usize], &b[7usize]),
2145 ]
2146 .simd_into(self)
2147 }
2148 #[inline(always)]
2149 fn or_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2150 [
2151 i16::bitor(a[0usize], &b[0usize]),
2152 i16::bitor(a[1usize], &b[1usize]),
2153 i16::bitor(a[2usize], &b[2usize]),
2154 i16::bitor(a[3usize], &b[3usize]),
2155 i16::bitor(a[4usize], &b[4usize]),
2156 i16::bitor(a[5usize], &b[5usize]),
2157 i16::bitor(a[6usize], &b[6usize]),
2158 i16::bitor(a[7usize], &b[7usize]),
2159 ]
2160 .simd_into(self)
2161 }
2162 #[inline(always)]
2163 fn xor_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2164 [
2165 i16::bitxor(a[0usize], &b[0usize]),
2166 i16::bitxor(a[1usize], &b[1usize]),
2167 i16::bitxor(a[2usize], &b[2usize]),
2168 i16::bitxor(a[3usize], &b[3usize]),
2169 i16::bitxor(a[4usize], &b[4usize]),
2170 i16::bitxor(a[5usize], &b[5usize]),
2171 i16::bitxor(a[6usize], &b[6usize]),
2172 i16::bitxor(a[7usize], &b[7usize]),
2173 ]
2174 .simd_into(self)
2175 }
2176 #[inline(always)]
2177 fn not_i16x8(self, a: i16x8<Self>) -> i16x8<Self> {
2178 [
2179 i16::not(a[0usize]),
2180 i16::not(a[1usize]),
2181 i16::not(a[2usize]),
2182 i16::not(a[3usize]),
2183 i16::not(a[4usize]),
2184 i16::not(a[5usize]),
2185 i16::not(a[6usize]),
2186 i16::not(a[7usize]),
2187 ]
2188 .simd_into(self)
2189 }
2190 #[inline(always)]
2191 fn shl_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {
2192 [
2193 i16::shl(a[0usize], shift),
2194 i16::shl(a[1usize], shift),
2195 i16::shl(a[2usize], shift),
2196 i16::shl(a[3usize], shift),
2197 i16::shl(a[4usize], shift),
2198 i16::shl(a[5usize], shift),
2199 i16::shl(a[6usize], shift),
2200 i16::shl(a[7usize], shift),
2201 ]
2202 .simd_into(self)
2203 }
2204 #[inline(always)]
2205 fn shlv_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2206 [
2207 i16::shl(a[0usize], &b[0usize]),
2208 i16::shl(a[1usize], &b[1usize]),
2209 i16::shl(a[2usize], &b[2usize]),
2210 i16::shl(a[3usize], &b[3usize]),
2211 i16::shl(a[4usize], &b[4usize]),
2212 i16::shl(a[5usize], &b[5usize]),
2213 i16::shl(a[6usize], &b[6usize]),
2214 i16::shl(a[7usize], &b[7usize]),
2215 ]
2216 .simd_into(self)
2217 }
2218 #[inline(always)]
2219 fn shr_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {
2220 [
2221 i16::shr(a[0usize], shift),
2222 i16::shr(a[1usize], shift),
2223 i16::shr(a[2usize], shift),
2224 i16::shr(a[3usize], shift),
2225 i16::shr(a[4usize], shift),
2226 i16::shr(a[5usize], shift),
2227 i16::shr(a[6usize], shift),
2228 i16::shr(a[7usize], shift),
2229 ]
2230 .simd_into(self)
2231 }
2232 #[inline(always)]
2233 fn shrv_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2234 [
2235 i16::shr(a[0usize], &b[0usize]),
2236 i16::shr(a[1usize], &b[1usize]),
2237 i16::shr(a[2usize], &b[2usize]),
2238 i16::shr(a[3usize], &b[3usize]),
2239 i16::shr(a[4usize], &b[4usize]),
2240 i16::shr(a[5usize], &b[5usize]),
2241 i16::shr(a[6usize], &b[6usize]),
2242 i16::shr(a[7usize], &b[7usize]),
2243 ]
2244 .simd_into(self)
2245 }
2246 #[inline(always)]
2247 fn simd_eq_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
2248 [
2249 -(i16::eq(&a[0usize], &b[0usize]) as i16),
2250 -(i16::eq(&a[1usize], &b[1usize]) as i16),
2251 -(i16::eq(&a[2usize], &b[2usize]) as i16),
2252 -(i16::eq(&a[3usize], &b[3usize]) as i16),
2253 -(i16::eq(&a[4usize], &b[4usize]) as i16),
2254 -(i16::eq(&a[5usize], &b[5usize]) as i16),
2255 -(i16::eq(&a[6usize], &b[6usize]) as i16),
2256 -(i16::eq(&a[7usize], &b[7usize]) as i16),
2257 ]
2258 .simd_into(self)
2259 }
2260 #[inline(always)]
2261 fn simd_lt_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
2262 [
2263 -(i16::lt(&a[0usize], &b[0usize]) as i16),
2264 -(i16::lt(&a[1usize], &b[1usize]) as i16),
2265 -(i16::lt(&a[2usize], &b[2usize]) as i16),
2266 -(i16::lt(&a[3usize], &b[3usize]) as i16),
2267 -(i16::lt(&a[4usize], &b[4usize]) as i16),
2268 -(i16::lt(&a[5usize], &b[5usize]) as i16),
2269 -(i16::lt(&a[6usize], &b[6usize]) as i16),
2270 -(i16::lt(&a[7usize], &b[7usize]) as i16),
2271 ]
2272 .simd_into(self)
2273 }
2274 #[inline(always)]
2275 fn simd_le_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
2276 [
2277 -(i16::le(&a[0usize], &b[0usize]) as i16),
2278 -(i16::le(&a[1usize], &b[1usize]) as i16),
2279 -(i16::le(&a[2usize], &b[2usize]) as i16),
2280 -(i16::le(&a[3usize], &b[3usize]) as i16),
2281 -(i16::le(&a[4usize], &b[4usize]) as i16),
2282 -(i16::le(&a[5usize], &b[5usize]) as i16),
2283 -(i16::le(&a[6usize], &b[6usize]) as i16),
2284 -(i16::le(&a[7usize], &b[7usize]) as i16),
2285 ]
2286 .simd_into(self)
2287 }
2288 #[inline(always)]
2289 fn simd_ge_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
2290 [
2291 -(i16::ge(&a[0usize], &b[0usize]) as i16),
2292 -(i16::ge(&a[1usize], &b[1usize]) as i16),
2293 -(i16::ge(&a[2usize], &b[2usize]) as i16),
2294 -(i16::ge(&a[3usize], &b[3usize]) as i16),
2295 -(i16::ge(&a[4usize], &b[4usize]) as i16),
2296 -(i16::ge(&a[5usize], &b[5usize]) as i16),
2297 -(i16::ge(&a[6usize], &b[6usize]) as i16),
2298 -(i16::ge(&a[7usize], &b[7usize]) as i16),
2299 ]
2300 .simd_into(self)
2301 }
2302 #[inline(always)]
2303 fn simd_gt_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
2304 [
2305 -(i16::gt(&a[0usize], &b[0usize]) as i16),
2306 -(i16::gt(&a[1usize], &b[1usize]) as i16),
2307 -(i16::gt(&a[2usize], &b[2usize]) as i16),
2308 -(i16::gt(&a[3usize], &b[3usize]) as i16),
2309 -(i16::gt(&a[4usize], &b[4usize]) as i16),
2310 -(i16::gt(&a[5usize], &b[5usize]) as i16),
2311 -(i16::gt(&a[6usize], &b[6usize]) as i16),
2312 -(i16::gt(&a[7usize], &b[7usize]) as i16),
2313 ]
2314 .simd_into(self)
2315 }
2316 #[inline(always)]
2317 fn zip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2318 [
2319 a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
2320 ]
2321 .simd_into(self)
2322 }
2323 #[inline(always)]
2324 fn zip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2325 [
2326 a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
2327 ]
2328 .simd_into(self)
2329 }
2330 #[inline(always)]
2331 fn unzip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2332 [
2333 a[0usize], a[2usize], a[4usize], a[6usize], b[0usize], b[2usize], b[4usize], b[6usize],
2334 ]
2335 .simd_into(self)
2336 }
2337 #[inline(always)]
2338 fn unzip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2339 [
2340 a[1usize], a[3usize], a[5usize], a[7usize], b[1usize], b[3usize], b[5usize], b[7usize],
2341 ]
2342 .simd_into(self)
2343 }
2344 #[inline(always)]
2345 fn interleave_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> (i16x8<Self>, i16x8<Self>) {
2346 (self.zip_low_i16x8(a, b), self.zip_high_i16x8(a, b))
2347 }
2348 #[inline(always)]
2349 fn deinterleave_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> (i16x8<Self>, i16x8<Self>) {
2350 (self.unzip_low_i16x8(a, b), self.unzip_high_i16x8(a, b))
2351 }
2352 #[inline(always)]
2353 fn select_i16x8(self, a: mask16x8<Self>, b: i16x8<Self>, c: i16x8<Self>) -> i16x8<Self> {
2354 [
2355 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2356 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2357 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2358 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2359 if a[4usize] != 0 { b[4usize] } else { c[4usize] },
2360 if a[5usize] != 0 { b[5usize] } else { c[5usize] },
2361 if a[6usize] != 0 { b[6usize] } else { c[6usize] },
2362 if a[7usize] != 0 { b[7usize] } else { c[7usize] },
2363 ]
2364 .simd_into(self)
2365 }
2366 #[inline(always)]
2367 fn min_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2368 [
2369 i16::min(a[0usize], b[0usize]),
2370 i16::min(a[1usize], b[1usize]),
2371 i16::min(a[2usize], b[2usize]),
2372 i16::min(a[3usize], b[3usize]),
2373 i16::min(a[4usize], b[4usize]),
2374 i16::min(a[5usize], b[5usize]),
2375 i16::min(a[6usize], b[6usize]),
2376 i16::min(a[7usize], b[7usize]),
2377 ]
2378 .simd_into(self)
2379 }
2380 #[inline(always)]
2381 fn max_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2382 [
2383 i16::max(a[0usize], b[0usize]),
2384 i16::max(a[1usize], b[1usize]),
2385 i16::max(a[2usize], b[2usize]),
2386 i16::max(a[3usize], b[3usize]),
2387 i16::max(a[4usize], b[4usize]),
2388 i16::max(a[5usize], b[5usize]),
2389 i16::max(a[6usize], b[6usize]),
2390 i16::max(a[7usize], b[7usize]),
2391 ]
2392 .simd_into(self)
2393 }
2394 #[inline(always)]
2395 fn combine_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x16<Self> {
2396 let mut result = [0; 16usize];
2397 result[0..8usize].copy_from_slice(&a.val.0);
2398 result[8usize..16usize].copy_from_slice(&b.val.0);
2399 result.simd_into(self)
2400 }
2401 #[inline(always)]
2402 fn neg_i16x8(self, a: i16x8<Self>) -> i16x8<Self> {
2403 [
2404 i16::neg(a[0usize]),
2405 i16::neg(a[1usize]),
2406 i16::neg(a[2usize]),
2407 i16::neg(a[3usize]),
2408 i16::neg(a[4usize]),
2409 i16::neg(a[5usize]),
2410 i16::neg(a[6usize]),
2411 i16::neg(a[7usize]),
2412 ]
2413 .simd_into(self)
2414 }
2415 #[inline(always)]
2416 fn reinterpret_u8_i16x8(self, a: i16x8<Self>) -> u8x16<Self> {
2417 a.bitcast()
2418 }
2419 #[inline(always)]
2420 fn reinterpret_u32_i16x8(self, a: i16x8<Self>) -> u32x4<Self> {
2421 a.bitcast()
2422 }
2423 #[inline(always)]
2424 fn splat_u16x8(self, val: u16) -> u16x8<Self> {
2425 [val; 8usize].simd_into(self)
2426 }
2427 #[inline(always)]
2428 fn load_array_u16x8(self, val: [u16; 8usize]) -> u16x8<Self> {
2429 u16x8 {
2430 val: crate::support::Aligned128(val),
2431 simd: self,
2432 }
2433 }
2434 #[inline(always)]
2435 fn load_array_ref_u16x8(self, val: &[u16; 8usize]) -> u16x8<Self> {
2436 u16x8 {
2437 val: crate::support::Aligned128(*val),
2438 simd: self,
2439 }
2440 }
2441 #[inline(always)]
2442 fn as_array_u16x8(self, a: u16x8<Self>) -> [u16; 8usize] {
2443 a.val.0
2444 }
2445 #[inline(always)]
2446 fn as_array_ref_u16x8(self, a: &u16x8<Self>) -> &[u16; 8usize] {
2447 &a.val.0
2448 }
2449 #[inline(always)]
2450 fn as_array_mut_u16x8(self, a: &mut u16x8<Self>) -> &mut [u16; 8usize] {
2451 &mut a.val.0
2452 }
2453 #[inline(always)]
2454 fn store_array_u16x8(self, a: u16x8<Self>, dest: &mut [u16; 8usize]) -> () {
2455 *dest = a.val.0;
2456 }
2457 #[inline(always)]
2458 fn cvt_from_bytes_u16x8(self, a: u8x16<Self>) -> u16x8<Self> {
2459 unsafe {
2460 u16x8 {
2461 val: core::mem::transmute(a.val),
2462 simd: self,
2463 }
2464 }
2465 }
2466 #[inline(always)]
2467 fn cvt_to_bytes_u16x8(self, a: u16x8<Self>) -> u8x16<Self> {
2468 unsafe {
2469 u8x16 {
2470 val: core::mem::transmute(a.val),
2471 simd: self,
2472 }
2473 }
2474 }
2475 #[inline(always)]
2476 fn slide_u16x8<const SHIFT: usize>(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2477 let mut dest = [Default::default(); 8usize];
2478 dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
2479 dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
2480 dest.simd_into(self)
2481 }
2482 #[inline(always)]
2483 fn slide_within_blocks_u16x8<const SHIFT: usize>(
2484 self,
2485 a: u16x8<Self>,
2486 b: u16x8<Self>,
2487 ) -> u16x8<Self> {
2488 self.slide_u16x8::<SHIFT>(a, b)
2489 }
2490 #[inline(always)]
2491 fn add_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2492 [
2493 u16::wrapping_add(a[0usize], b[0usize]),
2494 u16::wrapping_add(a[1usize], b[1usize]),
2495 u16::wrapping_add(a[2usize], b[2usize]),
2496 u16::wrapping_add(a[3usize], b[3usize]),
2497 u16::wrapping_add(a[4usize], b[4usize]),
2498 u16::wrapping_add(a[5usize], b[5usize]),
2499 u16::wrapping_add(a[6usize], b[6usize]),
2500 u16::wrapping_add(a[7usize], b[7usize]),
2501 ]
2502 .simd_into(self)
2503 }
2504 #[inline(always)]
2505 fn sub_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2506 [
2507 u16::wrapping_sub(a[0usize], b[0usize]),
2508 u16::wrapping_sub(a[1usize], b[1usize]),
2509 u16::wrapping_sub(a[2usize], b[2usize]),
2510 u16::wrapping_sub(a[3usize], b[3usize]),
2511 u16::wrapping_sub(a[4usize], b[4usize]),
2512 u16::wrapping_sub(a[5usize], b[5usize]),
2513 u16::wrapping_sub(a[6usize], b[6usize]),
2514 u16::wrapping_sub(a[7usize], b[7usize]),
2515 ]
2516 .simd_into(self)
2517 }
2518 #[inline(always)]
2519 fn mul_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2520 [
2521 u16::wrapping_mul(a[0usize], b[0usize]),
2522 u16::wrapping_mul(a[1usize], b[1usize]),
2523 u16::wrapping_mul(a[2usize], b[2usize]),
2524 u16::wrapping_mul(a[3usize], b[3usize]),
2525 u16::wrapping_mul(a[4usize], b[4usize]),
2526 u16::wrapping_mul(a[5usize], b[5usize]),
2527 u16::wrapping_mul(a[6usize], b[6usize]),
2528 u16::wrapping_mul(a[7usize], b[7usize]),
2529 ]
2530 .simd_into(self)
2531 }
2532 #[inline(always)]
2533 fn and_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2534 [
2535 u16::bitand(a[0usize], &b[0usize]),
2536 u16::bitand(a[1usize], &b[1usize]),
2537 u16::bitand(a[2usize], &b[2usize]),
2538 u16::bitand(a[3usize], &b[3usize]),
2539 u16::bitand(a[4usize], &b[4usize]),
2540 u16::bitand(a[5usize], &b[5usize]),
2541 u16::bitand(a[6usize], &b[6usize]),
2542 u16::bitand(a[7usize], &b[7usize]),
2543 ]
2544 .simd_into(self)
2545 }
2546 #[inline(always)]
2547 fn or_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2548 [
2549 u16::bitor(a[0usize], &b[0usize]),
2550 u16::bitor(a[1usize], &b[1usize]),
2551 u16::bitor(a[2usize], &b[2usize]),
2552 u16::bitor(a[3usize], &b[3usize]),
2553 u16::bitor(a[4usize], &b[4usize]),
2554 u16::bitor(a[5usize], &b[5usize]),
2555 u16::bitor(a[6usize], &b[6usize]),
2556 u16::bitor(a[7usize], &b[7usize]),
2557 ]
2558 .simd_into(self)
2559 }
2560 #[inline(always)]
2561 fn xor_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2562 [
2563 u16::bitxor(a[0usize], &b[0usize]),
2564 u16::bitxor(a[1usize], &b[1usize]),
2565 u16::bitxor(a[2usize], &b[2usize]),
2566 u16::bitxor(a[3usize], &b[3usize]),
2567 u16::bitxor(a[4usize], &b[4usize]),
2568 u16::bitxor(a[5usize], &b[5usize]),
2569 u16::bitxor(a[6usize], &b[6usize]),
2570 u16::bitxor(a[7usize], &b[7usize]),
2571 ]
2572 .simd_into(self)
2573 }
2574 #[inline(always)]
2575 fn not_u16x8(self, a: u16x8<Self>) -> u16x8<Self> {
2576 [
2577 u16::not(a[0usize]),
2578 u16::not(a[1usize]),
2579 u16::not(a[2usize]),
2580 u16::not(a[3usize]),
2581 u16::not(a[4usize]),
2582 u16::not(a[5usize]),
2583 u16::not(a[6usize]),
2584 u16::not(a[7usize]),
2585 ]
2586 .simd_into(self)
2587 }
2588 #[inline(always)]
2589 fn shl_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {
2590 [
2591 u16::shl(a[0usize], shift),
2592 u16::shl(a[1usize], shift),
2593 u16::shl(a[2usize], shift),
2594 u16::shl(a[3usize], shift),
2595 u16::shl(a[4usize], shift),
2596 u16::shl(a[5usize], shift),
2597 u16::shl(a[6usize], shift),
2598 u16::shl(a[7usize], shift),
2599 ]
2600 .simd_into(self)
2601 }
2602 #[inline(always)]
2603 fn shlv_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2604 [
2605 u16::shl(a[0usize], &b[0usize]),
2606 u16::shl(a[1usize], &b[1usize]),
2607 u16::shl(a[2usize], &b[2usize]),
2608 u16::shl(a[3usize], &b[3usize]),
2609 u16::shl(a[4usize], &b[4usize]),
2610 u16::shl(a[5usize], &b[5usize]),
2611 u16::shl(a[6usize], &b[6usize]),
2612 u16::shl(a[7usize], &b[7usize]),
2613 ]
2614 .simd_into(self)
2615 }
2616 #[inline(always)]
2617 fn shr_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {
2618 [
2619 u16::shr(a[0usize], shift),
2620 u16::shr(a[1usize], shift),
2621 u16::shr(a[2usize], shift),
2622 u16::shr(a[3usize], shift),
2623 u16::shr(a[4usize], shift),
2624 u16::shr(a[5usize], shift),
2625 u16::shr(a[6usize], shift),
2626 u16::shr(a[7usize], shift),
2627 ]
2628 .simd_into(self)
2629 }
2630 #[inline(always)]
2631 fn shrv_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2632 [
2633 u16::shr(a[0usize], &b[0usize]),
2634 u16::shr(a[1usize], &b[1usize]),
2635 u16::shr(a[2usize], &b[2usize]),
2636 u16::shr(a[3usize], &b[3usize]),
2637 u16::shr(a[4usize], &b[4usize]),
2638 u16::shr(a[5usize], &b[5usize]),
2639 u16::shr(a[6usize], &b[6usize]),
2640 u16::shr(a[7usize], &b[7usize]),
2641 ]
2642 .simd_into(self)
2643 }
2644 #[inline(always)]
2645 fn simd_eq_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2646 [
2647 -(u16::eq(&a[0usize], &b[0usize]) as i16),
2648 -(u16::eq(&a[1usize], &b[1usize]) as i16),
2649 -(u16::eq(&a[2usize], &b[2usize]) as i16),
2650 -(u16::eq(&a[3usize], &b[3usize]) as i16),
2651 -(u16::eq(&a[4usize], &b[4usize]) as i16),
2652 -(u16::eq(&a[5usize], &b[5usize]) as i16),
2653 -(u16::eq(&a[6usize], &b[6usize]) as i16),
2654 -(u16::eq(&a[7usize], &b[7usize]) as i16),
2655 ]
2656 .simd_into(self)
2657 }
2658 #[inline(always)]
2659 fn simd_lt_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2660 [
2661 -(u16::lt(&a[0usize], &b[0usize]) as i16),
2662 -(u16::lt(&a[1usize], &b[1usize]) as i16),
2663 -(u16::lt(&a[2usize], &b[2usize]) as i16),
2664 -(u16::lt(&a[3usize], &b[3usize]) as i16),
2665 -(u16::lt(&a[4usize], &b[4usize]) as i16),
2666 -(u16::lt(&a[5usize], &b[5usize]) as i16),
2667 -(u16::lt(&a[6usize], &b[6usize]) as i16),
2668 -(u16::lt(&a[7usize], &b[7usize]) as i16),
2669 ]
2670 .simd_into(self)
2671 }
2672 #[inline(always)]
2673 fn simd_le_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2674 [
2675 -(u16::le(&a[0usize], &b[0usize]) as i16),
2676 -(u16::le(&a[1usize], &b[1usize]) as i16),
2677 -(u16::le(&a[2usize], &b[2usize]) as i16),
2678 -(u16::le(&a[3usize], &b[3usize]) as i16),
2679 -(u16::le(&a[4usize], &b[4usize]) as i16),
2680 -(u16::le(&a[5usize], &b[5usize]) as i16),
2681 -(u16::le(&a[6usize], &b[6usize]) as i16),
2682 -(u16::le(&a[7usize], &b[7usize]) as i16),
2683 ]
2684 .simd_into(self)
2685 }
2686 #[inline(always)]
2687 fn simd_ge_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2688 [
2689 -(u16::ge(&a[0usize], &b[0usize]) as i16),
2690 -(u16::ge(&a[1usize], &b[1usize]) as i16),
2691 -(u16::ge(&a[2usize], &b[2usize]) as i16),
2692 -(u16::ge(&a[3usize], &b[3usize]) as i16),
2693 -(u16::ge(&a[4usize], &b[4usize]) as i16),
2694 -(u16::ge(&a[5usize], &b[5usize]) as i16),
2695 -(u16::ge(&a[6usize], &b[6usize]) as i16),
2696 -(u16::ge(&a[7usize], &b[7usize]) as i16),
2697 ]
2698 .simd_into(self)
2699 }
2700 #[inline(always)]
2701 fn simd_gt_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2702 [
2703 -(u16::gt(&a[0usize], &b[0usize]) as i16),
2704 -(u16::gt(&a[1usize], &b[1usize]) as i16),
2705 -(u16::gt(&a[2usize], &b[2usize]) as i16),
2706 -(u16::gt(&a[3usize], &b[3usize]) as i16),
2707 -(u16::gt(&a[4usize], &b[4usize]) as i16),
2708 -(u16::gt(&a[5usize], &b[5usize]) as i16),
2709 -(u16::gt(&a[6usize], &b[6usize]) as i16),
2710 -(u16::gt(&a[7usize], &b[7usize]) as i16),
2711 ]
2712 .simd_into(self)
2713 }
2714 #[inline(always)]
2715 fn zip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2716 [
2717 a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
2718 ]
2719 .simd_into(self)
2720 }
2721 #[inline(always)]
2722 fn zip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2723 [
2724 a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
2725 ]
2726 .simd_into(self)
2727 }
2728 #[inline(always)]
2729 fn unzip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2730 [
2731 a[0usize], a[2usize], a[4usize], a[6usize], b[0usize], b[2usize], b[4usize], b[6usize],
2732 ]
2733 .simd_into(self)
2734 }
2735 #[inline(always)]
2736 fn unzip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2737 [
2738 a[1usize], a[3usize], a[5usize], a[7usize], b[1usize], b[3usize], b[5usize], b[7usize],
2739 ]
2740 .simd_into(self)
2741 }
2742 #[inline(always)]
2743 fn interleave_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> (u16x8<Self>, u16x8<Self>) {
2744 (self.zip_low_u16x8(a, b), self.zip_high_u16x8(a, b))
2745 }
2746 #[inline(always)]
2747 fn deinterleave_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> (u16x8<Self>, u16x8<Self>) {
2748 (self.unzip_low_u16x8(a, b), self.unzip_high_u16x8(a, b))
2749 }
2750 #[inline(always)]
2751 fn select_u16x8(self, a: mask16x8<Self>, b: u16x8<Self>, c: u16x8<Self>) -> u16x8<Self> {
2752 [
2753 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2754 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2755 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2756 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2757 if a[4usize] != 0 { b[4usize] } else { c[4usize] },
2758 if a[5usize] != 0 { b[5usize] } else { c[5usize] },
2759 if a[6usize] != 0 { b[6usize] } else { c[6usize] },
2760 if a[7usize] != 0 { b[7usize] } else { c[7usize] },
2761 ]
2762 .simd_into(self)
2763 }
2764 #[inline(always)]
2765 fn min_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2766 [
2767 u16::min(a[0usize], b[0usize]),
2768 u16::min(a[1usize], b[1usize]),
2769 u16::min(a[2usize], b[2usize]),
2770 u16::min(a[3usize], b[3usize]),
2771 u16::min(a[4usize], b[4usize]),
2772 u16::min(a[5usize], b[5usize]),
2773 u16::min(a[6usize], b[6usize]),
2774 u16::min(a[7usize], b[7usize]),
2775 ]
2776 .simd_into(self)
2777 }
2778 #[inline(always)]
2779 fn max_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2780 [
2781 u16::max(a[0usize], b[0usize]),
2782 u16::max(a[1usize], b[1usize]),
2783 u16::max(a[2usize], b[2usize]),
2784 u16::max(a[3usize], b[3usize]),
2785 u16::max(a[4usize], b[4usize]),
2786 u16::max(a[5usize], b[5usize]),
2787 u16::max(a[6usize], b[6usize]),
2788 u16::max(a[7usize], b[7usize]),
2789 ]
2790 .simd_into(self)
2791 }
2792 #[inline(always)]
2793 fn combine_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x16<Self> {
2794 let mut result = [0; 16usize];
2795 result[0..8usize].copy_from_slice(&a.val.0);
2796 result[8usize..16usize].copy_from_slice(&b.val.0);
2797 result.simd_into(self)
2798 }
2799 #[inline(always)]
2800 fn reinterpret_u8_u16x8(self, a: u16x8<Self>) -> u8x16<Self> {
2801 a.bitcast()
2802 }
2803 #[inline(always)]
2804 fn reinterpret_u32_u16x8(self, a: u16x8<Self>) -> u32x4<Self> {
2805 a.bitcast()
2806 }
2807 #[inline(always)]
2808 fn splat_mask16x8(self, val: i16) -> mask16x8<Self> {
2809 [val; 8usize].simd_into(self)
2810 }
2811 #[inline(always)]
2812 fn load_array_mask16x8(self, val: [i16; 8usize]) -> mask16x8<Self> {
2813 mask16x8 {
2814 val: crate::support::Aligned128(val),
2815 simd: self,
2816 }
2817 }
2818 #[inline(always)]
2819 fn load_array_ref_mask16x8(self, val: &[i16; 8usize]) -> mask16x8<Self> {
2820 mask16x8 {
2821 val: crate::support::Aligned128(*val),
2822 simd: self,
2823 }
2824 }
2825 #[inline(always)]
2826 fn as_array_mask16x8(self, a: mask16x8<Self>) -> [i16; 8usize] {
2827 a.val.0
2828 }
2829 #[inline(always)]
2830 fn as_array_ref_mask16x8(self, a: &mask16x8<Self>) -> &[i16; 8usize] {
2831 &a.val.0
2832 }
2833 #[inline(always)]
2834 fn as_array_mut_mask16x8(self, a: &mut mask16x8<Self>) -> &mut [i16; 8usize] {
2835 &mut a.val.0
2836 }
2837 #[inline(always)]
2838 fn store_array_mask16x8(self, a: mask16x8<Self>, dest: &mut [i16; 8usize]) -> () {
2839 *dest = a.val.0;
2840 }
2841 #[inline(always)]
2842 fn cvt_from_bytes_mask16x8(self, a: u8x16<Self>) -> mask16x8<Self> {
2843 unsafe {
2844 mask16x8 {
2845 val: core::mem::transmute(a.val),
2846 simd: self,
2847 }
2848 }
2849 }
2850 #[inline(always)]
2851 fn cvt_to_bytes_mask16x8(self, a: mask16x8<Self>) -> u8x16<Self> {
2852 unsafe {
2853 u8x16 {
2854 val: core::mem::transmute(a.val),
2855 simd: self,
2856 }
2857 }
2858 }
2859 #[inline(always)]
2860 fn slide_mask16x8<const SHIFT: usize>(
2861 self,
2862 a: mask16x8<Self>,
2863 b: mask16x8<Self>,
2864 ) -> mask16x8<Self> {
2865 let mut dest = [Default::default(); 8usize];
2866 dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
2867 dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
2868 dest.simd_into(self)
2869 }
2870 #[inline(always)]
2871 fn slide_within_blocks_mask16x8<const SHIFT: usize>(
2872 self,
2873 a: mask16x8<Self>,
2874 b: mask16x8<Self>,
2875 ) -> mask16x8<Self> {
2876 self.slide_mask16x8::<SHIFT>(a, b)
2877 }
2878 #[inline(always)]
2879 fn and_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2880 [
2881 i16::bitand(a[0usize], &b[0usize]),
2882 i16::bitand(a[1usize], &b[1usize]),
2883 i16::bitand(a[2usize], &b[2usize]),
2884 i16::bitand(a[3usize], &b[3usize]),
2885 i16::bitand(a[4usize], &b[4usize]),
2886 i16::bitand(a[5usize], &b[5usize]),
2887 i16::bitand(a[6usize], &b[6usize]),
2888 i16::bitand(a[7usize], &b[7usize]),
2889 ]
2890 .simd_into(self)
2891 }
2892 #[inline(always)]
2893 fn or_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2894 [
2895 i16::bitor(a[0usize], &b[0usize]),
2896 i16::bitor(a[1usize], &b[1usize]),
2897 i16::bitor(a[2usize], &b[2usize]),
2898 i16::bitor(a[3usize], &b[3usize]),
2899 i16::bitor(a[4usize], &b[4usize]),
2900 i16::bitor(a[5usize], &b[5usize]),
2901 i16::bitor(a[6usize], &b[6usize]),
2902 i16::bitor(a[7usize], &b[7usize]),
2903 ]
2904 .simd_into(self)
2905 }
2906 #[inline(always)]
2907 fn xor_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2908 [
2909 i16::bitxor(a[0usize], &b[0usize]),
2910 i16::bitxor(a[1usize], &b[1usize]),
2911 i16::bitxor(a[2usize], &b[2usize]),
2912 i16::bitxor(a[3usize], &b[3usize]),
2913 i16::bitxor(a[4usize], &b[4usize]),
2914 i16::bitxor(a[5usize], &b[5usize]),
2915 i16::bitxor(a[6usize], &b[6usize]),
2916 i16::bitxor(a[7usize], &b[7usize]),
2917 ]
2918 .simd_into(self)
2919 }
2920 #[inline(always)]
2921 fn not_mask16x8(self, a: mask16x8<Self>) -> mask16x8<Self> {
2922 [
2923 i16::not(a[0usize]),
2924 i16::not(a[1usize]),
2925 i16::not(a[2usize]),
2926 i16::not(a[3usize]),
2927 i16::not(a[4usize]),
2928 i16::not(a[5usize]),
2929 i16::not(a[6usize]),
2930 i16::not(a[7usize]),
2931 ]
2932 .simd_into(self)
2933 }
2934 #[inline(always)]
2935 fn select_mask16x8(
2936 self,
2937 a: mask16x8<Self>,
2938 b: mask16x8<Self>,
2939 c: mask16x8<Self>,
2940 ) -> mask16x8<Self> {
2941 [
2942 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2943 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2944 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2945 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2946 if a[4usize] != 0 { b[4usize] } else { c[4usize] },
2947 if a[5usize] != 0 { b[5usize] } else { c[5usize] },
2948 if a[6usize] != 0 { b[6usize] } else { c[6usize] },
2949 if a[7usize] != 0 { b[7usize] } else { c[7usize] },
2950 ]
2951 .simd_into(self)
2952 }
2953 #[inline(always)]
2954 fn simd_eq_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2955 [
2956 -(i16::eq(&a[0usize], &b[0usize]) as i16),
2957 -(i16::eq(&a[1usize], &b[1usize]) as i16),
2958 -(i16::eq(&a[2usize], &b[2usize]) as i16),
2959 -(i16::eq(&a[3usize], &b[3usize]) as i16),
2960 -(i16::eq(&a[4usize], &b[4usize]) as i16),
2961 -(i16::eq(&a[5usize], &b[5usize]) as i16),
2962 -(i16::eq(&a[6usize], &b[6usize]) as i16),
2963 -(i16::eq(&a[7usize], &b[7usize]) as i16),
2964 ]
2965 .simd_into(self)
2966 }
2967 #[inline(always)]
2968 fn any_true_mask16x8(self, a: mask16x8<Self>) -> bool {
2969 a[0usize] != 0
2970 || a[1usize] != 0
2971 || a[2usize] != 0
2972 || a[3usize] != 0
2973 || a[4usize] != 0
2974 || a[5usize] != 0
2975 || a[6usize] != 0
2976 || a[7usize] != 0
2977 }
2978 #[inline(always)]
2979 fn all_true_mask16x8(self, a: mask16x8<Self>) -> bool {
2980 a[0usize] != 0
2981 && a[1usize] != 0
2982 && a[2usize] != 0
2983 && a[3usize] != 0
2984 && a[4usize] != 0
2985 && a[5usize] != 0
2986 && a[6usize] != 0
2987 && a[7usize] != 0
2988 }
2989 #[inline(always)]
2990 fn any_false_mask16x8(self, a: mask16x8<Self>) -> bool {
2991 a[0usize] == 0
2992 || a[1usize] == 0
2993 || a[2usize] == 0
2994 || a[3usize] == 0
2995 || a[4usize] == 0
2996 || a[5usize] == 0
2997 || a[6usize] == 0
2998 || a[7usize] == 0
2999 }
3000 #[inline(always)]
3001 fn all_false_mask16x8(self, a: mask16x8<Self>) -> bool {
3002 a[0usize] == 0
3003 && a[1usize] == 0
3004 && a[2usize] == 0
3005 && a[3usize] == 0
3006 && a[4usize] == 0
3007 && a[5usize] == 0
3008 && a[6usize] == 0
3009 && a[7usize] == 0
3010 }
3011 #[inline(always)]
3012 fn combine_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x16<Self> {
3013 let mut result = [0; 16usize];
3014 result[0..8usize].copy_from_slice(&a.val.0);
3015 result[8usize..16usize].copy_from_slice(&b.val.0);
3016 result.simd_into(self)
3017 }
3018 #[inline(always)]
3019 fn splat_i32x4(self, val: i32) -> i32x4<Self> {
3020 [val; 4usize].simd_into(self)
3021 }
3022 #[inline(always)]
3023 fn load_array_i32x4(self, val: [i32; 4usize]) -> i32x4<Self> {
3024 i32x4 {
3025 val: crate::support::Aligned128(val),
3026 simd: self,
3027 }
3028 }
3029 #[inline(always)]
3030 fn load_array_ref_i32x4(self, val: &[i32; 4usize]) -> i32x4<Self> {
3031 i32x4 {
3032 val: crate::support::Aligned128(*val),
3033 simd: self,
3034 }
3035 }
3036 #[inline(always)]
3037 fn as_array_i32x4(self, a: i32x4<Self>) -> [i32; 4usize] {
3038 a.val.0
3039 }
3040 #[inline(always)]
3041 fn as_array_ref_i32x4(self, a: &i32x4<Self>) -> &[i32; 4usize] {
3042 &a.val.0
3043 }
3044 #[inline(always)]
3045 fn as_array_mut_i32x4(self, a: &mut i32x4<Self>) -> &mut [i32; 4usize] {
3046 &mut a.val.0
3047 }
3048 #[inline(always)]
3049 fn store_array_i32x4(self, a: i32x4<Self>, dest: &mut [i32; 4usize]) -> () {
3050 *dest = a.val.0;
3051 }
3052 #[inline(always)]
3053 fn cvt_from_bytes_i32x4(self, a: u8x16<Self>) -> i32x4<Self> {
3054 unsafe {
3055 i32x4 {
3056 val: core::mem::transmute(a.val),
3057 simd: self,
3058 }
3059 }
3060 }
3061 #[inline(always)]
3062 fn cvt_to_bytes_i32x4(self, a: i32x4<Self>) -> u8x16<Self> {
3063 unsafe {
3064 u8x16 {
3065 val: core::mem::transmute(a.val),
3066 simd: self,
3067 }
3068 }
3069 }
3070 #[inline(always)]
3071 fn slide_i32x4<const SHIFT: usize>(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3072 let mut dest = [Default::default(); 4usize];
3073 dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
3074 dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
3075 dest.simd_into(self)
3076 }
3077 #[inline(always)]
3078 fn slide_within_blocks_i32x4<const SHIFT: usize>(
3079 self,
3080 a: i32x4<Self>,
3081 b: i32x4<Self>,
3082 ) -> i32x4<Self> {
3083 self.slide_i32x4::<SHIFT>(a, b)
3084 }
3085 #[inline(always)]
3086 fn add_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3087 [
3088 i32::wrapping_add(a[0usize], b[0usize]),
3089 i32::wrapping_add(a[1usize], b[1usize]),
3090 i32::wrapping_add(a[2usize], b[2usize]),
3091 i32::wrapping_add(a[3usize], b[3usize]),
3092 ]
3093 .simd_into(self)
3094 }
3095 #[inline(always)]
3096 fn sub_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3097 [
3098 i32::wrapping_sub(a[0usize], b[0usize]),
3099 i32::wrapping_sub(a[1usize], b[1usize]),
3100 i32::wrapping_sub(a[2usize], b[2usize]),
3101 i32::wrapping_sub(a[3usize], b[3usize]),
3102 ]
3103 .simd_into(self)
3104 }
3105 #[inline(always)]
3106 fn mul_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3107 [
3108 i32::wrapping_mul(a[0usize], b[0usize]),
3109 i32::wrapping_mul(a[1usize], b[1usize]),
3110 i32::wrapping_mul(a[2usize], b[2usize]),
3111 i32::wrapping_mul(a[3usize], b[3usize]),
3112 ]
3113 .simd_into(self)
3114 }
3115 #[inline(always)]
3116 fn and_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3117 [
3118 i32::bitand(a[0usize], &b[0usize]),
3119 i32::bitand(a[1usize], &b[1usize]),
3120 i32::bitand(a[2usize], &b[2usize]),
3121 i32::bitand(a[3usize], &b[3usize]),
3122 ]
3123 .simd_into(self)
3124 }
3125 #[inline(always)]
3126 fn or_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3127 [
3128 i32::bitor(a[0usize], &b[0usize]),
3129 i32::bitor(a[1usize], &b[1usize]),
3130 i32::bitor(a[2usize], &b[2usize]),
3131 i32::bitor(a[3usize], &b[3usize]),
3132 ]
3133 .simd_into(self)
3134 }
3135 #[inline(always)]
3136 fn xor_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3137 [
3138 i32::bitxor(a[0usize], &b[0usize]),
3139 i32::bitxor(a[1usize], &b[1usize]),
3140 i32::bitxor(a[2usize], &b[2usize]),
3141 i32::bitxor(a[3usize], &b[3usize]),
3142 ]
3143 .simd_into(self)
3144 }
3145 #[inline(always)]
3146 fn not_i32x4(self, a: i32x4<Self>) -> i32x4<Self> {
3147 [
3148 i32::not(a[0usize]),
3149 i32::not(a[1usize]),
3150 i32::not(a[2usize]),
3151 i32::not(a[3usize]),
3152 ]
3153 .simd_into(self)
3154 }
3155 #[inline(always)]
3156 fn shl_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {
3157 [
3158 i32::shl(a[0usize], shift),
3159 i32::shl(a[1usize], shift),
3160 i32::shl(a[2usize], shift),
3161 i32::shl(a[3usize], shift),
3162 ]
3163 .simd_into(self)
3164 }
3165 #[inline(always)]
3166 fn shlv_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3167 [
3168 i32::shl(a[0usize], &b[0usize]),
3169 i32::shl(a[1usize], &b[1usize]),
3170 i32::shl(a[2usize], &b[2usize]),
3171 i32::shl(a[3usize], &b[3usize]),
3172 ]
3173 .simd_into(self)
3174 }
3175 #[inline(always)]
3176 fn shr_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {
3177 [
3178 i32::shr(a[0usize], shift),
3179 i32::shr(a[1usize], shift),
3180 i32::shr(a[2usize], shift),
3181 i32::shr(a[3usize], shift),
3182 ]
3183 .simd_into(self)
3184 }
3185 #[inline(always)]
3186 fn shrv_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3187 [
3188 i32::shr(a[0usize], &b[0usize]),
3189 i32::shr(a[1usize], &b[1usize]),
3190 i32::shr(a[2usize], &b[2usize]),
3191 i32::shr(a[3usize], &b[3usize]),
3192 ]
3193 .simd_into(self)
3194 }
3195 #[inline(always)]
3196 fn simd_eq_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
3197 [
3198 -(i32::eq(&a[0usize], &b[0usize]) as i32),
3199 -(i32::eq(&a[1usize], &b[1usize]) as i32),
3200 -(i32::eq(&a[2usize], &b[2usize]) as i32),
3201 -(i32::eq(&a[3usize], &b[3usize]) as i32),
3202 ]
3203 .simd_into(self)
3204 }
3205 #[inline(always)]
3206 fn simd_lt_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
3207 [
3208 -(i32::lt(&a[0usize], &b[0usize]) as i32),
3209 -(i32::lt(&a[1usize], &b[1usize]) as i32),
3210 -(i32::lt(&a[2usize], &b[2usize]) as i32),
3211 -(i32::lt(&a[3usize], &b[3usize]) as i32),
3212 ]
3213 .simd_into(self)
3214 }
3215 #[inline(always)]
3216 fn simd_le_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
3217 [
3218 -(i32::le(&a[0usize], &b[0usize]) as i32),
3219 -(i32::le(&a[1usize], &b[1usize]) as i32),
3220 -(i32::le(&a[2usize], &b[2usize]) as i32),
3221 -(i32::le(&a[3usize], &b[3usize]) as i32),
3222 ]
3223 .simd_into(self)
3224 }
3225 #[inline(always)]
3226 fn simd_ge_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
3227 [
3228 -(i32::ge(&a[0usize], &b[0usize]) as i32),
3229 -(i32::ge(&a[1usize], &b[1usize]) as i32),
3230 -(i32::ge(&a[2usize], &b[2usize]) as i32),
3231 -(i32::ge(&a[3usize], &b[3usize]) as i32),
3232 ]
3233 .simd_into(self)
3234 }
3235 #[inline(always)]
3236 fn simd_gt_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
3237 [
3238 -(i32::gt(&a[0usize], &b[0usize]) as i32),
3239 -(i32::gt(&a[1usize], &b[1usize]) as i32),
3240 -(i32::gt(&a[2usize], &b[2usize]) as i32),
3241 -(i32::gt(&a[3usize], &b[3usize]) as i32),
3242 ]
3243 .simd_into(self)
3244 }
3245 #[inline(always)]
3246 fn zip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3247 [a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
3248 }
3249 #[inline(always)]
3250 fn zip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3251 [a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
3252 }
3253 #[inline(always)]
3254 fn unzip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3255 [a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
3256 }
3257 #[inline(always)]
3258 fn unzip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3259 [a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
3260 }
3261 #[inline(always)]
3262 fn interleave_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> (i32x4<Self>, i32x4<Self>) {
3263 (self.zip_low_i32x4(a, b), self.zip_high_i32x4(a, b))
3264 }
3265 #[inline(always)]
3266 fn deinterleave_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> (i32x4<Self>, i32x4<Self>) {
3267 (self.unzip_low_i32x4(a, b), self.unzip_high_i32x4(a, b))
3268 }
3269 #[inline(always)]
3270 fn select_i32x4(self, a: mask32x4<Self>, b: i32x4<Self>, c: i32x4<Self>) -> i32x4<Self> {
3271 [
3272 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
3273 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
3274 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
3275 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
3276 ]
3277 .simd_into(self)
3278 }
3279 #[inline(always)]
3280 fn min_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3281 [
3282 i32::min(a[0usize], b[0usize]),
3283 i32::min(a[1usize], b[1usize]),
3284 i32::min(a[2usize], b[2usize]),
3285 i32::min(a[3usize], b[3usize]),
3286 ]
3287 .simd_into(self)
3288 }
3289 #[inline(always)]
3290 fn max_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3291 [
3292 i32::max(a[0usize], b[0usize]),
3293 i32::max(a[1usize], b[1usize]),
3294 i32::max(a[2usize], b[2usize]),
3295 i32::max(a[3usize], b[3usize]),
3296 ]
3297 .simd_into(self)
3298 }
3299 #[inline(always)]
3300 fn combine_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x8<Self> {
3301 let mut result = [0; 8usize];
3302 result[0..4usize].copy_from_slice(&a.val.0);
3303 result[4usize..8usize].copy_from_slice(&b.val.0);
3304 result.simd_into(self)
3305 }
3306 #[inline(always)]
3307 fn neg_i32x4(self, a: i32x4<Self>) -> i32x4<Self> {
3308 [
3309 i32::neg(a[0usize]),
3310 i32::neg(a[1usize]),
3311 i32::neg(a[2usize]),
3312 i32::neg(a[3usize]),
3313 ]
3314 .simd_into(self)
3315 }
3316 #[inline(always)]
3317 fn reinterpret_u8_i32x4(self, a: i32x4<Self>) -> u8x16<Self> {
3318 a.bitcast()
3319 }
3320 #[inline(always)]
3321 fn reinterpret_u32_i32x4(self, a: i32x4<Self>) -> u32x4<Self> {
3322 a.bitcast()
3323 }
3324 #[inline(always)]
3325 fn cvt_f32_i32x4(self, a: i32x4<Self>) -> f32x4<Self> {
3326 [
3327 a[0usize] as f32,
3328 a[1usize] as f32,
3329 a[2usize] as f32,
3330 a[3usize] as f32,
3331 ]
3332 .simd_into(self)
3333 }
3334 #[inline(always)]
3335 fn splat_u32x4(self, val: u32) -> u32x4<Self> {
3336 [val; 4usize].simd_into(self)
3337 }
3338 #[inline(always)]
3339 fn load_array_u32x4(self, val: [u32; 4usize]) -> u32x4<Self> {
3340 u32x4 {
3341 val: crate::support::Aligned128(val),
3342 simd: self,
3343 }
3344 }
3345 #[inline(always)]
3346 fn load_array_ref_u32x4(self, val: &[u32; 4usize]) -> u32x4<Self> {
3347 u32x4 {
3348 val: crate::support::Aligned128(*val),
3349 simd: self,
3350 }
3351 }
3352 #[inline(always)]
3353 fn as_array_u32x4(self, a: u32x4<Self>) -> [u32; 4usize] {
3354 a.val.0
3355 }
3356 #[inline(always)]
3357 fn as_array_ref_u32x4(self, a: &u32x4<Self>) -> &[u32; 4usize] {
3358 &a.val.0
3359 }
3360 #[inline(always)]
3361 fn as_array_mut_u32x4(self, a: &mut u32x4<Self>) -> &mut [u32; 4usize] {
3362 &mut a.val.0
3363 }
3364 #[inline(always)]
3365 fn store_array_u32x4(self, a: u32x4<Self>, dest: &mut [u32; 4usize]) -> () {
3366 *dest = a.val.0;
3367 }
3368 #[inline(always)]
3369 fn cvt_from_bytes_u32x4(self, a: u8x16<Self>) -> u32x4<Self> {
3370 unsafe {
3371 u32x4 {
3372 val: core::mem::transmute(a.val),
3373 simd: self,
3374 }
3375 }
3376 }
3377 #[inline(always)]
3378 fn cvt_to_bytes_u32x4(self, a: u32x4<Self>) -> u8x16<Self> {
3379 unsafe {
3380 u8x16 {
3381 val: core::mem::transmute(a.val),
3382 simd: self,
3383 }
3384 }
3385 }
3386 #[inline(always)]
3387 fn slide_u32x4<const SHIFT: usize>(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3388 let mut dest = [Default::default(); 4usize];
3389 dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
3390 dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
3391 dest.simd_into(self)
3392 }
3393 #[inline(always)]
3394 fn slide_within_blocks_u32x4<const SHIFT: usize>(
3395 self,
3396 a: u32x4<Self>,
3397 b: u32x4<Self>,
3398 ) -> u32x4<Self> {
3399 self.slide_u32x4::<SHIFT>(a, b)
3400 }
3401 #[inline(always)]
3402 fn add_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3403 [
3404 u32::wrapping_add(a[0usize], b[0usize]),
3405 u32::wrapping_add(a[1usize], b[1usize]),
3406 u32::wrapping_add(a[2usize], b[2usize]),
3407 u32::wrapping_add(a[3usize], b[3usize]),
3408 ]
3409 .simd_into(self)
3410 }
3411 #[inline(always)]
3412 fn sub_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3413 [
3414 u32::wrapping_sub(a[0usize], b[0usize]),
3415 u32::wrapping_sub(a[1usize], b[1usize]),
3416 u32::wrapping_sub(a[2usize], b[2usize]),
3417 u32::wrapping_sub(a[3usize], b[3usize]),
3418 ]
3419 .simd_into(self)
3420 }
3421 #[inline(always)]
3422 fn mul_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3423 [
3424 u32::wrapping_mul(a[0usize], b[0usize]),
3425 u32::wrapping_mul(a[1usize], b[1usize]),
3426 u32::wrapping_mul(a[2usize], b[2usize]),
3427 u32::wrapping_mul(a[3usize], b[3usize]),
3428 ]
3429 .simd_into(self)
3430 }
3431 #[inline(always)]
3432 fn and_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3433 [
3434 u32::bitand(a[0usize], &b[0usize]),
3435 u32::bitand(a[1usize], &b[1usize]),
3436 u32::bitand(a[2usize], &b[2usize]),
3437 u32::bitand(a[3usize], &b[3usize]),
3438 ]
3439 .simd_into(self)
3440 }
3441 #[inline(always)]
3442 fn or_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3443 [
3444 u32::bitor(a[0usize], &b[0usize]),
3445 u32::bitor(a[1usize], &b[1usize]),
3446 u32::bitor(a[2usize], &b[2usize]),
3447 u32::bitor(a[3usize], &b[3usize]),
3448 ]
3449 .simd_into(self)
3450 }
3451 #[inline(always)]
3452 fn xor_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3453 [
3454 u32::bitxor(a[0usize], &b[0usize]),
3455 u32::bitxor(a[1usize], &b[1usize]),
3456 u32::bitxor(a[2usize], &b[2usize]),
3457 u32::bitxor(a[3usize], &b[3usize]),
3458 ]
3459 .simd_into(self)
3460 }
3461 #[inline(always)]
3462 fn not_u32x4(self, a: u32x4<Self>) -> u32x4<Self> {
3463 [
3464 u32::not(a[0usize]),
3465 u32::not(a[1usize]),
3466 u32::not(a[2usize]),
3467 u32::not(a[3usize]),
3468 ]
3469 .simd_into(self)
3470 }
3471 #[inline(always)]
3472 fn shl_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {
3473 [
3474 u32::shl(a[0usize], shift),
3475 u32::shl(a[1usize], shift),
3476 u32::shl(a[2usize], shift),
3477 u32::shl(a[3usize], shift),
3478 ]
3479 .simd_into(self)
3480 }
3481 #[inline(always)]
3482 fn shlv_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3483 [
3484 u32::shl(a[0usize], &b[0usize]),
3485 u32::shl(a[1usize], &b[1usize]),
3486 u32::shl(a[2usize], &b[2usize]),
3487 u32::shl(a[3usize], &b[3usize]),
3488 ]
3489 .simd_into(self)
3490 }
3491 #[inline(always)]
3492 fn shr_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {
3493 [
3494 u32::shr(a[0usize], shift),
3495 u32::shr(a[1usize], shift),
3496 u32::shr(a[2usize], shift),
3497 u32::shr(a[3usize], shift),
3498 ]
3499 .simd_into(self)
3500 }
3501 #[inline(always)]
3502 fn shrv_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3503 [
3504 u32::shr(a[0usize], &b[0usize]),
3505 u32::shr(a[1usize], &b[1usize]),
3506 u32::shr(a[2usize], &b[2usize]),
3507 u32::shr(a[3usize], &b[3usize]),
3508 ]
3509 .simd_into(self)
3510 }
3511 #[inline(always)]
3512 fn simd_eq_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
3513 [
3514 -(u32::eq(&a[0usize], &b[0usize]) as i32),
3515 -(u32::eq(&a[1usize], &b[1usize]) as i32),
3516 -(u32::eq(&a[2usize], &b[2usize]) as i32),
3517 -(u32::eq(&a[3usize], &b[3usize]) as i32),
3518 ]
3519 .simd_into(self)
3520 }
3521 #[inline(always)]
3522 fn simd_lt_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
3523 [
3524 -(u32::lt(&a[0usize], &b[0usize]) as i32),
3525 -(u32::lt(&a[1usize], &b[1usize]) as i32),
3526 -(u32::lt(&a[2usize], &b[2usize]) as i32),
3527 -(u32::lt(&a[3usize], &b[3usize]) as i32),
3528 ]
3529 .simd_into(self)
3530 }
3531 #[inline(always)]
3532 fn simd_le_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
3533 [
3534 -(u32::le(&a[0usize], &b[0usize]) as i32),
3535 -(u32::le(&a[1usize], &b[1usize]) as i32),
3536 -(u32::le(&a[2usize], &b[2usize]) as i32),
3537 -(u32::le(&a[3usize], &b[3usize]) as i32),
3538 ]
3539 .simd_into(self)
3540 }
3541 #[inline(always)]
3542 fn simd_ge_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
3543 [
3544 -(u32::ge(&a[0usize], &b[0usize]) as i32),
3545 -(u32::ge(&a[1usize], &b[1usize]) as i32),
3546 -(u32::ge(&a[2usize], &b[2usize]) as i32),
3547 -(u32::ge(&a[3usize], &b[3usize]) as i32),
3548 ]
3549 .simd_into(self)
3550 }
3551 #[inline(always)]
3552 fn simd_gt_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
3553 [
3554 -(u32::gt(&a[0usize], &b[0usize]) as i32),
3555 -(u32::gt(&a[1usize], &b[1usize]) as i32),
3556 -(u32::gt(&a[2usize], &b[2usize]) as i32),
3557 -(u32::gt(&a[3usize], &b[3usize]) as i32),
3558 ]
3559 .simd_into(self)
3560 }
3561 #[inline(always)]
3562 fn zip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3563 [a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
3564 }
3565 #[inline(always)]
3566 fn zip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3567 [a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
3568 }
3569 #[inline(always)]
3570 fn unzip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3571 [a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
3572 }
3573 #[inline(always)]
3574 fn unzip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3575 [a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
3576 }
3577 #[inline(always)]
3578 fn interleave_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> (u32x4<Self>, u32x4<Self>) {
3579 (self.zip_low_u32x4(a, b), self.zip_high_u32x4(a, b))
3580 }
3581 #[inline(always)]
3582 fn deinterleave_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> (u32x4<Self>, u32x4<Self>) {
3583 (self.unzip_low_u32x4(a, b), self.unzip_high_u32x4(a, b))
3584 }
3585 #[inline(always)]
3586 fn select_u32x4(self, a: mask32x4<Self>, b: u32x4<Self>, c: u32x4<Self>) -> u32x4<Self> {
3587 [
3588 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
3589 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
3590 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
3591 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
3592 ]
3593 .simd_into(self)
3594 }
3595 #[inline(always)]
3596 fn min_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3597 [
3598 u32::min(a[0usize], b[0usize]),
3599 u32::min(a[1usize], b[1usize]),
3600 u32::min(a[2usize], b[2usize]),
3601 u32::min(a[3usize], b[3usize]),
3602 ]
3603 .simd_into(self)
3604 }
3605 #[inline(always)]
3606 fn max_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3607 [
3608 u32::max(a[0usize], b[0usize]),
3609 u32::max(a[1usize], b[1usize]),
3610 u32::max(a[2usize], b[2usize]),
3611 u32::max(a[3usize], b[3usize]),
3612 ]
3613 .simd_into(self)
3614 }
3615 #[inline(always)]
3616 fn combine_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x8<Self> {
3617 let mut result = [0; 8usize];
3618 result[0..4usize].copy_from_slice(&a.val.0);
3619 result[4usize..8usize].copy_from_slice(&b.val.0);
3620 result.simd_into(self)
3621 }
3622 #[inline(always)]
3623 fn reinterpret_u8_u32x4(self, a: u32x4<Self>) -> u8x16<Self> {
3624 a.bitcast()
3625 }
3626 #[inline(always)]
3627 fn cvt_f32_u32x4(self, a: u32x4<Self>) -> f32x4<Self> {
3628 [
3629 a[0usize] as f32,
3630 a[1usize] as f32,
3631 a[2usize] as f32,
3632 a[3usize] as f32,
3633 ]
3634 .simd_into(self)
3635 }
3636 #[inline(always)]
3637 fn splat_mask32x4(self, val: i32) -> mask32x4<Self> {
3638 [val; 4usize].simd_into(self)
3639 }
3640 #[inline(always)]
3641 fn load_array_mask32x4(self, val: [i32; 4usize]) -> mask32x4<Self> {
3642 mask32x4 {
3643 val: crate::support::Aligned128(val),
3644 simd: self,
3645 }
3646 }
3647 #[inline(always)]
3648 fn load_array_ref_mask32x4(self, val: &[i32; 4usize]) -> mask32x4<Self> {
3649 mask32x4 {
3650 val: crate::support::Aligned128(*val),
3651 simd: self,
3652 }
3653 }
3654 #[inline(always)]
3655 fn as_array_mask32x4(self, a: mask32x4<Self>) -> [i32; 4usize] {
3656 a.val.0
3657 }
3658 #[inline(always)]
3659 fn as_array_ref_mask32x4(self, a: &mask32x4<Self>) -> &[i32; 4usize] {
3660 &a.val.0
3661 }
3662 #[inline(always)]
3663 fn as_array_mut_mask32x4(self, a: &mut mask32x4<Self>) -> &mut [i32; 4usize] {
3664 &mut a.val.0
3665 }
3666 #[inline(always)]
3667 fn store_array_mask32x4(self, a: mask32x4<Self>, dest: &mut [i32; 4usize]) -> () {
3668 *dest = a.val.0;
3669 }
3670 #[inline(always)]
3671 fn cvt_from_bytes_mask32x4(self, a: u8x16<Self>) -> mask32x4<Self> {
3672 unsafe {
3673 mask32x4 {
3674 val: core::mem::transmute(a.val),
3675 simd: self,
3676 }
3677 }
3678 }
3679 #[inline(always)]
3680 fn cvt_to_bytes_mask32x4(self, a: mask32x4<Self>) -> u8x16<Self> {
3681 unsafe {
3682 u8x16 {
3683 val: core::mem::transmute(a.val),
3684 simd: self,
3685 }
3686 }
3687 }
3688 #[inline(always)]
3689 fn slide_mask32x4<const SHIFT: usize>(
3690 self,
3691 a: mask32x4<Self>,
3692 b: mask32x4<Self>,
3693 ) -> mask32x4<Self> {
3694 let mut dest = [Default::default(); 4usize];
3695 dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
3696 dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
3697 dest.simd_into(self)
3698 }
3699 #[inline(always)]
3700 fn slide_within_blocks_mask32x4<const SHIFT: usize>(
3701 self,
3702 a: mask32x4<Self>,
3703 b: mask32x4<Self>,
3704 ) -> mask32x4<Self> {
3705 self.slide_mask32x4::<SHIFT>(a, b)
3706 }
3707 #[inline(always)]
3708 fn and_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
3709 [
3710 i32::bitand(a[0usize], &b[0usize]),
3711 i32::bitand(a[1usize], &b[1usize]),
3712 i32::bitand(a[2usize], &b[2usize]),
3713 i32::bitand(a[3usize], &b[3usize]),
3714 ]
3715 .simd_into(self)
3716 }
3717 #[inline(always)]
3718 fn or_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
3719 [
3720 i32::bitor(a[0usize], &b[0usize]),
3721 i32::bitor(a[1usize], &b[1usize]),
3722 i32::bitor(a[2usize], &b[2usize]),
3723 i32::bitor(a[3usize], &b[3usize]),
3724 ]
3725 .simd_into(self)
3726 }
3727 #[inline(always)]
3728 fn xor_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
3729 [
3730 i32::bitxor(a[0usize], &b[0usize]),
3731 i32::bitxor(a[1usize], &b[1usize]),
3732 i32::bitxor(a[2usize], &b[2usize]),
3733 i32::bitxor(a[3usize], &b[3usize]),
3734 ]
3735 .simd_into(self)
3736 }
3737 #[inline(always)]
3738 fn not_mask32x4(self, a: mask32x4<Self>) -> mask32x4<Self> {
3739 [
3740 i32::not(a[0usize]),
3741 i32::not(a[1usize]),
3742 i32::not(a[2usize]),
3743 i32::not(a[3usize]),
3744 ]
3745 .simd_into(self)
3746 }
3747 #[inline(always)]
3748 fn select_mask32x4(
3749 self,
3750 a: mask32x4<Self>,
3751 b: mask32x4<Self>,
3752 c: mask32x4<Self>,
3753 ) -> mask32x4<Self> {
3754 [
3755 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
3756 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
3757 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
3758 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
3759 ]
3760 .simd_into(self)
3761 }
3762 #[inline(always)]
3763 fn simd_eq_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
3764 [
3765 -(i32::eq(&a[0usize], &b[0usize]) as i32),
3766 -(i32::eq(&a[1usize], &b[1usize]) as i32),
3767 -(i32::eq(&a[2usize], &b[2usize]) as i32),
3768 -(i32::eq(&a[3usize], &b[3usize]) as i32),
3769 ]
3770 .simd_into(self)
3771 }
3772 #[inline(always)]
3773 fn any_true_mask32x4(self, a: mask32x4<Self>) -> bool {
3774 a[0usize] != 0 || a[1usize] != 0 || a[2usize] != 0 || a[3usize] != 0
3775 }
3776 #[inline(always)]
3777 fn all_true_mask32x4(self, a: mask32x4<Self>) -> bool {
3778 a[0usize] != 0 && a[1usize] != 0 && a[2usize] != 0 && a[3usize] != 0
3779 }
3780 #[inline(always)]
3781 fn any_false_mask32x4(self, a: mask32x4<Self>) -> bool {
3782 a[0usize] == 0 || a[1usize] == 0 || a[2usize] == 0 || a[3usize] == 0
3783 }
3784 #[inline(always)]
3785 fn all_false_mask32x4(self, a: mask32x4<Self>) -> bool {
3786 a[0usize] == 0 && a[1usize] == 0 && a[2usize] == 0 && a[3usize] == 0
3787 }
3788 #[inline(always)]
3789 fn combine_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x8<Self> {
3790 let mut result = [0; 8usize];
3791 result[0..4usize].copy_from_slice(&a.val.0);
3792 result[4usize..8usize].copy_from_slice(&b.val.0);
3793 result.simd_into(self)
3794 }
3795 #[inline(always)]
3796 fn splat_f64x2(self, val: f64) -> f64x2<Self> {
3797 [val; 2usize].simd_into(self)
3798 }
3799 #[inline(always)]
3800 fn load_array_f64x2(self, val: [f64; 2usize]) -> f64x2<Self> {
3801 f64x2 {
3802 val: crate::support::Aligned128(val),
3803 simd: self,
3804 }
3805 }
3806 #[inline(always)]
3807 fn load_array_ref_f64x2(self, val: &[f64; 2usize]) -> f64x2<Self> {
3808 f64x2 {
3809 val: crate::support::Aligned128(*val),
3810 simd: self,
3811 }
3812 }
3813 #[inline(always)]
3814 fn as_array_f64x2(self, a: f64x2<Self>) -> [f64; 2usize] {
3815 a.val.0
3816 }
3817 #[inline(always)]
3818 fn as_array_ref_f64x2(self, a: &f64x2<Self>) -> &[f64; 2usize] {
3819 &a.val.0
3820 }
3821 #[inline(always)]
3822 fn as_array_mut_f64x2(self, a: &mut f64x2<Self>) -> &mut [f64; 2usize] {
3823 &mut a.val.0
3824 }
3825 #[inline(always)]
3826 fn store_array_f64x2(self, a: f64x2<Self>, dest: &mut [f64; 2usize]) -> () {
3827 *dest = a.val.0;
3828 }
3829 #[inline(always)]
3830 fn cvt_from_bytes_f64x2(self, a: u8x16<Self>) -> f64x2<Self> {
3831 unsafe {
3832 f64x2 {
3833 val: core::mem::transmute(a.val),
3834 simd: self,
3835 }
3836 }
3837 }
3838 #[inline(always)]
3839 fn cvt_to_bytes_f64x2(self, a: f64x2<Self>) -> u8x16<Self> {
3840 unsafe {
3841 u8x16 {
3842 val: core::mem::transmute(a.val),
3843 simd: self,
3844 }
3845 }
3846 }
3847 #[inline(always)]
3848 fn slide_f64x2<const SHIFT: usize>(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3849 let mut dest = [Default::default(); 2usize];
3850 dest[..2usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
3851 dest[2usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
3852 dest.simd_into(self)
3853 }
3854 #[inline(always)]
3855 fn slide_within_blocks_f64x2<const SHIFT: usize>(
3856 self,
3857 a: f64x2<Self>,
3858 b: f64x2<Self>,
3859 ) -> f64x2<Self> {
3860 self.slide_f64x2::<SHIFT>(a, b)
3861 }
3862 #[inline(always)]
3863 fn abs_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
3864 [f64::abs(a[0usize]), f64::abs(a[1usize])].simd_into(self)
3865 }
3866 #[inline(always)]
3867 fn neg_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
3868 [f64::neg(a[0usize]), f64::neg(a[1usize])].simd_into(self)
3869 }
3870 #[inline(always)]
3871 fn sqrt_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
3872 [f64::sqrt(a[0usize]), f64::sqrt(a[1usize])].simd_into(self)
3873 }
3874 #[inline(always)]
3875 fn add_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3876 [
3877 f64::add(a[0usize], &b[0usize]),
3878 f64::add(a[1usize], &b[1usize]),
3879 ]
3880 .simd_into(self)
3881 }
3882 #[inline(always)]
3883 fn sub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3884 [
3885 f64::sub(a[0usize], &b[0usize]),
3886 f64::sub(a[1usize], &b[1usize]),
3887 ]
3888 .simd_into(self)
3889 }
3890 #[inline(always)]
3891 fn mul_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3892 [
3893 f64::mul(a[0usize], &b[0usize]),
3894 f64::mul(a[1usize], &b[1usize]),
3895 ]
3896 .simd_into(self)
3897 }
3898 #[inline(always)]
3899 fn div_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3900 [
3901 f64::div(a[0usize], &b[0usize]),
3902 f64::div(a[1usize], &b[1usize]),
3903 ]
3904 .simd_into(self)
3905 }
3906 #[inline(always)]
3907 fn copysign_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3908 [
3909 f64::copysign(a[0usize], b[0usize]),
3910 f64::copysign(a[1usize], b[1usize]),
3911 ]
3912 .simd_into(self)
3913 }
3914 #[inline(always)]
3915 fn simd_eq_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
3916 [
3917 -(f64::eq(&a[0usize], &b[0usize]) as i64),
3918 -(f64::eq(&a[1usize], &b[1usize]) as i64),
3919 ]
3920 .simd_into(self)
3921 }
3922 #[inline(always)]
3923 fn simd_lt_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
3924 [
3925 -(f64::lt(&a[0usize], &b[0usize]) as i64),
3926 -(f64::lt(&a[1usize], &b[1usize]) as i64),
3927 ]
3928 .simd_into(self)
3929 }
3930 #[inline(always)]
3931 fn simd_le_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
3932 [
3933 -(f64::le(&a[0usize], &b[0usize]) as i64),
3934 -(f64::le(&a[1usize], &b[1usize]) as i64),
3935 ]
3936 .simd_into(self)
3937 }
3938 #[inline(always)]
3939 fn simd_ge_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
3940 [
3941 -(f64::ge(&a[0usize], &b[0usize]) as i64),
3942 -(f64::ge(&a[1usize], &b[1usize]) as i64),
3943 ]
3944 .simd_into(self)
3945 }
3946 #[inline(always)]
3947 fn simd_gt_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
3948 [
3949 -(f64::gt(&a[0usize], &b[0usize]) as i64),
3950 -(f64::gt(&a[1usize], &b[1usize]) as i64),
3951 ]
3952 .simd_into(self)
3953 }
3954 #[inline(always)]
3955 fn zip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3956 [a[0usize], b[0usize]].simd_into(self)
3957 }
3958 #[inline(always)]
3959 fn zip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3960 [a[1usize], b[1usize]].simd_into(self)
3961 }
3962 #[inline(always)]
3963 fn unzip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3964 [a[0usize], b[0usize]].simd_into(self)
3965 }
3966 #[inline(always)]
3967 fn unzip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3968 [a[1usize], b[1usize]].simd_into(self)
3969 }
3970 #[inline(always)]
3971 fn interleave_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> (f64x2<Self>, f64x2<Self>) {
3972 (self.zip_low_f64x2(a, b), self.zip_high_f64x2(a, b))
3973 }
3974 #[inline(always)]
3975 fn deinterleave_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> (f64x2<Self>, f64x2<Self>) {
3976 (self.unzip_low_f64x2(a, b), self.unzip_high_f64x2(a, b))
3977 }
3978 #[inline(always)]
3979 fn max_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3980 [
3981 f64::max(a[0usize], b[0usize]),
3982 f64::max(a[1usize], b[1usize]),
3983 ]
3984 .simd_into(self)
3985 }
3986 #[inline(always)]
3987 fn min_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3988 [
3989 f64::min(a[0usize], b[0usize]),
3990 f64::min(a[1usize], b[1usize]),
3991 ]
3992 .simd_into(self)
3993 }
3994 #[inline(always)]
3995 fn max_precise_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3996 [
3997 f64::max(a[0usize], b[0usize]),
3998 f64::max(a[1usize], b[1usize]),
3999 ]
4000 .simd_into(self)
4001 }
4002 #[inline(always)]
4003 fn min_precise_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
4004 [
4005 f64::min(a[0usize], b[0usize]),
4006 f64::min(a[1usize], b[1usize]),
4007 ]
4008 .simd_into(self)
4009 }
4010 #[inline(always)]
4011 fn mul_add_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
4012 a.mul(b).add(c)
4013 }
4014 #[inline(always)]
4015 fn mul_sub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
4016 a.mul(b).sub(c)
4017 }
4018 #[inline(always)]
4019 fn floor_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
4020 [f64::floor(a[0usize]), f64::floor(a[1usize])].simd_into(self)
4021 }
4022 #[inline(always)]
4023 fn ceil_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
4024 [f64::ceil(a[0usize]), f64::ceil(a[1usize])].simd_into(self)
4025 }
4026 #[inline(always)]
4027 fn round_ties_even_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
4028 [
4029 f64::round_ties_even(a[0usize]),
4030 f64::round_ties_even(a[1usize]),
4031 ]
4032 .simd_into(self)
4033 }
4034 #[inline(always)]
4035 fn fract_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
4036 [f64::fract(a[0usize]), f64::fract(a[1usize])].simd_into(self)
4037 }
4038 #[inline(always)]
4039 fn trunc_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
4040 [f64::trunc(a[0usize]), f64::trunc(a[1usize])].simd_into(self)
4041 }
4042 #[inline(always)]
4043 fn select_f64x2(self, a: mask64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
4044 [
4045 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
4046 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
4047 ]
4048 .simd_into(self)
4049 }
4050 #[inline(always)]
4051 fn combine_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x4<Self> {
4052 let mut result = [0.0; 4usize];
4053 result[0..2usize].copy_from_slice(&a.val.0);
4054 result[2usize..4usize].copy_from_slice(&b.val.0);
4055 result.simd_into(self)
4056 }
4057 #[inline(always)]
4058 fn reinterpret_f32_f64x2(self, a: f64x2<Self>) -> f32x4<Self> {
4059 a.bitcast()
4060 }
4061 #[inline(always)]
4062 fn splat_mask64x2(self, val: i64) -> mask64x2<Self> {
4063 [val; 2usize].simd_into(self)
4064 }
4065 #[inline(always)]
4066 fn load_array_mask64x2(self, val: [i64; 2usize]) -> mask64x2<Self> {
4067 mask64x2 {
4068 val: crate::support::Aligned128(val),
4069 simd: self,
4070 }
4071 }
4072 #[inline(always)]
4073 fn load_array_ref_mask64x2(self, val: &[i64; 2usize]) -> mask64x2<Self> {
4074 mask64x2 {
4075 val: crate::support::Aligned128(*val),
4076 simd: self,
4077 }
4078 }
4079 #[inline(always)]
4080 fn as_array_mask64x2(self, a: mask64x2<Self>) -> [i64; 2usize] {
4081 a.val.0
4082 }
4083 #[inline(always)]
4084 fn as_array_ref_mask64x2(self, a: &mask64x2<Self>) -> &[i64; 2usize] {
4085 &a.val.0
4086 }
4087 #[inline(always)]
4088 fn as_array_mut_mask64x2(self, a: &mut mask64x2<Self>) -> &mut [i64; 2usize] {
4089 &mut a.val.0
4090 }
4091 #[inline(always)]
4092 fn store_array_mask64x2(self, a: mask64x2<Self>, dest: &mut [i64; 2usize]) -> () {
4093 *dest = a.val.0;
4094 }
4095 #[inline(always)]
4096 fn cvt_from_bytes_mask64x2(self, a: u8x16<Self>) -> mask64x2<Self> {
4097 unsafe {
4098 mask64x2 {
4099 val: core::mem::transmute(a.val),
4100 simd: self,
4101 }
4102 }
4103 }
4104 #[inline(always)]
4105 fn cvt_to_bytes_mask64x2(self, a: mask64x2<Self>) -> u8x16<Self> {
4106 unsafe {
4107 u8x16 {
4108 val: core::mem::transmute(a.val),
4109 simd: self,
4110 }
4111 }
4112 }
4113 #[inline(always)]
4114 fn slide_mask64x2<const SHIFT: usize>(
4115 self,
4116 a: mask64x2<Self>,
4117 b: mask64x2<Self>,
4118 ) -> mask64x2<Self> {
4119 let mut dest = [Default::default(); 2usize];
4120 dest[..2usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
4121 dest[2usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
4122 dest.simd_into(self)
4123 }
4124 #[inline(always)]
4125 fn slide_within_blocks_mask64x2<const SHIFT: usize>(
4126 self,
4127 a: mask64x2<Self>,
4128 b: mask64x2<Self>,
4129 ) -> mask64x2<Self> {
4130 self.slide_mask64x2::<SHIFT>(a, b)
4131 }
4132 #[inline(always)]
4133 fn and_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
4134 [
4135 i64::bitand(a[0usize], &b[0usize]),
4136 i64::bitand(a[1usize], &b[1usize]),
4137 ]
4138 .simd_into(self)
4139 }
4140 #[inline(always)]
4141 fn or_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
4142 [
4143 i64::bitor(a[0usize], &b[0usize]),
4144 i64::bitor(a[1usize], &b[1usize]),
4145 ]
4146 .simd_into(self)
4147 }
4148 #[inline(always)]
4149 fn xor_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
4150 [
4151 i64::bitxor(a[0usize], &b[0usize]),
4152 i64::bitxor(a[1usize], &b[1usize]),
4153 ]
4154 .simd_into(self)
4155 }
4156 #[inline(always)]
4157 fn not_mask64x2(self, a: mask64x2<Self>) -> mask64x2<Self> {
4158 [i64::not(a[0usize]), i64::not(a[1usize])].simd_into(self)
4159 }
4160 #[inline(always)]
4161 fn select_mask64x2(
4162 self,
4163 a: mask64x2<Self>,
4164 b: mask64x2<Self>,
4165 c: mask64x2<Self>,
4166 ) -> mask64x2<Self> {
4167 [
4168 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
4169 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
4170 ]
4171 .simd_into(self)
4172 }
4173 #[inline(always)]
4174 fn simd_eq_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
4175 [
4176 -(i64::eq(&a[0usize], &b[0usize]) as i64),
4177 -(i64::eq(&a[1usize], &b[1usize]) as i64),
4178 ]
4179 .simd_into(self)
4180 }
4181 #[inline(always)]
4182 fn any_true_mask64x2(self, a: mask64x2<Self>) -> bool {
4183 a[0usize] != 0 || a[1usize] != 0
4184 }
4185 #[inline(always)]
4186 fn all_true_mask64x2(self, a: mask64x2<Self>) -> bool {
4187 a[0usize] != 0 && a[1usize] != 0
4188 }
4189 #[inline(always)]
4190 fn any_false_mask64x2(self, a: mask64x2<Self>) -> bool {
4191 a[0usize] == 0 || a[1usize] == 0
4192 }
4193 #[inline(always)]
4194 fn all_false_mask64x2(self, a: mask64x2<Self>) -> bool {
4195 a[0usize] == 0 && a[1usize] == 0
4196 }
4197 #[inline(always)]
4198 fn combine_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x4<Self> {
4199 let mut result = [0; 4usize];
4200 result[0..2usize].copy_from_slice(&a.val.0);
4201 result[2usize..4usize].copy_from_slice(&b.val.0);
4202 result.simd_into(self)
4203 }
4204 #[inline(always)]
4205 fn splat_f32x8(self, val: f32) -> f32x8<Self> {
4206 let half = self.splat_f32x4(val);
4207 self.combine_f32x4(half, half)
4208 }
4209 #[inline(always)]
4210 fn load_array_f32x8(self, val: [f32; 8usize]) -> f32x8<Self> {
4211 f32x8 {
4212 val: crate::support::Aligned256(val),
4213 simd: self,
4214 }
4215 }
4216 #[inline(always)]
4217 fn load_array_ref_f32x8(self, val: &[f32; 8usize]) -> f32x8<Self> {
4218 f32x8 {
4219 val: crate::support::Aligned256(*val),
4220 simd: self,
4221 }
4222 }
4223 #[inline(always)]
4224 fn as_array_f32x8(self, a: f32x8<Self>) -> [f32; 8usize] {
4225 a.val.0
4226 }
4227 #[inline(always)]
4228 fn as_array_ref_f32x8(self, a: &f32x8<Self>) -> &[f32; 8usize] {
4229 &a.val.0
4230 }
4231 #[inline(always)]
4232 fn as_array_mut_f32x8(self, a: &mut f32x8<Self>) -> &mut [f32; 8usize] {
4233 &mut a.val.0
4234 }
4235 #[inline(always)]
4236 fn store_array_f32x8(self, a: f32x8<Self>, dest: &mut [f32; 8usize]) -> () {
4237 *dest = a.val.0;
4238 }
4239 #[inline(always)]
4240 fn cvt_from_bytes_f32x8(self, a: u8x32<Self>) -> f32x8<Self> {
4241 unsafe {
4242 f32x8 {
4243 val: core::mem::transmute(a.val),
4244 simd: self,
4245 }
4246 }
4247 }
4248 #[inline(always)]
4249 fn cvt_to_bytes_f32x8(self, a: f32x8<Self>) -> u8x32<Self> {
4250 unsafe {
4251 u8x32 {
4252 val: core::mem::transmute(a.val),
4253 simd: self,
4254 }
4255 }
4256 }
4257 #[inline(always)]
4258 fn slide_f32x8<const SHIFT: usize>(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4259 let mut dest = [Default::default(); 8usize];
4260 dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
4261 dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
4262 dest.simd_into(self)
4263 }
4264 #[inline(always)]
4265 fn slide_within_blocks_f32x8<const SHIFT: usize>(
4266 self,
4267 a: f32x8<Self>,
4268 b: f32x8<Self>,
4269 ) -> f32x8<Self> {
4270 let (a0, a1) = self.split_f32x8(a);
4271 let (b0, b1) = self.split_f32x8(b);
4272 self.combine_f32x4(
4273 self.slide_within_blocks_f32x4::<SHIFT>(a0, b0),
4274 self.slide_within_blocks_f32x4::<SHIFT>(a1, b1),
4275 )
4276 }
4277 #[inline(always)]
4278 fn abs_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
4279 let (a0, a1) = self.split_f32x8(a);
4280 self.combine_f32x4(self.abs_f32x4(a0), self.abs_f32x4(a1))
4281 }
4282 #[inline(always)]
4283 fn neg_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
4284 let (a0, a1) = self.split_f32x8(a);
4285 self.combine_f32x4(self.neg_f32x4(a0), self.neg_f32x4(a1))
4286 }
4287 #[inline(always)]
4288 fn sqrt_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
4289 let (a0, a1) = self.split_f32x8(a);
4290 self.combine_f32x4(self.sqrt_f32x4(a0), self.sqrt_f32x4(a1))
4291 }
4292 #[inline(always)]
4293 fn add_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4294 let (a0, a1) = self.split_f32x8(a);
4295 let (b0, b1) = self.split_f32x8(b);
4296 self.combine_f32x4(self.add_f32x4(a0, b0), self.add_f32x4(a1, b1))
4297 }
4298 #[inline(always)]
4299 fn sub_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4300 let (a0, a1) = self.split_f32x8(a);
4301 let (b0, b1) = self.split_f32x8(b);
4302 self.combine_f32x4(self.sub_f32x4(a0, b0), self.sub_f32x4(a1, b1))
4303 }
4304 #[inline(always)]
4305 fn mul_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4306 let (a0, a1) = self.split_f32x8(a);
4307 let (b0, b1) = self.split_f32x8(b);
4308 self.combine_f32x4(self.mul_f32x4(a0, b0), self.mul_f32x4(a1, b1))
4309 }
4310 #[inline(always)]
4311 fn div_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4312 let (a0, a1) = self.split_f32x8(a);
4313 let (b0, b1) = self.split_f32x8(b);
4314 self.combine_f32x4(self.div_f32x4(a0, b0), self.div_f32x4(a1, b1))
4315 }
4316 #[inline(always)]
4317 fn copysign_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4318 let (a0, a1) = self.split_f32x8(a);
4319 let (b0, b1) = self.split_f32x8(b);
4320 self.combine_f32x4(self.copysign_f32x4(a0, b0), self.copysign_f32x4(a1, b1))
4321 }
4322 #[inline(always)]
4323 fn simd_eq_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
4324 let (a0, a1) = self.split_f32x8(a);
4325 let (b0, b1) = self.split_f32x8(b);
4326 self.combine_mask32x4(self.simd_eq_f32x4(a0, b0), self.simd_eq_f32x4(a1, b1))
4327 }
4328 #[inline(always)]
4329 fn simd_lt_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
4330 let (a0, a1) = self.split_f32x8(a);
4331 let (b0, b1) = self.split_f32x8(b);
4332 self.combine_mask32x4(self.simd_lt_f32x4(a0, b0), self.simd_lt_f32x4(a1, b1))
4333 }
4334 #[inline(always)]
4335 fn simd_le_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
4336 let (a0, a1) = self.split_f32x8(a);
4337 let (b0, b1) = self.split_f32x8(b);
4338 self.combine_mask32x4(self.simd_le_f32x4(a0, b0), self.simd_le_f32x4(a1, b1))
4339 }
4340 #[inline(always)]
4341 fn simd_ge_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
4342 let (a0, a1) = self.split_f32x8(a);
4343 let (b0, b1) = self.split_f32x8(b);
4344 self.combine_mask32x4(self.simd_ge_f32x4(a0, b0), self.simd_ge_f32x4(a1, b1))
4345 }
4346 #[inline(always)]
4347 fn simd_gt_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
4348 let (a0, a1) = self.split_f32x8(a);
4349 let (b0, b1) = self.split_f32x8(b);
4350 self.combine_mask32x4(self.simd_gt_f32x4(a0, b0), self.simd_gt_f32x4(a1, b1))
4351 }
4352 #[inline(always)]
4353 fn zip_low_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4354 let (a0, _) = self.split_f32x8(a);
4355 let (b0, _) = self.split_f32x8(b);
4356 self.combine_f32x4(self.zip_low_f32x4(a0, b0), self.zip_high_f32x4(a0, b0))
4357 }
4358 #[inline(always)]
4359 fn zip_high_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4360 let (_, a1) = self.split_f32x8(a);
4361 let (_, b1) = self.split_f32x8(b);
4362 self.combine_f32x4(self.zip_low_f32x4(a1, b1), self.zip_high_f32x4(a1, b1))
4363 }
4364 #[inline(always)]
4365 fn unzip_low_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4366 let (a0, a1) = self.split_f32x8(a);
4367 let (b0, b1) = self.split_f32x8(b);
4368 self.combine_f32x4(self.unzip_low_f32x4(a0, a1), self.unzip_low_f32x4(b0, b1))
4369 }
4370 #[inline(always)]
4371 fn unzip_high_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4372 let (a0, a1) = self.split_f32x8(a);
4373 let (b0, b1) = self.split_f32x8(b);
4374 self.combine_f32x4(self.unzip_high_f32x4(a0, a1), self.unzip_high_f32x4(b0, b1))
4375 }
4376 #[inline(always)]
4377 fn interleave_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> (f32x8<Self>, f32x8<Self>) {
4378 let (a0, a1) = self.split_f32x8(a);
4379 let (b0, b1) = self.split_f32x8(b);
4380 let lo_lo = self.zip_low_f32x4(a0, b0);
4381 let lo_hi = self.zip_high_f32x4(a0, b0);
4382 let hi_lo = self.zip_low_f32x4(a1, b1);
4383 let hi_hi = self.zip_high_f32x4(a1, b1);
4384 (
4385 self.combine_f32x4(lo_lo, lo_hi),
4386 self.combine_f32x4(hi_lo, hi_hi),
4387 )
4388 }
4389 #[inline(always)]
4390 fn deinterleave_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> (f32x8<Self>, f32x8<Self>) {
4391 let (a0, a1) = self.split_f32x8(a);
4392 let (b0, b1) = self.split_f32x8(b);
4393 let lo_even = self.unzip_low_f32x4(a0, a1);
4394 let lo_odd = self.unzip_high_f32x4(a0, a1);
4395 let hi_even = self.unzip_low_f32x4(b0, b1);
4396 let hi_odd = self.unzip_high_f32x4(b0, b1);
4397 (
4398 self.combine_f32x4(lo_even, hi_even),
4399 self.combine_f32x4(lo_odd, hi_odd),
4400 )
4401 }
4402 #[inline(always)]
4403 fn max_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4404 let (a0, a1) = self.split_f32x8(a);
4405 let (b0, b1) = self.split_f32x8(b);
4406 self.combine_f32x4(self.max_f32x4(a0, b0), self.max_f32x4(a1, b1))
4407 }
4408 #[inline(always)]
4409 fn min_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4410 let (a0, a1) = self.split_f32x8(a);
4411 let (b0, b1) = self.split_f32x8(b);
4412 self.combine_f32x4(self.min_f32x4(a0, b0), self.min_f32x4(a1, b1))
4413 }
4414 #[inline(always)]
4415 fn max_precise_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4416 let (a0, a1) = self.split_f32x8(a);
4417 let (b0, b1) = self.split_f32x8(b);
4418 self.combine_f32x4(
4419 self.max_precise_f32x4(a0, b0),
4420 self.max_precise_f32x4(a1, b1),
4421 )
4422 }
4423 #[inline(always)]
4424 fn min_precise_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4425 let (a0, a1) = self.split_f32x8(a);
4426 let (b0, b1) = self.split_f32x8(b);
4427 self.combine_f32x4(
4428 self.min_precise_f32x4(a0, b0),
4429 self.min_precise_f32x4(a1, b1),
4430 )
4431 }
4432 #[inline(always)]
4433 fn mul_add_f32x8(self, a: f32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
4434 let (a0, a1) = self.split_f32x8(a);
4435 let (b0, b1) = self.split_f32x8(b);
4436 let (c0, c1) = self.split_f32x8(c);
4437 self.combine_f32x4(
4438 self.mul_add_f32x4(a0, b0, c0),
4439 self.mul_add_f32x4(a1, b1, c1),
4440 )
4441 }
4442 #[inline(always)]
4443 fn mul_sub_f32x8(self, a: f32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
4444 let (a0, a1) = self.split_f32x8(a);
4445 let (b0, b1) = self.split_f32x8(b);
4446 let (c0, c1) = self.split_f32x8(c);
4447 self.combine_f32x4(
4448 self.mul_sub_f32x4(a0, b0, c0),
4449 self.mul_sub_f32x4(a1, b1, c1),
4450 )
4451 }
4452 #[inline(always)]
4453 fn floor_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
4454 let (a0, a1) = self.split_f32x8(a);
4455 self.combine_f32x4(self.floor_f32x4(a0), self.floor_f32x4(a1))
4456 }
4457 #[inline(always)]
4458 fn ceil_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
4459 let (a0, a1) = self.split_f32x8(a);
4460 self.combine_f32x4(self.ceil_f32x4(a0), self.ceil_f32x4(a1))
4461 }
4462 #[inline(always)]
4463 fn round_ties_even_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
4464 let (a0, a1) = self.split_f32x8(a);
4465 self.combine_f32x4(
4466 self.round_ties_even_f32x4(a0),
4467 self.round_ties_even_f32x4(a1),
4468 )
4469 }
4470 #[inline(always)]
4471 fn fract_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
4472 let (a0, a1) = self.split_f32x8(a);
4473 self.combine_f32x4(self.fract_f32x4(a0), self.fract_f32x4(a1))
4474 }
4475 #[inline(always)]
4476 fn trunc_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
4477 let (a0, a1) = self.split_f32x8(a);
4478 self.combine_f32x4(self.trunc_f32x4(a0), self.trunc_f32x4(a1))
4479 }
4480 #[inline(always)]
4481 fn select_f32x8(self, a: mask32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
4482 let (a0, a1) = self.split_mask32x8(a);
4483 let (b0, b1) = self.split_f32x8(b);
4484 let (c0, c1) = self.split_f32x8(c);
4485 self.combine_f32x4(self.select_f32x4(a0, b0, c0), self.select_f32x4(a1, b1, c1))
4486 }
4487 #[inline(always)]
4488 fn combine_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x16<Self> {
4489 let mut result = [0.0; 16usize];
4490 result[0..8usize].copy_from_slice(&a.val.0);
4491 result[8usize..16usize].copy_from_slice(&b.val.0);
4492 result.simd_into(self)
4493 }
4494 #[inline(always)]
4495 fn split_f32x8(self, a: f32x8<Self>) -> (f32x4<Self>, f32x4<Self>) {
4496 let mut b0 = [0.0; 4usize];
4497 let mut b1 = [0.0; 4usize];
4498 b0.copy_from_slice(&a.val.0[0..4usize]);
4499 b1.copy_from_slice(&a.val.0[4usize..8usize]);
4500 (b0.simd_into(self), b1.simd_into(self))
4501 }
4502 #[inline(always)]
4503 fn reinterpret_f64_f32x8(self, a: f32x8<Self>) -> f64x4<Self> {
4504 let (a0, a1) = self.split_f32x8(a);
4505 self.combine_f64x2(
4506 self.reinterpret_f64_f32x4(a0),
4507 self.reinterpret_f64_f32x4(a1),
4508 )
4509 }
4510 #[inline(always)]
4511 fn reinterpret_i32_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
4512 let (a0, a1) = self.split_f32x8(a);
4513 self.combine_i32x4(
4514 self.reinterpret_i32_f32x4(a0),
4515 self.reinterpret_i32_f32x4(a1),
4516 )
4517 }
4518 #[inline(always)]
4519 fn reinterpret_u8_f32x8(self, a: f32x8<Self>) -> u8x32<Self> {
4520 let (a0, a1) = self.split_f32x8(a);
4521 self.combine_u8x16(self.reinterpret_u8_f32x4(a0), self.reinterpret_u8_f32x4(a1))
4522 }
4523 #[inline(always)]
4524 fn reinterpret_u32_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
4525 let (a0, a1) = self.split_f32x8(a);
4526 self.combine_u32x4(
4527 self.reinterpret_u32_f32x4(a0),
4528 self.reinterpret_u32_f32x4(a1),
4529 )
4530 }
4531 #[inline(always)]
4532 fn cvt_u32_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
4533 let (a0, a1) = self.split_f32x8(a);
4534 self.combine_u32x4(self.cvt_u32_f32x4(a0), self.cvt_u32_f32x4(a1))
4535 }
4536 #[inline(always)]
4537 fn cvt_u32_precise_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
4538 let (a0, a1) = self.split_f32x8(a);
4539 self.combine_u32x4(
4540 self.cvt_u32_precise_f32x4(a0),
4541 self.cvt_u32_precise_f32x4(a1),
4542 )
4543 }
4544 #[inline(always)]
4545 fn cvt_i32_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
4546 let (a0, a1) = self.split_f32x8(a);
4547 self.combine_i32x4(self.cvt_i32_f32x4(a0), self.cvt_i32_f32x4(a1))
4548 }
4549 #[inline(always)]
4550 fn cvt_i32_precise_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
4551 let (a0, a1) = self.split_f32x8(a);
4552 self.combine_i32x4(
4553 self.cvt_i32_precise_f32x4(a0),
4554 self.cvt_i32_precise_f32x4(a1),
4555 )
4556 }
4557 #[inline(always)]
4558 fn splat_i8x32(self, val: i8) -> i8x32<Self> {
4559 let half = self.splat_i8x16(val);
4560 self.combine_i8x16(half, half)
4561 }
4562 #[inline(always)]
4563 fn load_array_i8x32(self, val: [i8; 32usize]) -> i8x32<Self> {
4564 i8x32 {
4565 val: crate::support::Aligned256(val),
4566 simd: self,
4567 }
4568 }
4569 #[inline(always)]
4570 fn load_array_ref_i8x32(self, val: &[i8; 32usize]) -> i8x32<Self> {
4571 i8x32 {
4572 val: crate::support::Aligned256(*val),
4573 simd: self,
4574 }
4575 }
4576 #[inline(always)]
4577 fn as_array_i8x32(self, a: i8x32<Self>) -> [i8; 32usize] {
4578 a.val.0
4579 }
4580 #[inline(always)]
4581 fn as_array_ref_i8x32(self, a: &i8x32<Self>) -> &[i8; 32usize] {
4582 &a.val.0
4583 }
4584 #[inline(always)]
4585 fn as_array_mut_i8x32(self, a: &mut i8x32<Self>) -> &mut [i8; 32usize] {
4586 &mut a.val.0
4587 }
4588 #[inline(always)]
4589 fn store_array_i8x32(self, a: i8x32<Self>, dest: &mut [i8; 32usize]) -> () {
4590 *dest = a.val.0;
4591 }
4592 #[inline(always)]
4593 fn cvt_from_bytes_i8x32(self, a: u8x32<Self>) -> i8x32<Self> {
4594 unsafe {
4595 i8x32 {
4596 val: core::mem::transmute(a.val),
4597 simd: self,
4598 }
4599 }
4600 }
4601 #[inline(always)]
4602 fn cvt_to_bytes_i8x32(self, a: i8x32<Self>) -> u8x32<Self> {
4603 unsafe {
4604 u8x32 {
4605 val: core::mem::transmute(a.val),
4606 simd: self,
4607 }
4608 }
4609 }
4610 #[inline(always)]
4611 fn slide_i8x32<const SHIFT: usize>(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4612 let mut dest = [Default::default(); 32usize];
4613 dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
4614 dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
4615 dest.simd_into(self)
4616 }
4617 #[inline(always)]
4618 fn slide_within_blocks_i8x32<const SHIFT: usize>(
4619 self,
4620 a: i8x32<Self>,
4621 b: i8x32<Self>,
4622 ) -> i8x32<Self> {
4623 let (a0, a1) = self.split_i8x32(a);
4624 let (b0, b1) = self.split_i8x32(b);
4625 self.combine_i8x16(
4626 self.slide_within_blocks_i8x16::<SHIFT>(a0, b0),
4627 self.slide_within_blocks_i8x16::<SHIFT>(a1, b1),
4628 )
4629 }
4630 #[inline(always)]
4631 fn add_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4632 let (a0, a1) = self.split_i8x32(a);
4633 let (b0, b1) = self.split_i8x32(b);
4634 self.combine_i8x16(self.add_i8x16(a0, b0), self.add_i8x16(a1, b1))
4635 }
4636 #[inline(always)]
4637 fn sub_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4638 let (a0, a1) = self.split_i8x32(a);
4639 let (b0, b1) = self.split_i8x32(b);
4640 self.combine_i8x16(self.sub_i8x16(a0, b0), self.sub_i8x16(a1, b1))
4641 }
4642 #[inline(always)]
4643 fn mul_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4644 let (a0, a1) = self.split_i8x32(a);
4645 let (b0, b1) = self.split_i8x32(b);
4646 self.combine_i8x16(self.mul_i8x16(a0, b0), self.mul_i8x16(a1, b1))
4647 }
4648 #[inline(always)]
4649 fn and_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4650 let (a0, a1) = self.split_i8x32(a);
4651 let (b0, b1) = self.split_i8x32(b);
4652 self.combine_i8x16(self.and_i8x16(a0, b0), self.and_i8x16(a1, b1))
4653 }
4654 #[inline(always)]
4655 fn or_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4656 let (a0, a1) = self.split_i8x32(a);
4657 let (b0, b1) = self.split_i8x32(b);
4658 self.combine_i8x16(self.or_i8x16(a0, b0), self.or_i8x16(a1, b1))
4659 }
4660 #[inline(always)]
4661 fn xor_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4662 let (a0, a1) = self.split_i8x32(a);
4663 let (b0, b1) = self.split_i8x32(b);
4664 self.combine_i8x16(self.xor_i8x16(a0, b0), self.xor_i8x16(a1, b1))
4665 }
4666 #[inline(always)]
4667 fn not_i8x32(self, a: i8x32<Self>) -> i8x32<Self> {
4668 let (a0, a1) = self.split_i8x32(a);
4669 self.combine_i8x16(self.not_i8x16(a0), self.not_i8x16(a1))
4670 }
4671 #[inline(always)]
4672 fn shl_i8x32(self, a: i8x32<Self>, shift: u32) -> i8x32<Self> {
4673 let (a0, a1) = self.split_i8x32(a);
4674 self.combine_i8x16(self.shl_i8x16(a0, shift), self.shl_i8x16(a1, shift))
4675 }
4676 #[inline(always)]
4677 fn shlv_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4678 let (a0, a1) = self.split_i8x32(a);
4679 let (b0, b1) = self.split_i8x32(b);
4680 self.combine_i8x16(self.shlv_i8x16(a0, b0), self.shlv_i8x16(a1, b1))
4681 }
4682 #[inline(always)]
4683 fn shr_i8x32(self, a: i8x32<Self>, shift: u32) -> i8x32<Self> {
4684 let (a0, a1) = self.split_i8x32(a);
4685 self.combine_i8x16(self.shr_i8x16(a0, shift), self.shr_i8x16(a1, shift))
4686 }
4687 #[inline(always)]
4688 fn shrv_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4689 let (a0, a1) = self.split_i8x32(a);
4690 let (b0, b1) = self.split_i8x32(b);
4691 self.combine_i8x16(self.shrv_i8x16(a0, b0), self.shrv_i8x16(a1, b1))
4692 }
4693 #[inline(always)]
4694 fn simd_eq_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
4695 let (a0, a1) = self.split_i8x32(a);
4696 let (b0, b1) = self.split_i8x32(b);
4697 self.combine_mask8x16(self.simd_eq_i8x16(a0, b0), self.simd_eq_i8x16(a1, b1))
4698 }
4699 #[inline(always)]
4700 fn simd_lt_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
4701 let (a0, a1) = self.split_i8x32(a);
4702 let (b0, b1) = self.split_i8x32(b);
4703 self.combine_mask8x16(self.simd_lt_i8x16(a0, b0), self.simd_lt_i8x16(a1, b1))
4704 }
4705 #[inline(always)]
4706 fn simd_le_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
4707 let (a0, a1) = self.split_i8x32(a);
4708 let (b0, b1) = self.split_i8x32(b);
4709 self.combine_mask8x16(self.simd_le_i8x16(a0, b0), self.simd_le_i8x16(a1, b1))
4710 }
4711 #[inline(always)]
4712 fn simd_ge_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
4713 let (a0, a1) = self.split_i8x32(a);
4714 let (b0, b1) = self.split_i8x32(b);
4715 self.combine_mask8x16(self.simd_ge_i8x16(a0, b0), self.simd_ge_i8x16(a1, b1))
4716 }
4717 #[inline(always)]
4718 fn simd_gt_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
4719 let (a0, a1) = self.split_i8x32(a);
4720 let (b0, b1) = self.split_i8x32(b);
4721 self.combine_mask8x16(self.simd_gt_i8x16(a0, b0), self.simd_gt_i8x16(a1, b1))
4722 }
4723 #[inline(always)]
4724 fn zip_low_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4725 let (a0, _) = self.split_i8x32(a);
4726 let (b0, _) = self.split_i8x32(b);
4727 self.combine_i8x16(self.zip_low_i8x16(a0, b0), self.zip_high_i8x16(a0, b0))
4728 }
4729 #[inline(always)]
4730 fn zip_high_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4731 let (_, a1) = self.split_i8x32(a);
4732 let (_, b1) = self.split_i8x32(b);
4733 self.combine_i8x16(self.zip_low_i8x16(a1, b1), self.zip_high_i8x16(a1, b1))
4734 }
4735 #[inline(always)]
4736 fn unzip_low_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4737 let (a0, a1) = self.split_i8x32(a);
4738 let (b0, b1) = self.split_i8x32(b);
4739 self.combine_i8x16(self.unzip_low_i8x16(a0, a1), self.unzip_low_i8x16(b0, b1))
4740 }
4741 #[inline(always)]
4742 fn unzip_high_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4743 let (a0, a1) = self.split_i8x32(a);
4744 let (b0, b1) = self.split_i8x32(b);
4745 self.combine_i8x16(self.unzip_high_i8x16(a0, a1), self.unzip_high_i8x16(b0, b1))
4746 }
4747 #[inline(always)]
4748 fn interleave_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> (i8x32<Self>, i8x32<Self>) {
4749 let (a0, a1) = self.split_i8x32(a);
4750 let (b0, b1) = self.split_i8x32(b);
4751 let lo_lo = self.zip_low_i8x16(a0, b0);
4752 let lo_hi = self.zip_high_i8x16(a0, b0);
4753 let hi_lo = self.zip_low_i8x16(a1, b1);
4754 let hi_hi = self.zip_high_i8x16(a1, b1);
4755 (
4756 self.combine_i8x16(lo_lo, lo_hi),
4757 self.combine_i8x16(hi_lo, hi_hi),
4758 )
4759 }
4760 #[inline(always)]
4761 fn deinterleave_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> (i8x32<Self>, i8x32<Self>) {
4762 let (a0, a1) = self.split_i8x32(a);
4763 let (b0, b1) = self.split_i8x32(b);
4764 let lo_even = self.unzip_low_i8x16(a0, a1);
4765 let lo_odd = self.unzip_high_i8x16(a0, a1);
4766 let hi_even = self.unzip_low_i8x16(b0, b1);
4767 let hi_odd = self.unzip_high_i8x16(b0, b1);
4768 (
4769 self.combine_i8x16(lo_even, hi_even),
4770 self.combine_i8x16(lo_odd, hi_odd),
4771 )
4772 }
4773 #[inline(always)]
4774 fn select_i8x32(self, a: mask8x32<Self>, b: i8x32<Self>, c: i8x32<Self>) -> i8x32<Self> {
4775 let (a0, a1) = self.split_mask8x32(a);
4776 let (b0, b1) = self.split_i8x32(b);
4777 let (c0, c1) = self.split_i8x32(c);
4778 self.combine_i8x16(self.select_i8x16(a0, b0, c0), self.select_i8x16(a1, b1, c1))
4779 }
4780 #[inline(always)]
4781 fn min_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4782 let (a0, a1) = self.split_i8x32(a);
4783 let (b0, b1) = self.split_i8x32(b);
4784 self.combine_i8x16(self.min_i8x16(a0, b0), self.min_i8x16(a1, b1))
4785 }
4786 #[inline(always)]
4787 fn max_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4788 let (a0, a1) = self.split_i8x32(a);
4789 let (b0, b1) = self.split_i8x32(b);
4790 self.combine_i8x16(self.max_i8x16(a0, b0), self.max_i8x16(a1, b1))
4791 }
4792 #[inline(always)]
4793 fn combine_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x64<Self> {
4794 let mut result = [0; 64usize];
4795 result[0..32usize].copy_from_slice(&a.val.0);
4796 result[32usize..64usize].copy_from_slice(&b.val.0);
4797 result.simd_into(self)
4798 }
4799 #[inline(always)]
4800 fn split_i8x32(self, a: i8x32<Self>) -> (i8x16<Self>, i8x16<Self>) {
4801 let mut b0 = [0; 16usize];
4802 let mut b1 = [0; 16usize];
4803 b0.copy_from_slice(&a.val.0[0..16usize]);
4804 b1.copy_from_slice(&a.val.0[16usize..32usize]);
4805 (b0.simd_into(self), b1.simd_into(self))
4806 }
4807 #[inline(always)]
4808 fn neg_i8x32(self, a: i8x32<Self>) -> i8x32<Self> {
4809 let (a0, a1) = self.split_i8x32(a);
4810 self.combine_i8x16(self.neg_i8x16(a0), self.neg_i8x16(a1))
4811 }
4812 #[inline(always)]
4813 fn reinterpret_u8_i8x32(self, a: i8x32<Self>) -> u8x32<Self> {
4814 let (a0, a1) = self.split_i8x32(a);
4815 self.combine_u8x16(self.reinterpret_u8_i8x16(a0), self.reinterpret_u8_i8x16(a1))
4816 }
4817 #[inline(always)]
4818 fn reinterpret_u32_i8x32(self, a: i8x32<Self>) -> u32x8<Self> {
4819 let (a0, a1) = self.split_i8x32(a);
4820 self.combine_u32x4(
4821 self.reinterpret_u32_i8x16(a0),
4822 self.reinterpret_u32_i8x16(a1),
4823 )
4824 }
4825 #[inline(always)]
4826 fn splat_u8x32(self, val: u8) -> u8x32<Self> {
4827 let half = self.splat_u8x16(val);
4828 self.combine_u8x16(half, half)
4829 }
4830 #[inline(always)]
4831 fn load_array_u8x32(self, val: [u8; 32usize]) -> u8x32<Self> {
4832 u8x32 {
4833 val: crate::support::Aligned256(val),
4834 simd: self,
4835 }
4836 }
4837 #[inline(always)]
4838 fn load_array_ref_u8x32(self, val: &[u8; 32usize]) -> u8x32<Self> {
4839 u8x32 {
4840 val: crate::support::Aligned256(*val),
4841 simd: self,
4842 }
4843 }
4844 #[inline(always)]
4845 fn as_array_u8x32(self, a: u8x32<Self>) -> [u8; 32usize] {
4846 a.val.0
4847 }
4848 #[inline(always)]
4849 fn as_array_ref_u8x32(self, a: &u8x32<Self>) -> &[u8; 32usize] {
4850 &a.val.0
4851 }
4852 #[inline(always)]
4853 fn as_array_mut_u8x32(self, a: &mut u8x32<Self>) -> &mut [u8; 32usize] {
4854 &mut a.val.0
4855 }
4856 #[inline(always)]
4857 fn store_array_u8x32(self, a: u8x32<Self>, dest: &mut [u8; 32usize]) -> () {
4858 *dest = a.val.0;
4859 }
4860 #[inline(always)]
4861 fn cvt_from_bytes_u8x32(self, a: u8x32<Self>) -> u8x32<Self> {
4862 unsafe {
4863 u8x32 {
4864 val: core::mem::transmute(a.val),
4865 simd: self,
4866 }
4867 }
4868 }
4869 #[inline(always)]
4870 fn cvt_to_bytes_u8x32(self, a: u8x32<Self>) -> u8x32<Self> {
4871 unsafe {
4872 u8x32 {
4873 val: core::mem::transmute(a.val),
4874 simd: self,
4875 }
4876 }
4877 }
4878 #[inline(always)]
4879 fn slide_u8x32<const SHIFT: usize>(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4880 let mut dest = [Default::default(); 32usize];
4881 dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
4882 dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
4883 dest.simd_into(self)
4884 }
4885 #[inline(always)]
4886 fn slide_within_blocks_u8x32<const SHIFT: usize>(
4887 self,
4888 a: u8x32<Self>,
4889 b: u8x32<Self>,
4890 ) -> u8x32<Self> {
4891 let (a0, a1) = self.split_u8x32(a);
4892 let (b0, b1) = self.split_u8x32(b);
4893 self.combine_u8x16(
4894 self.slide_within_blocks_u8x16::<SHIFT>(a0, b0),
4895 self.slide_within_blocks_u8x16::<SHIFT>(a1, b1),
4896 )
4897 }
4898 #[inline(always)]
4899 fn add_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4900 let (a0, a1) = self.split_u8x32(a);
4901 let (b0, b1) = self.split_u8x32(b);
4902 self.combine_u8x16(self.add_u8x16(a0, b0), self.add_u8x16(a1, b1))
4903 }
4904 #[inline(always)]
4905 fn sub_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4906 let (a0, a1) = self.split_u8x32(a);
4907 let (b0, b1) = self.split_u8x32(b);
4908 self.combine_u8x16(self.sub_u8x16(a0, b0), self.sub_u8x16(a1, b1))
4909 }
4910 #[inline(always)]
4911 fn mul_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4912 let (a0, a1) = self.split_u8x32(a);
4913 let (b0, b1) = self.split_u8x32(b);
4914 self.combine_u8x16(self.mul_u8x16(a0, b0), self.mul_u8x16(a1, b1))
4915 }
4916 #[inline(always)]
4917 fn and_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4918 let (a0, a1) = self.split_u8x32(a);
4919 let (b0, b1) = self.split_u8x32(b);
4920 self.combine_u8x16(self.and_u8x16(a0, b0), self.and_u8x16(a1, b1))
4921 }
4922 #[inline(always)]
4923 fn or_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4924 let (a0, a1) = self.split_u8x32(a);
4925 let (b0, b1) = self.split_u8x32(b);
4926 self.combine_u8x16(self.or_u8x16(a0, b0), self.or_u8x16(a1, b1))
4927 }
4928 #[inline(always)]
4929 fn xor_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4930 let (a0, a1) = self.split_u8x32(a);
4931 let (b0, b1) = self.split_u8x32(b);
4932 self.combine_u8x16(self.xor_u8x16(a0, b0), self.xor_u8x16(a1, b1))
4933 }
4934 #[inline(always)]
4935 fn not_u8x32(self, a: u8x32<Self>) -> u8x32<Self> {
4936 let (a0, a1) = self.split_u8x32(a);
4937 self.combine_u8x16(self.not_u8x16(a0), self.not_u8x16(a1))
4938 }
4939 #[inline(always)]
4940 fn shl_u8x32(self, a: u8x32<Self>, shift: u32) -> u8x32<Self> {
4941 let (a0, a1) = self.split_u8x32(a);
4942 self.combine_u8x16(self.shl_u8x16(a0, shift), self.shl_u8x16(a1, shift))
4943 }
4944 #[inline(always)]
4945 fn shlv_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4946 let (a0, a1) = self.split_u8x32(a);
4947 let (b0, b1) = self.split_u8x32(b);
4948 self.combine_u8x16(self.shlv_u8x16(a0, b0), self.shlv_u8x16(a1, b1))
4949 }
4950 #[inline(always)]
4951 fn shr_u8x32(self, a: u8x32<Self>, shift: u32) -> u8x32<Self> {
4952 let (a0, a1) = self.split_u8x32(a);
4953 self.combine_u8x16(self.shr_u8x16(a0, shift), self.shr_u8x16(a1, shift))
4954 }
4955 #[inline(always)]
4956 fn shrv_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4957 let (a0, a1) = self.split_u8x32(a);
4958 let (b0, b1) = self.split_u8x32(b);
4959 self.combine_u8x16(self.shrv_u8x16(a0, b0), self.shrv_u8x16(a1, b1))
4960 }
4961 #[inline(always)]
4962 fn simd_eq_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
4963 let (a0, a1) = self.split_u8x32(a);
4964 let (b0, b1) = self.split_u8x32(b);
4965 self.combine_mask8x16(self.simd_eq_u8x16(a0, b0), self.simd_eq_u8x16(a1, b1))
4966 }
4967 #[inline(always)]
4968 fn simd_lt_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
4969 let (a0, a1) = self.split_u8x32(a);
4970 let (b0, b1) = self.split_u8x32(b);
4971 self.combine_mask8x16(self.simd_lt_u8x16(a0, b0), self.simd_lt_u8x16(a1, b1))
4972 }
4973 #[inline(always)]
4974 fn simd_le_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
4975 let (a0, a1) = self.split_u8x32(a);
4976 let (b0, b1) = self.split_u8x32(b);
4977 self.combine_mask8x16(self.simd_le_u8x16(a0, b0), self.simd_le_u8x16(a1, b1))
4978 }
4979 #[inline(always)]
4980 fn simd_ge_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
4981 let (a0, a1) = self.split_u8x32(a);
4982 let (b0, b1) = self.split_u8x32(b);
4983 self.combine_mask8x16(self.simd_ge_u8x16(a0, b0), self.simd_ge_u8x16(a1, b1))
4984 }
4985 #[inline(always)]
4986 fn simd_gt_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
4987 let (a0, a1) = self.split_u8x32(a);
4988 let (b0, b1) = self.split_u8x32(b);
4989 self.combine_mask8x16(self.simd_gt_u8x16(a0, b0), self.simd_gt_u8x16(a1, b1))
4990 }
4991 #[inline(always)]
4992 fn zip_low_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4993 let (a0, _) = self.split_u8x32(a);
4994 let (b0, _) = self.split_u8x32(b);
4995 self.combine_u8x16(self.zip_low_u8x16(a0, b0), self.zip_high_u8x16(a0, b0))
4996 }
4997 #[inline(always)]
4998 fn zip_high_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4999 let (_, a1) = self.split_u8x32(a);
5000 let (_, b1) = self.split_u8x32(b);
5001 self.combine_u8x16(self.zip_low_u8x16(a1, b1), self.zip_high_u8x16(a1, b1))
5002 }
5003 #[inline(always)]
5004 fn unzip_low_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
5005 let (a0, a1) = self.split_u8x32(a);
5006 let (b0, b1) = self.split_u8x32(b);
5007 self.combine_u8x16(self.unzip_low_u8x16(a0, a1), self.unzip_low_u8x16(b0, b1))
5008 }
5009 #[inline(always)]
5010 fn unzip_high_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
5011 let (a0, a1) = self.split_u8x32(a);
5012 let (b0, b1) = self.split_u8x32(b);
5013 self.combine_u8x16(self.unzip_high_u8x16(a0, a1), self.unzip_high_u8x16(b0, b1))
5014 }
5015 #[inline(always)]
5016 fn interleave_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> (u8x32<Self>, u8x32<Self>) {
5017 let (a0, a1) = self.split_u8x32(a);
5018 let (b0, b1) = self.split_u8x32(b);
5019 let lo_lo = self.zip_low_u8x16(a0, b0);
5020 let lo_hi = self.zip_high_u8x16(a0, b0);
5021 let hi_lo = self.zip_low_u8x16(a1, b1);
5022 let hi_hi = self.zip_high_u8x16(a1, b1);
5023 (
5024 self.combine_u8x16(lo_lo, lo_hi),
5025 self.combine_u8x16(hi_lo, hi_hi),
5026 )
5027 }
5028 #[inline(always)]
5029 fn deinterleave_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> (u8x32<Self>, u8x32<Self>) {
5030 let (a0, a1) = self.split_u8x32(a);
5031 let (b0, b1) = self.split_u8x32(b);
5032 let lo_even = self.unzip_low_u8x16(a0, a1);
5033 let lo_odd = self.unzip_high_u8x16(a0, a1);
5034 let hi_even = self.unzip_low_u8x16(b0, b1);
5035 let hi_odd = self.unzip_high_u8x16(b0, b1);
5036 (
5037 self.combine_u8x16(lo_even, hi_even),
5038 self.combine_u8x16(lo_odd, hi_odd),
5039 )
5040 }
5041 #[inline(always)]
5042 fn select_u8x32(self, a: mask8x32<Self>, b: u8x32<Self>, c: u8x32<Self>) -> u8x32<Self> {
5043 let (a0, a1) = self.split_mask8x32(a);
5044 let (b0, b1) = self.split_u8x32(b);
5045 let (c0, c1) = self.split_u8x32(c);
5046 self.combine_u8x16(self.select_u8x16(a0, b0, c0), self.select_u8x16(a1, b1, c1))
5047 }
5048 #[inline(always)]
5049 fn min_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
5050 let (a0, a1) = self.split_u8x32(a);
5051 let (b0, b1) = self.split_u8x32(b);
5052 self.combine_u8x16(self.min_u8x16(a0, b0), self.min_u8x16(a1, b1))
5053 }
5054 #[inline(always)]
5055 fn max_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
5056 let (a0, a1) = self.split_u8x32(a);
5057 let (b0, b1) = self.split_u8x32(b);
5058 self.combine_u8x16(self.max_u8x16(a0, b0), self.max_u8x16(a1, b1))
5059 }
5060 #[inline(always)]
5061 fn combine_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x64<Self> {
5062 let mut result = [0; 64usize];
5063 result[0..32usize].copy_from_slice(&a.val.0);
5064 result[32usize..64usize].copy_from_slice(&b.val.0);
5065 result.simd_into(self)
5066 }
5067 #[inline(always)]
5068 fn split_u8x32(self, a: u8x32<Self>) -> (u8x16<Self>, u8x16<Self>) {
5069 let mut b0 = [0; 16usize];
5070 let mut b1 = [0; 16usize];
5071 b0.copy_from_slice(&a.val.0[0..16usize]);
5072 b1.copy_from_slice(&a.val.0[16usize..32usize]);
5073 (b0.simd_into(self), b1.simd_into(self))
5074 }
5075 #[inline(always)]
5076 fn widen_u8x32(self, a: u8x32<Self>) -> u16x32<Self> {
5077 let (a0, a1) = self.split_u8x32(a);
5078 self.combine_u16x16(self.widen_u8x16(a0), self.widen_u8x16(a1))
5079 }
5080 #[inline(always)]
5081 fn reinterpret_u32_u8x32(self, a: u8x32<Self>) -> u32x8<Self> {
5082 let (a0, a1) = self.split_u8x32(a);
5083 self.combine_u32x4(
5084 self.reinterpret_u32_u8x16(a0),
5085 self.reinterpret_u32_u8x16(a1),
5086 )
5087 }
5088 #[inline(always)]
5089 fn splat_mask8x32(self, val: i8) -> mask8x32<Self> {
5090 let half = self.splat_mask8x16(val);
5091 self.combine_mask8x16(half, half)
5092 }
5093 #[inline(always)]
5094 fn load_array_mask8x32(self, val: [i8; 32usize]) -> mask8x32<Self> {
5095 mask8x32 {
5096 val: crate::support::Aligned256(val),
5097 simd: self,
5098 }
5099 }
5100 #[inline(always)]
5101 fn load_array_ref_mask8x32(self, val: &[i8; 32usize]) -> mask8x32<Self> {
5102 mask8x32 {
5103 val: crate::support::Aligned256(*val),
5104 simd: self,
5105 }
5106 }
5107 #[inline(always)]
5108 fn as_array_mask8x32(self, a: mask8x32<Self>) -> [i8; 32usize] {
5109 a.val.0
5110 }
5111 #[inline(always)]
5112 fn as_array_ref_mask8x32(self, a: &mask8x32<Self>) -> &[i8; 32usize] {
5113 &a.val.0
5114 }
5115 #[inline(always)]
5116 fn as_array_mut_mask8x32(self, a: &mut mask8x32<Self>) -> &mut [i8; 32usize] {
5117 &mut a.val.0
5118 }
5119 #[inline(always)]
5120 fn store_array_mask8x32(self, a: mask8x32<Self>, dest: &mut [i8; 32usize]) -> () {
5121 *dest = a.val.0;
5122 }
5123 #[inline(always)]
5124 fn cvt_from_bytes_mask8x32(self, a: u8x32<Self>) -> mask8x32<Self> {
5125 unsafe {
5126 mask8x32 {
5127 val: core::mem::transmute(a.val),
5128 simd: self,
5129 }
5130 }
5131 }
5132 #[inline(always)]
5133 fn cvt_to_bytes_mask8x32(self, a: mask8x32<Self>) -> u8x32<Self> {
5134 unsafe {
5135 u8x32 {
5136 val: core::mem::transmute(a.val),
5137 simd: self,
5138 }
5139 }
5140 }
5141 #[inline(always)]
5142 fn slide_mask8x32<const SHIFT: usize>(
5143 self,
5144 a: mask8x32<Self>,
5145 b: mask8x32<Self>,
5146 ) -> mask8x32<Self> {
5147 let mut dest = [Default::default(); 32usize];
5148 dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
5149 dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
5150 dest.simd_into(self)
5151 }
5152 #[inline(always)]
5153 fn slide_within_blocks_mask8x32<const SHIFT: usize>(
5154 self,
5155 a: mask8x32<Self>,
5156 b: mask8x32<Self>,
5157 ) -> mask8x32<Self> {
5158 let (a0, a1) = self.split_mask8x32(a);
5159 let (b0, b1) = self.split_mask8x32(b);
5160 self.combine_mask8x16(
5161 self.slide_within_blocks_mask8x16::<SHIFT>(a0, b0),
5162 self.slide_within_blocks_mask8x16::<SHIFT>(a1, b1),
5163 )
5164 }
5165 #[inline(always)]
5166 fn and_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
5167 let (a0, a1) = self.split_mask8x32(a);
5168 let (b0, b1) = self.split_mask8x32(b);
5169 self.combine_mask8x16(self.and_mask8x16(a0, b0), self.and_mask8x16(a1, b1))
5170 }
5171 #[inline(always)]
5172 fn or_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
5173 let (a0, a1) = self.split_mask8x32(a);
5174 let (b0, b1) = self.split_mask8x32(b);
5175 self.combine_mask8x16(self.or_mask8x16(a0, b0), self.or_mask8x16(a1, b1))
5176 }
5177 #[inline(always)]
5178 fn xor_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
5179 let (a0, a1) = self.split_mask8x32(a);
5180 let (b0, b1) = self.split_mask8x32(b);
5181 self.combine_mask8x16(self.xor_mask8x16(a0, b0), self.xor_mask8x16(a1, b1))
5182 }
5183 #[inline(always)]
5184 fn not_mask8x32(self, a: mask8x32<Self>) -> mask8x32<Self> {
5185 let (a0, a1) = self.split_mask8x32(a);
5186 self.combine_mask8x16(self.not_mask8x16(a0), self.not_mask8x16(a1))
5187 }
5188 #[inline(always)]
5189 fn select_mask8x32(
5190 self,
5191 a: mask8x32<Self>,
5192 b: mask8x32<Self>,
5193 c: mask8x32<Self>,
5194 ) -> mask8x32<Self> {
5195 let (a0, a1) = self.split_mask8x32(a);
5196 let (b0, b1) = self.split_mask8x32(b);
5197 let (c0, c1) = self.split_mask8x32(c);
5198 self.combine_mask8x16(
5199 self.select_mask8x16(a0, b0, c0),
5200 self.select_mask8x16(a1, b1, c1),
5201 )
5202 }
5203 #[inline(always)]
5204 fn simd_eq_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
5205 let (a0, a1) = self.split_mask8x32(a);
5206 let (b0, b1) = self.split_mask8x32(b);
5207 self.combine_mask8x16(self.simd_eq_mask8x16(a0, b0), self.simd_eq_mask8x16(a1, b1))
5208 }
5209 #[inline(always)]
5210 fn any_true_mask8x32(self, a: mask8x32<Self>) -> bool {
5211 let (a0, a1) = self.split_mask8x32(a);
5212 self.any_true_mask8x16(a0) || self.any_true_mask8x16(a1)
5213 }
5214 #[inline(always)]
5215 fn all_true_mask8x32(self, a: mask8x32<Self>) -> bool {
5216 let (a0, a1) = self.split_mask8x32(a);
5217 self.all_true_mask8x16(a0) && self.all_true_mask8x16(a1)
5218 }
5219 #[inline(always)]
5220 fn any_false_mask8x32(self, a: mask8x32<Self>) -> bool {
5221 let (a0, a1) = self.split_mask8x32(a);
5222 self.any_false_mask8x16(a0) || self.any_false_mask8x16(a1)
5223 }
5224 #[inline(always)]
5225 fn all_false_mask8x32(self, a: mask8x32<Self>) -> bool {
5226 let (a0, a1) = self.split_mask8x32(a);
5227 self.all_false_mask8x16(a0) && self.all_false_mask8x16(a1)
5228 }
5229 #[inline(always)]
5230 fn combine_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x64<Self> {
5231 let mut result = [0; 64usize];
5232 result[0..32usize].copy_from_slice(&a.val.0);
5233 result[32usize..64usize].copy_from_slice(&b.val.0);
5234 result.simd_into(self)
5235 }
5236 #[inline(always)]
5237 fn split_mask8x32(self, a: mask8x32<Self>) -> (mask8x16<Self>, mask8x16<Self>) {
5238 let mut b0 = [0; 16usize];
5239 let mut b1 = [0; 16usize];
5240 b0.copy_from_slice(&a.val.0[0..16usize]);
5241 b1.copy_from_slice(&a.val.0[16usize..32usize]);
5242 (b0.simd_into(self), b1.simd_into(self))
5243 }
5244 #[inline(always)]
5245 fn splat_i16x16(self, val: i16) -> i16x16<Self> {
5246 let half = self.splat_i16x8(val);
5247 self.combine_i16x8(half, half)
5248 }
5249 #[inline(always)]
5250 fn load_array_i16x16(self, val: [i16; 16usize]) -> i16x16<Self> {
5251 i16x16 {
5252 val: crate::support::Aligned256(val),
5253 simd: self,
5254 }
5255 }
5256 #[inline(always)]
5257 fn load_array_ref_i16x16(self, val: &[i16; 16usize]) -> i16x16<Self> {
5258 i16x16 {
5259 val: crate::support::Aligned256(*val),
5260 simd: self,
5261 }
5262 }
5263 #[inline(always)]
5264 fn as_array_i16x16(self, a: i16x16<Self>) -> [i16; 16usize] {
5265 a.val.0
5266 }
5267 #[inline(always)]
5268 fn as_array_ref_i16x16(self, a: &i16x16<Self>) -> &[i16; 16usize] {
5269 &a.val.0
5270 }
5271 #[inline(always)]
5272 fn as_array_mut_i16x16(self, a: &mut i16x16<Self>) -> &mut [i16; 16usize] {
5273 &mut a.val.0
5274 }
5275 #[inline(always)]
5276 fn store_array_i16x16(self, a: i16x16<Self>, dest: &mut [i16; 16usize]) -> () {
5277 *dest = a.val.0;
5278 }
5279 #[inline(always)]
5280 fn cvt_from_bytes_i16x16(self, a: u8x32<Self>) -> i16x16<Self> {
5281 unsafe {
5282 i16x16 {
5283 val: core::mem::transmute(a.val),
5284 simd: self,
5285 }
5286 }
5287 }
5288 #[inline(always)]
5289 fn cvt_to_bytes_i16x16(self, a: i16x16<Self>) -> u8x32<Self> {
5290 unsafe {
5291 u8x32 {
5292 val: core::mem::transmute(a.val),
5293 simd: self,
5294 }
5295 }
5296 }
5297 #[inline(always)]
5298 fn slide_i16x16<const SHIFT: usize>(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5299 let mut dest = [Default::default(); 16usize];
5300 dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
5301 dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
5302 dest.simd_into(self)
5303 }
5304 #[inline(always)]
5305 fn slide_within_blocks_i16x16<const SHIFT: usize>(
5306 self,
5307 a: i16x16<Self>,
5308 b: i16x16<Self>,
5309 ) -> i16x16<Self> {
5310 let (a0, a1) = self.split_i16x16(a);
5311 let (b0, b1) = self.split_i16x16(b);
5312 self.combine_i16x8(
5313 self.slide_within_blocks_i16x8::<SHIFT>(a0, b0),
5314 self.slide_within_blocks_i16x8::<SHIFT>(a1, b1),
5315 )
5316 }
5317 #[inline(always)]
5318 fn add_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5319 let (a0, a1) = self.split_i16x16(a);
5320 let (b0, b1) = self.split_i16x16(b);
5321 self.combine_i16x8(self.add_i16x8(a0, b0), self.add_i16x8(a1, b1))
5322 }
5323 #[inline(always)]
5324 fn sub_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5325 let (a0, a1) = self.split_i16x16(a);
5326 let (b0, b1) = self.split_i16x16(b);
5327 self.combine_i16x8(self.sub_i16x8(a0, b0), self.sub_i16x8(a1, b1))
5328 }
5329 #[inline(always)]
5330 fn mul_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5331 let (a0, a1) = self.split_i16x16(a);
5332 let (b0, b1) = self.split_i16x16(b);
5333 self.combine_i16x8(self.mul_i16x8(a0, b0), self.mul_i16x8(a1, b1))
5334 }
5335 #[inline(always)]
5336 fn and_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5337 let (a0, a1) = self.split_i16x16(a);
5338 let (b0, b1) = self.split_i16x16(b);
5339 self.combine_i16x8(self.and_i16x8(a0, b0), self.and_i16x8(a1, b1))
5340 }
5341 #[inline(always)]
5342 fn or_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5343 let (a0, a1) = self.split_i16x16(a);
5344 let (b0, b1) = self.split_i16x16(b);
5345 self.combine_i16x8(self.or_i16x8(a0, b0), self.or_i16x8(a1, b1))
5346 }
5347 #[inline(always)]
5348 fn xor_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5349 let (a0, a1) = self.split_i16x16(a);
5350 let (b0, b1) = self.split_i16x16(b);
5351 self.combine_i16x8(self.xor_i16x8(a0, b0), self.xor_i16x8(a1, b1))
5352 }
5353 #[inline(always)]
5354 fn not_i16x16(self, a: i16x16<Self>) -> i16x16<Self> {
5355 let (a0, a1) = self.split_i16x16(a);
5356 self.combine_i16x8(self.not_i16x8(a0), self.not_i16x8(a1))
5357 }
5358 #[inline(always)]
5359 fn shl_i16x16(self, a: i16x16<Self>, shift: u32) -> i16x16<Self> {
5360 let (a0, a1) = self.split_i16x16(a);
5361 self.combine_i16x8(self.shl_i16x8(a0, shift), self.shl_i16x8(a1, shift))
5362 }
5363 #[inline(always)]
5364 fn shlv_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5365 let (a0, a1) = self.split_i16x16(a);
5366 let (b0, b1) = self.split_i16x16(b);
5367 self.combine_i16x8(self.shlv_i16x8(a0, b0), self.shlv_i16x8(a1, b1))
5368 }
5369 #[inline(always)]
5370 fn shr_i16x16(self, a: i16x16<Self>, shift: u32) -> i16x16<Self> {
5371 let (a0, a1) = self.split_i16x16(a);
5372 self.combine_i16x8(self.shr_i16x8(a0, shift), self.shr_i16x8(a1, shift))
5373 }
5374 #[inline(always)]
5375 fn shrv_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5376 let (a0, a1) = self.split_i16x16(a);
5377 let (b0, b1) = self.split_i16x16(b);
5378 self.combine_i16x8(self.shrv_i16x8(a0, b0), self.shrv_i16x8(a1, b1))
5379 }
5380 #[inline(always)]
5381 fn simd_eq_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
5382 let (a0, a1) = self.split_i16x16(a);
5383 let (b0, b1) = self.split_i16x16(b);
5384 self.combine_mask16x8(self.simd_eq_i16x8(a0, b0), self.simd_eq_i16x8(a1, b1))
5385 }
5386 #[inline(always)]
5387 fn simd_lt_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
5388 let (a0, a1) = self.split_i16x16(a);
5389 let (b0, b1) = self.split_i16x16(b);
5390 self.combine_mask16x8(self.simd_lt_i16x8(a0, b0), self.simd_lt_i16x8(a1, b1))
5391 }
5392 #[inline(always)]
5393 fn simd_le_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
5394 let (a0, a1) = self.split_i16x16(a);
5395 let (b0, b1) = self.split_i16x16(b);
5396 self.combine_mask16x8(self.simd_le_i16x8(a0, b0), self.simd_le_i16x8(a1, b1))
5397 }
5398 #[inline(always)]
5399 fn simd_ge_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
5400 let (a0, a1) = self.split_i16x16(a);
5401 let (b0, b1) = self.split_i16x16(b);
5402 self.combine_mask16x8(self.simd_ge_i16x8(a0, b0), self.simd_ge_i16x8(a1, b1))
5403 }
5404 #[inline(always)]
5405 fn simd_gt_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
5406 let (a0, a1) = self.split_i16x16(a);
5407 let (b0, b1) = self.split_i16x16(b);
5408 self.combine_mask16x8(self.simd_gt_i16x8(a0, b0), self.simd_gt_i16x8(a1, b1))
5409 }
5410 #[inline(always)]
5411 fn zip_low_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5412 let (a0, _) = self.split_i16x16(a);
5413 let (b0, _) = self.split_i16x16(b);
5414 self.combine_i16x8(self.zip_low_i16x8(a0, b0), self.zip_high_i16x8(a0, b0))
5415 }
5416 #[inline(always)]
5417 fn zip_high_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5418 let (_, a1) = self.split_i16x16(a);
5419 let (_, b1) = self.split_i16x16(b);
5420 self.combine_i16x8(self.zip_low_i16x8(a1, b1), self.zip_high_i16x8(a1, b1))
5421 }
5422 #[inline(always)]
5423 fn unzip_low_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5424 let (a0, a1) = self.split_i16x16(a);
5425 let (b0, b1) = self.split_i16x16(b);
5426 self.combine_i16x8(self.unzip_low_i16x8(a0, a1), self.unzip_low_i16x8(b0, b1))
5427 }
5428 #[inline(always)]
5429 fn unzip_high_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5430 let (a0, a1) = self.split_i16x16(a);
5431 let (b0, b1) = self.split_i16x16(b);
5432 self.combine_i16x8(self.unzip_high_i16x8(a0, a1), self.unzip_high_i16x8(b0, b1))
5433 }
5434 #[inline(always)]
5435 fn interleave_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> (i16x16<Self>, i16x16<Self>) {
5436 let (a0, a1) = self.split_i16x16(a);
5437 let (b0, b1) = self.split_i16x16(b);
5438 let lo_lo = self.zip_low_i16x8(a0, b0);
5439 let lo_hi = self.zip_high_i16x8(a0, b0);
5440 let hi_lo = self.zip_low_i16x8(a1, b1);
5441 let hi_hi = self.zip_high_i16x8(a1, b1);
5442 (
5443 self.combine_i16x8(lo_lo, lo_hi),
5444 self.combine_i16x8(hi_lo, hi_hi),
5445 )
5446 }
5447 #[inline(always)]
5448 fn deinterleave_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> (i16x16<Self>, i16x16<Self>) {
5449 let (a0, a1) = self.split_i16x16(a);
5450 let (b0, b1) = self.split_i16x16(b);
5451 let lo_even = self.unzip_low_i16x8(a0, a1);
5452 let lo_odd = self.unzip_high_i16x8(a0, a1);
5453 let hi_even = self.unzip_low_i16x8(b0, b1);
5454 let hi_odd = self.unzip_high_i16x8(b0, b1);
5455 (
5456 self.combine_i16x8(lo_even, hi_even),
5457 self.combine_i16x8(lo_odd, hi_odd),
5458 )
5459 }
5460 #[inline(always)]
5461 fn select_i16x16(self, a: mask16x16<Self>, b: i16x16<Self>, c: i16x16<Self>) -> i16x16<Self> {
5462 let (a0, a1) = self.split_mask16x16(a);
5463 let (b0, b1) = self.split_i16x16(b);
5464 let (c0, c1) = self.split_i16x16(c);
5465 self.combine_i16x8(self.select_i16x8(a0, b0, c0), self.select_i16x8(a1, b1, c1))
5466 }
5467 #[inline(always)]
5468 fn min_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5469 let (a0, a1) = self.split_i16x16(a);
5470 let (b0, b1) = self.split_i16x16(b);
5471 self.combine_i16x8(self.min_i16x8(a0, b0), self.min_i16x8(a1, b1))
5472 }
5473 #[inline(always)]
5474 fn max_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5475 let (a0, a1) = self.split_i16x16(a);
5476 let (b0, b1) = self.split_i16x16(b);
5477 self.combine_i16x8(self.max_i16x8(a0, b0), self.max_i16x8(a1, b1))
5478 }
5479 #[inline(always)]
5480 fn combine_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x32<Self> {
5481 let mut result = [0; 32usize];
5482 result[0..16usize].copy_from_slice(&a.val.0);
5483 result[16usize..32usize].copy_from_slice(&b.val.0);
5484 result.simd_into(self)
5485 }
5486 #[inline(always)]
5487 fn split_i16x16(self, a: i16x16<Self>) -> (i16x8<Self>, i16x8<Self>) {
5488 let mut b0 = [0; 8usize];
5489 let mut b1 = [0; 8usize];
5490 b0.copy_from_slice(&a.val.0[0..8usize]);
5491 b1.copy_from_slice(&a.val.0[8usize..16usize]);
5492 (b0.simd_into(self), b1.simd_into(self))
5493 }
5494 #[inline(always)]
5495 fn neg_i16x16(self, a: i16x16<Self>) -> i16x16<Self> {
5496 let (a0, a1) = self.split_i16x16(a);
5497 self.combine_i16x8(self.neg_i16x8(a0), self.neg_i16x8(a1))
5498 }
5499 #[inline(always)]
5500 fn reinterpret_u8_i16x16(self, a: i16x16<Self>) -> u8x32<Self> {
5501 let (a0, a1) = self.split_i16x16(a);
5502 self.combine_u8x16(self.reinterpret_u8_i16x8(a0), self.reinterpret_u8_i16x8(a1))
5503 }
5504 #[inline(always)]
5505 fn reinterpret_u32_i16x16(self, a: i16x16<Self>) -> u32x8<Self> {
5506 let (a0, a1) = self.split_i16x16(a);
5507 self.combine_u32x4(
5508 self.reinterpret_u32_i16x8(a0),
5509 self.reinterpret_u32_i16x8(a1),
5510 )
5511 }
5512 #[inline(always)]
5513 fn splat_u16x16(self, val: u16) -> u16x16<Self> {
5514 let half = self.splat_u16x8(val);
5515 self.combine_u16x8(half, half)
5516 }
5517 #[inline(always)]
5518 fn load_array_u16x16(self, val: [u16; 16usize]) -> u16x16<Self> {
5519 u16x16 {
5520 val: crate::support::Aligned256(val),
5521 simd: self,
5522 }
5523 }
5524 #[inline(always)]
5525 fn load_array_ref_u16x16(self, val: &[u16; 16usize]) -> u16x16<Self> {
5526 u16x16 {
5527 val: crate::support::Aligned256(*val),
5528 simd: self,
5529 }
5530 }
5531 #[inline(always)]
5532 fn as_array_u16x16(self, a: u16x16<Self>) -> [u16; 16usize] {
5533 a.val.0
5534 }
5535 #[inline(always)]
5536 fn as_array_ref_u16x16(self, a: &u16x16<Self>) -> &[u16; 16usize] {
5537 &a.val.0
5538 }
5539 #[inline(always)]
5540 fn as_array_mut_u16x16(self, a: &mut u16x16<Self>) -> &mut [u16; 16usize] {
5541 &mut a.val.0
5542 }
5543 #[inline(always)]
5544 fn store_array_u16x16(self, a: u16x16<Self>, dest: &mut [u16; 16usize]) -> () {
5545 *dest = a.val.0;
5546 }
5547 #[inline(always)]
5548 fn cvt_from_bytes_u16x16(self, a: u8x32<Self>) -> u16x16<Self> {
5549 unsafe {
5550 u16x16 {
5551 val: core::mem::transmute(a.val),
5552 simd: self,
5553 }
5554 }
5555 }
5556 #[inline(always)]
5557 fn cvt_to_bytes_u16x16(self, a: u16x16<Self>) -> u8x32<Self> {
5558 unsafe {
5559 u8x32 {
5560 val: core::mem::transmute(a.val),
5561 simd: self,
5562 }
5563 }
5564 }
5565 #[inline(always)]
5566 fn slide_u16x16<const SHIFT: usize>(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5567 let mut dest = [Default::default(); 16usize];
5568 dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
5569 dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
5570 dest.simd_into(self)
5571 }
5572 #[inline(always)]
5573 fn slide_within_blocks_u16x16<const SHIFT: usize>(
5574 self,
5575 a: u16x16<Self>,
5576 b: u16x16<Self>,
5577 ) -> u16x16<Self> {
5578 let (a0, a1) = self.split_u16x16(a);
5579 let (b0, b1) = self.split_u16x16(b);
5580 self.combine_u16x8(
5581 self.slide_within_blocks_u16x8::<SHIFT>(a0, b0),
5582 self.slide_within_blocks_u16x8::<SHIFT>(a1, b1),
5583 )
5584 }
5585 #[inline(always)]
5586 fn add_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5587 let (a0, a1) = self.split_u16x16(a);
5588 let (b0, b1) = self.split_u16x16(b);
5589 self.combine_u16x8(self.add_u16x8(a0, b0), self.add_u16x8(a1, b1))
5590 }
5591 #[inline(always)]
5592 fn sub_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5593 let (a0, a1) = self.split_u16x16(a);
5594 let (b0, b1) = self.split_u16x16(b);
5595 self.combine_u16x8(self.sub_u16x8(a0, b0), self.sub_u16x8(a1, b1))
5596 }
5597 #[inline(always)]
5598 fn mul_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5599 let (a0, a1) = self.split_u16x16(a);
5600 let (b0, b1) = self.split_u16x16(b);
5601 self.combine_u16x8(self.mul_u16x8(a0, b0), self.mul_u16x8(a1, b1))
5602 }
5603 #[inline(always)]
5604 fn and_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5605 let (a0, a1) = self.split_u16x16(a);
5606 let (b0, b1) = self.split_u16x16(b);
5607 self.combine_u16x8(self.and_u16x8(a0, b0), self.and_u16x8(a1, b1))
5608 }
5609 #[inline(always)]
5610 fn or_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5611 let (a0, a1) = self.split_u16x16(a);
5612 let (b0, b1) = self.split_u16x16(b);
5613 self.combine_u16x8(self.or_u16x8(a0, b0), self.or_u16x8(a1, b1))
5614 }
5615 #[inline(always)]
5616 fn xor_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5617 let (a0, a1) = self.split_u16x16(a);
5618 let (b0, b1) = self.split_u16x16(b);
5619 self.combine_u16x8(self.xor_u16x8(a0, b0), self.xor_u16x8(a1, b1))
5620 }
5621 #[inline(always)]
5622 fn not_u16x16(self, a: u16x16<Self>) -> u16x16<Self> {
5623 let (a0, a1) = self.split_u16x16(a);
5624 self.combine_u16x8(self.not_u16x8(a0), self.not_u16x8(a1))
5625 }
5626 #[inline(always)]
5627 fn shl_u16x16(self, a: u16x16<Self>, shift: u32) -> u16x16<Self> {
5628 let (a0, a1) = self.split_u16x16(a);
5629 self.combine_u16x8(self.shl_u16x8(a0, shift), self.shl_u16x8(a1, shift))
5630 }
5631 #[inline(always)]
5632 fn shlv_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5633 let (a0, a1) = self.split_u16x16(a);
5634 let (b0, b1) = self.split_u16x16(b);
5635 self.combine_u16x8(self.shlv_u16x8(a0, b0), self.shlv_u16x8(a1, b1))
5636 }
5637 #[inline(always)]
5638 fn shr_u16x16(self, a: u16x16<Self>, shift: u32) -> u16x16<Self> {
5639 let (a0, a1) = self.split_u16x16(a);
5640 self.combine_u16x8(self.shr_u16x8(a0, shift), self.shr_u16x8(a1, shift))
5641 }
5642 #[inline(always)]
5643 fn shrv_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5644 let (a0, a1) = self.split_u16x16(a);
5645 let (b0, b1) = self.split_u16x16(b);
5646 self.combine_u16x8(self.shrv_u16x8(a0, b0), self.shrv_u16x8(a1, b1))
5647 }
5648 #[inline(always)]
5649 fn simd_eq_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
5650 let (a0, a1) = self.split_u16x16(a);
5651 let (b0, b1) = self.split_u16x16(b);
5652 self.combine_mask16x8(self.simd_eq_u16x8(a0, b0), self.simd_eq_u16x8(a1, b1))
5653 }
5654 #[inline(always)]
5655 fn simd_lt_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
5656 let (a0, a1) = self.split_u16x16(a);
5657 let (b0, b1) = self.split_u16x16(b);
5658 self.combine_mask16x8(self.simd_lt_u16x8(a0, b0), self.simd_lt_u16x8(a1, b1))
5659 }
5660 #[inline(always)]
5661 fn simd_le_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
5662 let (a0, a1) = self.split_u16x16(a);
5663 let (b0, b1) = self.split_u16x16(b);
5664 self.combine_mask16x8(self.simd_le_u16x8(a0, b0), self.simd_le_u16x8(a1, b1))
5665 }
5666 #[inline(always)]
5667 fn simd_ge_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
5668 let (a0, a1) = self.split_u16x16(a);
5669 let (b0, b1) = self.split_u16x16(b);
5670 self.combine_mask16x8(self.simd_ge_u16x8(a0, b0), self.simd_ge_u16x8(a1, b1))
5671 }
5672 #[inline(always)]
5673 fn simd_gt_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
5674 let (a0, a1) = self.split_u16x16(a);
5675 let (b0, b1) = self.split_u16x16(b);
5676 self.combine_mask16x8(self.simd_gt_u16x8(a0, b0), self.simd_gt_u16x8(a1, b1))
5677 }
5678 #[inline(always)]
5679 fn zip_low_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5680 let (a0, _) = self.split_u16x16(a);
5681 let (b0, _) = self.split_u16x16(b);
5682 self.combine_u16x8(self.zip_low_u16x8(a0, b0), self.zip_high_u16x8(a0, b0))
5683 }
5684 #[inline(always)]
5685 fn zip_high_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5686 let (_, a1) = self.split_u16x16(a);
5687 let (_, b1) = self.split_u16x16(b);
5688 self.combine_u16x8(self.zip_low_u16x8(a1, b1), self.zip_high_u16x8(a1, b1))
5689 }
5690 #[inline(always)]
5691 fn unzip_low_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5692 let (a0, a1) = self.split_u16x16(a);
5693 let (b0, b1) = self.split_u16x16(b);
5694 self.combine_u16x8(self.unzip_low_u16x8(a0, a1), self.unzip_low_u16x8(b0, b1))
5695 }
5696 #[inline(always)]
5697 fn unzip_high_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5698 let (a0, a1) = self.split_u16x16(a);
5699 let (b0, b1) = self.split_u16x16(b);
5700 self.combine_u16x8(self.unzip_high_u16x8(a0, a1), self.unzip_high_u16x8(b0, b1))
5701 }
5702 #[inline(always)]
5703 fn interleave_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> (u16x16<Self>, u16x16<Self>) {
5704 let (a0, a1) = self.split_u16x16(a);
5705 let (b0, b1) = self.split_u16x16(b);
5706 let lo_lo = self.zip_low_u16x8(a0, b0);
5707 let lo_hi = self.zip_high_u16x8(a0, b0);
5708 let hi_lo = self.zip_low_u16x8(a1, b1);
5709 let hi_hi = self.zip_high_u16x8(a1, b1);
5710 (
5711 self.combine_u16x8(lo_lo, lo_hi),
5712 self.combine_u16x8(hi_lo, hi_hi),
5713 )
5714 }
5715 #[inline(always)]
5716 fn deinterleave_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> (u16x16<Self>, u16x16<Self>) {
5717 let (a0, a1) = self.split_u16x16(a);
5718 let (b0, b1) = self.split_u16x16(b);
5719 let lo_even = self.unzip_low_u16x8(a0, a1);
5720 let lo_odd = self.unzip_high_u16x8(a0, a1);
5721 let hi_even = self.unzip_low_u16x8(b0, b1);
5722 let hi_odd = self.unzip_high_u16x8(b0, b1);
5723 (
5724 self.combine_u16x8(lo_even, hi_even),
5725 self.combine_u16x8(lo_odd, hi_odd),
5726 )
5727 }
5728 #[inline(always)]
5729 fn select_u16x16(self, a: mask16x16<Self>, b: u16x16<Self>, c: u16x16<Self>) -> u16x16<Self> {
5730 let (a0, a1) = self.split_mask16x16(a);
5731 let (b0, b1) = self.split_u16x16(b);
5732 let (c0, c1) = self.split_u16x16(c);
5733 self.combine_u16x8(self.select_u16x8(a0, b0, c0), self.select_u16x8(a1, b1, c1))
5734 }
5735 #[inline(always)]
5736 fn min_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5737 let (a0, a1) = self.split_u16x16(a);
5738 let (b0, b1) = self.split_u16x16(b);
5739 self.combine_u16x8(self.min_u16x8(a0, b0), self.min_u16x8(a1, b1))
5740 }
5741 #[inline(always)]
5742 fn max_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5743 let (a0, a1) = self.split_u16x16(a);
5744 let (b0, b1) = self.split_u16x16(b);
5745 self.combine_u16x8(self.max_u16x8(a0, b0), self.max_u16x8(a1, b1))
5746 }
5747 #[inline(always)]
5748 fn combine_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x32<Self> {
5749 let mut result = [0; 32usize];
5750 result[0..16usize].copy_from_slice(&a.val.0);
5751 result[16usize..32usize].copy_from_slice(&b.val.0);
5752 result.simd_into(self)
5753 }
5754 #[inline(always)]
5755 fn split_u16x16(self, a: u16x16<Self>) -> (u16x8<Self>, u16x8<Self>) {
5756 let mut b0 = [0; 8usize];
5757 let mut b1 = [0; 8usize];
5758 b0.copy_from_slice(&a.val.0[0..8usize]);
5759 b1.copy_from_slice(&a.val.0[8usize..16usize]);
5760 (b0.simd_into(self), b1.simd_into(self))
5761 }
5762 #[inline(always)]
5763 fn narrow_u16x16(self, a: u16x16<Self>) -> u8x16<Self> {
5764 [
5765 a[0usize] as u8,
5766 a[1usize] as u8,
5767 a[2usize] as u8,
5768 a[3usize] as u8,
5769 a[4usize] as u8,
5770 a[5usize] as u8,
5771 a[6usize] as u8,
5772 a[7usize] as u8,
5773 a[8usize] as u8,
5774 a[9usize] as u8,
5775 a[10usize] as u8,
5776 a[11usize] as u8,
5777 a[12usize] as u8,
5778 a[13usize] as u8,
5779 a[14usize] as u8,
5780 a[15usize] as u8,
5781 ]
5782 .simd_into(self)
5783 }
5784 #[inline(always)]
5785 fn reinterpret_u8_u16x16(self, a: u16x16<Self>) -> u8x32<Self> {
5786 let (a0, a1) = self.split_u16x16(a);
5787 self.combine_u8x16(self.reinterpret_u8_u16x8(a0), self.reinterpret_u8_u16x8(a1))
5788 }
5789 #[inline(always)]
5790 fn reinterpret_u32_u16x16(self, a: u16x16<Self>) -> u32x8<Self> {
5791 let (a0, a1) = self.split_u16x16(a);
5792 self.combine_u32x4(
5793 self.reinterpret_u32_u16x8(a0),
5794 self.reinterpret_u32_u16x8(a1),
5795 )
5796 }
5797 #[inline(always)]
5798 fn splat_mask16x16(self, val: i16) -> mask16x16<Self> {
5799 let half = self.splat_mask16x8(val);
5800 self.combine_mask16x8(half, half)
5801 }
5802 #[inline(always)]
5803 fn load_array_mask16x16(self, val: [i16; 16usize]) -> mask16x16<Self> {
5804 mask16x16 {
5805 val: crate::support::Aligned256(val),
5806 simd: self,
5807 }
5808 }
5809 #[inline(always)]
5810 fn load_array_ref_mask16x16(self, val: &[i16; 16usize]) -> mask16x16<Self> {
5811 mask16x16 {
5812 val: crate::support::Aligned256(*val),
5813 simd: self,
5814 }
5815 }
5816 #[inline(always)]
5817 fn as_array_mask16x16(self, a: mask16x16<Self>) -> [i16; 16usize] {
5818 a.val.0
5819 }
5820 #[inline(always)]
5821 fn as_array_ref_mask16x16(self, a: &mask16x16<Self>) -> &[i16; 16usize] {
5822 &a.val.0
5823 }
5824 #[inline(always)]
5825 fn as_array_mut_mask16x16(self, a: &mut mask16x16<Self>) -> &mut [i16; 16usize] {
5826 &mut a.val.0
5827 }
5828 #[inline(always)]
5829 fn store_array_mask16x16(self, a: mask16x16<Self>, dest: &mut [i16; 16usize]) -> () {
5830 *dest = a.val.0;
5831 }
5832 #[inline(always)]
5833 fn cvt_from_bytes_mask16x16(self, a: u8x32<Self>) -> mask16x16<Self> {
5834 unsafe {
5835 mask16x16 {
5836 val: core::mem::transmute(a.val),
5837 simd: self,
5838 }
5839 }
5840 }
5841 #[inline(always)]
5842 fn cvt_to_bytes_mask16x16(self, a: mask16x16<Self>) -> u8x32<Self> {
5843 unsafe {
5844 u8x32 {
5845 val: core::mem::transmute(a.val),
5846 simd: self,
5847 }
5848 }
5849 }
5850 #[inline(always)]
5851 fn slide_mask16x16<const SHIFT: usize>(
5852 self,
5853 a: mask16x16<Self>,
5854 b: mask16x16<Self>,
5855 ) -> mask16x16<Self> {
5856 let mut dest = [Default::default(); 16usize];
5857 dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
5858 dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
5859 dest.simd_into(self)
5860 }
5861 #[inline(always)]
5862 fn slide_within_blocks_mask16x16<const SHIFT: usize>(
5863 self,
5864 a: mask16x16<Self>,
5865 b: mask16x16<Self>,
5866 ) -> mask16x16<Self> {
5867 let (a0, a1) = self.split_mask16x16(a);
5868 let (b0, b1) = self.split_mask16x16(b);
5869 self.combine_mask16x8(
5870 self.slide_within_blocks_mask16x8::<SHIFT>(a0, b0),
5871 self.slide_within_blocks_mask16x8::<SHIFT>(a1, b1),
5872 )
5873 }
5874 #[inline(always)]
5875 fn and_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
5876 let (a0, a1) = self.split_mask16x16(a);
5877 let (b0, b1) = self.split_mask16x16(b);
5878 self.combine_mask16x8(self.and_mask16x8(a0, b0), self.and_mask16x8(a1, b1))
5879 }
5880 #[inline(always)]
5881 fn or_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
5882 let (a0, a1) = self.split_mask16x16(a);
5883 let (b0, b1) = self.split_mask16x16(b);
5884 self.combine_mask16x8(self.or_mask16x8(a0, b0), self.or_mask16x8(a1, b1))
5885 }
5886 #[inline(always)]
5887 fn xor_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
5888 let (a0, a1) = self.split_mask16x16(a);
5889 let (b0, b1) = self.split_mask16x16(b);
5890 self.combine_mask16x8(self.xor_mask16x8(a0, b0), self.xor_mask16x8(a1, b1))
5891 }
5892 #[inline(always)]
5893 fn not_mask16x16(self, a: mask16x16<Self>) -> mask16x16<Self> {
5894 let (a0, a1) = self.split_mask16x16(a);
5895 self.combine_mask16x8(self.not_mask16x8(a0), self.not_mask16x8(a1))
5896 }
5897 #[inline(always)]
5898 fn select_mask16x16(
5899 self,
5900 a: mask16x16<Self>,
5901 b: mask16x16<Self>,
5902 c: mask16x16<Self>,
5903 ) -> mask16x16<Self> {
5904 let (a0, a1) = self.split_mask16x16(a);
5905 let (b0, b1) = self.split_mask16x16(b);
5906 let (c0, c1) = self.split_mask16x16(c);
5907 self.combine_mask16x8(
5908 self.select_mask16x8(a0, b0, c0),
5909 self.select_mask16x8(a1, b1, c1),
5910 )
5911 }
5912 #[inline(always)]
5913 fn simd_eq_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
5914 let (a0, a1) = self.split_mask16x16(a);
5915 let (b0, b1) = self.split_mask16x16(b);
5916 self.combine_mask16x8(self.simd_eq_mask16x8(a0, b0), self.simd_eq_mask16x8(a1, b1))
5917 }
5918 #[inline(always)]
5919 fn any_true_mask16x16(self, a: mask16x16<Self>) -> bool {
5920 let (a0, a1) = self.split_mask16x16(a);
5921 self.any_true_mask16x8(a0) || self.any_true_mask16x8(a1)
5922 }
5923 #[inline(always)]
5924 fn all_true_mask16x16(self, a: mask16x16<Self>) -> bool {
5925 let (a0, a1) = self.split_mask16x16(a);
5926 self.all_true_mask16x8(a0) && self.all_true_mask16x8(a1)
5927 }
5928 #[inline(always)]
5929 fn any_false_mask16x16(self, a: mask16x16<Self>) -> bool {
5930 let (a0, a1) = self.split_mask16x16(a);
5931 self.any_false_mask16x8(a0) || self.any_false_mask16x8(a1)
5932 }
5933 #[inline(always)]
5934 fn all_false_mask16x16(self, a: mask16x16<Self>) -> bool {
5935 let (a0, a1) = self.split_mask16x16(a);
5936 self.all_false_mask16x8(a0) && self.all_false_mask16x8(a1)
5937 }
5938 #[inline(always)]
5939 fn combine_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x32<Self> {
5940 let mut result = [0; 32usize];
5941 result[0..16usize].copy_from_slice(&a.val.0);
5942 result[16usize..32usize].copy_from_slice(&b.val.0);
5943 result.simd_into(self)
5944 }
5945 #[inline(always)]
5946 fn split_mask16x16(self, a: mask16x16<Self>) -> (mask16x8<Self>, mask16x8<Self>) {
5947 let mut b0 = [0; 8usize];
5948 let mut b1 = [0; 8usize];
5949 b0.copy_from_slice(&a.val.0[0..8usize]);
5950 b1.copy_from_slice(&a.val.0[8usize..16usize]);
5951 (b0.simd_into(self), b1.simd_into(self))
5952 }
5953 #[inline(always)]
5954 fn splat_i32x8(self, val: i32) -> i32x8<Self> {
5955 let half = self.splat_i32x4(val);
5956 self.combine_i32x4(half, half)
5957 }
5958 #[inline(always)]
5959 fn load_array_i32x8(self, val: [i32; 8usize]) -> i32x8<Self> {
5960 i32x8 {
5961 val: crate::support::Aligned256(val),
5962 simd: self,
5963 }
5964 }
5965 #[inline(always)]
5966 fn load_array_ref_i32x8(self, val: &[i32; 8usize]) -> i32x8<Self> {
5967 i32x8 {
5968 val: crate::support::Aligned256(*val),
5969 simd: self,
5970 }
5971 }
5972 #[inline(always)]
5973 fn as_array_i32x8(self, a: i32x8<Self>) -> [i32; 8usize] {
5974 a.val.0
5975 }
5976 #[inline(always)]
5977 fn as_array_ref_i32x8(self, a: &i32x8<Self>) -> &[i32; 8usize] {
5978 &a.val.0
5979 }
5980 #[inline(always)]
5981 fn as_array_mut_i32x8(self, a: &mut i32x8<Self>) -> &mut [i32; 8usize] {
5982 &mut a.val.0
5983 }
5984 #[inline(always)]
5985 fn store_array_i32x8(self, a: i32x8<Self>, dest: &mut [i32; 8usize]) -> () {
5986 *dest = a.val.0;
5987 }
5988 #[inline(always)]
5989 fn cvt_from_bytes_i32x8(self, a: u8x32<Self>) -> i32x8<Self> {
5990 unsafe {
5991 i32x8 {
5992 val: core::mem::transmute(a.val),
5993 simd: self,
5994 }
5995 }
5996 }
5997 #[inline(always)]
5998 fn cvt_to_bytes_i32x8(self, a: i32x8<Self>) -> u8x32<Self> {
5999 unsafe {
6000 u8x32 {
6001 val: core::mem::transmute(a.val),
6002 simd: self,
6003 }
6004 }
6005 }
6006 #[inline(always)]
6007 fn slide_i32x8<const SHIFT: usize>(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6008 let mut dest = [Default::default(); 8usize];
6009 dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
6010 dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
6011 dest.simd_into(self)
6012 }
6013 #[inline(always)]
6014 fn slide_within_blocks_i32x8<const SHIFT: usize>(
6015 self,
6016 a: i32x8<Self>,
6017 b: i32x8<Self>,
6018 ) -> i32x8<Self> {
6019 let (a0, a1) = self.split_i32x8(a);
6020 let (b0, b1) = self.split_i32x8(b);
6021 self.combine_i32x4(
6022 self.slide_within_blocks_i32x4::<SHIFT>(a0, b0),
6023 self.slide_within_blocks_i32x4::<SHIFT>(a1, b1),
6024 )
6025 }
6026 #[inline(always)]
6027 fn add_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6028 let (a0, a1) = self.split_i32x8(a);
6029 let (b0, b1) = self.split_i32x8(b);
6030 self.combine_i32x4(self.add_i32x4(a0, b0), self.add_i32x4(a1, b1))
6031 }
6032 #[inline(always)]
6033 fn sub_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6034 let (a0, a1) = self.split_i32x8(a);
6035 let (b0, b1) = self.split_i32x8(b);
6036 self.combine_i32x4(self.sub_i32x4(a0, b0), self.sub_i32x4(a1, b1))
6037 }
6038 #[inline(always)]
6039 fn mul_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6040 let (a0, a1) = self.split_i32x8(a);
6041 let (b0, b1) = self.split_i32x8(b);
6042 self.combine_i32x4(self.mul_i32x4(a0, b0), self.mul_i32x4(a1, b1))
6043 }
6044 #[inline(always)]
6045 fn and_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6046 let (a0, a1) = self.split_i32x8(a);
6047 let (b0, b1) = self.split_i32x8(b);
6048 self.combine_i32x4(self.and_i32x4(a0, b0), self.and_i32x4(a1, b1))
6049 }
6050 #[inline(always)]
6051 fn or_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6052 let (a0, a1) = self.split_i32x8(a);
6053 let (b0, b1) = self.split_i32x8(b);
6054 self.combine_i32x4(self.or_i32x4(a0, b0), self.or_i32x4(a1, b1))
6055 }
6056 #[inline(always)]
6057 fn xor_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6058 let (a0, a1) = self.split_i32x8(a);
6059 let (b0, b1) = self.split_i32x8(b);
6060 self.combine_i32x4(self.xor_i32x4(a0, b0), self.xor_i32x4(a1, b1))
6061 }
6062 #[inline(always)]
6063 fn not_i32x8(self, a: i32x8<Self>) -> i32x8<Self> {
6064 let (a0, a1) = self.split_i32x8(a);
6065 self.combine_i32x4(self.not_i32x4(a0), self.not_i32x4(a1))
6066 }
6067 #[inline(always)]
6068 fn shl_i32x8(self, a: i32x8<Self>, shift: u32) -> i32x8<Self> {
6069 let (a0, a1) = self.split_i32x8(a);
6070 self.combine_i32x4(self.shl_i32x4(a0, shift), self.shl_i32x4(a1, shift))
6071 }
6072 #[inline(always)]
6073 fn shlv_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6074 let (a0, a1) = self.split_i32x8(a);
6075 let (b0, b1) = self.split_i32x8(b);
6076 self.combine_i32x4(self.shlv_i32x4(a0, b0), self.shlv_i32x4(a1, b1))
6077 }
6078 #[inline(always)]
6079 fn shr_i32x8(self, a: i32x8<Self>, shift: u32) -> i32x8<Self> {
6080 let (a0, a1) = self.split_i32x8(a);
6081 self.combine_i32x4(self.shr_i32x4(a0, shift), self.shr_i32x4(a1, shift))
6082 }
6083 #[inline(always)]
6084 fn shrv_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6085 let (a0, a1) = self.split_i32x8(a);
6086 let (b0, b1) = self.split_i32x8(b);
6087 self.combine_i32x4(self.shrv_i32x4(a0, b0), self.shrv_i32x4(a1, b1))
6088 }
6089 #[inline(always)]
6090 fn simd_eq_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
6091 let (a0, a1) = self.split_i32x8(a);
6092 let (b0, b1) = self.split_i32x8(b);
6093 self.combine_mask32x4(self.simd_eq_i32x4(a0, b0), self.simd_eq_i32x4(a1, b1))
6094 }
6095 #[inline(always)]
6096 fn simd_lt_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
6097 let (a0, a1) = self.split_i32x8(a);
6098 let (b0, b1) = self.split_i32x8(b);
6099 self.combine_mask32x4(self.simd_lt_i32x4(a0, b0), self.simd_lt_i32x4(a1, b1))
6100 }
6101 #[inline(always)]
6102 fn simd_le_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
6103 let (a0, a1) = self.split_i32x8(a);
6104 let (b0, b1) = self.split_i32x8(b);
6105 self.combine_mask32x4(self.simd_le_i32x4(a0, b0), self.simd_le_i32x4(a1, b1))
6106 }
6107 #[inline(always)]
6108 fn simd_ge_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
6109 let (a0, a1) = self.split_i32x8(a);
6110 let (b0, b1) = self.split_i32x8(b);
6111 self.combine_mask32x4(self.simd_ge_i32x4(a0, b0), self.simd_ge_i32x4(a1, b1))
6112 }
6113 #[inline(always)]
6114 fn simd_gt_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
6115 let (a0, a1) = self.split_i32x8(a);
6116 let (b0, b1) = self.split_i32x8(b);
6117 self.combine_mask32x4(self.simd_gt_i32x4(a0, b0), self.simd_gt_i32x4(a1, b1))
6118 }
6119 #[inline(always)]
6120 fn zip_low_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6121 let (a0, _) = self.split_i32x8(a);
6122 let (b0, _) = self.split_i32x8(b);
6123 self.combine_i32x4(self.zip_low_i32x4(a0, b0), self.zip_high_i32x4(a0, b0))
6124 }
6125 #[inline(always)]
6126 fn zip_high_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6127 let (_, a1) = self.split_i32x8(a);
6128 let (_, b1) = self.split_i32x8(b);
6129 self.combine_i32x4(self.zip_low_i32x4(a1, b1), self.zip_high_i32x4(a1, b1))
6130 }
6131 #[inline(always)]
6132 fn unzip_low_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6133 let (a0, a1) = self.split_i32x8(a);
6134 let (b0, b1) = self.split_i32x8(b);
6135 self.combine_i32x4(self.unzip_low_i32x4(a0, a1), self.unzip_low_i32x4(b0, b1))
6136 }
6137 #[inline(always)]
6138 fn unzip_high_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6139 let (a0, a1) = self.split_i32x8(a);
6140 let (b0, b1) = self.split_i32x8(b);
6141 self.combine_i32x4(self.unzip_high_i32x4(a0, a1), self.unzip_high_i32x4(b0, b1))
6142 }
6143 #[inline(always)]
6144 fn interleave_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> (i32x8<Self>, i32x8<Self>) {
6145 let (a0, a1) = self.split_i32x8(a);
6146 let (b0, b1) = self.split_i32x8(b);
6147 let lo_lo = self.zip_low_i32x4(a0, b0);
6148 let lo_hi = self.zip_high_i32x4(a0, b0);
6149 let hi_lo = self.zip_low_i32x4(a1, b1);
6150 let hi_hi = self.zip_high_i32x4(a1, b1);
6151 (
6152 self.combine_i32x4(lo_lo, lo_hi),
6153 self.combine_i32x4(hi_lo, hi_hi),
6154 )
6155 }
6156 #[inline(always)]
6157 fn deinterleave_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> (i32x8<Self>, i32x8<Self>) {
6158 let (a0, a1) = self.split_i32x8(a);
6159 let (b0, b1) = self.split_i32x8(b);
6160 let lo_even = self.unzip_low_i32x4(a0, a1);
6161 let lo_odd = self.unzip_high_i32x4(a0, a1);
6162 let hi_even = self.unzip_low_i32x4(b0, b1);
6163 let hi_odd = self.unzip_high_i32x4(b0, b1);
6164 (
6165 self.combine_i32x4(lo_even, hi_even),
6166 self.combine_i32x4(lo_odd, hi_odd),
6167 )
6168 }
6169 #[inline(always)]
6170 fn select_i32x8(self, a: mask32x8<Self>, b: i32x8<Self>, c: i32x8<Self>) -> i32x8<Self> {
6171 let (a0, a1) = self.split_mask32x8(a);
6172 let (b0, b1) = self.split_i32x8(b);
6173 let (c0, c1) = self.split_i32x8(c);
6174 self.combine_i32x4(self.select_i32x4(a0, b0, c0), self.select_i32x4(a1, b1, c1))
6175 }
6176 #[inline(always)]
6177 fn min_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6178 let (a0, a1) = self.split_i32x8(a);
6179 let (b0, b1) = self.split_i32x8(b);
6180 self.combine_i32x4(self.min_i32x4(a0, b0), self.min_i32x4(a1, b1))
6181 }
6182 #[inline(always)]
6183 fn max_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6184 let (a0, a1) = self.split_i32x8(a);
6185 let (b0, b1) = self.split_i32x8(b);
6186 self.combine_i32x4(self.max_i32x4(a0, b0), self.max_i32x4(a1, b1))
6187 }
6188 #[inline(always)]
6189 fn combine_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x16<Self> {
6190 let mut result = [0; 16usize];
6191 result[0..8usize].copy_from_slice(&a.val.0);
6192 result[8usize..16usize].copy_from_slice(&b.val.0);
6193 result.simd_into(self)
6194 }
6195 #[inline(always)]
6196 fn split_i32x8(self, a: i32x8<Self>) -> (i32x4<Self>, i32x4<Self>) {
6197 let mut b0 = [0; 4usize];
6198 let mut b1 = [0; 4usize];
6199 b0.copy_from_slice(&a.val.0[0..4usize]);
6200 b1.copy_from_slice(&a.val.0[4usize..8usize]);
6201 (b0.simd_into(self), b1.simd_into(self))
6202 }
6203 #[inline(always)]
6204 fn neg_i32x8(self, a: i32x8<Self>) -> i32x8<Self> {
6205 let (a0, a1) = self.split_i32x8(a);
6206 self.combine_i32x4(self.neg_i32x4(a0), self.neg_i32x4(a1))
6207 }
6208 #[inline(always)]
6209 fn reinterpret_u8_i32x8(self, a: i32x8<Self>) -> u8x32<Self> {
6210 let (a0, a1) = self.split_i32x8(a);
6211 self.combine_u8x16(self.reinterpret_u8_i32x4(a0), self.reinterpret_u8_i32x4(a1))
6212 }
6213 #[inline(always)]
6214 fn reinterpret_u32_i32x8(self, a: i32x8<Self>) -> u32x8<Self> {
6215 let (a0, a1) = self.split_i32x8(a);
6216 self.combine_u32x4(
6217 self.reinterpret_u32_i32x4(a0),
6218 self.reinterpret_u32_i32x4(a1),
6219 )
6220 }
6221 #[inline(always)]
6222 fn cvt_f32_i32x8(self, a: i32x8<Self>) -> f32x8<Self> {
6223 let (a0, a1) = self.split_i32x8(a);
6224 self.combine_f32x4(self.cvt_f32_i32x4(a0), self.cvt_f32_i32x4(a1))
6225 }
6226 #[inline(always)]
6227 fn splat_u32x8(self, val: u32) -> u32x8<Self> {
6228 let half = self.splat_u32x4(val);
6229 self.combine_u32x4(half, half)
6230 }
6231 #[inline(always)]
6232 fn load_array_u32x8(self, val: [u32; 8usize]) -> u32x8<Self> {
6233 u32x8 {
6234 val: crate::support::Aligned256(val),
6235 simd: self,
6236 }
6237 }
6238 #[inline(always)]
6239 fn load_array_ref_u32x8(self, val: &[u32; 8usize]) -> u32x8<Self> {
6240 u32x8 {
6241 val: crate::support::Aligned256(*val),
6242 simd: self,
6243 }
6244 }
6245 #[inline(always)]
6246 fn as_array_u32x8(self, a: u32x8<Self>) -> [u32; 8usize] {
6247 a.val.0
6248 }
6249 #[inline(always)]
6250 fn as_array_ref_u32x8(self, a: &u32x8<Self>) -> &[u32; 8usize] {
6251 &a.val.0
6252 }
6253 #[inline(always)]
6254 fn as_array_mut_u32x8(self, a: &mut u32x8<Self>) -> &mut [u32; 8usize] {
6255 &mut a.val.0
6256 }
6257 #[inline(always)]
6258 fn store_array_u32x8(self, a: u32x8<Self>, dest: &mut [u32; 8usize]) -> () {
6259 *dest = a.val.0;
6260 }
6261 #[inline(always)]
6262 fn cvt_from_bytes_u32x8(self, a: u8x32<Self>) -> u32x8<Self> {
6263 unsafe {
6264 u32x8 {
6265 val: core::mem::transmute(a.val),
6266 simd: self,
6267 }
6268 }
6269 }
6270 #[inline(always)]
6271 fn cvt_to_bytes_u32x8(self, a: u32x8<Self>) -> u8x32<Self> {
6272 unsafe {
6273 u8x32 {
6274 val: core::mem::transmute(a.val),
6275 simd: self,
6276 }
6277 }
6278 }
6279 #[inline(always)]
6280 fn slide_u32x8<const SHIFT: usize>(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6281 let mut dest = [Default::default(); 8usize];
6282 dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
6283 dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
6284 dest.simd_into(self)
6285 }
6286 #[inline(always)]
6287 fn slide_within_blocks_u32x8<const SHIFT: usize>(
6288 self,
6289 a: u32x8<Self>,
6290 b: u32x8<Self>,
6291 ) -> u32x8<Self> {
6292 let (a0, a1) = self.split_u32x8(a);
6293 let (b0, b1) = self.split_u32x8(b);
6294 self.combine_u32x4(
6295 self.slide_within_blocks_u32x4::<SHIFT>(a0, b0),
6296 self.slide_within_blocks_u32x4::<SHIFT>(a1, b1),
6297 )
6298 }
6299 #[inline(always)]
6300 fn add_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6301 let (a0, a1) = self.split_u32x8(a);
6302 let (b0, b1) = self.split_u32x8(b);
6303 self.combine_u32x4(self.add_u32x4(a0, b0), self.add_u32x4(a1, b1))
6304 }
6305 #[inline(always)]
6306 fn sub_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6307 let (a0, a1) = self.split_u32x8(a);
6308 let (b0, b1) = self.split_u32x8(b);
6309 self.combine_u32x4(self.sub_u32x4(a0, b0), self.sub_u32x4(a1, b1))
6310 }
6311 #[inline(always)]
6312 fn mul_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6313 let (a0, a1) = self.split_u32x8(a);
6314 let (b0, b1) = self.split_u32x8(b);
6315 self.combine_u32x4(self.mul_u32x4(a0, b0), self.mul_u32x4(a1, b1))
6316 }
6317 #[inline(always)]
6318 fn and_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6319 let (a0, a1) = self.split_u32x8(a);
6320 let (b0, b1) = self.split_u32x8(b);
6321 self.combine_u32x4(self.and_u32x4(a0, b0), self.and_u32x4(a1, b1))
6322 }
6323 #[inline(always)]
6324 fn or_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6325 let (a0, a1) = self.split_u32x8(a);
6326 let (b0, b1) = self.split_u32x8(b);
6327 self.combine_u32x4(self.or_u32x4(a0, b0), self.or_u32x4(a1, b1))
6328 }
6329 #[inline(always)]
6330 fn xor_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6331 let (a0, a1) = self.split_u32x8(a);
6332 let (b0, b1) = self.split_u32x8(b);
6333 self.combine_u32x4(self.xor_u32x4(a0, b0), self.xor_u32x4(a1, b1))
6334 }
6335 #[inline(always)]
6336 fn not_u32x8(self, a: u32x8<Self>) -> u32x8<Self> {
6337 let (a0, a1) = self.split_u32x8(a);
6338 self.combine_u32x4(self.not_u32x4(a0), self.not_u32x4(a1))
6339 }
6340 #[inline(always)]
6341 fn shl_u32x8(self, a: u32x8<Self>, shift: u32) -> u32x8<Self> {
6342 let (a0, a1) = self.split_u32x8(a);
6343 self.combine_u32x4(self.shl_u32x4(a0, shift), self.shl_u32x4(a1, shift))
6344 }
6345 #[inline(always)]
6346 fn shlv_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6347 let (a0, a1) = self.split_u32x8(a);
6348 let (b0, b1) = self.split_u32x8(b);
6349 self.combine_u32x4(self.shlv_u32x4(a0, b0), self.shlv_u32x4(a1, b1))
6350 }
6351 #[inline(always)]
6352 fn shr_u32x8(self, a: u32x8<Self>, shift: u32) -> u32x8<Self> {
6353 let (a0, a1) = self.split_u32x8(a);
6354 self.combine_u32x4(self.shr_u32x4(a0, shift), self.shr_u32x4(a1, shift))
6355 }
6356 #[inline(always)]
6357 fn shrv_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6358 let (a0, a1) = self.split_u32x8(a);
6359 let (b0, b1) = self.split_u32x8(b);
6360 self.combine_u32x4(self.shrv_u32x4(a0, b0), self.shrv_u32x4(a1, b1))
6361 }
6362 #[inline(always)]
6363 fn simd_eq_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
6364 let (a0, a1) = self.split_u32x8(a);
6365 let (b0, b1) = self.split_u32x8(b);
6366 self.combine_mask32x4(self.simd_eq_u32x4(a0, b0), self.simd_eq_u32x4(a1, b1))
6367 }
6368 #[inline(always)]
6369 fn simd_lt_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
6370 let (a0, a1) = self.split_u32x8(a);
6371 let (b0, b1) = self.split_u32x8(b);
6372 self.combine_mask32x4(self.simd_lt_u32x4(a0, b0), self.simd_lt_u32x4(a1, b1))
6373 }
6374 #[inline(always)]
6375 fn simd_le_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
6376 let (a0, a1) = self.split_u32x8(a);
6377 let (b0, b1) = self.split_u32x8(b);
6378 self.combine_mask32x4(self.simd_le_u32x4(a0, b0), self.simd_le_u32x4(a1, b1))
6379 }
6380 #[inline(always)]
6381 fn simd_ge_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
6382 let (a0, a1) = self.split_u32x8(a);
6383 let (b0, b1) = self.split_u32x8(b);
6384 self.combine_mask32x4(self.simd_ge_u32x4(a0, b0), self.simd_ge_u32x4(a1, b1))
6385 }
6386 #[inline(always)]
6387 fn simd_gt_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
6388 let (a0, a1) = self.split_u32x8(a);
6389 let (b0, b1) = self.split_u32x8(b);
6390 self.combine_mask32x4(self.simd_gt_u32x4(a0, b0), self.simd_gt_u32x4(a1, b1))
6391 }
6392 #[inline(always)]
6393 fn zip_low_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6394 let (a0, _) = self.split_u32x8(a);
6395 let (b0, _) = self.split_u32x8(b);
6396 self.combine_u32x4(self.zip_low_u32x4(a0, b0), self.zip_high_u32x4(a0, b0))
6397 }
6398 #[inline(always)]
6399 fn zip_high_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6400 let (_, a1) = self.split_u32x8(a);
6401 let (_, b1) = self.split_u32x8(b);
6402 self.combine_u32x4(self.zip_low_u32x4(a1, b1), self.zip_high_u32x4(a1, b1))
6403 }
6404 #[inline(always)]
6405 fn unzip_low_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6406 let (a0, a1) = self.split_u32x8(a);
6407 let (b0, b1) = self.split_u32x8(b);
6408 self.combine_u32x4(self.unzip_low_u32x4(a0, a1), self.unzip_low_u32x4(b0, b1))
6409 }
6410 #[inline(always)]
6411 fn unzip_high_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6412 let (a0, a1) = self.split_u32x8(a);
6413 let (b0, b1) = self.split_u32x8(b);
6414 self.combine_u32x4(self.unzip_high_u32x4(a0, a1), self.unzip_high_u32x4(b0, b1))
6415 }
6416 #[inline(always)]
6417 fn interleave_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> (u32x8<Self>, u32x8<Self>) {
6418 let (a0, a1) = self.split_u32x8(a);
6419 let (b0, b1) = self.split_u32x8(b);
6420 let lo_lo = self.zip_low_u32x4(a0, b0);
6421 let lo_hi = self.zip_high_u32x4(a0, b0);
6422 let hi_lo = self.zip_low_u32x4(a1, b1);
6423 let hi_hi = self.zip_high_u32x4(a1, b1);
6424 (
6425 self.combine_u32x4(lo_lo, lo_hi),
6426 self.combine_u32x4(hi_lo, hi_hi),
6427 )
6428 }
6429 #[inline(always)]
6430 fn deinterleave_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> (u32x8<Self>, u32x8<Self>) {
6431 let (a0, a1) = self.split_u32x8(a);
6432 let (b0, b1) = self.split_u32x8(b);
6433 let lo_even = self.unzip_low_u32x4(a0, a1);
6434 let lo_odd = self.unzip_high_u32x4(a0, a1);
6435 let hi_even = self.unzip_low_u32x4(b0, b1);
6436 let hi_odd = self.unzip_high_u32x4(b0, b1);
6437 (
6438 self.combine_u32x4(lo_even, hi_even),
6439 self.combine_u32x4(lo_odd, hi_odd),
6440 )
6441 }
6442 #[inline(always)]
6443 fn select_u32x8(self, a: mask32x8<Self>, b: u32x8<Self>, c: u32x8<Self>) -> u32x8<Self> {
6444 let (a0, a1) = self.split_mask32x8(a);
6445 let (b0, b1) = self.split_u32x8(b);
6446 let (c0, c1) = self.split_u32x8(c);
6447 self.combine_u32x4(self.select_u32x4(a0, b0, c0), self.select_u32x4(a1, b1, c1))
6448 }
6449 #[inline(always)]
6450 fn min_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6451 let (a0, a1) = self.split_u32x8(a);
6452 let (b0, b1) = self.split_u32x8(b);
6453 self.combine_u32x4(self.min_u32x4(a0, b0), self.min_u32x4(a1, b1))
6454 }
6455 #[inline(always)]
6456 fn max_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6457 let (a0, a1) = self.split_u32x8(a);
6458 let (b0, b1) = self.split_u32x8(b);
6459 self.combine_u32x4(self.max_u32x4(a0, b0), self.max_u32x4(a1, b1))
6460 }
6461 #[inline(always)]
6462 fn combine_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x16<Self> {
6463 let mut result = [0; 16usize];
6464 result[0..8usize].copy_from_slice(&a.val.0);
6465 result[8usize..16usize].copy_from_slice(&b.val.0);
6466 result.simd_into(self)
6467 }
6468 #[inline(always)]
6469 fn split_u32x8(self, a: u32x8<Self>) -> (u32x4<Self>, u32x4<Self>) {
6470 let mut b0 = [0; 4usize];
6471 let mut b1 = [0; 4usize];
6472 b0.copy_from_slice(&a.val.0[0..4usize]);
6473 b1.copy_from_slice(&a.val.0[4usize..8usize]);
6474 (b0.simd_into(self), b1.simd_into(self))
6475 }
6476 #[inline(always)]
6477 fn reinterpret_u8_u32x8(self, a: u32x8<Self>) -> u8x32<Self> {
6478 let (a0, a1) = self.split_u32x8(a);
6479 self.combine_u8x16(self.reinterpret_u8_u32x4(a0), self.reinterpret_u8_u32x4(a1))
6480 }
6481 #[inline(always)]
6482 fn cvt_f32_u32x8(self, a: u32x8<Self>) -> f32x8<Self> {
6483 let (a0, a1) = self.split_u32x8(a);
6484 self.combine_f32x4(self.cvt_f32_u32x4(a0), self.cvt_f32_u32x4(a1))
6485 }
6486 #[inline(always)]
6487 fn splat_mask32x8(self, val: i32) -> mask32x8<Self> {
6488 let half = self.splat_mask32x4(val);
6489 self.combine_mask32x4(half, half)
6490 }
6491 #[inline(always)]
6492 fn load_array_mask32x8(self, val: [i32; 8usize]) -> mask32x8<Self> {
6493 mask32x8 {
6494 val: crate::support::Aligned256(val),
6495 simd: self,
6496 }
6497 }
6498 #[inline(always)]
6499 fn load_array_ref_mask32x8(self, val: &[i32; 8usize]) -> mask32x8<Self> {
6500 mask32x8 {
6501 val: crate::support::Aligned256(*val),
6502 simd: self,
6503 }
6504 }
6505 #[inline(always)]
6506 fn as_array_mask32x8(self, a: mask32x8<Self>) -> [i32; 8usize] {
6507 a.val.0
6508 }
6509 #[inline(always)]
6510 fn as_array_ref_mask32x8(self, a: &mask32x8<Self>) -> &[i32; 8usize] {
6511 &a.val.0
6512 }
6513 #[inline(always)]
6514 fn as_array_mut_mask32x8(self, a: &mut mask32x8<Self>) -> &mut [i32; 8usize] {
6515 &mut a.val.0
6516 }
6517 #[inline(always)]
6518 fn store_array_mask32x8(self, a: mask32x8<Self>, dest: &mut [i32; 8usize]) -> () {
6519 *dest = a.val.0;
6520 }
6521 #[inline(always)]
6522 fn cvt_from_bytes_mask32x8(self, a: u8x32<Self>) -> mask32x8<Self> {
6523 unsafe {
6524 mask32x8 {
6525 val: core::mem::transmute(a.val),
6526 simd: self,
6527 }
6528 }
6529 }
6530 #[inline(always)]
6531 fn cvt_to_bytes_mask32x8(self, a: mask32x8<Self>) -> u8x32<Self> {
6532 unsafe {
6533 u8x32 {
6534 val: core::mem::transmute(a.val),
6535 simd: self,
6536 }
6537 }
6538 }
6539 #[inline(always)]
6540 fn slide_mask32x8<const SHIFT: usize>(
6541 self,
6542 a: mask32x8<Self>,
6543 b: mask32x8<Self>,
6544 ) -> mask32x8<Self> {
6545 let mut dest = [Default::default(); 8usize];
6546 dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
6547 dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
6548 dest.simd_into(self)
6549 }
6550 #[inline(always)]
6551 fn slide_within_blocks_mask32x8<const SHIFT: usize>(
6552 self,
6553 a: mask32x8<Self>,
6554 b: mask32x8<Self>,
6555 ) -> mask32x8<Self> {
6556 let (a0, a1) = self.split_mask32x8(a);
6557 let (b0, b1) = self.split_mask32x8(b);
6558 self.combine_mask32x4(
6559 self.slide_within_blocks_mask32x4::<SHIFT>(a0, b0),
6560 self.slide_within_blocks_mask32x4::<SHIFT>(a1, b1),
6561 )
6562 }
6563 #[inline(always)]
6564 fn and_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
6565 let (a0, a1) = self.split_mask32x8(a);
6566 let (b0, b1) = self.split_mask32x8(b);
6567 self.combine_mask32x4(self.and_mask32x4(a0, b0), self.and_mask32x4(a1, b1))
6568 }
6569 #[inline(always)]
6570 fn or_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
6571 let (a0, a1) = self.split_mask32x8(a);
6572 let (b0, b1) = self.split_mask32x8(b);
6573 self.combine_mask32x4(self.or_mask32x4(a0, b0), self.or_mask32x4(a1, b1))
6574 }
6575 #[inline(always)]
6576 fn xor_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
6577 let (a0, a1) = self.split_mask32x8(a);
6578 let (b0, b1) = self.split_mask32x8(b);
6579 self.combine_mask32x4(self.xor_mask32x4(a0, b0), self.xor_mask32x4(a1, b1))
6580 }
6581 #[inline(always)]
6582 fn not_mask32x8(self, a: mask32x8<Self>) -> mask32x8<Self> {
6583 let (a0, a1) = self.split_mask32x8(a);
6584 self.combine_mask32x4(self.not_mask32x4(a0), self.not_mask32x4(a1))
6585 }
6586 #[inline(always)]
6587 fn select_mask32x8(
6588 self,
6589 a: mask32x8<Self>,
6590 b: mask32x8<Self>,
6591 c: mask32x8<Self>,
6592 ) -> mask32x8<Self> {
6593 let (a0, a1) = self.split_mask32x8(a);
6594 let (b0, b1) = self.split_mask32x8(b);
6595 let (c0, c1) = self.split_mask32x8(c);
6596 self.combine_mask32x4(
6597 self.select_mask32x4(a0, b0, c0),
6598 self.select_mask32x4(a1, b1, c1),
6599 )
6600 }
6601 #[inline(always)]
6602 fn simd_eq_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
6603 let (a0, a1) = self.split_mask32x8(a);
6604 let (b0, b1) = self.split_mask32x8(b);
6605 self.combine_mask32x4(self.simd_eq_mask32x4(a0, b0), self.simd_eq_mask32x4(a1, b1))
6606 }
6607 #[inline(always)]
6608 fn any_true_mask32x8(self, a: mask32x8<Self>) -> bool {
6609 let (a0, a1) = self.split_mask32x8(a);
6610 self.any_true_mask32x4(a0) || self.any_true_mask32x4(a1)
6611 }
6612 #[inline(always)]
6613 fn all_true_mask32x8(self, a: mask32x8<Self>) -> bool {
6614 let (a0, a1) = self.split_mask32x8(a);
6615 self.all_true_mask32x4(a0) && self.all_true_mask32x4(a1)
6616 }
6617 #[inline(always)]
6618 fn any_false_mask32x8(self, a: mask32x8<Self>) -> bool {
6619 let (a0, a1) = self.split_mask32x8(a);
6620 self.any_false_mask32x4(a0) || self.any_false_mask32x4(a1)
6621 }
6622 #[inline(always)]
6623 fn all_false_mask32x8(self, a: mask32x8<Self>) -> bool {
6624 let (a0, a1) = self.split_mask32x8(a);
6625 self.all_false_mask32x4(a0) && self.all_false_mask32x4(a1)
6626 }
6627 #[inline(always)]
6628 fn combine_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x16<Self> {
6629 let mut result = [0; 16usize];
6630 result[0..8usize].copy_from_slice(&a.val.0);
6631 result[8usize..16usize].copy_from_slice(&b.val.0);
6632 result.simd_into(self)
6633 }
6634 #[inline(always)]
6635 fn split_mask32x8(self, a: mask32x8<Self>) -> (mask32x4<Self>, mask32x4<Self>) {
6636 let mut b0 = [0; 4usize];
6637 let mut b1 = [0; 4usize];
6638 b0.copy_from_slice(&a.val.0[0..4usize]);
6639 b1.copy_from_slice(&a.val.0[4usize..8usize]);
6640 (b0.simd_into(self), b1.simd_into(self))
6641 }
6642 #[inline(always)]
6643 fn splat_f64x4(self, val: f64) -> f64x4<Self> {
6644 let half = self.splat_f64x2(val);
6645 self.combine_f64x2(half, half)
6646 }
6647 #[inline(always)]
6648 fn load_array_f64x4(self, val: [f64; 4usize]) -> f64x4<Self> {
6649 f64x4 {
6650 val: crate::support::Aligned256(val),
6651 simd: self,
6652 }
6653 }
6654 #[inline(always)]
6655 fn load_array_ref_f64x4(self, val: &[f64; 4usize]) -> f64x4<Self> {
6656 f64x4 {
6657 val: crate::support::Aligned256(*val),
6658 simd: self,
6659 }
6660 }
6661 #[inline(always)]
6662 fn as_array_f64x4(self, a: f64x4<Self>) -> [f64; 4usize] {
6663 a.val.0
6664 }
6665 #[inline(always)]
6666 fn as_array_ref_f64x4(self, a: &f64x4<Self>) -> &[f64; 4usize] {
6667 &a.val.0
6668 }
6669 #[inline(always)]
6670 fn as_array_mut_f64x4(self, a: &mut f64x4<Self>) -> &mut [f64; 4usize] {
6671 &mut a.val.0
6672 }
6673 #[inline(always)]
6674 fn store_array_f64x4(self, a: f64x4<Self>, dest: &mut [f64; 4usize]) -> () {
6675 *dest = a.val.0;
6676 }
6677 #[inline(always)]
6678 fn cvt_from_bytes_f64x4(self, a: u8x32<Self>) -> f64x4<Self> {
6679 unsafe {
6680 f64x4 {
6681 val: core::mem::transmute(a.val),
6682 simd: self,
6683 }
6684 }
6685 }
6686 #[inline(always)]
6687 fn cvt_to_bytes_f64x4(self, a: f64x4<Self>) -> u8x32<Self> {
6688 unsafe {
6689 u8x32 {
6690 val: core::mem::transmute(a.val),
6691 simd: self,
6692 }
6693 }
6694 }
6695 #[inline(always)]
6696 fn slide_f64x4<const SHIFT: usize>(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6697 let mut dest = [Default::default(); 4usize];
6698 dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
6699 dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
6700 dest.simd_into(self)
6701 }
6702 #[inline(always)]
6703 fn slide_within_blocks_f64x4<const SHIFT: usize>(
6704 self,
6705 a: f64x4<Self>,
6706 b: f64x4<Self>,
6707 ) -> f64x4<Self> {
6708 let (a0, a1) = self.split_f64x4(a);
6709 let (b0, b1) = self.split_f64x4(b);
6710 self.combine_f64x2(
6711 self.slide_within_blocks_f64x2::<SHIFT>(a0, b0),
6712 self.slide_within_blocks_f64x2::<SHIFT>(a1, b1),
6713 )
6714 }
6715 #[inline(always)]
6716 fn abs_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
6717 let (a0, a1) = self.split_f64x4(a);
6718 self.combine_f64x2(self.abs_f64x2(a0), self.abs_f64x2(a1))
6719 }
6720 #[inline(always)]
6721 fn neg_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
6722 let (a0, a1) = self.split_f64x4(a);
6723 self.combine_f64x2(self.neg_f64x2(a0), self.neg_f64x2(a1))
6724 }
6725 #[inline(always)]
6726 fn sqrt_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
6727 let (a0, a1) = self.split_f64x4(a);
6728 self.combine_f64x2(self.sqrt_f64x2(a0), self.sqrt_f64x2(a1))
6729 }
6730 #[inline(always)]
6731 fn add_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6732 let (a0, a1) = self.split_f64x4(a);
6733 let (b0, b1) = self.split_f64x4(b);
6734 self.combine_f64x2(self.add_f64x2(a0, b0), self.add_f64x2(a1, b1))
6735 }
6736 #[inline(always)]
6737 fn sub_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6738 let (a0, a1) = self.split_f64x4(a);
6739 let (b0, b1) = self.split_f64x4(b);
6740 self.combine_f64x2(self.sub_f64x2(a0, b0), self.sub_f64x2(a1, b1))
6741 }
6742 #[inline(always)]
6743 fn mul_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6744 let (a0, a1) = self.split_f64x4(a);
6745 let (b0, b1) = self.split_f64x4(b);
6746 self.combine_f64x2(self.mul_f64x2(a0, b0), self.mul_f64x2(a1, b1))
6747 }
6748 #[inline(always)]
6749 fn div_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6750 let (a0, a1) = self.split_f64x4(a);
6751 let (b0, b1) = self.split_f64x4(b);
6752 self.combine_f64x2(self.div_f64x2(a0, b0), self.div_f64x2(a1, b1))
6753 }
6754 #[inline(always)]
6755 fn copysign_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6756 let (a0, a1) = self.split_f64x4(a);
6757 let (b0, b1) = self.split_f64x4(b);
6758 self.combine_f64x2(self.copysign_f64x2(a0, b0), self.copysign_f64x2(a1, b1))
6759 }
6760 #[inline(always)]
6761 fn simd_eq_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
6762 let (a0, a1) = self.split_f64x4(a);
6763 let (b0, b1) = self.split_f64x4(b);
6764 self.combine_mask64x2(self.simd_eq_f64x2(a0, b0), self.simd_eq_f64x2(a1, b1))
6765 }
6766 #[inline(always)]
6767 fn simd_lt_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
6768 let (a0, a1) = self.split_f64x4(a);
6769 let (b0, b1) = self.split_f64x4(b);
6770 self.combine_mask64x2(self.simd_lt_f64x2(a0, b0), self.simd_lt_f64x2(a1, b1))
6771 }
6772 #[inline(always)]
6773 fn simd_le_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
6774 let (a0, a1) = self.split_f64x4(a);
6775 let (b0, b1) = self.split_f64x4(b);
6776 self.combine_mask64x2(self.simd_le_f64x2(a0, b0), self.simd_le_f64x2(a1, b1))
6777 }
6778 #[inline(always)]
6779 fn simd_ge_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
6780 let (a0, a1) = self.split_f64x4(a);
6781 let (b0, b1) = self.split_f64x4(b);
6782 self.combine_mask64x2(self.simd_ge_f64x2(a0, b0), self.simd_ge_f64x2(a1, b1))
6783 }
6784 #[inline(always)]
6785 fn simd_gt_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
6786 let (a0, a1) = self.split_f64x4(a);
6787 let (b0, b1) = self.split_f64x4(b);
6788 self.combine_mask64x2(self.simd_gt_f64x2(a0, b0), self.simd_gt_f64x2(a1, b1))
6789 }
6790 #[inline(always)]
6791 fn zip_low_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6792 let (a0, _) = self.split_f64x4(a);
6793 let (b0, _) = self.split_f64x4(b);
6794 self.combine_f64x2(self.zip_low_f64x2(a0, b0), self.zip_high_f64x2(a0, b0))
6795 }
6796 #[inline(always)]
6797 fn zip_high_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6798 let (_, a1) = self.split_f64x4(a);
6799 let (_, b1) = self.split_f64x4(b);
6800 self.combine_f64x2(self.zip_low_f64x2(a1, b1), self.zip_high_f64x2(a1, b1))
6801 }
6802 #[inline(always)]
6803 fn unzip_low_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6804 let (a0, a1) = self.split_f64x4(a);
6805 let (b0, b1) = self.split_f64x4(b);
6806 self.combine_f64x2(self.unzip_low_f64x2(a0, a1), self.unzip_low_f64x2(b0, b1))
6807 }
6808 #[inline(always)]
6809 fn unzip_high_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6810 let (a0, a1) = self.split_f64x4(a);
6811 let (b0, b1) = self.split_f64x4(b);
6812 self.combine_f64x2(self.unzip_high_f64x2(a0, a1), self.unzip_high_f64x2(b0, b1))
6813 }
6814 #[inline(always)]
6815 fn interleave_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> (f64x4<Self>, f64x4<Self>) {
6816 let (a0, a1) = self.split_f64x4(a);
6817 let (b0, b1) = self.split_f64x4(b);
6818 let lo_lo = self.zip_low_f64x2(a0, b0);
6819 let lo_hi = self.zip_high_f64x2(a0, b0);
6820 let hi_lo = self.zip_low_f64x2(a1, b1);
6821 let hi_hi = self.zip_high_f64x2(a1, b1);
6822 (
6823 self.combine_f64x2(lo_lo, lo_hi),
6824 self.combine_f64x2(hi_lo, hi_hi),
6825 )
6826 }
6827 #[inline(always)]
6828 fn deinterleave_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> (f64x4<Self>, f64x4<Self>) {
6829 let (a0, a1) = self.split_f64x4(a);
6830 let (b0, b1) = self.split_f64x4(b);
6831 let lo_even = self.unzip_low_f64x2(a0, a1);
6832 let lo_odd = self.unzip_high_f64x2(a0, a1);
6833 let hi_even = self.unzip_low_f64x2(b0, b1);
6834 let hi_odd = self.unzip_high_f64x2(b0, b1);
6835 (
6836 self.combine_f64x2(lo_even, hi_even),
6837 self.combine_f64x2(lo_odd, hi_odd),
6838 )
6839 }
6840 #[inline(always)]
6841 fn max_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6842 let (a0, a1) = self.split_f64x4(a);
6843 let (b0, b1) = self.split_f64x4(b);
6844 self.combine_f64x2(self.max_f64x2(a0, b0), self.max_f64x2(a1, b1))
6845 }
6846 #[inline(always)]
6847 fn min_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6848 let (a0, a1) = self.split_f64x4(a);
6849 let (b0, b1) = self.split_f64x4(b);
6850 self.combine_f64x2(self.min_f64x2(a0, b0), self.min_f64x2(a1, b1))
6851 }
6852 #[inline(always)]
6853 fn max_precise_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6854 let (a0, a1) = self.split_f64x4(a);
6855 let (b0, b1) = self.split_f64x4(b);
6856 self.combine_f64x2(
6857 self.max_precise_f64x2(a0, b0),
6858 self.max_precise_f64x2(a1, b1),
6859 )
6860 }
6861 #[inline(always)]
6862 fn min_precise_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6863 let (a0, a1) = self.split_f64x4(a);
6864 let (b0, b1) = self.split_f64x4(b);
6865 self.combine_f64x2(
6866 self.min_precise_f64x2(a0, b0),
6867 self.min_precise_f64x2(a1, b1),
6868 )
6869 }
6870 #[inline(always)]
6871 fn mul_add_f64x4(self, a: f64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
6872 let (a0, a1) = self.split_f64x4(a);
6873 let (b0, b1) = self.split_f64x4(b);
6874 let (c0, c1) = self.split_f64x4(c);
6875 self.combine_f64x2(
6876 self.mul_add_f64x2(a0, b0, c0),
6877 self.mul_add_f64x2(a1, b1, c1),
6878 )
6879 }
6880 #[inline(always)]
6881 fn mul_sub_f64x4(self, a: f64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
6882 let (a0, a1) = self.split_f64x4(a);
6883 let (b0, b1) = self.split_f64x4(b);
6884 let (c0, c1) = self.split_f64x4(c);
6885 self.combine_f64x2(
6886 self.mul_sub_f64x2(a0, b0, c0),
6887 self.mul_sub_f64x2(a1, b1, c1),
6888 )
6889 }
6890 #[inline(always)]
6891 fn floor_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
6892 let (a0, a1) = self.split_f64x4(a);
6893 self.combine_f64x2(self.floor_f64x2(a0), self.floor_f64x2(a1))
6894 }
6895 #[inline(always)]
6896 fn ceil_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
6897 let (a0, a1) = self.split_f64x4(a);
6898 self.combine_f64x2(self.ceil_f64x2(a0), self.ceil_f64x2(a1))
6899 }
6900 #[inline(always)]
6901 fn round_ties_even_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
6902 let (a0, a1) = self.split_f64x4(a);
6903 self.combine_f64x2(
6904 self.round_ties_even_f64x2(a0),
6905 self.round_ties_even_f64x2(a1),
6906 )
6907 }
6908 #[inline(always)]
6909 fn fract_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
6910 let (a0, a1) = self.split_f64x4(a);
6911 self.combine_f64x2(self.fract_f64x2(a0), self.fract_f64x2(a1))
6912 }
6913 #[inline(always)]
6914 fn trunc_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
6915 let (a0, a1) = self.split_f64x4(a);
6916 self.combine_f64x2(self.trunc_f64x2(a0), self.trunc_f64x2(a1))
6917 }
6918 #[inline(always)]
6919 fn select_f64x4(self, a: mask64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
6920 let (a0, a1) = self.split_mask64x4(a);
6921 let (b0, b1) = self.split_f64x4(b);
6922 let (c0, c1) = self.split_f64x4(c);
6923 self.combine_f64x2(self.select_f64x2(a0, b0, c0), self.select_f64x2(a1, b1, c1))
6924 }
6925 #[inline(always)]
6926 fn combine_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x8<Self> {
6927 let mut result = [0.0; 8usize];
6928 result[0..4usize].copy_from_slice(&a.val.0);
6929 result[4usize..8usize].copy_from_slice(&b.val.0);
6930 result.simd_into(self)
6931 }
6932 #[inline(always)]
6933 fn split_f64x4(self, a: f64x4<Self>) -> (f64x2<Self>, f64x2<Self>) {
6934 let mut b0 = [0.0; 2usize];
6935 let mut b1 = [0.0; 2usize];
6936 b0.copy_from_slice(&a.val.0[0..2usize]);
6937 b1.copy_from_slice(&a.val.0[2usize..4usize]);
6938 (b0.simd_into(self), b1.simd_into(self))
6939 }
6940 #[inline(always)]
6941 fn reinterpret_f32_f64x4(self, a: f64x4<Self>) -> f32x8<Self> {
6942 let (a0, a1) = self.split_f64x4(a);
6943 self.combine_f32x4(
6944 self.reinterpret_f32_f64x2(a0),
6945 self.reinterpret_f32_f64x2(a1),
6946 )
6947 }
6948 #[inline(always)]
6949 fn splat_mask64x4(self, val: i64) -> mask64x4<Self> {
6950 let half = self.splat_mask64x2(val);
6951 self.combine_mask64x2(half, half)
6952 }
6953 #[inline(always)]
6954 fn load_array_mask64x4(self, val: [i64; 4usize]) -> mask64x4<Self> {
6955 mask64x4 {
6956 val: crate::support::Aligned256(val),
6957 simd: self,
6958 }
6959 }
6960 #[inline(always)]
6961 fn load_array_ref_mask64x4(self, val: &[i64; 4usize]) -> mask64x4<Self> {
6962 mask64x4 {
6963 val: crate::support::Aligned256(*val),
6964 simd: self,
6965 }
6966 }
6967 #[inline(always)]
6968 fn as_array_mask64x4(self, a: mask64x4<Self>) -> [i64; 4usize] {
6969 a.val.0
6970 }
6971 #[inline(always)]
6972 fn as_array_ref_mask64x4(self, a: &mask64x4<Self>) -> &[i64; 4usize] {
6973 &a.val.0
6974 }
6975 #[inline(always)]
6976 fn as_array_mut_mask64x4(self, a: &mut mask64x4<Self>) -> &mut [i64; 4usize] {
6977 &mut a.val.0
6978 }
6979 #[inline(always)]
6980 fn store_array_mask64x4(self, a: mask64x4<Self>, dest: &mut [i64; 4usize]) -> () {
6981 *dest = a.val.0;
6982 }
6983 #[inline(always)]
6984 fn cvt_from_bytes_mask64x4(self, a: u8x32<Self>) -> mask64x4<Self> {
6985 unsafe {
6986 mask64x4 {
6987 val: core::mem::transmute(a.val),
6988 simd: self,
6989 }
6990 }
6991 }
6992 #[inline(always)]
6993 fn cvt_to_bytes_mask64x4(self, a: mask64x4<Self>) -> u8x32<Self> {
6994 unsafe {
6995 u8x32 {
6996 val: core::mem::transmute(a.val),
6997 simd: self,
6998 }
6999 }
7000 }
7001 #[inline(always)]
7002 fn slide_mask64x4<const SHIFT: usize>(
7003 self,
7004 a: mask64x4<Self>,
7005 b: mask64x4<Self>,
7006 ) -> mask64x4<Self> {
7007 let mut dest = [Default::default(); 4usize];
7008 dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
7009 dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
7010 dest.simd_into(self)
7011 }
7012 #[inline(always)]
7013 fn slide_within_blocks_mask64x4<const SHIFT: usize>(
7014 self,
7015 a: mask64x4<Self>,
7016 b: mask64x4<Self>,
7017 ) -> mask64x4<Self> {
7018 let (a0, a1) = self.split_mask64x4(a);
7019 let (b0, b1) = self.split_mask64x4(b);
7020 self.combine_mask64x2(
7021 self.slide_within_blocks_mask64x2::<SHIFT>(a0, b0),
7022 self.slide_within_blocks_mask64x2::<SHIFT>(a1, b1),
7023 )
7024 }
7025 #[inline(always)]
7026 fn and_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
7027 let (a0, a1) = self.split_mask64x4(a);
7028 let (b0, b1) = self.split_mask64x4(b);
7029 self.combine_mask64x2(self.and_mask64x2(a0, b0), self.and_mask64x2(a1, b1))
7030 }
7031 #[inline(always)]
7032 fn or_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
7033 let (a0, a1) = self.split_mask64x4(a);
7034 let (b0, b1) = self.split_mask64x4(b);
7035 self.combine_mask64x2(self.or_mask64x2(a0, b0), self.or_mask64x2(a1, b1))
7036 }
7037 #[inline(always)]
7038 fn xor_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
7039 let (a0, a1) = self.split_mask64x4(a);
7040 let (b0, b1) = self.split_mask64x4(b);
7041 self.combine_mask64x2(self.xor_mask64x2(a0, b0), self.xor_mask64x2(a1, b1))
7042 }
7043 #[inline(always)]
7044 fn not_mask64x4(self, a: mask64x4<Self>) -> mask64x4<Self> {
7045 let (a0, a1) = self.split_mask64x4(a);
7046 self.combine_mask64x2(self.not_mask64x2(a0), self.not_mask64x2(a1))
7047 }
7048 #[inline(always)]
7049 fn select_mask64x4(
7050 self,
7051 a: mask64x4<Self>,
7052 b: mask64x4<Self>,
7053 c: mask64x4<Self>,
7054 ) -> mask64x4<Self> {
7055 let (a0, a1) = self.split_mask64x4(a);
7056 let (b0, b1) = self.split_mask64x4(b);
7057 let (c0, c1) = self.split_mask64x4(c);
7058 self.combine_mask64x2(
7059 self.select_mask64x2(a0, b0, c0),
7060 self.select_mask64x2(a1, b1, c1),
7061 )
7062 }
7063 #[inline(always)]
7064 fn simd_eq_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
7065 let (a0, a1) = self.split_mask64x4(a);
7066 let (b0, b1) = self.split_mask64x4(b);
7067 self.combine_mask64x2(self.simd_eq_mask64x2(a0, b0), self.simd_eq_mask64x2(a1, b1))
7068 }
7069 #[inline(always)]
7070 fn any_true_mask64x4(self, a: mask64x4<Self>) -> bool {
7071 let (a0, a1) = self.split_mask64x4(a);
7072 self.any_true_mask64x2(a0) || self.any_true_mask64x2(a1)
7073 }
7074 #[inline(always)]
7075 fn all_true_mask64x4(self, a: mask64x4<Self>) -> bool {
7076 let (a0, a1) = self.split_mask64x4(a);
7077 self.all_true_mask64x2(a0) && self.all_true_mask64x2(a1)
7078 }
7079 #[inline(always)]
7080 fn any_false_mask64x4(self, a: mask64x4<Self>) -> bool {
7081 let (a0, a1) = self.split_mask64x4(a);
7082 self.any_false_mask64x2(a0) || self.any_false_mask64x2(a1)
7083 }
7084 #[inline(always)]
7085 fn all_false_mask64x4(self, a: mask64x4<Self>) -> bool {
7086 let (a0, a1) = self.split_mask64x4(a);
7087 self.all_false_mask64x2(a0) && self.all_false_mask64x2(a1)
7088 }
7089 #[inline(always)]
7090 fn combine_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x8<Self> {
7091 let mut result = [0; 8usize];
7092 result[0..4usize].copy_from_slice(&a.val.0);
7093 result[4usize..8usize].copy_from_slice(&b.val.0);
7094 result.simd_into(self)
7095 }
7096 #[inline(always)]
7097 fn split_mask64x4(self, a: mask64x4<Self>) -> (mask64x2<Self>, mask64x2<Self>) {
7098 let mut b0 = [0; 2usize];
7099 let mut b1 = [0; 2usize];
7100 b0.copy_from_slice(&a.val.0[0..2usize]);
7101 b1.copy_from_slice(&a.val.0[2usize..4usize]);
7102 (b0.simd_into(self), b1.simd_into(self))
7103 }
7104 #[inline(always)]
7105 fn splat_f32x16(self, val: f32) -> f32x16<Self> {
7106 let half = self.splat_f32x8(val);
7107 self.combine_f32x8(half, half)
7108 }
7109 #[inline(always)]
7110 fn load_array_f32x16(self, val: [f32; 16usize]) -> f32x16<Self> {
7111 f32x16 {
7112 val: crate::support::Aligned512(val),
7113 simd: self,
7114 }
7115 }
7116 #[inline(always)]
7117 fn load_array_ref_f32x16(self, val: &[f32; 16usize]) -> f32x16<Self> {
7118 f32x16 {
7119 val: crate::support::Aligned512(*val),
7120 simd: self,
7121 }
7122 }
7123 #[inline(always)]
7124 fn as_array_f32x16(self, a: f32x16<Self>) -> [f32; 16usize] {
7125 a.val.0
7126 }
7127 #[inline(always)]
7128 fn as_array_ref_f32x16(self, a: &f32x16<Self>) -> &[f32; 16usize] {
7129 &a.val.0
7130 }
7131 #[inline(always)]
7132 fn as_array_mut_f32x16(self, a: &mut f32x16<Self>) -> &mut [f32; 16usize] {
7133 &mut a.val.0
7134 }
7135 #[inline(always)]
7136 fn store_array_f32x16(self, a: f32x16<Self>, dest: &mut [f32; 16usize]) -> () {
7137 *dest = a.val.0;
7138 }
7139 #[inline(always)]
7140 fn cvt_from_bytes_f32x16(self, a: u8x64<Self>) -> f32x16<Self> {
7141 unsafe {
7142 f32x16 {
7143 val: core::mem::transmute(a.val),
7144 simd: self,
7145 }
7146 }
7147 }
7148 #[inline(always)]
7149 fn cvt_to_bytes_f32x16(self, a: f32x16<Self>) -> u8x64<Self> {
7150 unsafe {
7151 u8x64 {
7152 val: core::mem::transmute(a.val),
7153 simd: self,
7154 }
7155 }
7156 }
7157 #[inline(always)]
7158 fn slide_f32x16<const SHIFT: usize>(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7159 let mut dest = [Default::default(); 16usize];
7160 dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
7161 dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
7162 dest.simd_into(self)
7163 }
7164 #[inline(always)]
7165 fn slide_within_blocks_f32x16<const SHIFT: usize>(
7166 self,
7167 a: f32x16<Self>,
7168 b: f32x16<Self>,
7169 ) -> f32x16<Self> {
7170 let (a0, a1) = self.split_f32x16(a);
7171 let (b0, b1) = self.split_f32x16(b);
7172 self.combine_f32x8(
7173 self.slide_within_blocks_f32x8::<SHIFT>(a0, b0),
7174 self.slide_within_blocks_f32x8::<SHIFT>(a1, b1),
7175 )
7176 }
7177 #[inline(always)]
7178 fn abs_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
7179 let (a0, a1) = self.split_f32x16(a);
7180 self.combine_f32x8(self.abs_f32x8(a0), self.abs_f32x8(a1))
7181 }
7182 #[inline(always)]
7183 fn neg_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
7184 let (a0, a1) = self.split_f32x16(a);
7185 self.combine_f32x8(self.neg_f32x8(a0), self.neg_f32x8(a1))
7186 }
7187 #[inline(always)]
7188 fn sqrt_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
7189 let (a0, a1) = self.split_f32x16(a);
7190 self.combine_f32x8(self.sqrt_f32x8(a0), self.sqrt_f32x8(a1))
7191 }
7192 #[inline(always)]
7193 fn add_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7194 let (a0, a1) = self.split_f32x16(a);
7195 let (b0, b1) = self.split_f32x16(b);
7196 self.combine_f32x8(self.add_f32x8(a0, b0), self.add_f32x8(a1, b1))
7197 }
7198 #[inline(always)]
7199 fn sub_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7200 let (a0, a1) = self.split_f32x16(a);
7201 let (b0, b1) = self.split_f32x16(b);
7202 self.combine_f32x8(self.sub_f32x8(a0, b0), self.sub_f32x8(a1, b1))
7203 }
7204 #[inline(always)]
7205 fn mul_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7206 let (a0, a1) = self.split_f32x16(a);
7207 let (b0, b1) = self.split_f32x16(b);
7208 self.combine_f32x8(self.mul_f32x8(a0, b0), self.mul_f32x8(a1, b1))
7209 }
7210 #[inline(always)]
7211 fn div_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7212 let (a0, a1) = self.split_f32x16(a);
7213 let (b0, b1) = self.split_f32x16(b);
7214 self.combine_f32x8(self.div_f32x8(a0, b0), self.div_f32x8(a1, b1))
7215 }
7216 #[inline(always)]
7217 fn copysign_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7218 let (a0, a1) = self.split_f32x16(a);
7219 let (b0, b1) = self.split_f32x16(b);
7220 self.combine_f32x8(self.copysign_f32x8(a0, b0), self.copysign_f32x8(a1, b1))
7221 }
7222 #[inline(always)]
7223 fn simd_eq_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
7224 let (a0, a1) = self.split_f32x16(a);
7225 let (b0, b1) = self.split_f32x16(b);
7226 self.combine_mask32x8(self.simd_eq_f32x8(a0, b0), self.simd_eq_f32x8(a1, b1))
7227 }
7228 #[inline(always)]
7229 fn simd_lt_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
7230 let (a0, a1) = self.split_f32x16(a);
7231 let (b0, b1) = self.split_f32x16(b);
7232 self.combine_mask32x8(self.simd_lt_f32x8(a0, b0), self.simd_lt_f32x8(a1, b1))
7233 }
7234 #[inline(always)]
7235 fn simd_le_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
7236 let (a0, a1) = self.split_f32x16(a);
7237 let (b0, b1) = self.split_f32x16(b);
7238 self.combine_mask32x8(self.simd_le_f32x8(a0, b0), self.simd_le_f32x8(a1, b1))
7239 }
7240 #[inline(always)]
7241 fn simd_ge_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
7242 let (a0, a1) = self.split_f32x16(a);
7243 let (b0, b1) = self.split_f32x16(b);
7244 self.combine_mask32x8(self.simd_ge_f32x8(a0, b0), self.simd_ge_f32x8(a1, b1))
7245 }
7246 #[inline(always)]
7247 fn simd_gt_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
7248 let (a0, a1) = self.split_f32x16(a);
7249 let (b0, b1) = self.split_f32x16(b);
7250 self.combine_mask32x8(self.simd_gt_f32x8(a0, b0), self.simd_gt_f32x8(a1, b1))
7251 }
7252 #[inline(always)]
7253 fn zip_low_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7254 let (a0, _) = self.split_f32x16(a);
7255 let (b0, _) = self.split_f32x16(b);
7256 self.combine_f32x8(self.zip_low_f32x8(a0, b0), self.zip_high_f32x8(a0, b0))
7257 }
7258 #[inline(always)]
7259 fn zip_high_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7260 let (_, a1) = self.split_f32x16(a);
7261 let (_, b1) = self.split_f32x16(b);
7262 self.combine_f32x8(self.zip_low_f32x8(a1, b1), self.zip_high_f32x8(a1, b1))
7263 }
7264 #[inline(always)]
7265 fn unzip_low_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7266 let (a0, a1) = self.split_f32x16(a);
7267 let (b0, b1) = self.split_f32x16(b);
7268 self.combine_f32x8(self.unzip_low_f32x8(a0, a1), self.unzip_low_f32x8(b0, b1))
7269 }
7270 #[inline(always)]
7271 fn unzip_high_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7272 let (a0, a1) = self.split_f32x16(a);
7273 let (b0, b1) = self.split_f32x16(b);
7274 self.combine_f32x8(self.unzip_high_f32x8(a0, a1), self.unzip_high_f32x8(b0, b1))
7275 }
7276 #[inline(always)]
7277 fn interleave_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> (f32x16<Self>, f32x16<Self>) {
7278 let (a0, a1) = self.split_f32x16(a);
7279 let (b0, b1) = self.split_f32x16(b);
7280 let lo_lo = self.zip_low_f32x8(a0, b0);
7281 let lo_hi = self.zip_high_f32x8(a0, b0);
7282 let hi_lo = self.zip_low_f32x8(a1, b1);
7283 let hi_hi = self.zip_high_f32x8(a1, b1);
7284 (
7285 self.combine_f32x8(lo_lo, lo_hi),
7286 self.combine_f32x8(hi_lo, hi_hi),
7287 )
7288 }
7289 #[inline(always)]
7290 fn deinterleave_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> (f32x16<Self>, f32x16<Self>) {
7291 let (a0, a1) = self.split_f32x16(a);
7292 let (b0, b1) = self.split_f32x16(b);
7293 let lo_even = self.unzip_low_f32x8(a0, a1);
7294 let lo_odd = self.unzip_high_f32x8(a0, a1);
7295 let hi_even = self.unzip_low_f32x8(b0, b1);
7296 let hi_odd = self.unzip_high_f32x8(b0, b1);
7297 (
7298 self.combine_f32x8(lo_even, hi_even),
7299 self.combine_f32x8(lo_odd, hi_odd),
7300 )
7301 }
7302 #[inline(always)]
7303 fn max_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7304 let (a0, a1) = self.split_f32x16(a);
7305 let (b0, b1) = self.split_f32x16(b);
7306 self.combine_f32x8(self.max_f32x8(a0, b0), self.max_f32x8(a1, b1))
7307 }
7308 #[inline(always)]
7309 fn min_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7310 let (a0, a1) = self.split_f32x16(a);
7311 let (b0, b1) = self.split_f32x16(b);
7312 self.combine_f32x8(self.min_f32x8(a0, b0), self.min_f32x8(a1, b1))
7313 }
7314 #[inline(always)]
7315 fn max_precise_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7316 let (a0, a1) = self.split_f32x16(a);
7317 let (b0, b1) = self.split_f32x16(b);
7318 self.combine_f32x8(
7319 self.max_precise_f32x8(a0, b0),
7320 self.max_precise_f32x8(a1, b1),
7321 )
7322 }
7323 #[inline(always)]
7324 fn min_precise_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7325 let (a0, a1) = self.split_f32x16(a);
7326 let (b0, b1) = self.split_f32x16(b);
7327 self.combine_f32x8(
7328 self.min_precise_f32x8(a0, b0),
7329 self.min_precise_f32x8(a1, b1),
7330 )
7331 }
7332 #[inline(always)]
7333 fn mul_add_f32x16(self, a: f32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
7334 let (a0, a1) = self.split_f32x16(a);
7335 let (b0, b1) = self.split_f32x16(b);
7336 let (c0, c1) = self.split_f32x16(c);
7337 self.combine_f32x8(
7338 self.mul_add_f32x8(a0, b0, c0),
7339 self.mul_add_f32x8(a1, b1, c1),
7340 )
7341 }
7342 #[inline(always)]
7343 fn mul_sub_f32x16(self, a: f32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
7344 let (a0, a1) = self.split_f32x16(a);
7345 let (b0, b1) = self.split_f32x16(b);
7346 let (c0, c1) = self.split_f32x16(c);
7347 self.combine_f32x8(
7348 self.mul_sub_f32x8(a0, b0, c0),
7349 self.mul_sub_f32x8(a1, b1, c1),
7350 )
7351 }
7352 #[inline(always)]
7353 fn floor_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
7354 let (a0, a1) = self.split_f32x16(a);
7355 self.combine_f32x8(self.floor_f32x8(a0), self.floor_f32x8(a1))
7356 }
7357 #[inline(always)]
7358 fn ceil_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
7359 let (a0, a1) = self.split_f32x16(a);
7360 self.combine_f32x8(self.ceil_f32x8(a0), self.ceil_f32x8(a1))
7361 }
7362 #[inline(always)]
7363 fn round_ties_even_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
7364 let (a0, a1) = self.split_f32x16(a);
7365 self.combine_f32x8(
7366 self.round_ties_even_f32x8(a0),
7367 self.round_ties_even_f32x8(a1),
7368 )
7369 }
7370 #[inline(always)]
7371 fn fract_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
7372 let (a0, a1) = self.split_f32x16(a);
7373 self.combine_f32x8(self.fract_f32x8(a0), self.fract_f32x8(a1))
7374 }
7375 #[inline(always)]
7376 fn trunc_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
7377 let (a0, a1) = self.split_f32x16(a);
7378 self.combine_f32x8(self.trunc_f32x8(a0), self.trunc_f32x8(a1))
7379 }
7380 #[inline(always)]
7381 fn select_f32x16(self, a: mask32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
7382 let (a0, a1) = self.split_mask32x16(a);
7383 let (b0, b1) = self.split_f32x16(b);
7384 let (c0, c1) = self.split_f32x16(c);
7385 self.combine_f32x8(self.select_f32x8(a0, b0, c0), self.select_f32x8(a1, b1, c1))
7386 }
7387 #[inline(always)]
7388 fn split_f32x16(self, a: f32x16<Self>) -> (f32x8<Self>, f32x8<Self>) {
7389 let mut b0 = [0.0; 8usize];
7390 let mut b1 = [0.0; 8usize];
7391 b0.copy_from_slice(&a.val.0[0..8usize]);
7392 b1.copy_from_slice(&a.val.0[8usize..16usize]);
7393 (b0.simd_into(self), b1.simd_into(self))
7394 }
7395 #[inline(always)]
7396 fn reinterpret_f64_f32x16(self, a: f32x16<Self>) -> f64x8<Self> {
7397 let (a0, a1) = self.split_f32x16(a);
7398 self.combine_f64x4(
7399 self.reinterpret_f64_f32x8(a0),
7400 self.reinterpret_f64_f32x8(a1),
7401 )
7402 }
7403 #[inline(always)]
7404 fn reinterpret_i32_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
7405 let (a0, a1) = self.split_f32x16(a);
7406 self.combine_i32x8(
7407 self.reinterpret_i32_f32x8(a0),
7408 self.reinterpret_i32_f32x8(a1),
7409 )
7410 }
7411 #[inline(always)]
7412 fn load_interleaved_128_f32x16(self, src: &[f32; 16usize]) -> f32x16<Self> {
7413 [
7414 src[0usize],
7415 src[4usize],
7416 src[8usize],
7417 src[12usize],
7418 src[1usize],
7419 src[5usize],
7420 src[9usize],
7421 src[13usize],
7422 src[2usize],
7423 src[6usize],
7424 src[10usize],
7425 src[14usize],
7426 src[3usize],
7427 src[7usize],
7428 src[11usize],
7429 src[15usize],
7430 ]
7431 .simd_into(self)
7432 }
7433 #[inline(always)]
7434 fn store_interleaved_128_f32x16(self, a: f32x16<Self>, dest: &mut [f32; 16usize]) -> () {
7435 *dest = [
7436 a[0usize], a[4usize], a[8usize], a[12usize], a[1usize], a[5usize], a[9usize],
7437 a[13usize], a[2usize], a[6usize], a[10usize], a[14usize], a[3usize], a[7usize],
7438 a[11usize], a[15usize],
7439 ];
7440 }
7441 #[inline(always)]
7442 fn reinterpret_u8_f32x16(self, a: f32x16<Self>) -> u8x64<Self> {
7443 let (a0, a1) = self.split_f32x16(a);
7444 self.combine_u8x32(self.reinterpret_u8_f32x8(a0), self.reinterpret_u8_f32x8(a1))
7445 }
7446 #[inline(always)]
7447 fn reinterpret_u32_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
7448 let (a0, a1) = self.split_f32x16(a);
7449 self.combine_u32x8(
7450 self.reinterpret_u32_f32x8(a0),
7451 self.reinterpret_u32_f32x8(a1),
7452 )
7453 }
7454 #[inline(always)]
7455 fn cvt_u32_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
7456 let (a0, a1) = self.split_f32x16(a);
7457 self.combine_u32x8(self.cvt_u32_f32x8(a0), self.cvt_u32_f32x8(a1))
7458 }
7459 #[inline(always)]
7460 fn cvt_u32_precise_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
7461 let (a0, a1) = self.split_f32x16(a);
7462 self.combine_u32x8(
7463 self.cvt_u32_precise_f32x8(a0),
7464 self.cvt_u32_precise_f32x8(a1),
7465 )
7466 }
7467 #[inline(always)]
7468 fn cvt_i32_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
7469 let (a0, a1) = self.split_f32x16(a);
7470 self.combine_i32x8(self.cvt_i32_f32x8(a0), self.cvt_i32_f32x8(a1))
7471 }
7472 #[inline(always)]
7473 fn cvt_i32_precise_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
7474 let (a0, a1) = self.split_f32x16(a);
7475 self.combine_i32x8(
7476 self.cvt_i32_precise_f32x8(a0),
7477 self.cvt_i32_precise_f32x8(a1),
7478 )
7479 }
7480 #[inline(always)]
7481 fn splat_i8x64(self, val: i8) -> i8x64<Self> {
7482 let half = self.splat_i8x32(val);
7483 self.combine_i8x32(half, half)
7484 }
7485 #[inline(always)]
7486 fn load_array_i8x64(self, val: [i8; 64usize]) -> i8x64<Self> {
7487 i8x64 {
7488 val: crate::support::Aligned512(val),
7489 simd: self,
7490 }
7491 }
7492 #[inline(always)]
7493 fn load_array_ref_i8x64(self, val: &[i8; 64usize]) -> i8x64<Self> {
7494 i8x64 {
7495 val: crate::support::Aligned512(*val),
7496 simd: self,
7497 }
7498 }
7499 #[inline(always)]
7500 fn as_array_i8x64(self, a: i8x64<Self>) -> [i8; 64usize] {
7501 a.val.0
7502 }
7503 #[inline(always)]
7504 fn as_array_ref_i8x64(self, a: &i8x64<Self>) -> &[i8; 64usize] {
7505 &a.val.0
7506 }
7507 #[inline(always)]
7508 fn as_array_mut_i8x64(self, a: &mut i8x64<Self>) -> &mut [i8; 64usize] {
7509 &mut a.val.0
7510 }
7511 #[inline(always)]
7512 fn store_array_i8x64(self, a: i8x64<Self>, dest: &mut [i8; 64usize]) -> () {
7513 *dest = a.val.0;
7514 }
7515 #[inline(always)]
7516 fn cvt_from_bytes_i8x64(self, a: u8x64<Self>) -> i8x64<Self> {
7517 unsafe {
7518 i8x64 {
7519 val: core::mem::transmute(a.val),
7520 simd: self,
7521 }
7522 }
7523 }
7524 #[inline(always)]
7525 fn cvt_to_bytes_i8x64(self, a: i8x64<Self>) -> u8x64<Self> {
7526 unsafe {
7527 u8x64 {
7528 val: core::mem::transmute(a.val),
7529 simd: self,
7530 }
7531 }
7532 }
7533 #[inline(always)]
7534 fn slide_i8x64<const SHIFT: usize>(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7535 let mut dest = [Default::default(); 64usize];
7536 dest[..64usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
7537 dest[64usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
7538 dest.simd_into(self)
7539 }
7540 #[inline(always)]
7541 fn slide_within_blocks_i8x64<const SHIFT: usize>(
7542 self,
7543 a: i8x64<Self>,
7544 b: i8x64<Self>,
7545 ) -> i8x64<Self> {
7546 let (a0, a1) = self.split_i8x64(a);
7547 let (b0, b1) = self.split_i8x64(b);
7548 self.combine_i8x32(
7549 self.slide_within_blocks_i8x32::<SHIFT>(a0, b0),
7550 self.slide_within_blocks_i8x32::<SHIFT>(a1, b1),
7551 )
7552 }
7553 #[inline(always)]
7554 fn add_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7555 let (a0, a1) = self.split_i8x64(a);
7556 let (b0, b1) = self.split_i8x64(b);
7557 self.combine_i8x32(self.add_i8x32(a0, b0), self.add_i8x32(a1, b1))
7558 }
7559 #[inline(always)]
7560 fn sub_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7561 let (a0, a1) = self.split_i8x64(a);
7562 let (b0, b1) = self.split_i8x64(b);
7563 self.combine_i8x32(self.sub_i8x32(a0, b0), self.sub_i8x32(a1, b1))
7564 }
7565 #[inline(always)]
7566 fn mul_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7567 let (a0, a1) = self.split_i8x64(a);
7568 let (b0, b1) = self.split_i8x64(b);
7569 self.combine_i8x32(self.mul_i8x32(a0, b0), self.mul_i8x32(a1, b1))
7570 }
7571 #[inline(always)]
7572 fn and_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7573 let (a0, a1) = self.split_i8x64(a);
7574 let (b0, b1) = self.split_i8x64(b);
7575 self.combine_i8x32(self.and_i8x32(a0, b0), self.and_i8x32(a1, b1))
7576 }
7577 #[inline(always)]
7578 fn or_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7579 let (a0, a1) = self.split_i8x64(a);
7580 let (b0, b1) = self.split_i8x64(b);
7581 self.combine_i8x32(self.or_i8x32(a0, b0), self.or_i8x32(a1, b1))
7582 }
7583 #[inline(always)]
7584 fn xor_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7585 let (a0, a1) = self.split_i8x64(a);
7586 let (b0, b1) = self.split_i8x64(b);
7587 self.combine_i8x32(self.xor_i8x32(a0, b0), self.xor_i8x32(a1, b1))
7588 }
7589 #[inline(always)]
7590 fn not_i8x64(self, a: i8x64<Self>) -> i8x64<Self> {
7591 let (a0, a1) = self.split_i8x64(a);
7592 self.combine_i8x32(self.not_i8x32(a0), self.not_i8x32(a1))
7593 }
7594 #[inline(always)]
7595 fn shl_i8x64(self, a: i8x64<Self>, shift: u32) -> i8x64<Self> {
7596 let (a0, a1) = self.split_i8x64(a);
7597 self.combine_i8x32(self.shl_i8x32(a0, shift), self.shl_i8x32(a1, shift))
7598 }
7599 #[inline(always)]
7600 fn shlv_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7601 let (a0, a1) = self.split_i8x64(a);
7602 let (b0, b1) = self.split_i8x64(b);
7603 self.combine_i8x32(self.shlv_i8x32(a0, b0), self.shlv_i8x32(a1, b1))
7604 }
7605 #[inline(always)]
7606 fn shr_i8x64(self, a: i8x64<Self>, shift: u32) -> i8x64<Self> {
7607 let (a0, a1) = self.split_i8x64(a);
7608 self.combine_i8x32(self.shr_i8x32(a0, shift), self.shr_i8x32(a1, shift))
7609 }
7610 #[inline(always)]
7611 fn shrv_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7612 let (a0, a1) = self.split_i8x64(a);
7613 let (b0, b1) = self.split_i8x64(b);
7614 self.combine_i8x32(self.shrv_i8x32(a0, b0), self.shrv_i8x32(a1, b1))
7615 }
7616 #[inline(always)]
7617 fn simd_eq_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
7618 let (a0, a1) = self.split_i8x64(a);
7619 let (b0, b1) = self.split_i8x64(b);
7620 self.combine_mask8x32(self.simd_eq_i8x32(a0, b0), self.simd_eq_i8x32(a1, b1))
7621 }
7622 #[inline(always)]
7623 fn simd_lt_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
7624 let (a0, a1) = self.split_i8x64(a);
7625 let (b0, b1) = self.split_i8x64(b);
7626 self.combine_mask8x32(self.simd_lt_i8x32(a0, b0), self.simd_lt_i8x32(a1, b1))
7627 }
7628 #[inline(always)]
7629 fn simd_le_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
7630 let (a0, a1) = self.split_i8x64(a);
7631 let (b0, b1) = self.split_i8x64(b);
7632 self.combine_mask8x32(self.simd_le_i8x32(a0, b0), self.simd_le_i8x32(a1, b1))
7633 }
7634 #[inline(always)]
7635 fn simd_ge_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
7636 let (a0, a1) = self.split_i8x64(a);
7637 let (b0, b1) = self.split_i8x64(b);
7638 self.combine_mask8x32(self.simd_ge_i8x32(a0, b0), self.simd_ge_i8x32(a1, b1))
7639 }
7640 #[inline(always)]
7641 fn simd_gt_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
7642 let (a0, a1) = self.split_i8x64(a);
7643 let (b0, b1) = self.split_i8x64(b);
7644 self.combine_mask8x32(self.simd_gt_i8x32(a0, b0), self.simd_gt_i8x32(a1, b1))
7645 }
7646 #[inline(always)]
7647 fn zip_low_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7648 let (a0, _) = self.split_i8x64(a);
7649 let (b0, _) = self.split_i8x64(b);
7650 self.combine_i8x32(self.zip_low_i8x32(a0, b0), self.zip_high_i8x32(a0, b0))
7651 }
7652 #[inline(always)]
7653 fn zip_high_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7654 let (_, a1) = self.split_i8x64(a);
7655 let (_, b1) = self.split_i8x64(b);
7656 self.combine_i8x32(self.zip_low_i8x32(a1, b1), self.zip_high_i8x32(a1, b1))
7657 }
7658 #[inline(always)]
7659 fn unzip_low_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7660 let (a0, a1) = self.split_i8x64(a);
7661 let (b0, b1) = self.split_i8x64(b);
7662 self.combine_i8x32(self.unzip_low_i8x32(a0, a1), self.unzip_low_i8x32(b0, b1))
7663 }
7664 #[inline(always)]
7665 fn unzip_high_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7666 let (a0, a1) = self.split_i8x64(a);
7667 let (b0, b1) = self.split_i8x64(b);
7668 self.combine_i8x32(self.unzip_high_i8x32(a0, a1), self.unzip_high_i8x32(b0, b1))
7669 }
7670 #[inline(always)]
7671 fn interleave_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> (i8x64<Self>, i8x64<Self>) {
7672 let (a0, a1) = self.split_i8x64(a);
7673 let (b0, b1) = self.split_i8x64(b);
7674 let lo_lo = self.zip_low_i8x32(a0, b0);
7675 let lo_hi = self.zip_high_i8x32(a0, b0);
7676 let hi_lo = self.zip_low_i8x32(a1, b1);
7677 let hi_hi = self.zip_high_i8x32(a1, b1);
7678 (
7679 self.combine_i8x32(lo_lo, lo_hi),
7680 self.combine_i8x32(hi_lo, hi_hi),
7681 )
7682 }
7683 #[inline(always)]
7684 fn deinterleave_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> (i8x64<Self>, i8x64<Self>) {
7685 let (a0, a1) = self.split_i8x64(a);
7686 let (b0, b1) = self.split_i8x64(b);
7687 let lo_even = self.unzip_low_i8x32(a0, a1);
7688 let lo_odd = self.unzip_high_i8x32(a0, a1);
7689 let hi_even = self.unzip_low_i8x32(b0, b1);
7690 let hi_odd = self.unzip_high_i8x32(b0, b1);
7691 (
7692 self.combine_i8x32(lo_even, hi_even),
7693 self.combine_i8x32(lo_odd, hi_odd),
7694 )
7695 }
7696 #[inline(always)]
7697 fn select_i8x64(self, a: mask8x64<Self>, b: i8x64<Self>, c: i8x64<Self>) -> i8x64<Self> {
7698 let (a0, a1) = self.split_mask8x64(a);
7699 let (b0, b1) = self.split_i8x64(b);
7700 let (c0, c1) = self.split_i8x64(c);
7701 self.combine_i8x32(self.select_i8x32(a0, b0, c0), self.select_i8x32(a1, b1, c1))
7702 }
7703 #[inline(always)]
7704 fn min_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7705 let (a0, a1) = self.split_i8x64(a);
7706 let (b0, b1) = self.split_i8x64(b);
7707 self.combine_i8x32(self.min_i8x32(a0, b0), self.min_i8x32(a1, b1))
7708 }
7709 #[inline(always)]
7710 fn max_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7711 let (a0, a1) = self.split_i8x64(a);
7712 let (b0, b1) = self.split_i8x64(b);
7713 self.combine_i8x32(self.max_i8x32(a0, b0), self.max_i8x32(a1, b1))
7714 }
7715 #[inline(always)]
7716 fn split_i8x64(self, a: i8x64<Self>) -> (i8x32<Self>, i8x32<Self>) {
7717 let mut b0 = [0; 32usize];
7718 let mut b1 = [0; 32usize];
7719 b0.copy_from_slice(&a.val.0[0..32usize]);
7720 b1.copy_from_slice(&a.val.0[32usize..64usize]);
7721 (b0.simd_into(self), b1.simd_into(self))
7722 }
7723 #[inline(always)]
7724 fn neg_i8x64(self, a: i8x64<Self>) -> i8x64<Self> {
7725 let (a0, a1) = self.split_i8x64(a);
7726 self.combine_i8x32(self.neg_i8x32(a0), self.neg_i8x32(a1))
7727 }
7728 #[inline(always)]
7729 fn reinterpret_u8_i8x64(self, a: i8x64<Self>) -> u8x64<Self> {
7730 let (a0, a1) = self.split_i8x64(a);
7731 self.combine_u8x32(self.reinterpret_u8_i8x32(a0), self.reinterpret_u8_i8x32(a1))
7732 }
7733 #[inline(always)]
7734 fn reinterpret_u32_i8x64(self, a: i8x64<Self>) -> u32x16<Self> {
7735 let (a0, a1) = self.split_i8x64(a);
7736 self.combine_u32x8(
7737 self.reinterpret_u32_i8x32(a0),
7738 self.reinterpret_u32_i8x32(a1),
7739 )
7740 }
7741 #[inline(always)]
7742 fn splat_u8x64(self, val: u8) -> u8x64<Self> {
7743 let half = self.splat_u8x32(val);
7744 self.combine_u8x32(half, half)
7745 }
7746 #[inline(always)]
7747 fn load_array_u8x64(self, val: [u8; 64usize]) -> u8x64<Self> {
7748 u8x64 {
7749 val: crate::support::Aligned512(val),
7750 simd: self,
7751 }
7752 }
7753 #[inline(always)]
7754 fn load_array_ref_u8x64(self, val: &[u8; 64usize]) -> u8x64<Self> {
7755 u8x64 {
7756 val: crate::support::Aligned512(*val),
7757 simd: self,
7758 }
7759 }
7760 #[inline(always)]
7761 fn as_array_u8x64(self, a: u8x64<Self>) -> [u8; 64usize] {
7762 a.val.0
7763 }
7764 #[inline(always)]
7765 fn as_array_ref_u8x64(self, a: &u8x64<Self>) -> &[u8; 64usize] {
7766 &a.val.0
7767 }
7768 #[inline(always)]
7769 fn as_array_mut_u8x64(self, a: &mut u8x64<Self>) -> &mut [u8; 64usize] {
7770 &mut a.val.0
7771 }
7772 #[inline(always)]
7773 fn store_array_u8x64(self, a: u8x64<Self>, dest: &mut [u8; 64usize]) -> () {
7774 *dest = a.val.0;
7775 }
7776 #[inline(always)]
7777 fn cvt_from_bytes_u8x64(self, a: u8x64<Self>) -> u8x64<Self> {
7778 unsafe {
7779 u8x64 {
7780 val: core::mem::transmute(a.val),
7781 simd: self,
7782 }
7783 }
7784 }
7785 #[inline(always)]
7786 fn cvt_to_bytes_u8x64(self, a: u8x64<Self>) -> u8x64<Self> {
7787 unsafe {
7788 u8x64 {
7789 val: core::mem::transmute(a.val),
7790 simd: self,
7791 }
7792 }
7793 }
7794 #[inline(always)]
7795 fn slide_u8x64<const SHIFT: usize>(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7796 let mut dest = [Default::default(); 64usize];
7797 dest[..64usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
7798 dest[64usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
7799 dest.simd_into(self)
7800 }
7801 #[inline(always)]
7802 fn slide_within_blocks_u8x64<const SHIFT: usize>(
7803 self,
7804 a: u8x64<Self>,
7805 b: u8x64<Self>,
7806 ) -> u8x64<Self> {
7807 let (a0, a1) = self.split_u8x64(a);
7808 let (b0, b1) = self.split_u8x64(b);
7809 self.combine_u8x32(
7810 self.slide_within_blocks_u8x32::<SHIFT>(a0, b0),
7811 self.slide_within_blocks_u8x32::<SHIFT>(a1, b1),
7812 )
7813 }
7814 #[inline(always)]
7815 fn add_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7816 let (a0, a1) = self.split_u8x64(a);
7817 let (b0, b1) = self.split_u8x64(b);
7818 self.combine_u8x32(self.add_u8x32(a0, b0), self.add_u8x32(a1, b1))
7819 }
7820 #[inline(always)]
7821 fn sub_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7822 let (a0, a1) = self.split_u8x64(a);
7823 let (b0, b1) = self.split_u8x64(b);
7824 self.combine_u8x32(self.sub_u8x32(a0, b0), self.sub_u8x32(a1, b1))
7825 }
7826 #[inline(always)]
7827 fn mul_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7828 let (a0, a1) = self.split_u8x64(a);
7829 let (b0, b1) = self.split_u8x64(b);
7830 self.combine_u8x32(self.mul_u8x32(a0, b0), self.mul_u8x32(a1, b1))
7831 }
7832 #[inline(always)]
7833 fn and_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7834 let (a0, a1) = self.split_u8x64(a);
7835 let (b0, b1) = self.split_u8x64(b);
7836 self.combine_u8x32(self.and_u8x32(a0, b0), self.and_u8x32(a1, b1))
7837 }
7838 #[inline(always)]
7839 fn or_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7840 let (a0, a1) = self.split_u8x64(a);
7841 let (b0, b1) = self.split_u8x64(b);
7842 self.combine_u8x32(self.or_u8x32(a0, b0), self.or_u8x32(a1, b1))
7843 }
7844 #[inline(always)]
7845 fn xor_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7846 let (a0, a1) = self.split_u8x64(a);
7847 let (b0, b1) = self.split_u8x64(b);
7848 self.combine_u8x32(self.xor_u8x32(a0, b0), self.xor_u8x32(a1, b1))
7849 }
7850 #[inline(always)]
7851 fn not_u8x64(self, a: u8x64<Self>) -> u8x64<Self> {
7852 let (a0, a1) = self.split_u8x64(a);
7853 self.combine_u8x32(self.not_u8x32(a0), self.not_u8x32(a1))
7854 }
7855 #[inline(always)]
7856 fn shl_u8x64(self, a: u8x64<Self>, shift: u32) -> u8x64<Self> {
7857 let (a0, a1) = self.split_u8x64(a);
7858 self.combine_u8x32(self.shl_u8x32(a0, shift), self.shl_u8x32(a1, shift))
7859 }
7860 #[inline(always)]
7861 fn shlv_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7862 let (a0, a1) = self.split_u8x64(a);
7863 let (b0, b1) = self.split_u8x64(b);
7864 self.combine_u8x32(self.shlv_u8x32(a0, b0), self.shlv_u8x32(a1, b1))
7865 }
7866 #[inline(always)]
7867 fn shr_u8x64(self, a: u8x64<Self>, shift: u32) -> u8x64<Self> {
7868 let (a0, a1) = self.split_u8x64(a);
7869 self.combine_u8x32(self.shr_u8x32(a0, shift), self.shr_u8x32(a1, shift))
7870 }
7871 #[inline(always)]
7872 fn shrv_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7873 let (a0, a1) = self.split_u8x64(a);
7874 let (b0, b1) = self.split_u8x64(b);
7875 self.combine_u8x32(self.shrv_u8x32(a0, b0), self.shrv_u8x32(a1, b1))
7876 }
7877 #[inline(always)]
7878 fn simd_eq_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
7879 let (a0, a1) = self.split_u8x64(a);
7880 let (b0, b1) = self.split_u8x64(b);
7881 self.combine_mask8x32(self.simd_eq_u8x32(a0, b0), self.simd_eq_u8x32(a1, b1))
7882 }
7883 #[inline(always)]
7884 fn simd_lt_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
7885 let (a0, a1) = self.split_u8x64(a);
7886 let (b0, b1) = self.split_u8x64(b);
7887 self.combine_mask8x32(self.simd_lt_u8x32(a0, b0), self.simd_lt_u8x32(a1, b1))
7888 }
7889 #[inline(always)]
7890 fn simd_le_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
7891 let (a0, a1) = self.split_u8x64(a);
7892 let (b0, b1) = self.split_u8x64(b);
7893 self.combine_mask8x32(self.simd_le_u8x32(a0, b0), self.simd_le_u8x32(a1, b1))
7894 }
7895 #[inline(always)]
7896 fn simd_ge_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
7897 let (a0, a1) = self.split_u8x64(a);
7898 let (b0, b1) = self.split_u8x64(b);
7899 self.combine_mask8x32(self.simd_ge_u8x32(a0, b0), self.simd_ge_u8x32(a1, b1))
7900 }
7901 #[inline(always)]
7902 fn simd_gt_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
7903 let (a0, a1) = self.split_u8x64(a);
7904 let (b0, b1) = self.split_u8x64(b);
7905 self.combine_mask8x32(self.simd_gt_u8x32(a0, b0), self.simd_gt_u8x32(a1, b1))
7906 }
7907 #[inline(always)]
7908 fn zip_low_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7909 let (a0, _) = self.split_u8x64(a);
7910 let (b0, _) = self.split_u8x64(b);
7911 self.combine_u8x32(self.zip_low_u8x32(a0, b0), self.zip_high_u8x32(a0, b0))
7912 }
7913 #[inline(always)]
7914 fn zip_high_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7915 let (_, a1) = self.split_u8x64(a);
7916 let (_, b1) = self.split_u8x64(b);
7917 self.combine_u8x32(self.zip_low_u8x32(a1, b1), self.zip_high_u8x32(a1, b1))
7918 }
7919 #[inline(always)]
7920 fn unzip_low_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7921 let (a0, a1) = self.split_u8x64(a);
7922 let (b0, b1) = self.split_u8x64(b);
7923 self.combine_u8x32(self.unzip_low_u8x32(a0, a1), self.unzip_low_u8x32(b0, b1))
7924 }
7925 #[inline(always)]
7926 fn unzip_high_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7927 let (a0, a1) = self.split_u8x64(a);
7928 let (b0, b1) = self.split_u8x64(b);
7929 self.combine_u8x32(self.unzip_high_u8x32(a0, a1), self.unzip_high_u8x32(b0, b1))
7930 }
7931 #[inline(always)]
7932 fn interleave_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> (u8x64<Self>, u8x64<Self>) {
7933 let (a0, a1) = self.split_u8x64(a);
7934 let (b0, b1) = self.split_u8x64(b);
7935 let lo_lo = self.zip_low_u8x32(a0, b0);
7936 let lo_hi = self.zip_high_u8x32(a0, b0);
7937 let hi_lo = self.zip_low_u8x32(a1, b1);
7938 let hi_hi = self.zip_high_u8x32(a1, b1);
7939 (
7940 self.combine_u8x32(lo_lo, lo_hi),
7941 self.combine_u8x32(hi_lo, hi_hi),
7942 )
7943 }
7944 #[inline(always)]
7945 fn deinterleave_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> (u8x64<Self>, u8x64<Self>) {
7946 let (a0, a1) = self.split_u8x64(a);
7947 let (b0, b1) = self.split_u8x64(b);
7948 let lo_even = self.unzip_low_u8x32(a0, a1);
7949 let lo_odd = self.unzip_high_u8x32(a0, a1);
7950 let hi_even = self.unzip_low_u8x32(b0, b1);
7951 let hi_odd = self.unzip_high_u8x32(b0, b1);
7952 (
7953 self.combine_u8x32(lo_even, hi_even),
7954 self.combine_u8x32(lo_odd, hi_odd),
7955 )
7956 }
7957 #[inline(always)]
7958 fn select_u8x64(self, a: mask8x64<Self>, b: u8x64<Self>, c: u8x64<Self>) -> u8x64<Self> {
7959 let (a0, a1) = self.split_mask8x64(a);
7960 let (b0, b1) = self.split_u8x64(b);
7961 let (c0, c1) = self.split_u8x64(c);
7962 self.combine_u8x32(self.select_u8x32(a0, b0, c0), self.select_u8x32(a1, b1, c1))
7963 }
7964 #[inline(always)]
7965 fn min_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7966 let (a0, a1) = self.split_u8x64(a);
7967 let (b0, b1) = self.split_u8x64(b);
7968 self.combine_u8x32(self.min_u8x32(a0, b0), self.min_u8x32(a1, b1))
7969 }
7970 #[inline(always)]
7971 fn max_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7972 let (a0, a1) = self.split_u8x64(a);
7973 let (b0, b1) = self.split_u8x64(b);
7974 self.combine_u8x32(self.max_u8x32(a0, b0), self.max_u8x32(a1, b1))
7975 }
7976 #[inline(always)]
7977 fn split_u8x64(self, a: u8x64<Self>) -> (u8x32<Self>, u8x32<Self>) {
7978 let mut b0 = [0; 32usize];
7979 let mut b1 = [0; 32usize];
7980 b0.copy_from_slice(&a.val.0[0..32usize]);
7981 b1.copy_from_slice(&a.val.0[32usize..64usize]);
7982 (b0.simd_into(self), b1.simd_into(self))
7983 }
7984 #[inline(always)]
7985 fn load_interleaved_128_u8x64(self, src: &[u8; 64usize]) -> u8x64<Self> {
7986 [
7987 src[0usize],
7988 src[4usize],
7989 src[8usize],
7990 src[12usize],
7991 src[16usize],
7992 src[20usize],
7993 src[24usize],
7994 src[28usize],
7995 src[32usize],
7996 src[36usize],
7997 src[40usize],
7998 src[44usize],
7999 src[48usize],
8000 src[52usize],
8001 src[56usize],
8002 src[60usize],
8003 src[1usize],
8004 src[5usize],
8005 src[9usize],
8006 src[13usize],
8007 src[17usize],
8008 src[21usize],
8009 src[25usize],
8010 src[29usize],
8011 src[33usize],
8012 src[37usize],
8013 src[41usize],
8014 src[45usize],
8015 src[49usize],
8016 src[53usize],
8017 src[57usize],
8018 src[61usize],
8019 src[2usize],
8020 src[6usize],
8021 src[10usize],
8022 src[14usize],
8023 src[18usize],
8024 src[22usize],
8025 src[26usize],
8026 src[30usize],
8027 src[34usize],
8028 src[38usize],
8029 src[42usize],
8030 src[46usize],
8031 src[50usize],
8032 src[54usize],
8033 src[58usize],
8034 src[62usize],
8035 src[3usize],
8036 src[7usize],
8037 src[11usize],
8038 src[15usize],
8039 src[19usize],
8040 src[23usize],
8041 src[27usize],
8042 src[31usize],
8043 src[35usize],
8044 src[39usize],
8045 src[43usize],
8046 src[47usize],
8047 src[51usize],
8048 src[55usize],
8049 src[59usize],
8050 src[63usize],
8051 ]
8052 .simd_into(self)
8053 }
8054 #[inline(always)]
8055 fn store_interleaved_128_u8x64(self, a: u8x64<Self>, dest: &mut [u8; 64usize]) -> () {
8056 *dest = [
8057 a[0usize], a[16usize], a[32usize], a[48usize], a[1usize], a[17usize], a[33usize],
8058 a[49usize], a[2usize], a[18usize], a[34usize], a[50usize], a[3usize], a[19usize],
8059 a[35usize], a[51usize], a[4usize], a[20usize], a[36usize], a[52usize], a[5usize],
8060 a[21usize], a[37usize], a[53usize], a[6usize], a[22usize], a[38usize], a[54usize],
8061 a[7usize], a[23usize], a[39usize], a[55usize], a[8usize], a[24usize], a[40usize],
8062 a[56usize], a[9usize], a[25usize], a[41usize], a[57usize], a[10usize], a[26usize],
8063 a[42usize], a[58usize], a[11usize], a[27usize], a[43usize], a[59usize], a[12usize],
8064 a[28usize], a[44usize], a[60usize], a[13usize], a[29usize], a[45usize], a[61usize],
8065 a[14usize], a[30usize], a[46usize], a[62usize], a[15usize], a[31usize], a[47usize],
8066 a[63usize],
8067 ];
8068 }
8069 #[inline(always)]
8070 fn reinterpret_u32_u8x64(self, a: u8x64<Self>) -> u32x16<Self> {
8071 let (a0, a1) = self.split_u8x64(a);
8072 self.combine_u32x8(
8073 self.reinterpret_u32_u8x32(a0),
8074 self.reinterpret_u32_u8x32(a1),
8075 )
8076 }
8077 #[inline(always)]
8078 fn splat_mask8x64(self, val: i8) -> mask8x64<Self> {
8079 let half = self.splat_mask8x32(val);
8080 self.combine_mask8x32(half, half)
8081 }
8082 #[inline(always)]
8083 fn load_array_mask8x64(self, val: [i8; 64usize]) -> mask8x64<Self> {
8084 mask8x64 {
8085 val: crate::support::Aligned512(val),
8086 simd: self,
8087 }
8088 }
8089 #[inline(always)]
8090 fn load_array_ref_mask8x64(self, val: &[i8; 64usize]) -> mask8x64<Self> {
8091 mask8x64 {
8092 val: crate::support::Aligned512(*val),
8093 simd: self,
8094 }
8095 }
8096 #[inline(always)]
8097 fn as_array_mask8x64(self, a: mask8x64<Self>) -> [i8; 64usize] {
8098 a.val.0
8099 }
8100 #[inline(always)]
8101 fn as_array_ref_mask8x64(self, a: &mask8x64<Self>) -> &[i8; 64usize] {
8102 &a.val.0
8103 }
8104 #[inline(always)]
8105 fn as_array_mut_mask8x64(self, a: &mut mask8x64<Self>) -> &mut [i8; 64usize] {
8106 &mut a.val.0
8107 }
8108 #[inline(always)]
8109 fn store_array_mask8x64(self, a: mask8x64<Self>, dest: &mut [i8; 64usize]) -> () {
8110 *dest = a.val.0;
8111 }
8112 #[inline(always)]
8113 fn cvt_from_bytes_mask8x64(self, a: u8x64<Self>) -> mask8x64<Self> {
8114 unsafe {
8115 mask8x64 {
8116 val: core::mem::transmute(a.val),
8117 simd: self,
8118 }
8119 }
8120 }
8121 #[inline(always)]
8122 fn cvt_to_bytes_mask8x64(self, a: mask8x64<Self>) -> u8x64<Self> {
8123 unsafe {
8124 u8x64 {
8125 val: core::mem::transmute(a.val),
8126 simd: self,
8127 }
8128 }
8129 }
8130 #[inline(always)]
8131 fn slide_mask8x64<const SHIFT: usize>(
8132 self,
8133 a: mask8x64<Self>,
8134 b: mask8x64<Self>,
8135 ) -> mask8x64<Self> {
8136 let mut dest = [Default::default(); 64usize];
8137 dest[..64usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
8138 dest[64usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
8139 dest.simd_into(self)
8140 }
8141 #[inline(always)]
8142 fn slide_within_blocks_mask8x64<const SHIFT: usize>(
8143 self,
8144 a: mask8x64<Self>,
8145 b: mask8x64<Self>,
8146 ) -> mask8x64<Self> {
8147 let (a0, a1) = self.split_mask8x64(a);
8148 let (b0, b1) = self.split_mask8x64(b);
8149 self.combine_mask8x32(
8150 self.slide_within_blocks_mask8x32::<SHIFT>(a0, b0),
8151 self.slide_within_blocks_mask8x32::<SHIFT>(a1, b1),
8152 )
8153 }
8154 #[inline(always)]
8155 fn and_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
8156 let (a0, a1) = self.split_mask8x64(a);
8157 let (b0, b1) = self.split_mask8x64(b);
8158 self.combine_mask8x32(self.and_mask8x32(a0, b0), self.and_mask8x32(a1, b1))
8159 }
8160 #[inline(always)]
8161 fn or_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
8162 let (a0, a1) = self.split_mask8x64(a);
8163 let (b0, b1) = self.split_mask8x64(b);
8164 self.combine_mask8x32(self.or_mask8x32(a0, b0), self.or_mask8x32(a1, b1))
8165 }
8166 #[inline(always)]
8167 fn xor_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
8168 let (a0, a1) = self.split_mask8x64(a);
8169 let (b0, b1) = self.split_mask8x64(b);
8170 self.combine_mask8x32(self.xor_mask8x32(a0, b0), self.xor_mask8x32(a1, b1))
8171 }
8172 #[inline(always)]
8173 fn not_mask8x64(self, a: mask8x64<Self>) -> mask8x64<Self> {
8174 let (a0, a1) = self.split_mask8x64(a);
8175 self.combine_mask8x32(self.not_mask8x32(a0), self.not_mask8x32(a1))
8176 }
8177 #[inline(always)]
8178 fn select_mask8x64(
8179 self,
8180 a: mask8x64<Self>,
8181 b: mask8x64<Self>,
8182 c: mask8x64<Self>,
8183 ) -> mask8x64<Self> {
8184 let (a0, a1) = self.split_mask8x64(a);
8185 let (b0, b1) = self.split_mask8x64(b);
8186 let (c0, c1) = self.split_mask8x64(c);
8187 self.combine_mask8x32(
8188 self.select_mask8x32(a0, b0, c0),
8189 self.select_mask8x32(a1, b1, c1),
8190 )
8191 }
8192 #[inline(always)]
8193 fn simd_eq_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
8194 let (a0, a1) = self.split_mask8x64(a);
8195 let (b0, b1) = self.split_mask8x64(b);
8196 self.combine_mask8x32(self.simd_eq_mask8x32(a0, b0), self.simd_eq_mask8x32(a1, b1))
8197 }
8198 #[inline(always)]
8199 fn any_true_mask8x64(self, a: mask8x64<Self>) -> bool {
8200 let (a0, a1) = self.split_mask8x64(a);
8201 self.any_true_mask8x32(a0) || self.any_true_mask8x32(a1)
8202 }
8203 #[inline(always)]
8204 fn all_true_mask8x64(self, a: mask8x64<Self>) -> bool {
8205 let (a0, a1) = self.split_mask8x64(a);
8206 self.all_true_mask8x32(a0) && self.all_true_mask8x32(a1)
8207 }
8208 #[inline(always)]
8209 fn any_false_mask8x64(self, a: mask8x64<Self>) -> bool {
8210 let (a0, a1) = self.split_mask8x64(a);
8211 self.any_false_mask8x32(a0) || self.any_false_mask8x32(a1)
8212 }
8213 #[inline(always)]
8214 fn all_false_mask8x64(self, a: mask8x64<Self>) -> bool {
8215 let (a0, a1) = self.split_mask8x64(a);
8216 self.all_false_mask8x32(a0) && self.all_false_mask8x32(a1)
8217 }
8218 #[inline(always)]
8219 fn split_mask8x64(self, a: mask8x64<Self>) -> (mask8x32<Self>, mask8x32<Self>) {
8220 let mut b0 = [0; 32usize];
8221 let mut b1 = [0; 32usize];
8222 b0.copy_from_slice(&a.val.0[0..32usize]);
8223 b1.copy_from_slice(&a.val.0[32usize..64usize]);
8224 (b0.simd_into(self), b1.simd_into(self))
8225 }
8226 #[inline(always)]
8227 fn splat_i16x32(self, val: i16) -> i16x32<Self> {
8228 let half = self.splat_i16x16(val);
8229 self.combine_i16x16(half, half)
8230 }
8231 #[inline(always)]
8232 fn load_array_i16x32(self, val: [i16; 32usize]) -> i16x32<Self> {
8233 i16x32 {
8234 val: crate::support::Aligned512(val),
8235 simd: self,
8236 }
8237 }
8238 #[inline(always)]
8239 fn load_array_ref_i16x32(self, val: &[i16; 32usize]) -> i16x32<Self> {
8240 i16x32 {
8241 val: crate::support::Aligned512(*val),
8242 simd: self,
8243 }
8244 }
8245 #[inline(always)]
8246 fn as_array_i16x32(self, a: i16x32<Self>) -> [i16; 32usize] {
8247 a.val.0
8248 }
8249 #[inline(always)]
8250 fn as_array_ref_i16x32(self, a: &i16x32<Self>) -> &[i16; 32usize] {
8251 &a.val.0
8252 }
8253 #[inline(always)]
8254 fn as_array_mut_i16x32(self, a: &mut i16x32<Self>) -> &mut [i16; 32usize] {
8255 &mut a.val.0
8256 }
8257 #[inline(always)]
8258 fn store_array_i16x32(self, a: i16x32<Self>, dest: &mut [i16; 32usize]) -> () {
8259 *dest = a.val.0;
8260 }
8261 #[inline(always)]
8262 fn cvt_from_bytes_i16x32(self, a: u8x64<Self>) -> i16x32<Self> {
8263 unsafe {
8264 i16x32 {
8265 val: core::mem::transmute(a.val),
8266 simd: self,
8267 }
8268 }
8269 }
8270 #[inline(always)]
8271 fn cvt_to_bytes_i16x32(self, a: i16x32<Self>) -> u8x64<Self> {
8272 unsafe {
8273 u8x64 {
8274 val: core::mem::transmute(a.val),
8275 simd: self,
8276 }
8277 }
8278 }
8279 #[inline(always)]
8280 fn slide_i16x32<const SHIFT: usize>(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8281 let mut dest = [Default::default(); 32usize];
8282 dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
8283 dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
8284 dest.simd_into(self)
8285 }
8286 #[inline(always)]
8287 fn slide_within_blocks_i16x32<const SHIFT: usize>(
8288 self,
8289 a: i16x32<Self>,
8290 b: i16x32<Self>,
8291 ) -> i16x32<Self> {
8292 let (a0, a1) = self.split_i16x32(a);
8293 let (b0, b1) = self.split_i16x32(b);
8294 self.combine_i16x16(
8295 self.slide_within_blocks_i16x16::<SHIFT>(a0, b0),
8296 self.slide_within_blocks_i16x16::<SHIFT>(a1, b1),
8297 )
8298 }
8299 #[inline(always)]
8300 fn add_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8301 let (a0, a1) = self.split_i16x32(a);
8302 let (b0, b1) = self.split_i16x32(b);
8303 self.combine_i16x16(self.add_i16x16(a0, b0), self.add_i16x16(a1, b1))
8304 }
8305 #[inline(always)]
8306 fn sub_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8307 let (a0, a1) = self.split_i16x32(a);
8308 let (b0, b1) = self.split_i16x32(b);
8309 self.combine_i16x16(self.sub_i16x16(a0, b0), self.sub_i16x16(a1, b1))
8310 }
8311 #[inline(always)]
8312 fn mul_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8313 let (a0, a1) = self.split_i16x32(a);
8314 let (b0, b1) = self.split_i16x32(b);
8315 self.combine_i16x16(self.mul_i16x16(a0, b0), self.mul_i16x16(a1, b1))
8316 }
8317 #[inline(always)]
8318 fn and_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8319 let (a0, a1) = self.split_i16x32(a);
8320 let (b0, b1) = self.split_i16x32(b);
8321 self.combine_i16x16(self.and_i16x16(a0, b0), self.and_i16x16(a1, b1))
8322 }
8323 #[inline(always)]
8324 fn or_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8325 let (a0, a1) = self.split_i16x32(a);
8326 let (b0, b1) = self.split_i16x32(b);
8327 self.combine_i16x16(self.or_i16x16(a0, b0), self.or_i16x16(a1, b1))
8328 }
8329 #[inline(always)]
8330 fn xor_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8331 let (a0, a1) = self.split_i16x32(a);
8332 let (b0, b1) = self.split_i16x32(b);
8333 self.combine_i16x16(self.xor_i16x16(a0, b0), self.xor_i16x16(a1, b1))
8334 }
8335 #[inline(always)]
8336 fn not_i16x32(self, a: i16x32<Self>) -> i16x32<Self> {
8337 let (a0, a1) = self.split_i16x32(a);
8338 self.combine_i16x16(self.not_i16x16(a0), self.not_i16x16(a1))
8339 }
8340 #[inline(always)]
8341 fn shl_i16x32(self, a: i16x32<Self>, shift: u32) -> i16x32<Self> {
8342 let (a0, a1) = self.split_i16x32(a);
8343 self.combine_i16x16(self.shl_i16x16(a0, shift), self.shl_i16x16(a1, shift))
8344 }
8345 #[inline(always)]
8346 fn shlv_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8347 let (a0, a1) = self.split_i16x32(a);
8348 let (b0, b1) = self.split_i16x32(b);
8349 self.combine_i16x16(self.shlv_i16x16(a0, b0), self.shlv_i16x16(a1, b1))
8350 }
8351 #[inline(always)]
8352 fn shr_i16x32(self, a: i16x32<Self>, shift: u32) -> i16x32<Self> {
8353 let (a0, a1) = self.split_i16x32(a);
8354 self.combine_i16x16(self.shr_i16x16(a0, shift), self.shr_i16x16(a1, shift))
8355 }
8356 #[inline(always)]
8357 fn shrv_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8358 let (a0, a1) = self.split_i16x32(a);
8359 let (b0, b1) = self.split_i16x32(b);
8360 self.combine_i16x16(self.shrv_i16x16(a0, b0), self.shrv_i16x16(a1, b1))
8361 }
8362 #[inline(always)]
8363 fn simd_eq_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
8364 let (a0, a1) = self.split_i16x32(a);
8365 let (b0, b1) = self.split_i16x32(b);
8366 self.combine_mask16x16(self.simd_eq_i16x16(a0, b0), self.simd_eq_i16x16(a1, b1))
8367 }
8368 #[inline(always)]
8369 fn simd_lt_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
8370 let (a0, a1) = self.split_i16x32(a);
8371 let (b0, b1) = self.split_i16x32(b);
8372 self.combine_mask16x16(self.simd_lt_i16x16(a0, b0), self.simd_lt_i16x16(a1, b1))
8373 }
8374 #[inline(always)]
8375 fn simd_le_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
8376 let (a0, a1) = self.split_i16x32(a);
8377 let (b0, b1) = self.split_i16x32(b);
8378 self.combine_mask16x16(self.simd_le_i16x16(a0, b0), self.simd_le_i16x16(a1, b1))
8379 }
8380 #[inline(always)]
8381 fn simd_ge_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
8382 let (a0, a1) = self.split_i16x32(a);
8383 let (b0, b1) = self.split_i16x32(b);
8384 self.combine_mask16x16(self.simd_ge_i16x16(a0, b0), self.simd_ge_i16x16(a1, b1))
8385 }
8386 #[inline(always)]
8387 fn simd_gt_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
8388 let (a0, a1) = self.split_i16x32(a);
8389 let (b0, b1) = self.split_i16x32(b);
8390 self.combine_mask16x16(self.simd_gt_i16x16(a0, b0), self.simd_gt_i16x16(a1, b1))
8391 }
8392 #[inline(always)]
8393 fn zip_low_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8394 let (a0, _) = self.split_i16x32(a);
8395 let (b0, _) = self.split_i16x32(b);
8396 self.combine_i16x16(self.zip_low_i16x16(a0, b0), self.zip_high_i16x16(a0, b0))
8397 }
8398 #[inline(always)]
8399 fn zip_high_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8400 let (_, a1) = self.split_i16x32(a);
8401 let (_, b1) = self.split_i16x32(b);
8402 self.combine_i16x16(self.zip_low_i16x16(a1, b1), self.zip_high_i16x16(a1, b1))
8403 }
8404 #[inline(always)]
8405 fn unzip_low_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8406 let (a0, a1) = self.split_i16x32(a);
8407 let (b0, b1) = self.split_i16x32(b);
8408 self.combine_i16x16(self.unzip_low_i16x16(a0, a1), self.unzip_low_i16x16(b0, b1))
8409 }
8410 #[inline(always)]
8411 fn unzip_high_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8412 let (a0, a1) = self.split_i16x32(a);
8413 let (b0, b1) = self.split_i16x32(b);
8414 self.combine_i16x16(
8415 self.unzip_high_i16x16(a0, a1),
8416 self.unzip_high_i16x16(b0, b1),
8417 )
8418 }
8419 #[inline(always)]
8420 fn interleave_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> (i16x32<Self>, i16x32<Self>) {
8421 let (a0, a1) = self.split_i16x32(a);
8422 let (b0, b1) = self.split_i16x32(b);
8423 let lo_lo = self.zip_low_i16x16(a0, b0);
8424 let lo_hi = self.zip_high_i16x16(a0, b0);
8425 let hi_lo = self.zip_low_i16x16(a1, b1);
8426 let hi_hi = self.zip_high_i16x16(a1, b1);
8427 (
8428 self.combine_i16x16(lo_lo, lo_hi),
8429 self.combine_i16x16(hi_lo, hi_hi),
8430 )
8431 }
8432 #[inline(always)]
8433 fn deinterleave_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> (i16x32<Self>, i16x32<Self>) {
8434 let (a0, a1) = self.split_i16x32(a);
8435 let (b0, b1) = self.split_i16x32(b);
8436 let lo_even = self.unzip_low_i16x16(a0, a1);
8437 let lo_odd = self.unzip_high_i16x16(a0, a1);
8438 let hi_even = self.unzip_low_i16x16(b0, b1);
8439 let hi_odd = self.unzip_high_i16x16(b0, b1);
8440 (
8441 self.combine_i16x16(lo_even, hi_even),
8442 self.combine_i16x16(lo_odd, hi_odd),
8443 )
8444 }
8445 #[inline(always)]
8446 fn select_i16x32(self, a: mask16x32<Self>, b: i16x32<Self>, c: i16x32<Self>) -> i16x32<Self> {
8447 let (a0, a1) = self.split_mask16x32(a);
8448 let (b0, b1) = self.split_i16x32(b);
8449 let (c0, c1) = self.split_i16x32(c);
8450 self.combine_i16x16(
8451 self.select_i16x16(a0, b0, c0),
8452 self.select_i16x16(a1, b1, c1),
8453 )
8454 }
8455 #[inline(always)]
8456 fn min_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8457 let (a0, a1) = self.split_i16x32(a);
8458 let (b0, b1) = self.split_i16x32(b);
8459 self.combine_i16x16(self.min_i16x16(a0, b0), self.min_i16x16(a1, b1))
8460 }
8461 #[inline(always)]
8462 fn max_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8463 let (a0, a1) = self.split_i16x32(a);
8464 let (b0, b1) = self.split_i16x32(b);
8465 self.combine_i16x16(self.max_i16x16(a0, b0), self.max_i16x16(a1, b1))
8466 }
8467 #[inline(always)]
8468 fn split_i16x32(self, a: i16x32<Self>) -> (i16x16<Self>, i16x16<Self>) {
8469 let mut b0 = [0; 16usize];
8470 let mut b1 = [0; 16usize];
8471 b0.copy_from_slice(&a.val.0[0..16usize]);
8472 b1.copy_from_slice(&a.val.0[16usize..32usize]);
8473 (b0.simd_into(self), b1.simd_into(self))
8474 }
8475 #[inline(always)]
8476 fn neg_i16x32(self, a: i16x32<Self>) -> i16x32<Self> {
8477 let (a0, a1) = self.split_i16x32(a);
8478 self.combine_i16x16(self.neg_i16x16(a0), self.neg_i16x16(a1))
8479 }
8480 #[inline(always)]
8481 fn reinterpret_u8_i16x32(self, a: i16x32<Self>) -> u8x64<Self> {
8482 let (a0, a1) = self.split_i16x32(a);
8483 self.combine_u8x32(
8484 self.reinterpret_u8_i16x16(a0),
8485 self.reinterpret_u8_i16x16(a1),
8486 )
8487 }
8488 #[inline(always)]
8489 fn reinterpret_u32_i16x32(self, a: i16x32<Self>) -> u32x16<Self> {
8490 let (a0, a1) = self.split_i16x32(a);
8491 self.combine_u32x8(
8492 self.reinterpret_u32_i16x16(a0),
8493 self.reinterpret_u32_i16x16(a1),
8494 )
8495 }
8496 #[inline(always)]
8497 fn splat_u16x32(self, val: u16) -> u16x32<Self> {
8498 let half = self.splat_u16x16(val);
8499 self.combine_u16x16(half, half)
8500 }
8501 #[inline(always)]
8502 fn load_array_u16x32(self, val: [u16; 32usize]) -> u16x32<Self> {
8503 u16x32 {
8504 val: crate::support::Aligned512(val),
8505 simd: self,
8506 }
8507 }
8508 #[inline(always)]
8509 fn load_array_ref_u16x32(self, val: &[u16; 32usize]) -> u16x32<Self> {
8510 u16x32 {
8511 val: crate::support::Aligned512(*val),
8512 simd: self,
8513 }
8514 }
8515 #[inline(always)]
8516 fn as_array_u16x32(self, a: u16x32<Self>) -> [u16; 32usize] {
8517 a.val.0
8518 }
8519 #[inline(always)]
8520 fn as_array_ref_u16x32(self, a: &u16x32<Self>) -> &[u16; 32usize] {
8521 &a.val.0
8522 }
8523 #[inline(always)]
8524 fn as_array_mut_u16x32(self, a: &mut u16x32<Self>) -> &mut [u16; 32usize] {
8525 &mut a.val.0
8526 }
8527 #[inline(always)]
8528 fn store_array_u16x32(self, a: u16x32<Self>, dest: &mut [u16; 32usize]) -> () {
8529 *dest = a.val.0;
8530 }
8531 #[inline(always)]
8532 fn cvt_from_bytes_u16x32(self, a: u8x64<Self>) -> u16x32<Self> {
8533 unsafe {
8534 u16x32 {
8535 val: core::mem::transmute(a.val),
8536 simd: self,
8537 }
8538 }
8539 }
8540 #[inline(always)]
8541 fn cvt_to_bytes_u16x32(self, a: u16x32<Self>) -> u8x64<Self> {
8542 unsafe {
8543 u8x64 {
8544 val: core::mem::transmute(a.val),
8545 simd: self,
8546 }
8547 }
8548 }
8549 #[inline(always)]
8550 fn slide_u16x32<const SHIFT: usize>(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8551 let mut dest = [Default::default(); 32usize];
8552 dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
8553 dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
8554 dest.simd_into(self)
8555 }
8556 #[inline(always)]
8557 fn slide_within_blocks_u16x32<const SHIFT: usize>(
8558 self,
8559 a: u16x32<Self>,
8560 b: u16x32<Self>,
8561 ) -> u16x32<Self> {
8562 let (a0, a1) = self.split_u16x32(a);
8563 let (b0, b1) = self.split_u16x32(b);
8564 self.combine_u16x16(
8565 self.slide_within_blocks_u16x16::<SHIFT>(a0, b0),
8566 self.slide_within_blocks_u16x16::<SHIFT>(a1, b1),
8567 )
8568 }
8569 #[inline(always)]
8570 fn add_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8571 let (a0, a1) = self.split_u16x32(a);
8572 let (b0, b1) = self.split_u16x32(b);
8573 self.combine_u16x16(self.add_u16x16(a0, b0), self.add_u16x16(a1, b1))
8574 }
8575 #[inline(always)]
8576 fn sub_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8577 let (a0, a1) = self.split_u16x32(a);
8578 let (b0, b1) = self.split_u16x32(b);
8579 self.combine_u16x16(self.sub_u16x16(a0, b0), self.sub_u16x16(a1, b1))
8580 }
8581 #[inline(always)]
8582 fn mul_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8583 let (a0, a1) = self.split_u16x32(a);
8584 let (b0, b1) = self.split_u16x32(b);
8585 self.combine_u16x16(self.mul_u16x16(a0, b0), self.mul_u16x16(a1, b1))
8586 }
8587 #[inline(always)]
8588 fn and_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8589 let (a0, a1) = self.split_u16x32(a);
8590 let (b0, b1) = self.split_u16x32(b);
8591 self.combine_u16x16(self.and_u16x16(a0, b0), self.and_u16x16(a1, b1))
8592 }
8593 #[inline(always)]
8594 fn or_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8595 let (a0, a1) = self.split_u16x32(a);
8596 let (b0, b1) = self.split_u16x32(b);
8597 self.combine_u16x16(self.or_u16x16(a0, b0), self.or_u16x16(a1, b1))
8598 }
8599 #[inline(always)]
8600 fn xor_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8601 let (a0, a1) = self.split_u16x32(a);
8602 let (b0, b1) = self.split_u16x32(b);
8603 self.combine_u16x16(self.xor_u16x16(a0, b0), self.xor_u16x16(a1, b1))
8604 }
8605 #[inline(always)]
8606 fn not_u16x32(self, a: u16x32<Self>) -> u16x32<Self> {
8607 let (a0, a1) = self.split_u16x32(a);
8608 self.combine_u16x16(self.not_u16x16(a0), self.not_u16x16(a1))
8609 }
8610 #[inline(always)]
8611 fn shl_u16x32(self, a: u16x32<Self>, shift: u32) -> u16x32<Self> {
8612 let (a0, a1) = self.split_u16x32(a);
8613 self.combine_u16x16(self.shl_u16x16(a0, shift), self.shl_u16x16(a1, shift))
8614 }
8615 #[inline(always)]
8616 fn shlv_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8617 let (a0, a1) = self.split_u16x32(a);
8618 let (b0, b1) = self.split_u16x32(b);
8619 self.combine_u16x16(self.shlv_u16x16(a0, b0), self.shlv_u16x16(a1, b1))
8620 }
8621 #[inline(always)]
8622 fn shr_u16x32(self, a: u16x32<Self>, shift: u32) -> u16x32<Self> {
8623 let (a0, a1) = self.split_u16x32(a);
8624 self.combine_u16x16(self.shr_u16x16(a0, shift), self.shr_u16x16(a1, shift))
8625 }
8626 #[inline(always)]
8627 fn shrv_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8628 let (a0, a1) = self.split_u16x32(a);
8629 let (b0, b1) = self.split_u16x32(b);
8630 self.combine_u16x16(self.shrv_u16x16(a0, b0), self.shrv_u16x16(a1, b1))
8631 }
8632 #[inline(always)]
8633 fn simd_eq_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
8634 let (a0, a1) = self.split_u16x32(a);
8635 let (b0, b1) = self.split_u16x32(b);
8636 self.combine_mask16x16(self.simd_eq_u16x16(a0, b0), self.simd_eq_u16x16(a1, b1))
8637 }
8638 #[inline(always)]
8639 fn simd_lt_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
8640 let (a0, a1) = self.split_u16x32(a);
8641 let (b0, b1) = self.split_u16x32(b);
8642 self.combine_mask16x16(self.simd_lt_u16x16(a0, b0), self.simd_lt_u16x16(a1, b1))
8643 }
8644 #[inline(always)]
8645 fn simd_le_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
8646 let (a0, a1) = self.split_u16x32(a);
8647 let (b0, b1) = self.split_u16x32(b);
8648 self.combine_mask16x16(self.simd_le_u16x16(a0, b0), self.simd_le_u16x16(a1, b1))
8649 }
8650 #[inline(always)]
8651 fn simd_ge_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
8652 let (a0, a1) = self.split_u16x32(a);
8653 let (b0, b1) = self.split_u16x32(b);
8654 self.combine_mask16x16(self.simd_ge_u16x16(a0, b0), self.simd_ge_u16x16(a1, b1))
8655 }
8656 #[inline(always)]
8657 fn simd_gt_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
8658 let (a0, a1) = self.split_u16x32(a);
8659 let (b0, b1) = self.split_u16x32(b);
8660 self.combine_mask16x16(self.simd_gt_u16x16(a0, b0), self.simd_gt_u16x16(a1, b1))
8661 }
8662 #[inline(always)]
8663 fn zip_low_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8664 let (a0, _) = self.split_u16x32(a);
8665 let (b0, _) = self.split_u16x32(b);
8666 self.combine_u16x16(self.zip_low_u16x16(a0, b0), self.zip_high_u16x16(a0, b0))
8667 }
8668 #[inline(always)]
8669 fn zip_high_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8670 let (_, a1) = self.split_u16x32(a);
8671 let (_, b1) = self.split_u16x32(b);
8672 self.combine_u16x16(self.zip_low_u16x16(a1, b1), self.zip_high_u16x16(a1, b1))
8673 }
8674 #[inline(always)]
8675 fn unzip_low_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8676 let (a0, a1) = self.split_u16x32(a);
8677 let (b0, b1) = self.split_u16x32(b);
8678 self.combine_u16x16(self.unzip_low_u16x16(a0, a1), self.unzip_low_u16x16(b0, b1))
8679 }
8680 #[inline(always)]
8681 fn unzip_high_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8682 let (a0, a1) = self.split_u16x32(a);
8683 let (b0, b1) = self.split_u16x32(b);
8684 self.combine_u16x16(
8685 self.unzip_high_u16x16(a0, a1),
8686 self.unzip_high_u16x16(b0, b1),
8687 )
8688 }
8689 #[inline(always)]
8690 fn interleave_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> (u16x32<Self>, u16x32<Self>) {
8691 let (a0, a1) = self.split_u16x32(a);
8692 let (b0, b1) = self.split_u16x32(b);
8693 let lo_lo = self.zip_low_u16x16(a0, b0);
8694 let lo_hi = self.zip_high_u16x16(a0, b0);
8695 let hi_lo = self.zip_low_u16x16(a1, b1);
8696 let hi_hi = self.zip_high_u16x16(a1, b1);
8697 (
8698 self.combine_u16x16(lo_lo, lo_hi),
8699 self.combine_u16x16(hi_lo, hi_hi),
8700 )
8701 }
8702 #[inline(always)]
8703 fn deinterleave_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> (u16x32<Self>, u16x32<Self>) {
8704 let (a0, a1) = self.split_u16x32(a);
8705 let (b0, b1) = self.split_u16x32(b);
8706 let lo_even = self.unzip_low_u16x16(a0, a1);
8707 let lo_odd = self.unzip_high_u16x16(a0, a1);
8708 let hi_even = self.unzip_low_u16x16(b0, b1);
8709 let hi_odd = self.unzip_high_u16x16(b0, b1);
8710 (
8711 self.combine_u16x16(lo_even, hi_even),
8712 self.combine_u16x16(lo_odd, hi_odd),
8713 )
8714 }
8715 #[inline(always)]
8716 fn select_u16x32(self, a: mask16x32<Self>, b: u16x32<Self>, c: u16x32<Self>) -> u16x32<Self> {
8717 let (a0, a1) = self.split_mask16x32(a);
8718 let (b0, b1) = self.split_u16x32(b);
8719 let (c0, c1) = self.split_u16x32(c);
8720 self.combine_u16x16(
8721 self.select_u16x16(a0, b0, c0),
8722 self.select_u16x16(a1, b1, c1),
8723 )
8724 }
8725 #[inline(always)]
8726 fn min_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8727 let (a0, a1) = self.split_u16x32(a);
8728 let (b0, b1) = self.split_u16x32(b);
8729 self.combine_u16x16(self.min_u16x16(a0, b0), self.min_u16x16(a1, b1))
8730 }
8731 #[inline(always)]
8732 fn max_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8733 let (a0, a1) = self.split_u16x32(a);
8734 let (b0, b1) = self.split_u16x32(b);
8735 self.combine_u16x16(self.max_u16x16(a0, b0), self.max_u16x16(a1, b1))
8736 }
8737 #[inline(always)]
8738 fn split_u16x32(self, a: u16x32<Self>) -> (u16x16<Self>, u16x16<Self>) {
8739 let mut b0 = [0; 16usize];
8740 let mut b1 = [0; 16usize];
8741 b0.copy_from_slice(&a.val.0[0..16usize]);
8742 b1.copy_from_slice(&a.val.0[16usize..32usize]);
8743 (b0.simd_into(self), b1.simd_into(self))
8744 }
8745 #[inline(always)]
8746 fn load_interleaved_128_u16x32(self, src: &[u16; 32usize]) -> u16x32<Self> {
8747 [
8748 src[0usize],
8749 src[4usize],
8750 src[8usize],
8751 src[12usize],
8752 src[16usize],
8753 src[20usize],
8754 src[24usize],
8755 src[28usize],
8756 src[1usize],
8757 src[5usize],
8758 src[9usize],
8759 src[13usize],
8760 src[17usize],
8761 src[21usize],
8762 src[25usize],
8763 src[29usize],
8764 src[2usize],
8765 src[6usize],
8766 src[10usize],
8767 src[14usize],
8768 src[18usize],
8769 src[22usize],
8770 src[26usize],
8771 src[30usize],
8772 src[3usize],
8773 src[7usize],
8774 src[11usize],
8775 src[15usize],
8776 src[19usize],
8777 src[23usize],
8778 src[27usize],
8779 src[31usize],
8780 ]
8781 .simd_into(self)
8782 }
8783 #[inline(always)]
8784 fn store_interleaved_128_u16x32(self, a: u16x32<Self>, dest: &mut [u16; 32usize]) -> () {
8785 *dest = [
8786 a[0usize], a[8usize], a[16usize], a[24usize], a[1usize], a[9usize], a[17usize],
8787 a[25usize], a[2usize], a[10usize], a[18usize], a[26usize], a[3usize], a[11usize],
8788 a[19usize], a[27usize], a[4usize], a[12usize], a[20usize], a[28usize], a[5usize],
8789 a[13usize], a[21usize], a[29usize], a[6usize], a[14usize], a[22usize], a[30usize],
8790 a[7usize], a[15usize], a[23usize], a[31usize],
8791 ];
8792 }
8793 #[inline(always)]
8794 fn narrow_u16x32(self, a: u16x32<Self>) -> u8x32<Self> {
8795 let (a0, a1) = self.split_u16x32(a);
8796 self.combine_u8x16(self.narrow_u16x16(a0), self.narrow_u16x16(a1))
8797 }
8798 #[inline(always)]
8799 fn reinterpret_u8_u16x32(self, a: u16x32<Self>) -> u8x64<Self> {
8800 let (a0, a1) = self.split_u16x32(a);
8801 self.combine_u8x32(
8802 self.reinterpret_u8_u16x16(a0),
8803 self.reinterpret_u8_u16x16(a1),
8804 )
8805 }
8806 #[inline(always)]
8807 fn reinterpret_u32_u16x32(self, a: u16x32<Self>) -> u32x16<Self> {
8808 let (a0, a1) = self.split_u16x32(a);
8809 self.combine_u32x8(
8810 self.reinterpret_u32_u16x16(a0),
8811 self.reinterpret_u32_u16x16(a1),
8812 )
8813 }
8814 #[inline(always)]
8815 fn splat_mask16x32(self, val: i16) -> mask16x32<Self> {
8816 let half = self.splat_mask16x16(val);
8817 self.combine_mask16x16(half, half)
8818 }
8819 #[inline(always)]
8820 fn load_array_mask16x32(self, val: [i16; 32usize]) -> mask16x32<Self> {
8821 mask16x32 {
8822 val: crate::support::Aligned512(val),
8823 simd: self,
8824 }
8825 }
8826 #[inline(always)]
8827 fn load_array_ref_mask16x32(self, val: &[i16; 32usize]) -> mask16x32<Self> {
8828 mask16x32 {
8829 val: crate::support::Aligned512(*val),
8830 simd: self,
8831 }
8832 }
8833 #[inline(always)]
8834 fn as_array_mask16x32(self, a: mask16x32<Self>) -> [i16; 32usize] {
8835 a.val.0
8836 }
8837 #[inline(always)]
8838 fn as_array_ref_mask16x32(self, a: &mask16x32<Self>) -> &[i16; 32usize] {
8839 &a.val.0
8840 }
8841 #[inline(always)]
8842 fn as_array_mut_mask16x32(self, a: &mut mask16x32<Self>) -> &mut [i16; 32usize] {
8843 &mut a.val.0
8844 }
8845 #[inline(always)]
8846 fn store_array_mask16x32(self, a: mask16x32<Self>, dest: &mut [i16; 32usize]) -> () {
8847 *dest = a.val.0;
8848 }
8849 #[inline(always)]
8850 fn cvt_from_bytes_mask16x32(self, a: u8x64<Self>) -> mask16x32<Self> {
8851 unsafe {
8852 mask16x32 {
8853 val: core::mem::transmute(a.val),
8854 simd: self,
8855 }
8856 }
8857 }
8858 #[inline(always)]
8859 fn cvt_to_bytes_mask16x32(self, a: mask16x32<Self>) -> u8x64<Self> {
8860 unsafe {
8861 u8x64 {
8862 val: core::mem::transmute(a.val),
8863 simd: self,
8864 }
8865 }
8866 }
8867 #[inline(always)]
8868 fn slide_mask16x32<const SHIFT: usize>(
8869 self,
8870 a: mask16x32<Self>,
8871 b: mask16x32<Self>,
8872 ) -> mask16x32<Self> {
8873 let mut dest = [Default::default(); 32usize];
8874 dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
8875 dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
8876 dest.simd_into(self)
8877 }
8878 #[inline(always)]
8879 fn slide_within_blocks_mask16x32<const SHIFT: usize>(
8880 self,
8881 a: mask16x32<Self>,
8882 b: mask16x32<Self>,
8883 ) -> mask16x32<Self> {
8884 let (a0, a1) = self.split_mask16x32(a);
8885 let (b0, b1) = self.split_mask16x32(b);
8886 self.combine_mask16x16(
8887 self.slide_within_blocks_mask16x16::<SHIFT>(a0, b0),
8888 self.slide_within_blocks_mask16x16::<SHIFT>(a1, b1),
8889 )
8890 }
8891 #[inline(always)]
8892 fn and_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
8893 let (a0, a1) = self.split_mask16x32(a);
8894 let (b0, b1) = self.split_mask16x32(b);
8895 self.combine_mask16x16(self.and_mask16x16(a0, b0), self.and_mask16x16(a1, b1))
8896 }
8897 #[inline(always)]
8898 fn or_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
8899 let (a0, a1) = self.split_mask16x32(a);
8900 let (b0, b1) = self.split_mask16x32(b);
8901 self.combine_mask16x16(self.or_mask16x16(a0, b0), self.or_mask16x16(a1, b1))
8902 }
8903 #[inline(always)]
8904 fn xor_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
8905 let (a0, a1) = self.split_mask16x32(a);
8906 let (b0, b1) = self.split_mask16x32(b);
8907 self.combine_mask16x16(self.xor_mask16x16(a0, b0), self.xor_mask16x16(a1, b1))
8908 }
8909 #[inline(always)]
8910 fn not_mask16x32(self, a: mask16x32<Self>) -> mask16x32<Self> {
8911 let (a0, a1) = self.split_mask16x32(a);
8912 self.combine_mask16x16(self.not_mask16x16(a0), self.not_mask16x16(a1))
8913 }
8914 #[inline(always)]
8915 fn select_mask16x32(
8916 self,
8917 a: mask16x32<Self>,
8918 b: mask16x32<Self>,
8919 c: mask16x32<Self>,
8920 ) -> mask16x32<Self> {
8921 let (a0, a1) = self.split_mask16x32(a);
8922 let (b0, b1) = self.split_mask16x32(b);
8923 let (c0, c1) = self.split_mask16x32(c);
8924 self.combine_mask16x16(
8925 self.select_mask16x16(a0, b0, c0),
8926 self.select_mask16x16(a1, b1, c1),
8927 )
8928 }
8929 #[inline(always)]
8930 fn simd_eq_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
8931 let (a0, a1) = self.split_mask16x32(a);
8932 let (b0, b1) = self.split_mask16x32(b);
8933 self.combine_mask16x16(
8934 self.simd_eq_mask16x16(a0, b0),
8935 self.simd_eq_mask16x16(a1, b1),
8936 )
8937 }
8938 #[inline(always)]
8939 fn any_true_mask16x32(self, a: mask16x32<Self>) -> bool {
8940 let (a0, a1) = self.split_mask16x32(a);
8941 self.any_true_mask16x16(a0) || self.any_true_mask16x16(a1)
8942 }
8943 #[inline(always)]
8944 fn all_true_mask16x32(self, a: mask16x32<Self>) -> bool {
8945 let (a0, a1) = self.split_mask16x32(a);
8946 self.all_true_mask16x16(a0) && self.all_true_mask16x16(a1)
8947 }
8948 #[inline(always)]
8949 fn any_false_mask16x32(self, a: mask16x32<Self>) -> bool {
8950 let (a0, a1) = self.split_mask16x32(a);
8951 self.any_false_mask16x16(a0) || self.any_false_mask16x16(a1)
8952 }
8953 #[inline(always)]
8954 fn all_false_mask16x32(self, a: mask16x32<Self>) -> bool {
8955 let (a0, a1) = self.split_mask16x32(a);
8956 self.all_false_mask16x16(a0) && self.all_false_mask16x16(a1)
8957 }
8958 #[inline(always)]
8959 fn split_mask16x32(self, a: mask16x32<Self>) -> (mask16x16<Self>, mask16x16<Self>) {
8960 let mut b0 = [0; 16usize];
8961 let mut b1 = [0; 16usize];
8962 b0.copy_from_slice(&a.val.0[0..16usize]);
8963 b1.copy_from_slice(&a.val.0[16usize..32usize]);
8964 (b0.simd_into(self), b1.simd_into(self))
8965 }
8966 #[inline(always)]
8967 fn splat_i32x16(self, val: i32) -> i32x16<Self> {
8968 let half = self.splat_i32x8(val);
8969 self.combine_i32x8(half, half)
8970 }
8971 #[inline(always)]
8972 fn load_array_i32x16(self, val: [i32; 16usize]) -> i32x16<Self> {
8973 i32x16 {
8974 val: crate::support::Aligned512(val),
8975 simd: self,
8976 }
8977 }
8978 #[inline(always)]
8979 fn load_array_ref_i32x16(self, val: &[i32; 16usize]) -> i32x16<Self> {
8980 i32x16 {
8981 val: crate::support::Aligned512(*val),
8982 simd: self,
8983 }
8984 }
8985 #[inline(always)]
8986 fn as_array_i32x16(self, a: i32x16<Self>) -> [i32; 16usize] {
8987 a.val.0
8988 }
8989 #[inline(always)]
8990 fn as_array_ref_i32x16(self, a: &i32x16<Self>) -> &[i32; 16usize] {
8991 &a.val.0
8992 }
8993 #[inline(always)]
8994 fn as_array_mut_i32x16(self, a: &mut i32x16<Self>) -> &mut [i32; 16usize] {
8995 &mut a.val.0
8996 }
8997 #[inline(always)]
8998 fn store_array_i32x16(self, a: i32x16<Self>, dest: &mut [i32; 16usize]) -> () {
8999 *dest = a.val.0;
9000 }
9001 #[inline(always)]
9002 fn cvt_from_bytes_i32x16(self, a: u8x64<Self>) -> i32x16<Self> {
9003 unsafe {
9004 i32x16 {
9005 val: core::mem::transmute(a.val),
9006 simd: self,
9007 }
9008 }
9009 }
9010 #[inline(always)]
9011 fn cvt_to_bytes_i32x16(self, a: i32x16<Self>) -> u8x64<Self> {
9012 unsafe {
9013 u8x64 {
9014 val: core::mem::transmute(a.val),
9015 simd: self,
9016 }
9017 }
9018 }
9019 #[inline(always)]
9020 fn slide_i32x16<const SHIFT: usize>(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9021 let mut dest = [Default::default(); 16usize];
9022 dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
9023 dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
9024 dest.simd_into(self)
9025 }
9026 #[inline(always)]
9027 fn slide_within_blocks_i32x16<const SHIFT: usize>(
9028 self,
9029 a: i32x16<Self>,
9030 b: i32x16<Self>,
9031 ) -> i32x16<Self> {
9032 let (a0, a1) = self.split_i32x16(a);
9033 let (b0, b1) = self.split_i32x16(b);
9034 self.combine_i32x8(
9035 self.slide_within_blocks_i32x8::<SHIFT>(a0, b0),
9036 self.slide_within_blocks_i32x8::<SHIFT>(a1, b1),
9037 )
9038 }
9039 #[inline(always)]
9040 fn add_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9041 let (a0, a1) = self.split_i32x16(a);
9042 let (b0, b1) = self.split_i32x16(b);
9043 self.combine_i32x8(self.add_i32x8(a0, b0), self.add_i32x8(a1, b1))
9044 }
9045 #[inline(always)]
9046 fn sub_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9047 let (a0, a1) = self.split_i32x16(a);
9048 let (b0, b1) = self.split_i32x16(b);
9049 self.combine_i32x8(self.sub_i32x8(a0, b0), self.sub_i32x8(a1, b1))
9050 }
9051 #[inline(always)]
9052 fn mul_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9053 let (a0, a1) = self.split_i32x16(a);
9054 let (b0, b1) = self.split_i32x16(b);
9055 self.combine_i32x8(self.mul_i32x8(a0, b0), self.mul_i32x8(a1, b1))
9056 }
9057 #[inline(always)]
9058 fn and_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9059 let (a0, a1) = self.split_i32x16(a);
9060 let (b0, b1) = self.split_i32x16(b);
9061 self.combine_i32x8(self.and_i32x8(a0, b0), self.and_i32x8(a1, b1))
9062 }
9063 #[inline(always)]
9064 fn or_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9065 let (a0, a1) = self.split_i32x16(a);
9066 let (b0, b1) = self.split_i32x16(b);
9067 self.combine_i32x8(self.or_i32x8(a0, b0), self.or_i32x8(a1, b1))
9068 }
9069 #[inline(always)]
9070 fn xor_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9071 let (a0, a1) = self.split_i32x16(a);
9072 let (b0, b1) = self.split_i32x16(b);
9073 self.combine_i32x8(self.xor_i32x8(a0, b0), self.xor_i32x8(a1, b1))
9074 }
9075 #[inline(always)]
9076 fn not_i32x16(self, a: i32x16<Self>) -> i32x16<Self> {
9077 let (a0, a1) = self.split_i32x16(a);
9078 self.combine_i32x8(self.not_i32x8(a0), self.not_i32x8(a1))
9079 }
9080 #[inline(always)]
9081 fn shl_i32x16(self, a: i32x16<Self>, shift: u32) -> i32x16<Self> {
9082 let (a0, a1) = self.split_i32x16(a);
9083 self.combine_i32x8(self.shl_i32x8(a0, shift), self.shl_i32x8(a1, shift))
9084 }
9085 #[inline(always)]
9086 fn shlv_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9087 let (a0, a1) = self.split_i32x16(a);
9088 let (b0, b1) = self.split_i32x16(b);
9089 self.combine_i32x8(self.shlv_i32x8(a0, b0), self.shlv_i32x8(a1, b1))
9090 }
9091 #[inline(always)]
9092 fn shr_i32x16(self, a: i32x16<Self>, shift: u32) -> i32x16<Self> {
9093 let (a0, a1) = self.split_i32x16(a);
9094 self.combine_i32x8(self.shr_i32x8(a0, shift), self.shr_i32x8(a1, shift))
9095 }
9096 #[inline(always)]
9097 fn shrv_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9098 let (a0, a1) = self.split_i32x16(a);
9099 let (b0, b1) = self.split_i32x16(b);
9100 self.combine_i32x8(self.shrv_i32x8(a0, b0), self.shrv_i32x8(a1, b1))
9101 }
9102 #[inline(always)]
9103 fn simd_eq_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
9104 let (a0, a1) = self.split_i32x16(a);
9105 let (b0, b1) = self.split_i32x16(b);
9106 self.combine_mask32x8(self.simd_eq_i32x8(a0, b0), self.simd_eq_i32x8(a1, b1))
9107 }
9108 #[inline(always)]
9109 fn simd_lt_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
9110 let (a0, a1) = self.split_i32x16(a);
9111 let (b0, b1) = self.split_i32x16(b);
9112 self.combine_mask32x8(self.simd_lt_i32x8(a0, b0), self.simd_lt_i32x8(a1, b1))
9113 }
9114 #[inline(always)]
9115 fn simd_le_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
9116 let (a0, a1) = self.split_i32x16(a);
9117 let (b0, b1) = self.split_i32x16(b);
9118 self.combine_mask32x8(self.simd_le_i32x8(a0, b0), self.simd_le_i32x8(a1, b1))
9119 }
9120 #[inline(always)]
9121 fn simd_ge_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
9122 let (a0, a1) = self.split_i32x16(a);
9123 let (b0, b1) = self.split_i32x16(b);
9124 self.combine_mask32x8(self.simd_ge_i32x8(a0, b0), self.simd_ge_i32x8(a1, b1))
9125 }
9126 #[inline(always)]
9127 fn simd_gt_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
9128 let (a0, a1) = self.split_i32x16(a);
9129 let (b0, b1) = self.split_i32x16(b);
9130 self.combine_mask32x8(self.simd_gt_i32x8(a0, b0), self.simd_gt_i32x8(a1, b1))
9131 }
9132 #[inline(always)]
9133 fn zip_low_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9134 let (a0, _) = self.split_i32x16(a);
9135 let (b0, _) = self.split_i32x16(b);
9136 self.combine_i32x8(self.zip_low_i32x8(a0, b0), self.zip_high_i32x8(a0, b0))
9137 }
9138 #[inline(always)]
9139 fn zip_high_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9140 let (_, a1) = self.split_i32x16(a);
9141 let (_, b1) = self.split_i32x16(b);
9142 self.combine_i32x8(self.zip_low_i32x8(a1, b1), self.zip_high_i32x8(a1, b1))
9143 }
9144 #[inline(always)]
9145 fn unzip_low_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9146 let (a0, a1) = self.split_i32x16(a);
9147 let (b0, b1) = self.split_i32x16(b);
9148 self.combine_i32x8(self.unzip_low_i32x8(a0, a1), self.unzip_low_i32x8(b0, b1))
9149 }
9150 #[inline(always)]
9151 fn unzip_high_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9152 let (a0, a1) = self.split_i32x16(a);
9153 let (b0, b1) = self.split_i32x16(b);
9154 self.combine_i32x8(self.unzip_high_i32x8(a0, a1), self.unzip_high_i32x8(b0, b1))
9155 }
9156 #[inline(always)]
9157 fn interleave_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> (i32x16<Self>, i32x16<Self>) {
9158 let (a0, a1) = self.split_i32x16(a);
9159 let (b0, b1) = self.split_i32x16(b);
9160 let lo_lo = self.zip_low_i32x8(a0, b0);
9161 let lo_hi = self.zip_high_i32x8(a0, b0);
9162 let hi_lo = self.zip_low_i32x8(a1, b1);
9163 let hi_hi = self.zip_high_i32x8(a1, b1);
9164 (
9165 self.combine_i32x8(lo_lo, lo_hi),
9166 self.combine_i32x8(hi_lo, hi_hi),
9167 )
9168 }
9169 #[inline(always)]
9170 fn deinterleave_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> (i32x16<Self>, i32x16<Self>) {
9171 let (a0, a1) = self.split_i32x16(a);
9172 let (b0, b1) = self.split_i32x16(b);
9173 let lo_even = self.unzip_low_i32x8(a0, a1);
9174 let lo_odd = self.unzip_high_i32x8(a0, a1);
9175 let hi_even = self.unzip_low_i32x8(b0, b1);
9176 let hi_odd = self.unzip_high_i32x8(b0, b1);
9177 (
9178 self.combine_i32x8(lo_even, hi_even),
9179 self.combine_i32x8(lo_odd, hi_odd),
9180 )
9181 }
9182 #[inline(always)]
9183 fn select_i32x16(self, a: mask32x16<Self>, b: i32x16<Self>, c: i32x16<Self>) -> i32x16<Self> {
9184 let (a0, a1) = self.split_mask32x16(a);
9185 let (b0, b1) = self.split_i32x16(b);
9186 let (c0, c1) = self.split_i32x16(c);
9187 self.combine_i32x8(self.select_i32x8(a0, b0, c0), self.select_i32x8(a1, b1, c1))
9188 }
9189 #[inline(always)]
9190 fn min_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9191 let (a0, a1) = self.split_i32x16(a);
9192 let (b0, b1) = self.split_i32x16(b);
9193 self.combine_i32x8(self.min_i32x8(a0, b0), self.min_i32x8(a1, b1))
9194 }
9195 #[inline(always)]
9196 fn max_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9197 let (a0, a1) = self.split_i32x16(a);
9198 let (b0, b1) = self.split_i32x16(b);
9199 self.combine_i32x8(self.max_i32x8(a0, b0), self.max_i32x8(a1, b1))
9200 }
9201 #[inline(always)]
9202 fn split_i32x16(self, a: i32x16<Self>) -> (i32x8<Self>, i32x8<Self>) {
9203 let mut b0 = [0; 8usize];
9204 let mut b1 = [0; 8usize];
9205 b0.copy_from_slice(&a.val.0[0..8usize]);
9206 b1.copy_from_slice(&a.val.0[8usize..16usize]);
9207 (b0.simd_into(self), b1.simd_into(self))
9208 }
9209 #[inline(always)]
9210 fn neg_i32x16(self, a: i32x16<Self>) -> i32x16<Self> {
9211 let (a0, a1) = self.split_i32x16(a);
9212 self.combine_i32x8(self.neg_i32x8(a0), self.neg_i32x8(a1))
9213 }
9214 #[inline(always)]
9215 fn reinterpret_u8_i32x16(self, a: i32x16<Self>) -> u8x64<Self> {
9216 let (a0, a1) = self.split_i32x16(a);
9217 self.combine_u8x32(self.reinterpret_u8_i32x8(a0), self.reinterpret_u8_i32x8(a1))
9218 }
9219 #[inline(always)]
9220 fn reinterpret_u32_i32x16(self, a: i32x16<Self>) -> u32x16<Self> {
9221 let (a0, a1) = self.split_i32x16(a);
9222 self.combine_u32x8(
9223 self.reinterpret_u32_i32x8(a0),
9224 self.reinterpret_u32_i32x8(a1),
9225 )
9226 }
9227 #[inline(always)]
9228 fn cvt_f32_i32x16(self, a: i32x16<Self>) -> f32x16<Self> {
9229 let (a0, a1) = self.split_i32x16(a);
9230 self.combine_f32x8(self.cvt_f32_i32x8(a0), self.cvt_f32_i32x8(a1))
9231 }
9232 #[inline(always)]
9233 fn splat_u32x16(self, val: u32) -> u32x16<Self> {
9234 let half = self.splat_u32x8(val);
9235 self.combine_u32x8(half, half)
9236 }
9237 #[inline(always)]
9238 fn load_array_u32x16(self, val: [u32; 16usize]) -> u32x16<Self> {
9239 u32x16 {
9240 val: crate::support::Aligned512(val),
9241 simd: self,
9242 }
9243 }
9244 #[inline(always)]
9245 fn load_array_ref_u32x16(self, val: &[u32; 16usize]) -> u32x16<Self> {
9246 u32x16 {
9247 val: crate::support::Aligned512(*val),
9248 simd: self,
9249 }
9250 }
9251 #[inline(always)]
9252 fn as_array_u32x16(self, a: u32x16<Self>) -> [u32; 16usize] {
9253 a.val.0
9254 }
9255 #[inline(always)]
9256 fn as_array_ref_u32x16(self, a: &u32x16<Self>) -> &[u32; 16usize] {
9257 &a.val.0
9258 }
9259 #[inline(always)]
9260 fn as_array_mut_u32x16(self, a: &mut u32x16<Self>) -> &mut [u32; 16usize] {
9261 &mut a.val.0
9262 }
9263 #[inline(always)]
9264 fn store_array_u32x16(self, a: u32x16<Self>, dest: &mut [u32; 16usize]) -> () {
9265 *dest = a.val.0;
9266 }
9267 #[inline(always)]
9268 fn cvt_from_bytes_u32x16(self, a: u8x64<Self>) -> u32x16<Self> {
9269 unsafe {
9270 u32x16 {
9271 val: core::mem::transmute(a.val),
9272 simd: self,
9273 }
9274 }
9275 }
9276 #[inline(always)]
9277 fn cvt_to_bytes_u32x16(self, a: u32x16<Self>) -> u8x64<Self> {
9278 unsafe {
9279 u8x64 {
9280 val: core::mem::transmute(a.val),
9281 simd: self,
9282 }
9283 }
9284 }
9285 #[inline(always)]
9286 fn slide_u32x16<const SHIFT: usize>(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9287 let mut dest = [Default::default(); 16usize];
9288 dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
9289 dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
9290 dest.simd_into(self)
9291 }
9292 #[inline(always)]
9293 fn slide_within_blocks_u32x16<const SHIFT: usize>(
9294 self,
9295 a: u32x16<Self>,
9296 b: u32x16<Self>,
9297 ) -> u32x16<Self> {
9298 let (a0, a1) = self.split_u32x16(a);
9299 let (b0, b1) = self.split_u32x16(b);
9300 self.combine_u32x8(
9301 self.slide_within_blocks_u32x8::<SHIFT>(a0, b0),
9302 self.slide_within_blocks_u32x8::<SHIFT>(a1, b1),
9303 )
9304 }
9305 #[inline(always)]
9306 fn add_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9307 let (a0, a1) = self.split_u32x16(a);
9308 let (b0, b1) = self.split_u32x16(b);
9309 self.combine_u32x8(self.add_u32x8(a0, b0), self.add_u32x8(a1, b1))
9310 }
9311 #[inline(always)]
9312 fn sub_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9313 let (a0, a1) = self.split_u32x16(a);
9314 let (b0, b1) = self.split_u32x16(b);
9315 self.combine_u32x8(self.sub_u32x8(a0, b0), self.sub_u32x8(a1, b1))
9316 }
9317 #[inline(always)]
9318 fn mul_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9319 let (a0, a1) = self.split_u32x16(a);
9320 let (b0, b1) = self.split_u32x16(b);
9321 self.combine_u32x8(self.mul_u32x8(a0, b0), self.mul_u32x8(a1, b1))
9322 }
9323 #[inline(always)]
9324 fn and_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9325 let (a0, a1) = self.split_u32x16(a);
9326 let (b0, b1) = self.split_u32x16(b);
9327 self.combine_u32x8(self.and_u32x8(a0, b0), self.and_u32x8(a1, b1))
9328 }
9329 #[inline(always)]
9330 fn or_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9331 let (a0, a1) = self.split_u32x16(a);
9332 let (b0, b1) = self.split_u32x16(b);
9333 self.combine_u32x8(self.or_u32x8(a0, b0), self.or_u32x8(a1, b1))
9334 }
9335 #[inline(always)]
9336 fn xor_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9337 let (a0, a1) = self.split_u32x16(a);
9338 let (b0, b1) = self.split_u32x16(b);
9339 self.combine_u32x8(self.xor_u32x8(a0, b0), self.xor_u32x8(a1, b1))
9340 }
9341 #[inline(always)]
9342 fn not_u32x16(self, a: u32x16<Self>) -> u32x16<Self> {
9343 let (a0, a1) = self.split_u32x16(a);
9344 self.combine_u32x8(self.not_u32x8(a0), self.not_u32x8(a1))
9345 }
9346 #[inline(always)]
9347 fn shl_u32x16(self, a: u32x16<Self>, shift: u32) -> u32x16<Self> {
9348 let (a0, a1) = self.split_u32x16(a);
9349 self.combine_u32x8(self.shl_u32x8(a0, shift), self.shl_u32x8(a1, shift))
9350 }
9351 #[inline(always)]
9352 fn shlv_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9353 let (a0, a1) = self.split_u32x16(a);
9354 let (b0, b1) = self.split_u32x16(b);
9355 self.combine_u32x8(self.shlv_u32x8(a0, b0), self.shlv_u32x8(a1, b1))
9356 }
9357 #[inline(always)]
9358 fn shr_u32x16(self, a: u32x16<Self>, shift: u32) -> u32x16<Self> {
9359 let (a0, a1) = self.split_u32x16(a);
9360 self.combine_u32x8(self.shr_u32x8(a0, shift), self.shr_u32x8(a1, shift))
9361 }
9362 #[inline(always)]
9363 fn shrv_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9364 let (a0, a1) = self.split_u32x16(a);
9365 let (b0, b1) = self.split_u32x16(b);
9366 self.combine_u32x8(self.shrv_u32x8(a0, b0), self.shrv_u32x8(a1, b1))
9367 }
9368 #[inline(always)]
9369 fn simd_eq_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
9370 let (a0, a1) = self.split_u32x16(a);
9371 let (b0, b1) = self.split_u32x16(b);
9372 self.combine_mask32x8(self.simd_eq_u32x8(a0, b0), self.simd_eq_u32x8(a1, b1))
9373 }
9374 #[inline(always)]
9375 fn simd_lt_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
9376 let (a0, a1) = self.split_u32x16(a);
9377 let (b0, b1) = self.split_u32x16(b);
9378 self.combine_mask32x8(self.simd_lt_u32x8(a0, b0), self.simd_lt_u32x8(a1, b1))
9379 }
9380 #[inline(always)]
9381 fn simd_le_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
9382 let (a0, a1) = self.split_u32x16(a);
9383 let (b0, b1) = self.split_u32x16(b);
9384 self.combine_mask32x8(self.simd_le_u32x8(a0, b0), self.simd_le_u32x8(a1, b1))
9385 }
9386 #[inline(always)]
9387 fn simd_ge_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
9388 let (a0, a1) = self.split_u32x16(a);
9389 let (b0, b1) = self.split_u32x16(b);
9390 self.combine_mask32x8(self.simd_ge_u32x8(a0, b0), self.simd_ge_u32x8(a1, b1))
9391 }
9392 #[inline(always)]
9393 fn simd_gt_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
9394 let (a0, a1) = self.split_u32x16(a);
9395 let (b0, b1) = self.split_u32x16(b);
9396 self.combine_mask32x8(self.simd_gt_u32x8(a0, b0), self.simd_gt_u32x8(a1, b1))
9397 }
9398 #[inline(always)]
9399 fn zip_low_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9400 let (a0, _) = self.split_u32x16(a);
9401 let (b0, _) = self.split_u32x16(b);
9402 self.combine_u32x8(self.zip_low_u32x8(a0, b0), self.zip_high_u32x8(a0, b0))
9403 }
9404 #[inline(always)]
9405 fn zip_high_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9406 let (_, a1) = self.split_u32x16(a);
9407 let (_, b1) = self.split_u32x16(b);
9408 self.combine_u32x8(self.zip_low_u32x8(a1, b1), self.zip_high_u32x8(a1, b1))
9409 }
9410 #[inline(always)]
9411 fn unzip_low_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9412 let (a0, a1) = self.split_u32x16(a);
9413 let (b0, b1) = self.split_u32x16(b);
9414 self.combine_u32x8(self.unzip_low_u32x8(a0, a1), self.unzip_low_u32x8(b0, b1))
9415 }
9416 #[inline(always)]
9417 fn unzip_high_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9418 let (a0, a1) = self.split_u32x16(a);
9419 let (b0, b1) = self.split_u32x16(b);
9420 self.combine_u32x8(self.unzip_high_u32x8(a0, a1), self.unzip_high_u32x8(b0, b1))
9421 }
9422 #[inline(always)]
9423 fn interleave_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> (u32x16<Self>, u32x16<Self>) {
9424 let (a0, a1) = self.split_u32x16(a);
9425 let (b0, b1) = self.split_u32x16(b);
9426 let lo_lo = self.zip_low_u32x8(a0, b0);
9427 let lo_hi = self.zip_high_u32x8(a0, b0);
9428 let hi_lo = self.zip_low_u32x8(a1, b1);
9429 let hi_hi = self.zip_high_u32x8(a1, b1);
9430 (
9431 self.combine_u32x8(lo_lo, lo_hi),
9432 self.combine_u32x8(hi_lo, hi_hi),
9433 )
9434 }
9435 #[inline(always)]
9436 fn deinterleave_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> (u32x16<Self>, u32x16<Self>) {
9437 let (a0, a1) = self.split_u32x16(a);
9438 let (b0, b1) = self.split_u32x16(b);
9439 let lo_even = self.unzip_low_u32x8(a0, a1);
9440 let lo_odd = self.unzip_high_u32x8(a0, a1);
9441 let hi_even = self.unzip_low_u32x8(b0, b1);
9442 let hi_odd = self.unzip_high_u32x8(b0, b1);
9443 (
9444 self.combine_u32x8(lo_even, hi_even),
9445 self.combine_u32x8(lo_odd, hi_odd),
9446 )
9447 }
9448 #[inline(always)]
9449 fn select_u32x16(self, a: mask32x16<Self>, b: u32x16<Self>, c: u32x16<Self>) -> u32x16<Self> {
9450 let (a0, a1) = self.split_mask32x16(a);
9451 let (b0, b1) = self.split_u32x16(b);
9452 let (c0, c1) = self.split_u32x16(c);
9453 self.combine_u32x8(self.select_u32x8(a0, b0, c0), self.select_u32x8(a1, b1, c1))
9454 }
9455 #[inline(always)]
9456 fn min_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9457 let (a0, a1) = self.split_u32x16(a);
9458 let (b0, b1) = self.split_u32x16(b);
9459 self.combine_u32x8(self.min_u32x8(a0, b0), self.min_u32x8(a1, b1))
9460 }
9461 #[inline(always)]
9462 fn max_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9463 let (a0, a1) = self.split_u32x16(a);
9464 let (b0, b1) = self.split_u32x16(b);
9465 self.combine_u32x8(self.max_u32x8(a0, b0), self.max_u32x8(a1, b1))
9466 }
9467 #[inline(always)]
9468 fn split_u32x16(self, a: u32x16<Self>) -> (u32x8<Self>, u32x8<Self>) {
9469 let mut b0 = [0; 8usize];
9470 let mut b1 = [0; 8usize];
9471 b0.copy_from_slice(&a.val.0[0..8usize]);
9472 b1.copy_from_slice(&a.val.0[8usize..16usize]);
9473 (b0.simd_into(self), b1.simd_into(self))
9474 }
9475 #[inline(always)]
9476 fn load_interleaved_128_u32x16(self, src: &[u32; 16usize]) -> u32x16<Self> {
9477 [
9478 src[0usize],
9479 src[4usize],
9480 src[8usize],
9481 src[12usize],
9482 src[1usize],
9483 src[5usize],
9484 src[9usize],
9485 src[13usize],
9486 src[2usize],
9487 src[6usize],
9488 src[10usize],
9489 src[14usize],
9490 src[3usize],
9491 src[7usize],
9492 src[11usize],
9493 src[15usize],
9494 ]
9495 .simd_into(self)
9496 }
9497 #[inline(always)]
9498 fn store_interleaved_128_u32x16(self, a: u32x16<Self>, dest: &mut [u32; 16usize]) -> () {
9499 *dest = [
9500 a[0usize], a[4usize], a[8usize], a[12usize], a[1usize], a[5usize], a[9usize],
9501 a[13usize], a[2usize], a[6usize], a[10usize], a[14usize], a[3usize], a[7usize],
9502 a[11usize], a[15usize],
9503 ];
9504 }
9505 #[inline(always)]
9506 fn reinterpret_u8_u32x16(self, a: u32x16<Self>) -> u8x64<Self> {
9507 let (a0, a1) = self.split_u32x16(a);
9508 self.combine_u8x32(self.reinterpret_u8_u32x8(a0), self.reinterpret_u8_u32x8(a1))
9509 }
9510 #[inline(always)]
9511 fn cvt_f32_u32x16(self, a: u32x16<Self>) -> f32x16<Self> {
9512 let (a0, a1) = self.split_u32x16(a);
9513 self.combine_f32x8(self.cvt_f32_u32x8(a0), self.cvt_f32_u32x8(a1))
9514 }
9515 #[inline(always)]
9516 fn splat_mask32x16(self, val: i32) -> mask32x16<Self> {
9517 let half = self.splat_mask32x8(val);
9518 self.combine_mask32x8(half, half)
9519 }
9520 #[inline(always)]
9521 fn load_array_mask32x16(self, val: [i32; 16usize]) -> mask32x16<Self> {
9522 mask32x16 {
9523 val: crate::support::Aligned512(val),
9524 simd: self,
9525 }
9526 }
9527 #[inline(always)]
9528 fn load_array_ref_mask32x16(self, val: &[i32; 16usize]) -> mask32x16<Self> {
9529 mask32x16 {
9530 val: crate::support::Aligned512(*val),
9531 simd: self,
9532 }
9533 }
9534 #[inline(always)]
9535 fn as_array_mask32x16(self, a: mask32x16<Self>) -> [i32; 16usize] {
9536 a.val.0
9537 }
9538 #[inline(always)]
9539 fn as_array_ref_mask32x16(self, a: &mask32x16<Self>) -> &[i32; 16usize] {
9540 &a.val.0
9541 }
9542 #[inline(always)]
9543 fn as_array_mut_mask32x16(self, a: &mut mask32x16<Self>) -> &mut [i32; 16usize] {
9544 &mut a.val.0
9545 }
9546 #[inline(always)]
9547 fn store_array_mask32x16(self, a: mask32x16<Self>, dest: &mut [i32; 16usize]) -> () {
9548 *dest = a.val.0;
9549 }
9550 #[inline(always)]
9551 fn cvt_from_bytes_mask32x16(self, a: u8x64<Self>) -> mask32x16<Self> {
9552 unsafe {
9553 mask32x16 {
9554 val: core::mem::transmute(a.val),
9555 simd: self,
9556 }
9557 }
9558 }
9559 #[inline(always)]
9560 fn cvt_to_bytes_mask32x16(self, a: mask32x16<Self>) -> u8x64<Self> {
9561 unsafe {
9562 u8x64 {
9563 val: core::mem::transmute(a.val),
9564 simd: self,
9565 }
9566 }
9567 }
9568 #[inline(always)]
9569 fn slide_mask32x16<const SHIFT: usize>(
9570 self,
9571 a: mask32x16<Self>,
9572 b: mask32x16<Self>,
9573 ) -> mask32x16<Self> {
9574 let mut dest = [Default::default(); 16usize];
9575 dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
9576 dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
9577 dest.simd_into(self)
9578 }
9579 #[inline(always)]
9580 fn slide_within_blocks_mask32x16<const SHIFT: usize>(
9581 self,
9582 a: mask32x16<Self>,
9583 b: mask32x16<Self>,
9584 ) -> mask32x16<Self> {
9585 let (a0, a1) = self.split_mask32x16(a);
9586 let (b0, b1) = self.split_mask32x16(b);
9587 self.combine_mask32x8(
9588 self.slide_within_blocks_mask32x8::<SHIFT>(a0, b0),
9589 self.slide_within_blocks_mask32x8::<SHIFT>(a1, b1),
9590 )
9591 }
9592 #[inline(always)]
9593 fn and_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
9594 let (a0, a1) = self.split_mask32x16(a);
9595 let (b0, b1) = self.split_mask32x16(b);
9596 self.combine_mask32x8(self.and_mask32x8(a0, b0), self.and_mask32x8(a1, b1))
9597 }
9598 #[inline(always)]
9599 fn or_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
9600 let (a0, a1) = self.split_mask32x16(a);
9601 let (b0, b1) = self.split_mask32x16(b);
9602 self.combine_mask32x8(self.or_mask32x8(a0, b0), self.or_mask32x8(a1, b1))
9603 }
9604 #[inline(always)]
9605 fn xor_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
9606 let (a0, a1) = self.split_mask32x16(a);
9607 let (b0, b1) = self.split_mask32x16(b);
9608 self.combine_mask32x8(self.xor_mask32x8(a0, b0), self.xor_mask32x8(a1, b1))
9609 }
9610 #[inline(always)]
9611 fn not_mask32x16(self, a: mask32x16<Self>) -> mask32x16<Self> {
9612 let (a0, a1) = self.split_mask32x16(a);
9613 self.combine_mask32x8(self.not_mask32x8(a0), self.not_mask32x8(a1))
9614 }
9615 #[inline(always)]
9616 fn select_mask32x16(
9617 self,
9618 a: mask32x16<Self>,
9619 b: mask32x16<Self>,
9620 c: mask32x16<Self>,
9621 ) -> mask32x16<Self> {
9622 let (a0, a1) = self.split_mask32x16(a);
9623 let (b0, b1) = self.split_mask32x16(b);
9624 let (c0, c1) = self.split_mask32x16(c);
9625 self.combine_mask32x8(
9626 self.select_mask32x8(a0, b0, c0),
9627 self.select_mask32x8(a1, b1, c1),
9628 )
9629 }
9630 #[inline(always)]
9631 fn simd_eq_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
9632 let (a0, a1) = self.split_mask32x16(a);
9633 let (b0, b1) = self.split_mask32x16(b);
9634 self.combine_mask32x8(self.simd_eq_mask32x8(a0, b0), self.simd_eq_mask32x8(a1, b1))
9635 }
9636 #[inline(always)]
9637 fn any_true_mask32x16(self, a: mask32x16<Self>) -> bool {
9638 let (a0, a1) = self.split_mask32x16(a);
9639 self.any_true_mask32x8(a0) || self.any_true_mask32x8(a1)
9640 }
9641 #[inline(always)]
9642 fn all_true_mask32x16(self, a: mask32x16<Self>) -> bool {
9643 let (a0, a1) = self.split_mask32x16(a);
9644 self.all_true_mask32x8(a0) && self.all_true_mask32x8(a1)
9645 }
9646 #[inline(always)]
9647 fn any_false_mask32x16(self, a: mask32x16<Self>) -> bool {
9648 let (a0, a1) = self.split_mask32x16(a);
9649 self.any_false_mask32x8(a0) || self.any_false_mask32x8(a1)
9650 }
9651 #[inline(always)]
9652 fn all_false_mask32x16(self, a: mask32x16<Self>) -> bool {
9653 let (a0, a1) = self.split_mask32x16(a);
9654 self.all_false_mask32x8(a0) && self.all_false_mask32x8(a1)
9655 }
9656 #[inline(always)]
9657 fn split_mask32x16(self, a: mask32x16<Self>) -> (mask32x8<Self>, mask32x8<Self>) {
9658 let mut b0 = [0; 8usize];
9659 let mut b1 = [0; 8usize];
9660 b0.copy_from_slice(&a.val.0[0..8usize]);
9661 b1.copy_from_slice(&a.val.0[8usize..16usize]);
9662 (b0.simd_into(self), b1.simd_into(self))
9663 }
9664 #[inline(always)]
9665 fn splat_f64x8(self, val: f64) -> f64x8<Self> {
9666 let half = self.splat_f64x4(val);
9667 self.combine_f64x4(half, half)
9668 }
9669 #[inline(always)]
9670 fn load_array_f64x8(self, val: [f64; 8usize]) -> f64x8<Self> {
9671 f64x8 {
9672 val: crate::support::Aligned512(val),
9673 simd: self,
9674 }
9675 }
9676 #[inline(always)]
9677 fn load_array_ref_f64x8(self, val: &[f64; 8usize]) -> f64x8<Self> {
9678 f64x8 {
9679 val: crate::support::Aligned512(*val),
9680 simd: self,
9681 }
9682 }
9683 #[inline(always)]
9684 fn as_array_f64x8(self, a: f64x8<Self>) -> [f64; 8usize] {
9685 a.val.0
9686 }
9687 #[inline(always)]
9688 fn as_array_ref_f64x8(self, a: &f64x8<Self>) -> &[f64; 8usize] {
9689 &a.val.0
9690 }
9691 #[inline(always)]
9692 fn as_array_mut_f64x8(self, a: &mut f64x8<Self>) -> &mut [f64; 8usize] {
9693 &mut a.val.0
9694 }
9695 #[inline(always)]
9696 fn store_array_f64x8(self, a: f64x8<Self>, dest: &mut [f64; 8usize]) -> () {
9697 *dest = a.val.0;
9698 }
9699 #[inline(always)]
9700 fn cvt_from_bytes_f64x8(self, a: u8x64<Self>) -> f64x8<Self> {
9701 unsafe {
9702 f64x8 {
9703 val: core::mem::transmute(a.val),
9704 simd: self,
9705 }
9706 }
9707 }
9708 #[inline(always)]
9709 fn cvt_to_bytes_f64x8(self, a: f64x8<Self>) -> u8x64<Self> {
9710 unsafe {
9711 u8x64 {
9712 val: core::mem::transmute(a.val),
9713 simd: self,
9714 }
9715 }
9716 }
9717 #[inline(always)]
9718 fn slide_f64x8<const SHIFT: usize>(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9719 let mut dest = [Default::default(); 8usize];
9720 dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
9721 dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
9722 dest.simd_into(self)
9723 }
9724 #[inline(always)]
9725 fn slide_within_blocks_f64x8<const SHIFT: usize>(
9726 self,
9727 a: f64x8<Self>,
9728 b: f64x8<Self>,
9729 ) -> f64x8<Self> {
9730 let (a0, a1) = self.split_f64x8(a);
9731 let (b0, b1) = self.split_f64x8(b);
9732 self.combine_f64x4(
9733 self.slide_within_blocks_f64x4::<SHIFT>(a0, b0),
9734 self.slide_within_blocks_f64x4::<SHIFT>(a1, b1),
9735 )
9736 }
9737 #[inline(always)]
9738 fn abs_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
9739 let (a0, a1) = self.split_f64x8(a);
9740 self.combine_f64x4(self.abs_f64x4(a0), self.abs_f64x4(a1))
9741 }
9742 #[inline(always)]
9743 fn neg_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
9744 let (a0, a1) = self.split_f64x8(a);
9745 self.combine_f64x4(self.neg_f64x4(a0), self.neg_f64x4(a1))
9746 }
9747 #[inline(always)]
9748 fn sqrt_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
9749 let (a0, a1) = self.split_f64x8(a);
9750 self.combine_f64x4(self.sqrt_f64x4(a0), self.sqrt_f64x4(a1))
9751 }
9752 #[inline(always)]
9753 fn add_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9754 let (a0, a1) = self.split_f64x8(a);
9755 let (b0, b1) = self.split_f64x8(b);
9756 self.combine_f64x4(self.add_f64x4(a0, b0), self.add_f64x4(a1, b1))
9757 }
9758 #[inline(always)]
9759 fn sub_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9760 let (a0, a1) = self.split_f64x8(a);
9761 let (b0, b1) = self.split_f64x8(b);
9762 self.combine_f64x4(self.sub_f64x4(a0, b0), self.sub_f64x4(a1, b1))
9763 }
9764 #[inline(always)]
9765 fn mul_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9766 let (a0, a1) = self.split_f64x8(a);
9767 let (b0, b1) = self.split_f64x8(b);
9768 self.combine_f64x4(self.mul_f64x4(a0, b0), self.mul_f64x4(a1, b1))
9769 }
9770 #[inline(always)]
9771 fn div_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9772 let (a0, a1) = self.split_f64x8(a);
9773 let (b0, b1) = self.split_f64x8(b);
9774 self.combine_f64x4(self.div_f64x4(a0, b0), self.div_f64x4(a1, b1))
9775 }
9776 #[inline(always)]
9777 fn copysign_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9778 let (a0, a1) = self.split_f64x8(a);
9779 let (b0, b1) = self.split_f64x8(b);
9780 self.combine_f64x4(self.copysign_f64x4(a0, b0), self.copysign_f64x4(a1, b1))
9781 }
9782 #[inline(always)]
9783 fn simd_eq_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
9784 let (a0, a1) = self.split_f64x8(a);
9785 let (b0, b1) = self.split_f64x8(b);
9786 self.combine_mask64x4(self.simd_eq_f64x4(a0, b0), self.simd_eq_f64x4(a1, b1))
9787 }
9788 #[inline(always)]
9789 fn simd_lt_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
9790 let (a0, a1) = self.split_f64x8(a);
9791 let (b0, b1) = self.split_f64x8(b);
9792 self.combine_mask64x4(self.simd_lt_f64x4(a0, b0), self.simd_lt_f64x4(a1, b1))
9793 }
9794 #[inline(always)]
9795 fn simd_le_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
9796 let (a0, a1) = self.split_f64x8(a);
9797 let (b0, b1) = self.split_f64x8(b);
9798 self.combine_mask64x4(self.simd_le_f64x4(a0, b0), self.simd_le_f64x4(a1, b1))
9799 }
9800 #[inline(always)]
9801 fn simd_ge_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
9802 let (a0, a1) = self.split_f64x8(a);
9803 let (b0, b1) = self.split_f64x8(b);
9804 self.combine_mask64x4(self.simd_ge_f64x4(a0, b0), self.simd_ge_f64x4(a1, b1))
9805 }
9806 #[inline(always)]
9807 fn simd_gt_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
9808 let (a0, a1) = self.split_f64x8(a);
9809 let (b0, b1) = self.split_f64x8(b);
9810 self.combine_mask64x4(self.simd_gt_f64x4(a0, b0), self.simd_gt_f64x4(a1, b1))
9811 }
9812 #[inline(always)]
9813 fn zip_low_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9814 let (a0, _) = self.split_f64x8(a);
9815 let (b0, _) = self.split_f64x8(b);
9816 self.combine_f64x4(self.zip_low_f64x4(a0, b0), self.zip_high_f64x4(a0, b0))
9817 }
9818 #[inline(always)]
9819 fn zip_high_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9820 let (_, a1) = self.split_f64x8(a);
9821 let (_, b1) = self.split_f64x8(b);
9822 self.combine_f64x4(self.zip_low_f64x4(a1, b1), self.zip_high_f64x4(a1, b1))
9823 }
9824 #[inline(always)]
9825 fn unzip_low_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9826 let (a0, a1) = self.split_f64x8(a);
9827 let (b0, b1) = self.split_f64x8(b);
9828 self.combine_f64x4(self.unzip_low_f64x4(a0, a1), self.unzip_low_f64x4(b0, b1))
9829 }
9830 #[inline(always)]
9831 fn unzip_high_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9832 let (a0, a1) = self.split_f64x8(a);
9833 let (b0, b1) = self.split_f64x8(b);
9834 self.combine_f64x4(self.unzip_high_f64x4(a0, a1), self.unzip_high_f64x4(b0, b1))
9835 }
9836 #[inline(always)]
9837 fn interleave_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> (f64x8<Self>, f64x8<Self>) {
9838 let (a0, a1) = self.split_f64x8(a);
9839 let (b0, b1) = self.split_f64x8(b);
9840 let lo_lo = self.zip_low_f64x4(a0, b0);
9841 let lo_hi = self.zip_high_f64x4(a0, b0);
9842 let hi_lo = self.zip_low_f64x4(a1, b1);
9843 let hi_hi = self.zip_high_f64x4(a1, b1);
9844 (
9845 self.combine_f64x4(lo_lo, lo_hi),
9846 self.combine_f64x4(hi_lo, hi_hi),
9847 )
9848 }
9849 #[inline(always)]
9850 fn deinterleave_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> (f64x8<Self>, f64x8<Self>) {
9851 let (a0, a1) = self.split_f64x8(a);
9852 let (b0, b1) = self.split_f64x8(b);
9853 let lo_even = self.unzip_low_f64x4(a0, a1);
9854 let lo_odd = self.unzip_high_f64x4(a0, a1);
9855 let hi_even = self.unzip_low_f64x4(b0, b1);
9856 let hi_odd = self.unzip_high_f64x4(b0, b1);
9857 (
9858 self.combine_f64x4(lo_even, hi_even),
9859 self.combine_f64x4(lo_odd, hi_odd),
9860 )
9861 }
9862 #[inline(always)]
9863 fn max_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9864 let (a0, a1) = self.split_f64x8(a);
9865 let (b0, b1) = self.split_f64x8(b);
9866 self.combine_f64x4(self.max_f64x4(a0, b0), self.max_f64x4(a1, b1))
9867 }
9868 #[inline(always)]
9869 fn min_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9870 let (a0, a1) = self.split_f64x8(a);
9871 let (b0, b1) = self.split_f64x8(b);
9872 self.combine_f64x4(self.min_f64x4(a0, b0), self.min_f64x4(a1, b1))
9873 }
9874 #[inline(always)]
9875 fn max_precise_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9876 let (a0, a1) = self.split_f64x8(a);
9877 let (b0, b1) = self.split_f64x8(b);
9878 self.combine_f64x4(
9879 self.max_precise_f64x4(a0, b0),
9880 self.max_precise_f64x4(a1, b1),
9881 )
9882 }
9883 #[inline(always)]
9884 fn min_precise_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9885 let (a0, a1) = self.split_f64x8(a);
9886 let (b0, b1) = self.split_f64x8(b);
9887 self.combine_f64x4(
9888 self.min_precise_f64x4(a0, b0),
9889 self.min_precise_f64x4(a1, b1),
9890 )
9891 }
9892 #[inline(always)]
9893 fn mul_add_f64x8(self, a: f64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
9894 let (a0, a1) = self.split_f64x8(a);
9895 let (b0, b1) = self.split_f64x8(b);
9896 let (c0, c1) = self.split_f64x8(c);
9897 self.combine_f64x4(
9898 self.mul_add_f64x4(a0, b0, c0),
9899 self.mul_add_f64x4(a1, b1, c1),
9900 )
9901 }
9902 #[inline(always)]
9903 fn mul_sub_f64x8(self, a: f64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
9904 let (a0, a1) = self.split_f64x8(a);
9905 let (b0, b1) = self.split_f64x8(b);
9906 let (c0, c1) = self.split_f64x8(c);
9907 self.combine_f64x4(
9908 self.mul_sub_f64x4(a0, b0, c0),
9909 self.mul_sub_f64x4(a1, b1, c1),
9910 )
9911 }
9912 #[inline(always)]
9913 fn floor_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
9914 let (a0, a1) = self.split_f64x8(a);
9915 self.combine_f64x4(self.floor_f64x4(a0), self.floor_f64x4(a1))
9916 }
9917 #[inline(always)]
9918 fn ceil_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
9919 let (a0, a1) = self.split_f64x8(a);
9920 self.combine_f64x4(self.ceil_f64x4(a0), self.ceil_f64x4(a1))
9921 }
9922 #[inline(always)]
9923 fn round_ties_even_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
9924 let (a0, a1) = self.split_f64x8(a);
9925 self.combine_f64x4(
9926 self.round_ties_even_f64x4(a0),
9927 self.round_ties_even_f64x4(a1),
9928 )
9929 }
9930 #[inline(always)]
9931 fn fract_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
9932 let (a0, a1) = self.split_f64x8(a);
9933 self.combine_f64x4(self.fract_f64x4(a0), self.fract_f64x4(a1))
9934 }
9935 #[inline(always)]
9936 fn trunc_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
9937 let (a0, a1) = self.split_f64x8(a);
9938 self.combine_f64x4(self.trunc_f64x4(a0), self.trunc_f64x4(a1))
9939 }
9940 #[inline(always)]
9941 fn select_f64x8(self, a: mask64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
9942 let (a0, a1) = self.split_mask64x8(a);
9943 let (b0, b1) = self.split_f64x8(b);
9944 let (c0, c1) = self.split_f64x8(c);
9945 self.combine_f64x4(self.select_f64x4(a0, b0, c0), self.select_f64x4(a1, b1, c1))
9946 }
9947 #[inline(always)]
9948 fn split_f64x8(self, a: f64x8<Self>) -> (f64x4<Self>, f64x4<Self>) {
9949 let mut b0 = [0.0; 4usize];
9950 let mut b1 = [0.0; 4usize];
9951 b0.copy_from_slice(&a.val.0[0..4usize]);
9952 b1.copy_from_slice(&a.val.0[4usize..8usize]);
9953 (b0.simd_into(self), b1.simd_into(self))
9954 }
9955 #[inline(always)]
9956 fn reinterpret_f32_f64x8(self, a: f64x8<Self>) -> f32x16<Self> {
9957 let (a0, a1) = self.split_f64x8(a);
9958 self.combine_f32x8(
9959 self.reinterpret_f32_f64x4(a0),
9960 self.reinterpret_f32_f64x4(a1),
9961 )
9962 }
9963 #[inline(always)]
9964 fn splat_mask64x8(self, val: i64) -> mask64x8<Self> {
9965 let half = self.splat_mask64x4(val);
9966 self.combine_mask64x4(half, half)
9967 }
9968 #[inline(always)]
9969 fn load_array_mask64x8(self, val: [i64; 8usize]) -> mask64x8<Self> {
9970 mask64x8 {
9971 val: crate::support::Aligned512(val),
9972 simd: self,
9973 }
9974 }
9975 #[inline(always)]
9976 fn load_array_ref_mask64x8(self, val: &[i64; 8usize]) -> mask64x8<Self> {
9977 mask64x8 {
9978 val: crate::support::Aligned512(*val),
9979 simd: self,
9980 }
9981 }
9982 #[inline(always)]
9983 fn as_array_mask64x8(self, a: mask64x8<Self>) -> [i64; 8usize] {
9984 a.val.0
9985 }
9986 #[inline(always)]
9987 fn as_array_ref_mask64x8(self, a: &mask64x8<Self>) -> &[i64; 8usize] {
9988 &a.val.0
9989 }
9990 #[inline(always)]
9991 fn as_array_mut_mask64x8(self, a: &mut mask64x8<Self>) -> &mut [i64; 8usize] {
9992 &mut a.val.0
9993 }
9994 #[inline(always)]
9995 fn store_array_mask64x8(self, a: mask64x8<Self>, dest: &mut [i64; 8usize]) -> () {
9996 *dest = a.val.0;
9997 }
9998 #[inline(always)]
9999 fn cvt_from_bytes_mask64x8(self, a: u8x64<Self>) -> mask64x8<Self> {
10000 unsafe {
10001 mask64x8 {
10002 val: core::mem::transmute(a.val),
10003 simd: self,
10004 }
10005 }
10006 }
10007 #[inline(always)]
10008 fn cvt_to_bytes_mask64x8(self, a: mask64x8<Self>) -> u8x64<Self> {
10009 unsafe {
10010 u8x64 {
10011 val: core::mem::transmute(a.val),
10012 simd: self,
10013 }
10014 }
10015 }
10016 #[inline(always)]
10017 fn slide_mask64x8<const SHIFT: usize>(
10018 self,
10019 a: mask64x8<Self>,
10020 b: mask64x8<Self>,
10021 ) -> mask64x8<Self> {
10022 let mut dest = [Default::default(); 8usize];
10023 dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
10024 dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
10025 dest.simd_into(self)
10026 }
10027 #[inline(always)]
10028 fn slide_within_blocks_mask64x8<const SHIFT: usize>(
10029 self,
10030 a: mask64x8<Self>,
10031 b: mask64x8<Self>,
10032 ) -> mask64x8<Self> {
10033 let (a0, a1) = self.split_mask64x8(a);
10034 let (b0, b1) = self.split_mask64x8(b);
10035 self.combine_mask64x4(
10036 self.slide_within_blocks_mask64x4::<SHIFT>(a0, b0),
10037 self.slide_within_blocks_mask64x4::<SHIFT>(a1, b1),
10038 )
10039 }
10040 #[inline(always)]
10041 fn and_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
10042 let (a0, a1) = self.split_mask64x8(a);
10043 let (b0, b1) = self.split_mask64x8(b);
10044 self.combine_mask64x4(self.and_mask64x4(a0, b0), self.and_mask64x4(a1, b1))
10045 }
10046 #[inline(always)]
10047 fn or_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
10048 let (a0, a1) = self.split_mask64x8(a);
10049 let (b0, b1) = self.split_mask64x8(b);
10050 self.combine_mask64x4(self.or_mask64x4(a0, b0), self.or_mask64x4(a1, b1))
10051 }
10052 #[inline(always)]
10053 fn xor_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
10054 let (a0, a1) = self.split_mask64x8(a);
10055 let (b0, b1) = self.split_mask64x8(b);
10056 self.combine_mask64x4(self.xor_mask64x4(a0, b0), self.xor_mask64x4(a1, b1))
10057 }
10058 #[inline(always)]
10059 fn not_mask64x8(self, a: mask64x8<Self>) -> mask64x8<Self> {
10060 let (a0, a1) = self.split_mask64x8(a);
10061 self.combine_mask64x4(self.not_mask64x4(a0), self.not_mask64x4(a1))
10062 }
10063 #[inline(always)]
10064 fn select_mask64x8(
10065 self,
10066 a: mask64x8<Self>,
10067 b: mask64x8<Self>,
10068 c: mask64x8<Self>,
10069 ) -> mask64x8<Self> {
10070 let (a0, a1) = self.split_mask64x8(a);
10071 let (b0, b1) = self.split_mask64x8(b);
10072 let (c0, c1) = self.split_mask64x8(c);
10073 self.combine_mask64x4(
10074 self.select_mask64x4(a0, b0, c0),
10075 self.select_mask64x4(a1, b1, c1),
10076 )
10077 }
10078 #[inline(always)]
10079 fn simd_eq_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
10080 let (a0, a1) = self.split_mask64x8(a);
10081 let (b0, b1) = self.split_mask64x8(b);
10082 self.combine_mask64x4(self.simd_eq_mask64x4(a0, b0), self.simd_eq_mask64x4(a1, b1))
10083 }
10084 #[inline(always)]
10085 fn any_true_mask64x8(self, a: mask64x8<Self>) -> bool {
10086 let (a0, a1) = self.split_mask64x8(a);
10087 self.any_true_mask64x4(a0) || self.any_true_mask64x4(a1)
10088 }
10089 #[inline(always)]
10090 fn all_true_mask64x8(self, a: mask64x8<Self>) -> bool {
10091 let (a0, a1) = self.split_mask64x8(a);
10092 self.all_true_mask64x4(a0) && self.all_true_mask64x4(a1)
10093 }
10094 #[inline(always)]
10095 fn any_false_mask64x8(self, a: mask64x8<Self>) -> bool {
10096 let (a0, a1) = self.split_mask64x8(a);
10097 self.any_false_mask64x4(a0) || self.any_false_mask64x4(a1)
10098 }
10099 #[inline(always)]
10100 fn all_false_mask64x8(self, a: mask64x8<Self>) -> bool {
10101 let (a0, a1) = self.split_mask64x8(a);
10102 self.all_false_mask64x4(a0) && self.all_false_mask64x4(a1)
10103 }
10104 #[inline(always)]
10105 fn split_mask64x8(self, a: mask64x8<Self>) -> (mask64x4<Self>, mask64x4<Self>) {
10106 let mut b0 = [0; 4usize];
10107 let mut b1 = [0; 4usize];
10108 b0.copy_from_slice(&a.val.0[0..4usize]);
10109 b1.copy_from_slice(&a.val.0[4usize..8usize]);
10110 (b0.simd_into(self), b1.simd_into(self))
10111 }
10112}