Skip to main content

fearless_simd/generated/
fallback.rs

1// Copyright 2025 the Fearless_SIMD Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4// This file is autogenerated by fearless_simd_gen
5
6use crate::{Level, arch_types::ArchTypes, prelude::*, seal::Seal};
7use crate::{
8    f32x4, f32x8, f32x16, f64x2, f64x4, f64x8, i8x16, i8x32, i8x64, i16x8, i16x16, i16x32, i32x4,
9    i32x8, i32x16, mask8x16, mask8x32, mask8x64, mask16x8, mask16x16, mask16x32, mask32x4,
10    mask32x8, mask32x16, mask64x2, mask64x4, mask64x8, u8x16, u8x32, u8x64, u16x8, u16x16, u16x32,
11    u32x4, u32x8, u32x16,
12};
13use core::ops::*;
14#[cfg(all(feature = "libm", not(feature = "std")))]
15trait FloatExt {
16    fn floor(self) -> Self;
17    fn ceil(self) -> Self;
18    fn round_ties_even(self) -> Self;
19    fn fract(self) -> Self;
20    fn sqrt(self) -> Self;
21    fn trunc(self) -> Self;
22}
23#[cfg(all(feature = "libm", not(feature = "std")))]
24impl FloatExt for f32 {
25    #[inline(always)]
26    fn floor(self) -> f32 {
27        libm::floorf(self)
28    }
29    #[inline(always)]
30    fn ceil(self) -> f32 {
31        libm::ceilf(self)
32    }
33    #[inline(always)]
34    fn round_ties_even(self) -> f32 {
35        libm::rintf(self)
36    }
37    #[inline(always)]
38    fn sqrt(self) -> f32 {
39        libm::sqrtf(self)
40    }
41    #[inline(always)]
42    fn fract(self) -> f32 {
43        self - self.trunc()
44    }
45    #[inline(always)]
46    fn trunc(self) -> f32 {
47        libm::truncf(self)
48    }
49}
50#[cfg(all(feature = "libm", not(feature = "std")))]
51impl FloatExt for f64 {
52    #[inline(always)]
53    fn floor(self) -> f64 {
54        libm::floor(self)
55    }
56    #[inline(always)]
57    fn ceil(self) -> f64 {
58        libm::ceil(self)
59    }
60    #[inline(always)]
61    fn round_ties_even(self) -> f64 {
62        libm::rint(self)
63    }
64    #[inline(always)]
65    fn sqrt(self) -> f64 {
66        libm::sqrt(self)
67    }
68    #[inline(always)]
69    fn fract(self) -> f64 {
70        self - self.trunc()
71    }
72    #[inline(always)]
73    fn trunc(self) -> f64 {
74        libm::trunc(self)
75    }
76}
77#[doc = "The SIMD token for the \"fallback\" level."]
78#[derive(Clone, Copy, Debug)]
79pub struct Fallback {
80    pub fallback: crate::core_arch::fallback::Fallback,
81}
82impl Fallback {
83    #[inline]
84    pub const fn new() -> Self {
85        Self {
86            fallback: crate::core_arch::fallback::Fallback::new(),
87        }
88    }
89}
90impl Seal for Fallback {}
91impl ArchTypes for Fallback {
92    type f32x4 = crate::support::Aligned128<[f32; 4usize]>;
93    type i8x16 = crate::support::Aligned128<[i8; 16usize]>;
94    type u8x16 = crate::support::Aligned128<[u8; 16usize]>;
95    type mask8x16 = crate::support::Aligned128<[i8; 16usize]>;
96    type i16x8 = crate::support::Aligned128<[i16; 8usize]>;
97    type u16x8 = crate::support::Aligned128<[u16; 8usize]>;
98    type mask16x8 = crate::support::Aligned128<[i16; 8usize]>;
99    type i32x4 = crate::support::Aligned128<[i32; 4usize]>;
100    type u32x4 = crate::support::Aligned128<[u32; 4usize]>;
101    type mask32x4 = crate::support::Aligned128<[i32; 4usize]>;
102    type f64x2 = crate::support::Aligned128<[f64; 2usize]>;
103    type mask64x2 = crate::support::Aligned128<[i64; 2usize]>;
104    type f32x8 = crate::support::Aligned256<[f32; 8usize]>;
105    type i8x32 = crate::support::Aligned256<[i8; 32usize]>;
106    type u8x32 = crate::support::Aligned256<[u8; 32usize]>;
107    type mask8x32 = crate::support::Aligned256<[i8; 32usize]>;
108    type i16x16 = crate::support::Aligned256<[i16; 16usize]>;
109    type u16x16 = crate::support::Aligned256<[u16; 16usize]>;
110    type mask16x16 = crate::support::Aligned256<[i16; 16usize]>;
111    type i32x8 = crate::support::Aligned256<[i32; 8usize]>;
112    type u32x8 = crate::support::Aligned256<[u32; 8usize]>;
113    type mask32x8 = crate::support::Aligned256<[i32; 8usize]>;
114    type f64x4 = crate::support::Aligned256<[f64; 4usize]>;
115    type mask64x4 = crate::support::Aligned256<[i64; 4usize]>;
116    type f32x16 = crate::support::Aligned512<[f32; 16usize]>;
117    type i8x64 = crate::support::Aligned512<[i8; 64usize]>;
118    type u8x64 = crate::support::Aligned512<[u8; 64usize]>;
119    type mask8x64 = crate::support::Aligned512<[i8; 64usize]>;
120    type i16x32 = crate::support::Aligned512<[i16; 32usize]>;
121    type u16x32 = crate::support::Aligned512<[u16; 32usize]>;
122    type mask16x32 = crate::support::Aligned512<[i16; 32usize]>;
123    type i32x16 = crate::support::Aligned512<[i32; 16usize]>;
124    type u32x16 = crate::support::Aligned512<[u32; 16usize]>;
125    type mask32x16 = crate::support::Aligned512<[i32; 16usize]>;
126    type f64x8 = crate::support::Aligned512<[f64; 8usize]>;
127    type mask64x8 = crate::support::Aligned512<[i64; 8usize]>;
128}
129impl Simd for Fallback {
130    type f32s = f32x4<Self>;
131    type f64s = f64x2<Self>;
132    type u8s = u8x16<Self>;
133    type i8s = i8x16<Self>;
134    type u16s = u16x8<Self>;
135    type i16s = i16x8<Self>;
136    type u32s = u32x4<Self>;
137    type i32s = i32x4<Self>;
138    type mask8s = mask8x16<Self>;
139    type mask16s = mask16x8<Self>;
140    type mask32s = mask32x4<Self>;
141    type mask64s = mask64x2<Self>;
142    #[inline(always)]
143    fn level(self) -> Level {
144        #[cfg(feature = "force_support_fallback")]
145        return Level::Fallback(self);
146        #[cfg(not(feature = "force_support_fallback"))]
147        Level::baseline()
148    }
149    #[inline]
150    fn vectorize<F: FnOnce() -> R, R>(self, f: F) -> R {
151        fn vectorize_inner<F: FnOnce() -> R, R>(f: F) -> R {
152            f()
153        }
154        vectorize_inner(f)
155    }
156    #[inline(always)]
157    fn splat_f32x4(self, val: f32) -> f32x4<Self> {
158        [val; 4usize].simd_into(self)
159    }
160    #[inline(always)]
161    fn load_array_f32x4(self, val: [f32; 4usize]) -> f32x4<Self> {
162        f32x4 {
163            val: crate::support::Aligned128(val),
164            simd: self,
165        }
166    }
167    #[inline(always)]
168    fn load_array_ref_f32x4(self, val: &[f32; 4usize]) -> f32x4<Self> {
169        f32x4 {
170            val: crate::support::Aligned128(*val),
171            simd: self,
172        }
173    }
174    #[inline(always)]
175    fn as_array_f32x4(self, a: f32x4<Self>) -> [f32; 4usize] {
176        a.val.0
177    }
178    #[inline(always)]
179    fn as_array_ref_f32x4(self, a: &f32x4<Self>) -> &[f32; 4usize] {
180        &a.val.0
181    }
182    #[inline(always)]
183    fn as_array_mut_f32x4(self, a: &mut f32x4<Self>) -> &mut [f32; 4usize] {
184        &mut a.val.0
185    }
186    #[inline(always)]
187    fn store_array_f32x4(self, a: f32x4<Self>, dest: &mut [f32; 4usize]) -> () {
188        *dest = a.val.0;
189    }
190    #[inline(always)]
191    fn cvt_from_bytes_f32x4(self, a: u8x16<Self>) -> f32x4<Self> {
192        unsafe {
193            f32x4 {
194                val: core::mem::transmute(a.val),
195                simd: self,
196            }
197        }
198    }
199    #[inline(always)]
200    fn cvt_to_bytes_f32x4(self, a: f32x4<Self>) -> u8x16<Self> {
201        unsafe {
202            u8x16 {
203                val: core::mem::transmute(a.val),
204                simd: self,
205            }
206        }
207    }
208    #[inline(always)]
209    fn slide_f32x4<const SHIFT: usize>(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
210        let mut dest = [Default::default(); 4usize];
211        dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
212        dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
213        dest.simd_into(self)
214    }
215    #[inline(always)]
216    fn slide_within_blocks_f32x4<const SHIFT: usize>(
217        self,
218        a: f32x4<Self>,
219        b: f32x4<Self>,
220    ) -> f32x4<Self> {
221        self.slide_f32x4::<SHIFT>(a, b)
222    }
223    #[inline(always)]
224    fn abs_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
225        [
226            f32::abs(a[0usize]),
227            f32::abs(a[1usize]),
228            f32::abs(a[2usize]),
229            f32::abs(a[3usize]),
230        ]
231        .simd_into(self)
232    }
233    #[inline(always)]
234    fn neg_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
235        [
236            f32::neg(a[0usize]),
237            f32::neg(a[1usize]),
238            f32::neg(a[2usize]),
239            f32::neg(a[3usize]),
240        ]
241        .simd_into(self)
242    }
243    #[inline(always)]
244    fn sqrt_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
245        [
246            f32::sqrt(a[0usize]),
247            f32::sqrt(a[1usize]),
248            f32::sqrt(a[2usize]),
249            f32::sqrt(a[3usize]),
250        ]
251        .simd_into(self)
252    }
253    #[inline(always)]
254    fn add_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
255        [
256            f32::add(a[0usize], &b[0usize]),
257            f32::add(a[1usize], &b[1usize]),
258            f32::add(a[2usize], &b[2usize]),
259            f32::add(a[3usize], &b[3usize]),
260        ]
261        .simd_into(self)
262    }
263    #[inline(always)]
264    fn sub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
265        [
266            f32::sub(a[0usize], &b[0usize]),
267            f32::sub(a[1usize], &b[1usize]),
268            f32::sub(a[2usize], &b[2usize]),
269            f32::sub(a[3usize], &b[3usize]),
270        ]
271        .simd_into(self)
272    }
273    #[inline(always)]
274    fn mul_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
275        [
276            f32::mul(a[0usize], &b[0usize]),
277            f32::mul(a[1usize], &b[1usize]),
278            f32::mul(a[2usize], &b[2usize]),
279            f32::mul(a[3usize], &b[3usize]),
280        ]
281        .simd_into(self)
282    }
283    #[inline(always)]
284    fn div_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
285        [
286            f32::div(a[0usize], &b[0usize]),
287            f32::div(a[1usize], &b[1usize]),
288            f32::div(a[2usize], &b[2usize]),
289            f32::div(a[3usize], &b[3usize]),
290        ]
291        .simd_into(self)
292    }
293    #[inline(always)]
294    fn copysign_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
295        [
296            f32::copysign(a[0usize], b[0usize]),
297            f32::copysign(a[1usize], b[1usize]),
298            f32::copysign(a[2usize], b[2usize]),
299            f32::copysign(a[3usize], b[3usize]),
300        ]
301        .simd_into(self)
302    }
303    #[inline(always)]
304    fn simd_eq_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
305        [
306            -(f32::eq(&a[0usize], &b[0usize]) as i32),
307            -(f32::eq(&a[1usize], &b[1usize]) as i32),
308            -(f32::eq(&a[2usize], &b[2usize]) as i32),
309            -(f32::eq(&a[3usize], &b[3usize]) as i32),
310        ]
311        .simd_into(self)
312    }
313    #[inline(always)]
314    fn simd_lt_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
315        [
316            -(f32::lt(&a[0usize], &b[0usize]) as i32),
317            -(f32::lt(&a[1usize], &b[1usize]) as i32),
318            -(f32::lt(&a[2usize], &b[2usize]) as i32),
319            -(f32::lt(&a[3usize], &b[3usize]) as i32),
320        ]
321        .simd_into(self)
322    }
323    #[inline(always)]
324    fn simd_le_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
325        [
326            -(f32::le(&a[0usize], &b[0usize]) as i32),
327            -(f32::le(&a[1usize], &b[1usize]) as i32),
328            -(f32::le(&a[2usize], &b[2usize]) as i32),
329            -(f32::le(&a[3usize], &b[3usize]) as i32),
330        ]
331        .simd_into(self)
332    }
333    #[inline(always)]
334    fn simd_ge_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
335        [
336            -(f32::ge(&a[0usize], &b[0usize]) as i32),
337            -(f32::ge(&a[1usize], &b[1usize]) as i32),
338            -(f32::ge(&a[2usize], &b[2usize]) as i32),
339            -(f32::ge(&a[3usize], &b[3usize]) as i32),
340        ]
341        .simd_into(self)
342    }
343    #[inline(always)]
344    fn simd_gt_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
345        [
346            -(f32::gt(&a[0usize], &b[0usize]) as i32),
347            -(f32::gt(&a[1usize], &b[1usize]) as i32),
348            -(f32::gt(&a[2usize], &b[2usize]) as i32),
349            -(f32::gt(&a[3usize], &b[3usize]) as i32),
350        ]
351        .simd_into(self)
352    }
353    #[inline(always)]
354    fn zip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
355        [a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
356    }
357    #[inline(always)]
358    fn zip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
359        [a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
360    }
361    #[inline(always)]
362    fn unzip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
363        [a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
364    }
365    #[inline(always)]
366    fn unzip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
367        [a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
368    }
369    #[inline(always)]
370    fn interleave_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> (f32x4<Self>, f32x4<Self>) {
371        (self.zip_low_f32x4(a, b), self.zip_high_f32x4(a, b))
372    }
373    #[inline(always)]
374    fn deinterleave_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> (f32x4<Self>, f32x4<Self>) {
375        (self.unzip_low_f32x4(a, b), self.unzip_high_f32x4(a, b))
376    }
377    #[inline(always)]
378    fn max_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
379        [
380            f32::max(a[0usize], b[0usize]),
381            f32::max(a[1usize], b[1usize]),
382            f32::max(a[2usize], b[2usize]),
383            f32::max(a[3usize], b[3usize]),
384        ]
385        .simd_into(self)
386    }
387    #[inline(always)]
388    fn min_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
389        [
390            f32::min(a[0usize], b[0usize]),
391            f32::min(a[1usize], b[1usize]),
392            f32::min(a[2usize], b[2usize]),
393            f32::min(a[3usize], b[3usize]),
394        ]
395        .simd_into(self)
396    }
397    #[inline(always)]
398    fn max_precise_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
399        [
400            f32::max(a[0usize], b[0usize]),
401            f32::max(a[1usize], b[1usize]),
402            f32::max(a[2usize], b[2usize]),
403            f32::max(a[3usize], b[3usize]),
404        ]
405        .simd_into(self)
406    }
407    #[inline(always)]
408    fn min_precise_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
409        [
410            f32::min(a[0usize], b[0usize]),
411            f32::min(a[1usize], b[1usize]),
412            f32::min(a[2usize], b[2usize]),
413            f32::min(a[3usize], b[3usize]),
414        ]
415        .simd_into(self)
416    }
417    #[inline(always)]
418    fn mul_add_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
419        a.mul(b).add(c)
420    }
421    #[inline(always)]
422    fn mul_sub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
423        a.mul(b).sub(c)
424    }
425    #[inline(always)]
426    fn floor_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
427        [
428            f32::floor(a[0usize]),
429            f32::floor(a[1usize]),
430            f32::floor(a[2usize]),
431            f32::floor(a[3usize]),
432        ]
433        .simd_into(self)
434    }
435    #[inline(always)]
436    fn ceil_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
437        [
438            f32::ceil(a[0usize]),
439            f32::ceil(a[1usize]),
440            f32::ceil(a[2usize]),
441            f32::ceil(a[3usize]),
442        ]
443        .simd_into(self)
444    }
445    #[inline(always)]
446    fn round_ties_even_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
447        [
448            f32::round_ties_even(a[0usize]),
449            f32::round_ties_even(a[1usize]),
450            f32::round_ties_even(a[2usize]),
451            f32::round_ties_even(a[3usize]),
452        ]
453        .simd_into(self)
454    }
455    #[inline(always)]
456    fn fract_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
457        [
458            f32::fract(a[0usize]),
459            f32::fract(a[1usize]),
460            f32::fract(a[2usize]),
461            f32::fract(a[3usize]),
462        ]
463        .simd_into(self)
464    }
465    #[inline(always)]
466    fn trunc_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
467        [
468            f32::trunc(a[0usize]),
469            f32::trunc(a[1usize]),
470            f32::trunc(a[2usize]),
471            f32::trunc(a[3usize]),
472        ]
473        .simd_into(self)
474    }
475    #[inline(always)]
476    fn select_f32x4(self, a: mask32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
477        [
478            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
479            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
480            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
481            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
482        ]
483        .simd_into(self)
484    }
485    #[inline(always)]
486    fn combine_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x8<Self> {
487        let mut result = [0.0; 8usize];
488        result[0..4usize].copy_from_slice(&a.val.0);
489        result[4usize..8usize].copy_from_slice(&b.val.0);
490        result.simd_into(self)
491    }
492    #[inline(always)]
493    fn reinterpret_f64_f32x4(self, a: f32x4<Self>) -> f64x2<Self> {
494        a.bitcast()
495    }
496    #[inline(always)]
497    fn reinterpret_i32_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
498        a.bitcast()
499    }
500    #[inline(always)]
501    fn reinterpret_u8_f32x4(self, a: f32x4<Self>) -> u8x16<Self> {
502        a.bitcast()
503    }
504    #[inline(always)]
505    fn reinterpret_u32_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
506        a.bitcast()
507    }
508    #[inline(always)]
509    fn cvt_u32_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
510        [
511            a[0usize] as u32,
512            a[1usize] as u32,
513            a[2usize] as u32,
514            a[3usize] as u32,
515        ]
516        .simd_into(self)
517    }
518    #[inline(always)]
519    fn cvt_u32_precise_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
520        self.cvt_u32_f32x4(a)
521    }
522    #[inline(always)]
523    fn cvt_i32_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
524        [
525            a[0usize] as i32,
526            a[1usize] as i32,
527            a[2usize] as i32,
528            a[3usize] as i32,
529        ]
530        .simd_into(self)
531    }
532    #[inline(always)]
533    fn cvt_i32_precise_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
534        self.cvt_i32_f32x4(a)
535    }
536    #[inline(always)]
537    fn splat_i8x16(self, val: i8) -> i8x16<Self> {
538        [val; 16usize].simd_into(self)
539    }
540    #[inline(always)]
541    fn load_array_i8x16(self, val: [i8; 16usize]) -> i8x16<Self> {
542        i8x16 {
543            val: crate::support::Aligned128(val),
544            simd: self,
545        }
546    }
547    #[inline(always)]
548    fn load_array_ref_i8x16(self, val: &[i8; 16usize]) -> i8x16<Self> {
549        i8x16 {
550            val: crate::support::Aligned128(*val),
551            simd: self,
552        }
553    }
554    #[inline(always)]
555    fn as_array_i8x16(self, a: i8x16<Self>) -> [i8; 16usize] {
556        a.val.0
557    }
558    #[inline(always)]
559    fn as_array_ref_i8x16(self, a: &i8x16<Self>) -> &[i8; 16usize] {
560        &a.val.0
561    }
562    #[inline(always)]
563    fn as_array_mut_i8x16(self, a: &mut i8x16<Self>) -> &mut [i8; 16usize] {
564        &mut a.val.0
565    }
566    #[inline(always)]
567    fn store_array_i8x16(self, a: i8x16<Self>, dest: &mut [i8; 16usize]) -> () {
568        *dest = a.val.0;
569    }
570    #[inline(always)]
571    fn cvt_from_bytes_i8x16(self, a: u8x16<Self>) -> i8x16<Self> {
572        unsafe {
573            i8x16 {
574                val: core::mem::transmute(a.val),
575                simd: self,
576            }
577        }
578    }
579    #[inline(always)]
580    fn cvt_to_bytes_i8x16(self, a: i8x16<Self>) -> u8x16<Self> {
581        unsafe {
582            u8x16 {
583                val: core::mem::transmute(a.val),
584                simd: self,
585            }
586        }
587    }
588    #[inline(always)]
589    fn slide_i8x16<const SHIFT: usize>(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
590        let mut dest = [Default::default(); 16usize];
591        dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
592        dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
593        dest.simd_into(self)
594    }
595    #[inline(always)]
596    fn slide_within_blocks_i8x16<const SHIFT: usize>(
597        self,
598        a: i8x16<Self>,
599        b: i8x16<Self>,
600    ) -> i8x16<Self> {
601        self.slide_i8x16::<SHIFT>(a, b)
602    }
603    #[inline(always)]
604    fn add_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
605        [
606            i8::wrapping_add(a[0usize], b[0usize]),
607            i8::wrapping_add(a[1usize], b[1usize]),
608            i8::wrapping_add(a[2usize], b[2usize]),
609            i8::wrapping_add(a[3usize], b[3usize]),
610            i8::wrapping_add(a[4usize], b[4usize]),
611            i8::wrapping_add(a[5usize], b[5usize]),
612            i8::wrapping_add(a[6usize], b[6usize]),
613            i8::wrapping_add(a[7usize], b[7usize]),
614            i8::wrapping_add(a[8usize], b[8usize]),
615            i8::wrapping_add(a[9usize], b[9usize]),
616            i8::wrapping_add(a[10usize], b[10usize]),
617            i8::wrapping_add(a[11usize], b[11usize]),
618            i8::wrapping_add(a[12usize], b[12usize]),
619            i8::wrapping_add(a[13usize], b[13usize]),
620            i8::wrapping_add(a[14usize], b[14usize]),
621            i8::wrapping_add(a[15usize], b[15usize]),
622        ]
623        .simd_into(self)
624    }
625    #[inline(always)]
626    fn sub_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
627        [
628            i8::wrapping_sub(a[0usize], b[0usize]),
629            i8::wrapping_sub(a[1usize], b[1usize]),
630            i8::wrapping_sub(a[2usize], b[2usize]),
631            i8::wrapping_sub(a[3usize], b[3usize]),
632            i8::wrapping_sub(a[4usize], b[4usize]),
633            i8::wrapping_sub(a[5usize], b[5usize]),
634            i8::wrapping_sub(a[6usize], b[6usize]),
635            i8::wrapping_sub(a[7usize], b[7usize]),
636            i8::wrapping_sub(a[8usize], b[8usize]),
637            i8::wrapping_sub(a[9usize], b[9usize]),
638            i8::wrapping_sub(a[10usize], b[10usize]),
639            i8::wrapping_sub(a[11usize], b[11usize]),
640            i8::wrapping_sub(a[12usize], b[12usize]),
641            i8::wrapping_sub(a[13usize], b[13usize]),
642            i8::wrapping_sub(a[14usize], b[14usize]),
643            i8::wrapping_sub(a[15usize], b[15usize]),
644        ]
645        .simd_into(self)
646    }
647    #[inline(always)]
648    fn mul_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
649        [
650            i8::wrapping_mul(a[0usize], b[0usize]),
651            i8::wrapping_mul(a[1usize], b[1usize]),
652            i8::wrapping_mul(a[2usize], b[2usize]),
653            i8::wrapping_mul(a[3usize], b[3usize]),
654            i8::wrapping_mul(a[4usize], b[4usize]),
655            i8::wrapping_mul(a[5usize], b[5usize]),
656            i8::wrapping_mul(a[6usize], b[6usize]),
657            i8::wrapping_mul(a[7usize], b[7usize]),
658            i8::wrapping_mul(a[8usize], b[8usize]),
659            i8::wrapping_mul(a[9usize], b[9usize]),
660            i8::wrapping_mul(a[10usize], b[10usize]),
661            i8::wrapping_mul(a[11usize], b[11usize]),
662            i8::wrapping_mul(a[12usize], b[12usize]),
663            i8::wrapping_mul(a[13usize], b[13usize]),
664            i8::wrapping_mul(a[14usize], b[14usize]),
665            i8::wrapping_mul(a[15usize], b[15usize]),
666        ]
667        .simd_into(self)
668    }
669    #[inline(always)]
670    fn and_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
671        [
672            i8::bitand(a[0usize], &b[0usize]),
673            i8::bitand(a[1usize], &b[1usize]),
674            i8::bitand(a[2usize], &b[2usize]),
675            i8::bitand(a[3usize], &b[3usize]),
676            i8::bitand(a[4usize], &b[4usize]),
677            i8::bitand(a[5usize], &b[5usize]),
678            i8::bitand(a[6usize], &b[6usize]),
679            i8::bitand(a[7usize], &b[7usize]),
680            i8::bitand(a[8usize], &b[8usize]),
681            i8::bitand(a[9usize], &b[9usize]),
682            i8::bitand(a[10usize], &b[10usize]),
683            i8::bitand(a[11usize], &b[11usize]),
684            i8::bitand(a[12usize], &b[12usize]),
685            i8::bitand(a[13usize], &b[13usize]),
686            i8::bitand(a[14usize], &b[14usize]),
687            i8::bitand(a[15usize], &b[15usize]),
688        ]
689        .simd_into(self)
690    }
691    #[inline(always)]
692    fn or_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
693        [
694            i8::bitor(a[0usize], &b[0usize]),
695            i8::bitor(a[1usize], &b[1usize]),
696            i8::bitor(a[2usize], &b[2usize]),
697            i8::bitor(a[3usize], &b[3usize]),
698            i8::bitor(a[4usize], &b[4usize]),
699            i8::bitor(a[5usize], &b[5usize]),
700            i8::bitor(a[6usize], &b[6usize]),
701            i8::bitor(a[7usize], &b[7usize]),
702            i8::bitor(a[8usize], &b[8usize]),
703            i8::bitor(a[9usize], &b[9usize]),
704            i8::bitor(a[10usize], &b[10usize]),
705            i8::bitor(a[11usize], &b[11usize]),
706            i8::bitor(a[12usize], &b[12usize]),
707            i8::bitor(a[13usize], &b[13usize]),
708            i8::bitor(a[14usize], &b[14usize]),
709            i8::bitor(a[15usize], &b[15usize]),
710        ]
711        .simd_into(self)
712    }
713    #[inline(always)]
714    fn xor_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
715        [
716            i8::bitxor(a[0usize], &b[0usize]),
717            i8::bitxor(a[1usize], &b[1usize]),
718            i8::bitxor(a[2usize], &b[2usize]),
719            i8::bitxor(a[3usize], &b[3usize]),
720            i8::bitxor(a[4usize], &b[4usize]),
721            i8::bitxor(a[5usize], &b[5usize]),
722            i8::bitxor(a[6usize], &b[6usize]),
723            i8::bitxor(a[7usize], &b[7usize]),
724            i8::bitxor(a[8usize], &b[8usize]),
725            i8::bitxor(a[9usize], &b[9usize]),
726            i8::bitxor(a[10usize], &b[10usize]),
727            i8::bitxor(a[11usize], &b[11usize]),
728            i8::bitxor(a[12usize], &b[12usize]),
729            i8::bitxor(a[13usize], &b[13usize]),
730            i8::bitxor(a[14usize], &b[14usize]),
731            i8::bitxor(a[15usize], &b[15usize]),
732        ]
733        .simd_into(self)
734    }
735    #[inline(always)]
736    fn not_i8x16(self, a: i8x16<Self>) -> i8x16<Self> {
737        [
738            i8::not(a[0usize]),
739            i8::not(a[1usize]),
740            i8::not(a[2usize]),
741            i8::not(a[3usize]),
742            i8::not(a[4usize]),
743            i8::not(a[5usize]),
744            i8::not(a[6usize]),
745            i8::not(a[7usize]),
746            i8::not(a[8usize]),
747            i8::not(a[9usize]),
748            i8::not(a[10usize]),
749            i8::not(a[11usize]),
750            i8::not(a[12usize]),
751            i8::not(a[13usize]),
752            i8::not(a[14usize]),
753            i8::not(a[15usize]),
754        ]
755        .simd_into(self)
756    }
757    #[inline(always)]
758    fn shl_i8x16(self, a: i8x16<Self>, shift: u32) -> i8x16<Self> {
759        [
760            i8::shl(a[0usize], shift),
761            i8::shl(a[1usize], shift),
762            i8::shl(a[2usize], shift),
763            i8::shl(a[3usize], shift),
764            i8::shl(a[4usize], shift),
765            i8::shl(a[5usize], shift),
766            i8::shl(a[6usize], shift),
767            i8::shl(a[7usize], shift),
768            i8::shl(a[8usize], shift),
769            i8::shl(a[9usize], shift),
770            i8::shl(a[10usize], shift),
771            i8::shl(a[11usize], shift),
772            i8::shl(a[12usize], shift),
773            i8::shl(a[13usize], shift),
774            i8::shl(a[14usize], shift),
775            i8::shl(a[15usize], shift),
776        ]
777        .simd_into(self)
778    }
779    #[inline(always)]
780    fn shlv_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
781        [
782            i8::shl(a[0usize], &b[0usize]),
783            i8::shl(a[1usize], &b[1usize]),
784            i8::shl(a[2usize], &b[2usize]),
785            i8::shl(a[3usize], &b[3usize]),
786            i8::shl(a[4usize], &b[4usize]),
787            i8::shl(a[5usize], &b[5usize]),
788            i8::shl(a[6usize], &b[6usize]),
789            i8::shl(a[7usize], &b[7usize]),
790            i8::shl(a[8usize], &b[8usize]),
791            i8::shl(a[9usize], &b[9usize]),
792            i8::shl(a[10usize], &b[10usize]),
793            i8::shl(a[11usize], &b[11usize]),
794            i8::shl(a[12usize], &b[12usize]),
795            i8::shl(a[13usize], &b[13usize]),
796            i8::shl(a[14usize], &b[14usize]),
797            i8::shl(a[15usize], &b[15usize]),
798        ]
799        .simd_into(self)
800    }
801    #[inline(always)]
802    fn shr_i8x16(self, a: i8x16<Self>, shift: u32) -> i8x16<Self> {
803        [
804            i8::shr(a[0usize], shift),
805            i8::shr(a[1usize], shift),
806            i8::shr(a[2usize], shift),
807            i8::shr(a[3usize], shift),
808            i8::shr(a[4usize], shift),
809            i8::shr(a[5usize], shift),
810            i8::shr(a[6usize], shift),
811            i8::shr(a[7usize], shift),
812            i8::shr(a[8usize], shift),
813            i8::shr(a[9usize], shift),
814            i8::shr(a[10usize], shift),
815            i8::shr(a[11usize], shift),
816            i8::shr(a[12usize], shift),
817            i8::shr(a[13usize], shift),
818            i8::shr(a[14usize], shift),
819            i8::shr(a[15usize], shift),
820        ]
821        .simd_into(self)
822    }
823    #[inline(always)]
824    fn shrv_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
825        [
826            i8::shr(a[0usize], &b[0usize]),
827            i8::shr(a[1usize], &b[1usize]),
828            i8::shr(a[2usize], &b[2usize]),
829            i8::shr(a[3usize], &b[3usize]),
830            i8::shr(a[4usize], &b[4usize]),
831            i8::shr(a[5usize], &b[5usize]),
832            i8::shr(a[6usize], &b[6usize]),
833            i8::shr(a[7usize], &b[7usize]),
834            i8::shr(a[8usize], &b[8usize]),
835            i8::shr(a[9usize], &b[9usize]),
836            i8::shr(a[10usize], &b[10usize]),
837            i8::shr(a[11usize], &b[11usize]),
838            i8::shr(a[12usize], &b[12usize]),
839            i8::shr(a[13usize], &b[13usize]),
840            i8::shr(a[14usize], &b[14usize]),
841            i8::shr(a[15usize], &b[15usize]),
842        ]
843        .simd_into(self)
844    }
845    #[inline(always)]
846    fn simd_eq_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
847        [
848            -(i8::eq(&a[0usize], &b[0usize]) as i8),
849            -(i8::eq(&a[1usize], &b[1usize]) as i8),
850            -(i8::eq(&a[2usize], &b[2usize]) as i8),
851            -(i8::eq(&a[3usize], &b[3usize]) as i8),
852            -(i8::eq(&a[4usize], &b[4usize]) as i8),
853            -(i8::eq(&a[5usize], &b[5usize]) as i8),
854            -(i8::eq(&a[6usize], &b[6usize]) as i8),
855            -(i8::eq(&a[7usize], &b[7usize]) as i8),
856            -(i8::eq(&a[8usize], &b[8usize]) as i8),
857            -(i8::eq(&a[9usize], &b[9usize]) as i8),
858            -(i8::eq(&a[10usize], &b[10usize]) as i8),
859            -(i8::eq(&a[11usize], &b[11usize]) as i8),
860            -(i8::eq(&a[12usize], &b[12usize]) as i8),
861            -(i8::eq(&a[13usize], &b[13usize]) as i8),
862            -(i8::eq(&a[14usize], &b[14usize]) as i8),
863            -(i8::eq(&a[15usize], &b[15usize]) as i8),
864        ]
865        .simd_into(self)
866    }
867    #[inline(always)]
868    fn simd_lt_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
869        [
870            -(i8::lt(&a[0usize], &b[0usize]) as i8),
871            -(i8::lt(&a[1usize], &b[1usize]) as i8),
872            -(i8::lt(&a[2usize], &b[2usize]) as i8),
873            -(i8::lt(&a[3usize], &b[3usize]) as i8),
874            -(i8::lt(&a[4usize], &b[4usize]) as i8),
875            -(i8::lt(&a[5usize], &b[5usize]) as i8),
876            -(i8::lt(&a[6usize], &b[6usize]) as i8),
877            -(i8::lt(&a[7usize], &b[7usize]) as i8),
878            -(i8::lt(&a[8usize], &b[8usize]) as i8),
879            -(i8::lt(&a[9usize], &b[9usize]) as i8),
880            -(i8::lt(&a[10usize], &b[10usize]) as i8),
881            -(i8::lt(&a[11usize], &b[11usize]) as i8),
882            -(i8::lt(&a[12usize], &b[12usize]) as i8),
883            -(i8::lt(&a[13usize], &b[13usize]) as i8),
884            -(i8::lt(&a[14usize], &b[14usize]) as i8),
885            -(i8::lt(&a[15usize], &b[15usize]) as i8),
886        ]
887        .simd_into(self)
888    }
889    #[inline(always)]
890    fn simd_le_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
891        [
892            -(i8::le(&a[0usize], &b[0usize]) as i8),
893            -(i8::le(&a[1usize], &b[1usize]) as i8),
894            -(i8::le(&a[2usize], &b[2usize]) as i8),
895            -(i8::le(&a[3usize], &b[3usize]) as i8),
896            -(i8::le(&a[4usize], &b[4usize]) as i8),
897            -(i8::le(&a[5usize], &b[5usize]) as i8),
898            -(i8::le(&a[6usize], &b[6usize]) as i8),
899            -(i8::le(&a[7usize], &b[7usize]) as i8),
900            -(i8::le(&a[8usize], &b[8usize]) as i8),
901            -(i8::le(&a[9usize], &b[9usize]) as i8),
902            -(i8::le(&a[10usize], &b[10usize]) as i8),
903            -(i8::le(&a[11usize], &b[11usize]) as i8),
904            -(i8::le(&a[12usize], &b[12usize]) as i8),
905            -(i8::le(&a[13usize], &b[13usize]) as i8),
906            -(i8::le(&a[14usize], &b[14usize]) as i8),
907            -(i8::le(&a[15usize], &b[15usize]) as i8),
908        ]
909        .simd_into(self)
910    }
911    #[inline(always)]
912    fn simd_ge_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
913        [
914            -(i8::ge(&a[0usize], &b[0usize]) as i8),
915            -(i8::ge(&a[1usize], &b[1usize]) as i8),
916            -(i8::ge(&a[2usize], &b[2usize]) as i8),
917            -(i8::ge(&a[3usize], &b[3usize]) as i8),
918            -(i8::ge(&a[4usize], &b[4usize]) as i8),
919            -(i8::ge(&a[5usize], &b[5usize]) as i8),
920            -(i8::ge(&a[6usize], &b[6usize]) as i8),
921            -(i8::ge(&a[7usize], &b[7usize]) as i8),
922            -(i8::ge(&a[8usize], &b[8usize]) as i8),
923            -(i8::ge(&a[9usize], &b[9usize]) as i8),
924            -(i8::ge(&a[10usize], &b[10usize]) as i8),
925            -(i8::ge(&a[11usize], &b[11usize]) as i8),
926            -(i8::ge(&a[12usize], &b[12usize]) as i8),
927            -(i8::ge(&a[13usize], &b[13usize]) as i8),
928            -(i8::ge(&a[14usize], &b[14usize]) as i8),
929            -(i8::ge(&a[15usize], &b[15usize]) as i8),
930        ]
931        .simd_into(self)
932    }
933    #[inline(always)]
934    fn simd_gt_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
935        [
936            -(i8::gt(&a[0usize], &b[0usize]) as i8),
937            -(i8::gt(&a[1usize], &b[1usize]) as i8),
938            -(i8::gt(&a[2usize], &b[2usize]) as i8),
939            -(i8::gt(&a[3usize], &b[3usize]) as i8),
940            -(i8::gt(&a[4usize], &b[4usize]) as i8),
941            -(i8::gt(&a[5usize], &b[5usize]) as i8),
942            -(i8::gt(&a[6usize], &b[6usize]) as i8),
943            -(i8::gt(&a[7usize], &b[7usize]) as i8),
944            -(i8::gt(&a[8usize], &b[8usize]) as i8),
945            -(i8::gt(&a[9usize], &b[9usize]) as i8),
946            -(i8::gt(&a[10usize], &b[10usize]) as i8),
947            -(i8::gt(&a[11usize], &b[11usize]) as i8),
948            -(i8::gt(&a[12usize], &b[12usize]) as i8),
949            -(i8::gt(&a[13usize], &b[13usize]) as i8),
950            -(i8::gt(&a[14usize], &b[14usize]) as i8),
951            -(i8::gt(&a[15usize], &b[15usize]) as i8),
952        ]
953        .simd_into(self)
954    }
955    #[inline(always)]
956    fn zip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
957        [
958            a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
959            a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
960        ]
961        .simd_into(self)
962    }
963    #[inline(always)]
964    fn zip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
965        [
966            a[8usize], b[8usize], a[9usize], b[9usize], a[10usize], b[10usize], a[11usize],
967            b[11usize], a[12usize], b[12usize], a[13usize], b[13usize], a[14usize], b[14usize],
968            a[15usize], b[15usize],
969        ]
970        .simd_into(self)
971    }
972    #[inline(always)]
973    fn unzip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
974        [
975            a[0usize], a[2usize], a[4usize], a[6usize], a[8usize], a[10usize], a[12usize],
976            a[14usize], b[0usize], b[2usize], b[4usize], b[6usize], b[8usize], b[10usize],
977            b[12usize], b[14usize],
978        ]
979        .simd_into(self)
980    }
981    #[inline(always)]
982    fn unzip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
983        [
984            a[1usize], a[3usize], a[5usize], a[7usize], a[9usize], a[11usize], a[13usize],
985            a[15usize], b[1usize], b[3usize], b[5usize], b[7usize], b[9usize], b[11usize],
986            b[13usize], b[15usize],
987        ]
988        .simd_into(self)
989    }
990    #[inline(always)]
991    fn interleave_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> (i8x16<Self>, i8x16<Self>) {
992        (self.zip_low_i8x16(a, b), self.zip_high_i8x16(a, b))
993    }
994    #[inline(always)]
995    fn deinterleave_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> (i8x16<Self>, i8x16<Self>) {
996        (self.unzip_low_i8x16(a, b), self.unzip_high_i8x16(a, b))
997    }
998    #[inline(always)]
999    fn select_i8x16(self, a: mask8x16<Self>, b: i8x16<Self>, c: i8x16<Self>) -> i8x16<Self> {
1000        [
1001            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1002            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1003            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1004            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1005            if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1006            if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1007            if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1008            if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1009            if a[8usize] != 0 { b[8usize] } else { c[8usize] },
1010            if a[9usize] != 0 { b[9usize] } else { c[9usize] },
1011            if a[10usize] != 0 {
1012                b[10usize]
1013            } else {
1014                c[10usize]
1015            },
1016            if a[11usize] != 0 {
1017                b[11usize]
1018            } else {
1019                c[11usize]
1020            },
1021            if a[12usize] != 0 {
1022                b[12usize]
1023            } else {
1024                c[12usize]
1025            },
1026            if a[13usize] != 0 {
1027                b[13usize]
1028            } else {
1029                c[13usize]
1030            },
1031            if a[14usize] != 0 {
1032                b[14usize]
1033            } else {
1034                c[14usize]
1035            },
1036            if a[15usize] != 0 {
1037                b[15usize]
1038            } else {
1039                c[15usize]
1040            },
1041        ]
1042        .simd_into(self)
1043    }
1044    #[inline(always)]
1045    fn min_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
1046        [
1047            i8::min(a[0usize], b[0usize]),
1048            i8::min(a[1usize], b[1usize]),
1049            i8::min(a[2usize], b[2usize]),
1050            i8::min(a[3usize], b[3usize]),
1051            i8::min(a[4usize], b[4usize]),
1052            i8::min(a[5usize], b[5usize]),
1053            i8::min(a[6usize], b[6usize]),
1054            i8::min(a[7usize], b[7usize]),
1055            i8::min(a[8usize], b[8usize]),
1056            i8::min(a[9usize], b[9usize]),
1057            i8::min(a[10usize], b[10usize]),
1058            i8::min(a[11usize], b[11usize]),
1059            i8::min(a[12usize], b[12usize]),
1060            i8::min(a[13usize], b[13usize]),
1061            i8::min(a[14usize], b[14usize]),
1062            i8::min(a[15usize], b[15usize]),
1063        ]
1064        .simd_into(self)
1065    }
1066    #[inline(always)]
1067    fn max_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
1068        [
1069            i8::max(a[0usize], b[0usize]),
1070            i8::max(a[1usize], b[1usize]),
1071            i8::max(a[2usize], b[2usize]),
1072            i8::max(a[3usize], b[3usize]),
1073            i8::max(a[4usize], b[4usize]),
1074            i8::max(a[5usize], b[5usize]),
1075            i8::max(a[6usize], b[6usize]),
1076            i8::max(a[7usize], b[7usize]),
1077            i8::max(a[8usize], b[8usize]),
1078            i8::max(a[9usize], b[9usize]),
1079            i8::max(a[10usize], b[10usize]),
1080            i8::max(a[11usize], b[11usize]),
1081            i8::max(a[12usize], b[12usize]),
1082            i8::max(a[13usize], b[13usize]),
1083            i8::max(a[14usize], b[14usize]),
1084            i8::max(a[15usize], b[15usize]),
1085        ]
1086        .simd_into(self)
1087    }
1088    #[inline(always)]
1089    fn combine_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x32<Self> {
1090        let mut result = [0; 32usize];
1091        result[0..16usize].copy_from_slice(&a.val.0);
1092        result[16usize..32usize].copy_from_slice(&b.val.0);
1093        result.simd_into(self)
1094    }
1095    #[inline(always)]
1096    fn neg_i8x16(self, a: i8x16<Self>) -> i8x16<Self> {
1097        [
1098            i8::neg(a[0usize]),
1099            i8::neg(a[1usize]),
1100            i8::neg(a[2usize]),
1101            i8::neg(a[3usize]),
1102            i8::neg(a[4usize]),
1103            i8::neg(a[5usize]),
1104            i8::neg(a[6usize]),
1105            i8::neg(a[7usize]),
1106            i8::neg(a[8usize]),
1107            i8::neg(a[9usize]),
1108            i8::neg(a[10usize]),
1109            i8::neg(a[11usize]),
1110            i8::neg(a[12usize]),
1111            i8::neg(a[13usize]),
1112            i8::neg(a[14usize]),
1113            i8::neg(a[15usize]),
1114        ]
1115        .simd_into(self)
1116    }
1117    #[inline(always)]
1118    fn reinterpret_u8_i8x16(self, a: i8x16<Self>) -> u8x16<Self> {
1119        a.bitcast()
1120    }
1121    #[inline(always)]
1122    fn reinterpret_u32_i8x16(self, a: i8x16<Self>) -> u32x4<Self> {
1123        a.bitcast()
1124    }
1125    #[inline(always)]
1126    fn splat_u8x16(self, val: u8) -> u8x16<Self> {
1127        [val; 16usize].simd_into(self)
1128    }
1129    #[inline(always)]
1130    fn load_array_u8x16(self, val: [u8; 16usize]) -> u8x16<Self> {
1131        u8x16 {
1132            val: crate::support::Aligned128(val),
1133            simd: self,
1134        }
1135    }
1136    #[inline(always)]
1137    fn load_array_ref_u8x16(self, val: &[u8; 16usize]) -> u8x16<Self> {
1138        u8x16 {
1139            val: crate::support::Aligned128(*val),
1140            simd: self,
1141        }
1142    }
1143    #[inline(always)]
1144    fn as_array_u8x16(self, a: u8x16<Self>) -> [u8; 16usize] {
1145        a.val.0
1146    }
1147    #[inline(always)]
1148    fn as_array_ref_u8x16(self, a: &u8x16<Self>) -> &[u8; 16usize] {
1149        &a.val.0
1150    }
1151    #[inline(always)]
1152    fn as_array_mut_u8x16(self, a: &mut u8x16<Self>) -> &mut [u8; 16usize] {
1153        &mut a.val.0
1154    }
1155    #[inline(always)]
1156    fn store_array_u8x16(self, a: u8x16<Self>, dest: &mut [u8; 16usize]) -> () {
1157        *dest = a.val.0;
1158    }
1159    #[inline(always)]
1160    fn cvt_from_bytes_u8x16(self, a: u8x16<Self>) -> u8x16<Self> {
1161        unsafe {
1162            u8x16 {
1163                val: core::mem::transmute(a.val),
1164                simd: self,
1165            }
1166        }
1167    }
1168    #[inline(always)]
1169    fn cvt_to_bytes_u8x16(self, a: u8x16<Self>) -> u8x16<Self> {
1170        unsafe {
1171            u8x16 {
1172                val: core::mem::transmute(a.val),
1173                simd: self,
1174            }
1175        }
1176    }
1177    #[inline(always)]
1178    fn slide_u8x16<const SHIFT: usize>(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1179        let mut dest = [Default::default(); 16usize];
1180        dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
1181        dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
1182        dest.simd_into(self)
1183    }
1184    #[inline(always)]
1185    fn slide_within_blocks_u8x16<const SHIFT: usize>(
1186        self,
1187        a: u8x16<Self>,
1188        b: u8x16<Self>,
1189    ) -> u8x16<Self> {
1190        self.slide_u8x16::<SHIFT>(a, b)
1191    }
1192    #[inline(always)]
1193    fn add_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1194        [
1195            u8::wrapping_add(a[0usize], b[0usize]),
1196            u8::wrapping_add(a[1usize], b[1usize]),
1197            u8::wrapping_add(a[2usize], b[2usize]),
1198            u8::wrapping_add(a[3usize], b[3usize]),
1199            u8::wrapping_add(a[4usize], b[4usize]),
1200            u8::wrapping_add(a[5usize], b[5usize]),
1201            u8::wrapping_add(a[6usize], b[6usize]),
1202            u8::wrapping_add(a[7usize], b[7usize]),
1203            u8::wrapping_add(a[8usize], b[8usize]),
1204            u8::wrapping_add(a[9usize], b[9usize]),
1205            u8::wrapping_add(a[10usize], b[10usize]),
1206            u8::wrapping_add(a[11usize], b[11usize]),
1207            u8::wrapping_add(a[12usize], b[12usize]),
1208            u8::wrapping_add(a[13usize], b[13usize]),
1209            u8::wrapping_add(a[14usize], b[14usize]),
1210            u8::wrapping_add(a[15usize], b[15usize]),
1211        ]
1212        .simd_into(self)
1213    }
1214    #[inline(always)]
1215    fn sub_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1216        [
1217            u8::wrapping_sub(a[0usize], b[0usize]),
1218            u8::wrapping_sub(a[1usize], b[1usize]),
1219            u8::wrapping_sub(a[2usize], b[2usize]),
1220            u8::wrapping_sub(a[3usize], b[3usize]),
1221            u8::wrapping_sub(a[4usize], b[4usize]),
1222            u8::wrapping_sub(a[5usize], b[5usize]),
1223            u8::wrapping_sub(a[6usize], b[6usize]),
1224            u8::wrapping_sub(a[7usize], b[7usize]),
1225            u8::wrapping_sub(a[8usize], b[8usize]),
1226            u8::wrapping_sub(a[9usize], b[9usize]),
1227            u8::wrapping_sub(a[10usize], b[10usize]),
1228            u8::wrapping_sub(a[11usize], b[11usize]),
1229            u8::wrapping_sub(a[12usize], b[12usize]),
1230            u8::wrapping_sub(a[13usize], b[13usize]),
1231            u8::wrapping_sub(a[14usize], b[14usize]),
1232            u8::wrapping_sub(a[15usize], b[15usize]),
1233        ]
1234        .simd_into(self)
1235    }
1236    #[inline(always)]
1237    fn mul_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1238        [
1239            u8::wrapping_mul(a[0usize], b[0usize]),
1240            u8::wrapping_mul(a[1usize], b[1usize]),
1241            u8::wrapping_mul(a[2usize], b[2usize]),
1242            u8::wrapping_mul(a[3usize], b[3usize]),
1243            u8::wrapping_mul(a[4usize], b[4usize]),
1244            u8::wrapping_mul(a[5usize], b[5usize]),
1245            u8::wrapping_mul(a[6usize], b[6usize]),
1246            u8::wrapping_mul(a[7usize], b[7usize]),
1247            u8::wrapping_mul(a[8usize], b[8usize]),
1248            u8::wrapping_mul(a[9usize], b[9usize]),
1249            u8::wrapping_mul(a[10usize], b[10usize]),
1250            u8::wrapping_mul(a[11usize], b[11usize]),
1251            u8::wrapping_mul(a[12usize], b[12usize]),
1252            u8::wrapping_mul(a[13usize], b[13usize]),
1253            u8::wrapping_mul(a[14usize], b[14usize]),
1254            u8::wrapping_mul(a[15usize], b[15usize]),
1255        ]
1256        .simd_into(self)
1257    }
1258    #[inline(always)]
1259    fn and_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1260        [
1261            u8::bitand(a[0usize], &b[0usize]),
1262            u8::bitand(a[1usize], &b[1usize]),
1263            u8::bitand(a[2usize], &b[2usize]),
1264            u8::bitand(a[3usize], &b[3usize]),
1265            u8::bitand(a[4usize], &b[4usize]),
1266            u8::bitand(a[5usize], &b[5usize]),
1267            u8::bitand(a[6usize], &b[6usize]),
1268            u8::bitand(a[7usize], &b[7usize]),
1269            u8::bitand(a[8usize], &b[8usize]),
1270            u8::bitand(a[9usize], &b[9usize]),
1271            u8::bitand(a[10usize], &b[10usize]),
1272            u8::bitand(a[11usize], &b[11usize]),
1273            u8::bitand(a[12usize], &b[12usize]),
1274            u8::bitand(a[13usize], &b[13usize]),
1275            u8::bitand(a[14usize], &b[14usize]),
1276            u8::bitand(a[15usize], &b[15usize]),
1277        ]
1278        .simd_into(self)
1279    }
1280    #[inline(always)]
1281    fn or_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1282        [
1283            u8::bitor(a[0usize], &b[0usize]),
1284            u8::bitor(a[1usize], &b[1usize]),
1285            u8::bitor(a[2usize], &b[2usize]),
1286            u8::bitor(a[3usize], &b[3usize]),
1287            u8::bitor(a[4usize], &b[4usize]),
1288            u8::bitor(a[5usize], &b[5usize]),
1289            u8::bitor(a[6usize], &b[6usize]),
1290            u8::bitor(a[7usize], &b[7usize]),
1291            u8::bitor(a[8usize], &b[8usize]),
1292            u8::bitor(a[9usize], &b[9usize]),
1293            u8::bitor(a[10usize], &b[10usize]),
1294            u8::bitor(a[11usize], &b[11usize]),
1295            u8::bitor(a[12usize], &b[12usize]),
1296            u8::bitor(a[13usize], &b[13usize]),
1297            u8::bitor(a[14usize], &b[14usize]),
1298            u8::bitor(a[15usize], &b[15usize]),
1299        ]
1300        .simd_into(self)
1301    }
1302    #[inline(always)]
1303    fn xor_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1304        [
1305            u8::bitxor(a[0usize], &b[0usize]),
1306            u8::bitxor(a[1usize], &b[1usize]),
1307            u8::bitxor(a[2usize], &b[2usize]),
1308            u8::bitxor(a[3usize], &b[3usize]),
1309            u8::bitxor(a[4usize], &b[4usize]),
1310            u8::bitxor(a[5usize], &b[5usize]),
1311            u8::bitxor(a[6usize], &b[6usize]),
1312            u8::bitxor(a[7usize], &b[7usize]),
1313            u8::bitxor(a[8usize], &b[8usize]),
1314            u8::bitxor(a[9usize], &b[9usize]),
1315            u8::bitxor(a[10usize], &b[10usize]),
1316            u8::bitxor(a[11usize], &b[11usize]),
1317            u8::bitxor(a[12usize], &b[12usize]),
1318            u8::bitxor(a[13usize], &b[13usize]),
1319            u8::bitxor(a[14usize], &b[14usize]),
1320            u8::bitxor(a[15usize], &b[15usize]),
1321        ]
1322        .simd_into(self)
1323    }
1324    #[inline(always)]
1325    fn not_u8x16(self, a: u8x16<Self>) -> u8x16<Self> {
1326        [
1327            u8::not(a[0usize]),
1328            u8::not(a[1usize]),
1329            u8::not(a[2usize]),
1330            u8::not(a[3usize]),
1331            u8::not(a[4usize]),
1332            u8::not(a[5usize]),
1333            u8::not(a[6usize]),
1334            u8::not(a[7usize]),
1335            u8::not(a[8usize]),
1336            u8::not(a[9usize]),
1337            u8::not(a[10usize]),
1338            u8::not(a[11usize]),
1339            u8::not(a[12usize]),
1340            u8::not(a[13usize]),
1341            u8::not(a[14usize]),
1342            u8::not(a[15usize]),
1343        ]
1344        .simd_into(self)
1345    }
1346    #[inline(always)]
1347    fn shl_u8x16(self, a: u8x16<Self>, shift: u32) -> u8x16<Self> {
1348        [
1349            u8::shl(a[0usize], shift),
1350            u8::shl(a[1usize], shift),
1351            u8::shl(a[2usize], shift),
1352            u8::shl(a[3usize], shift),
1353            u8::shl(a[4usize], shift),
1354            u8::shl(a[5usize], shift),
1355            u8::shl(a[6usize], shift),
1356            u8::shl(a[7usize], shift),
1357            u8::shl(a[8usize], shift),
1358            u8::shl(a[9usize], shift),
1359            u8::shl(a[10usize], shift),
1360            u8::shl(a[11usize], shift),
1361            u8::shl(a[12usize], shift),
1362            u8::shl(a[13usize], shift),
1363            u8::shl(a[14usize], shift),
1364            u8::shl(a[15usize], shift),
1365        ]
1366        .simd_into(self)
1367    }
1368    #[inline(always)]
1369    fn shlv_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1370        [
1371            u8::shl(a[0usize], &b[0usize]),
1372            u8::shl(a[1usize], &b[1usize]),
1373            u8::shl(a[2usize], &b[2usize]),
1374            u8::shl(a[3usize], &b[3usize]),
1375            u8::shl(a[4usize], &b[4usize]),
1376            u8::shl(a[5usize], &b[5usize]),
1377            u8::shl(a[6usize], &b[6usize]),
1378            u8::shl(a[7usize], &b[7usize]),
1379            u8::shl(a[8usize], &b[8usize]),
1380            u8::shl(a[9usize], &b[9usize]),
1381            u8::shl(a[10usize], &b[10usize]),
1382            u8::shl(a[11usize], &b[11usize]),
1383            u8::shl(a[12usize], &b[12usize]),
1384            u8::shl(a[13usize], &b[13usize]),
1385            u8::shl(a[14usize], &b[14usize]),
1386            u8::shl(a[15usize], &b[15usize]),
1387        ]
1388        .simd_into(self)
1389    }
1390    #[inline(always)]
1391    fn shr_u8x16(self, a: u8x16<Self>, shift: u32) -> u8x16<Self> {
1392        [
1393            u8::shr(a[0usize], shift),
1394            u8::shr(a[1usize], shift),
1395            u8::shr(a[2usize], shift),
1396            u8::shr(a[3usize], shift),
1397            u8::shr(a[4usize], shift),
1398            u8::shr(a[5usize], shift),
1399            u8::shr(a[6usize], shift),
1400            u8::shr(a[7usize], shift),
1401            u8::shr(a[8usize], shift),
1402            u8::shr(a[9usize], shift),
1403            u8::shr(a[10usize], shift),
1404            u8::shr(a[11usize], shift),
1405            u8::shr(a[12usize], shift),
1406            u8::shr(a[13usize], shift),
1407            u8::shr(a[14usize], shift),
1408            u8::shr(a[15usize], shift),
1409        ]
1410        .simd_into(self)
1411    }
1412    #[inline(always)]
1413    fn shrv_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1414        [
1415            u8::shr(a[0usize], &b[0usize]),
1416            u8::shr(a[1usize], &b[1usize]),
1417            u8::shr(a[2usize], &b[2usize]),
1418            u8::shr(a[3usize], &b[3usize]),
1419            u8::shr(a[4usize], &b[4usize]),
1420            u8::shr(a[5usize], &b[5usize]),
1421            u8::shr(a[6usize], &b[6usize]),
1422            u8::shr(a[7usize], &b[7usize]),
1423            u8::shr(a[8usize], &b[8usize]),
1424            u8::shr(a[9usize], &b[9usize]),
1425            u8::shr(a[10usize], &b[10usize]),
1426            u8::shr(a[11usize], &b[11usize]),
1427            u8::shr(a[12usize], &b[12usize]),
1428            u8::shr(a[13usize], &b[13usize]),
1429            u8::shr(a[14usize], &b[14usize]),
1430            u8::shr(a[15usize], &b[15usize]),
1431        ]
1432        .simd_into(self)
1433    }
1434    #[inline(always)]
1435    fn simd_eq_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1436        [
1437            -(u8::eq(&a[0usize], &b[0usize]) as i8),
1438            -(u8::eq(&a[1usize], &b[1usize]) as i8),
1439            -(u8::eq(&a[2usize], &b[2usize]) as i8),
1440            -(u8::eq(&a[3usize], &b[3usize]) as i8),
1441            -(u8::eq(&a[4usize], &b[4usize]) as i8),
1442            -(u8::eq(&a[5usize], &b[5usize]) as i8),
1443            -(u8::eq(&a[6usize], &b[6usize]) as i8),
1444            -(u8::eq(&a[7usize], &b[7usize]) as i8),
1445            -(u8::eq(&a[8usize], &b[8usize]) as i8),
1446            -(u8::eq(&a[9usize], &b[9usize]) as i8),
1447            -(u8::eq(&a[10usize], &b[10usize]) as i8),
1448            -(u8::eq(&a[11usize], &b[11usize]) as i8),
1449            -(u8::eq(&a[12usize], &b[12usize]) as i8),
1450            -(u8::eq(&a[13usize], &b[13usize]) as i8),
1451            -(u8::eq(&a[14usize], &b[14usize]) as i8),
1452            -(u8::eq(&a[15usize], &b[15usize]) as i8),
1453        ]
1454        .simd_into(self)
1455    }
1456    #[inline(always)]
1457    fn simd_lt_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1458        [
1459            -(u8::lt(&a[0usize], &b[0usize]) as i8),
1460            -(u8::lt(&a[1usize], &b[1usize]) as i8),
1461            -(u8::lt(&a[2usize], &b[2usize]) as i8),
1462            -(u8::lt(&a[3usize], &b[3usize]) as i8),
1463            -(u8::lt(&a[4usize], &b[4usize]) as i8),
1464            -(u8::lt(&a[5usize], &b[5usize]) as i8),
1465            -(u8::lt(&a[6usize], &b[6usize]) as i8),
1466            -(u8::lt(&a[7usize], &b[7usize]) as i8),
1467            -(u8::lt(&a[8usize], &b[8usize]) as i8),
1468            -(u8::lt(&a[9usize], &b[9usize]) as i8),
1469            -(u8::lt(&a[10usize], &b[10usize]) as i8),
1470            -(u8::lt(&a[11usize], &b[11usize]) as i8),
1471            -(u8::lt(&a[12usize], &b[12usize]) as i8),
1472            -(u8::lt(&a[13usize], &b[13usize]) as i8),
1473            -(u8::lt(&a[14usize], &b[14usize]) as i8),
1474            -(u8::lt(&a[15usize], &b[15usize]) as i8),
1475        ]
1476        .simd_into(self)
1477    }
1478    #[inline(always)]
1479    fn simd_le_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1480        [
1481            -(u8::le(&a[0usize], &b[0usize]) as i8),
1482            -(u8::le(&a[1usize], &b[1usize]) as i8),
1483            -(u8::le(&a[2usize], &b[2usize]) as i8),
1484            -(u8::le(&a[3usize], &b[3usize]) as i8),
1485            -(u8::le(&a[4usize], &b[4usize]) as i8),
1486            -(u8::le(&a[5usize], &b[5usize]) as i8),
1487            -(u8::le(&a[6usize], &b[6usize]) as i8),
1488            -(u8::le(&a[7usize], &b[7usize]) as i8),
1489            -(u8::le(&a[8usize], &b[8usize]) as i8),
1490            -(u8::le(&a[9usize], &b[9usize]) as i8),
1491            -(u8::le(&a[10usize], &b[10usize]) as i8),
1492            -(u8::le(&a[11usize], &b[11usize]) as i8),
1493            -(u8::le(&a[12usize], &b[12usize]) as i8),
1494            -(u8::le(&a[13usize], &b[13usize]) as i8),
1495            -(u8::le(&a[14usize], &b[14usize]) as i8),
1496            -(u8::le(&a[15usize], &b[15usize]) as i8),
1497        ]
1498        .simd_into(self)
1499    }
1500    #[inline(always)]
1501    fn simd_ge_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1502        [
1503            -(u8::ge(&a[0usize], &b[0usize]) as i8),
1504            -(u8::ge(&a[1usize], &b[1usize]) as i8),
1505            -(u8::ge(&a[2usize], &b[2usize]) as i8),
1506            -(u8::ge(&a[3usize], &b[3usize]) as i8),
1507            -(u8::ge(&a[4usize], &b[4usize]) as i8),
1508            -(u8::ge(&a[5usize], &b[5usize]) as i8),
1509            -(u8::ge(&a[6usize], &b[6usize]) as i8),
1510            -(u8::ge(&a[7usize], &b[7usize]) as i8),
1511            -(u8::ge(&a[8usize], &b[8usize]) as i8),
1512            -(u8::ge(&a[9usize], &b[9usize]) as i8),
1513            -(u8::ge(&a[10usize], &b[10usize]) as i8),
1514            -(u8::ge(&a[11usize], &b[11usize]) as i8),
1515            -(u8::ge(&a[12usize], &b[12usize]) as i8),
1516            -(u8::ge(&a[13usize], &b[13usize]) as i8),
1517            -(u8::ge(&a[14usize], &b[14usize]) as i8),
1518            -(u8::ge(&a[15usize], &b[15usize]) as i8),
1519        ]
1520        .simd_into(self)
1521    }
1522    #[inline(always)]
1523    fn simd_gt_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1524        [
1525            -(u8::gt(&a[0usize], &b[0usize]) as i8),
1526            -(u8::gt(&a[1usize], &b[1usize]) as i8),
1527            -(u8::gt(&a[2usize], &b[2usize]) as i8),
1528            -(u8::gt(&a[3usize], &b[3usize]) as i8),
1529            -(u8::gt(&a[4usize], &b[4usize]) as i8),
1530            -(u8::gt(&a[5usize], &b[5usize]) as i8),
1531            -(u8::gt(&a[6usize], &b[6usize]) as i8),
1532            -(u8::gt(&a[7usize], &b[7usize]) as i8),
1533            -(u8::gt(&a[8usize], &b[8usize]) as i8),
1534            -(u8::gt(&a[9usize], &b[9usize]) as i8),
1535            -(u8::gt(&a[10usize], &b[10usize]) as i8),
1536            -(u8::gt(&a[11usize], &b[11usize]) as i8),
1537            -(u8::gt(&a[12usize], &b[12usize]) as i8),
1538            -(u8::gt(&a[13usize], &b[13usize]) as i8),
1539            -(u8::gt(&a[14usize], &b[14usize]) as i8),
1540            -(u8::gt(&a[15usize], &b[15usize]) as i8),
1541        ]
1542        .simd_into(self)
1543    }
1544    #[inline(always)]
1545    fn zip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1546        [
1547            a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
1548            a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
1549        ]
1550        .simd_into(self)
1551    }
1552    #[inline(always)]
1553    fn zip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1554        [
1555            a[8usize], b[8usize], a[9usize], b[9usize], a[10usize], b[10usize], a[11usize],
1556            b[11usize], a[12usize], b[12usize], a[13usize], b[13usize], a[14usize], b[14usize],
1557            a[15usize], b[15usize],
1558        ]
1559        .simd_into(self)
1560    }
1561    #[inline(always)]
1562    fn unzip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1563        [
1564            a[0usize], a[2usize], a[4usize], a[6usize], a[8usize], a[10usize], a[12usize],
1565            a[14usize], b[0usize], b[2usize], b[4usize], b[6usize], b[8usize], b[10usize],
1566            b[12usize], b[14usize],
1567        ]
1568        .simd_into(self)
1569    }
1570    #[inline(always)]
1571    fn unzip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1572        [
1573            a[1usize], a[3usize], a[5usize], a[7usize], a[9usize], a[11usize], a[13usize],
1574            a[15usize], b[1usize], b[3usize], b[5usize], b[7usize], b[9usize], b[11usize],
1575            b[13usize], b[15usize],
1576        ]
1577        .simd_into(self)
1578    }
1579    #[inline(always)]
1580    fn interleave_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> (u8x16<Self>, u8x16<Self>) {
1581        (self.zip_low_u8x16(a, b), self.zip_high_u8x16(a, b))
1582    }
1583    #[inline(always)]
1584    fn deinterleave_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> (u8x16<Self>, u8x16<Self>) {
1585        (self.unzip_low_u8x16(a, b), self.unzip_high_u8x16(a, b))
1586    }
1587    #[inline(always)]
1588    fn select_u8x16(self, a: mask8x16<Self>, b: u8x16<Self>, c: u8x16<Self>) -> u8x16<Self> {
1589        [
1590            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1591            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1592            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1593            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1594            if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1595            if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1596            if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1597            if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1598            if a[8usize] != 0 { b[8usize] } else { c[8usize] },
1599            if a[9usize] != 0 { b[9usize] } else { c[9usize] },
1600            if a[10usize] != 0 {
1601                b[10usize]
1602            } else {
1603                c[10usize]
1604            },
1605            if a[11usize] != 0 {
1606                b[11usize]
1607            } else {
1608                c[11usize]
1609            },
1610            if a[12usize] != 0 {
1611                b[12usize]
1612            } else {
1613                c[12usize]
1614            },
1615            if a[13usize] != 0 {
1616                b[13usize]
1617            } else {
1618                c[13usize]
1619            },
1620            if a[14usize] != 0 {
1621                b[14usize]
1622            } else {
1623                c[14usize]
1624            },
1625            if a[15usize] != 0 {
1626                b[15usize]
1627            } else {
1628                c[15usize]
1629            },
1630        ]
1631        .simd_into(self)
1632    }
1633    #[inline(always)]
1634    fn min_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1635        [
1636            u8::min(a[0usize], b[0usize]),
1637            u8::min(a[1usize], b[1usize]),
1638            u8::min(a[2usize], b[2usize]),
1639            u8::min(a[3usize], b[3usize]),
1640            u8::min(a[4usize], b[4usize]),
1641            u8::min(a[5usize], b[5usize]),
1642            u8::min(a[6usize], b[6usize]),
1643            u8::min(a[7usize], b[7usize]),
1644            u8::min(a[8usize], b[8usize]),
1645            u8::min(a[9usize], b[9usize]),
1646            u8::min(a[10usize], b[10usize]),
1647            u8::min(a[11usize], b[11usize]),
1648            u8::min(a[12usize], b[12usize]),
1649            u8::min(a[13usize], b[13usize]),
1650            u8::min(a[14usize], b[14usize]),
1651            u8::min(a[15usize], b[15usize]),
1652        ]
1653        .simd_into(self)
1654    }
1655    #[inline(always)]
1656    fn max_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1657        [
1658            u8::max(a[0usize], b[0usize]),
1659            u8::max(a[1usize], b[1usize]),
1660            u8::max(a[2usize], b[2usize]),
1661            u8::max(a[3usize], b[3usize]),
1662            u8::max(a[4usize], b[4usize]),
1663            u8::max(a[5usize], b[5usize]),
1664            u8::max(a[6usize], b[6usize]),
1665            u8::max(a[7usize], b[7usize]),
1666            u8::max(a[8usize], b[8usize]),
1667            u8::max(a[9usize], b[9usize]),
1668            u8::max(a[10usize], b[10usize]),
1669            u8::max(a[11usize], b[11usize]),
1670            u8::max(a[12usize], b[12usize]),
1671            u8::max(a[13usize], b[13usize]),
1672            u8::max(a[14usize], b[14usize]),
1673            u8::max(a[15usize], b[15usize]),
1674        ]
1675        .simd_into(self)
1676    }
1677    #[inline(always)]
1678    fn combine_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x32<Self> {
1679        let mut result = [0; 32usize];
1680        result[0..16usize].copy_from_slice(&a.val.0);
1681        result[16usize..32usize].copy_from_slice(&b.val.0);
1682        result.simd_into(self)
1683    }
1684    #[inline(always)]
1685    fn widen_u8x16(self, a: u8x16<Self>) -> u16x16<Self> {
1686        [
1687            a[0usize] as u16,
1688            a[1usize] as u16,
1689            a[2usize] as u16,
1690            a[3usize] as u16,
1691            a[4usize] as u16,
1692            a[5usize] as u16,
1693            a[6usize] as u16,
1694            a[7usize] as u16,
1695            a[8usize] as u16,
1696            a[9usize] as u16,
1697            a[10usize] as u16,
1698            a[11usize] as u16,
1699            a[12usize] as u16,
1700            a[13usize] as u16,
1701            a[14usize] as u16,
1702            a[15usize] as u16,
1703        ]
1704        .simd_into(self)
1705    }
1706    #[inline(always)]
1707    fn reinterpret_u32_u8x16(self, a: u8x16<Self>) -> u32x4<Self> {
1708        a.bitcast()
1709    }
1710    #[inline(always)]
1711    fn splat_mask8x16(self, val: i8) -> mask8x16<Self> {
1712        [val; 16usize].simd_into(self)
1713    }
1714    #[inline(always)]
1715    fn load_array_mask8x16(self, val: [i8; 16usize]) -> mask8x16<Self> {
1716        mask8x16 {
1717            val: crate::support::Aligned128(val),
1718            simd: self,
1719        }
1720    }
1721    #[inline(always)]
1722    fn load_array_ref_mask8x16(self, val: &[i8; 16usize]) -> mask8x16<Self> {
1723        mask8x16 {
1724            val: crate::support::Aligned128(*val),
1725            simd: self,
1726        }
1727    }
1728    #[inline(always)]
1729    fn as_array_mask8x16(self, a: mask8x16<Self>) -> [i8; 16usize] {
1730        a.val.0
1731    }
1732    #[inline(always)]
1733    fn as_array_ref_mask8x16(self, a: &mask8x16<Self>) -> &[i8; 16usize] {
1734        &a.val.0
1735    }
1736    #[inline(always)]
1737    fn as_array_mut_mask8x16(self, a: &mut mask8x16<Self>) -> &mut [i8; 16usize] {
1738        &mut a.val.0
1739    }
1740    #[inline(always)]
1741    fn store_array_mask8x16(self, a: mask8x16<Self>, dest: &mut [i8; 16usize]) -> () {
1742        *dest = a.val.0;
1743    }
1744    #[inline(always)]
1745    fn cvt_from_bytes_mask8x16(self, a: u8x16<Self>) -> mask8x16<Self> {
1746        unsafe {
1747            mask8x16 {
1748                val: core::mem::transmute(a.val),
1749                simd: self,
1750            }
1751        }
1752    }
1753    #[inline(always)]
1754    fn cvt_to_bytes_mask8x16(self, a: mask8x16<Self>) -> u8x16<Self> {
1755        unsafe {
1756            u8x16 {
1757                val: core::mem::transmute(a.val),
1758                simd: self,
1759            }
1760        }
1761    }
1762    #[inline(always)]
1763    fn slide_mask8x16<const SHIFT: usize>(
1764        self,
1765        a: mask8x16<Self>,
1766        b: mask8x16<Self>,
1767    ) -> mask8x16<Self> {
1768        let mut dest = [Default::default(); 16usize];
1769        dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
1770        dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
1771        dest.simd_into(self)
1772    }
1773    #[inline(always)]
1774    fn slide_within_blocks_mask8x16<const SHIFT: usize>(
1775        self,
1776        a: mask8x16<Self>,
1777        b: mask8x16<Self>,
1778    ) -> mask8x16<Self> {
1779        self.slide_mask8x16::<SHIFT>(a, b)
1780    }
1781    #[inline(always)]
1782    fn and_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1783        [
1784            i8::bitand(a[0usize], &b[0usize]),
1785            i8::bitand(a[1usize], &b[1usize]),
1786            i8::bitand(a[2usize], &b[2usize]),
1787            i8::bitand(a[3usize], &b[3usize]),
1788            i8::bitand(a[4usize], &b[4usize]),
1789            i8::bitand(a[5usize], &b[5usize]),
1790            i8::bitand(a[6usize], &b[6usize]),
1791            i8::bitand(a[7usize], &b[7usize]),
1792            i8::bitand(a[8usize], &b[8usize]),
1793            i8::bitand(a[9usize], &b[9usize]),
1794            i8::bitand(a[10usize], &b[10usize]),
1795            i8::bitand(a[11usize], &b[11usize]),
1796            i8::bitand(a[12usize], &b[12usize]),
1797            i8::bitand(a[13usize], &b[13usize]),
1798            i8::bitand(a[14usize], &b[14usize]),
1799            i8::bitand(a[15usize], &b[15usize]),
1800        ]
1801        .simd_into(self)
1802    }
1803    #[inline(always)]
1804    fn or_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1805        [
1806            i8::bitor(a[0usize], &b[0usize]),
1807            i8::bitor(a[1usize], &b[1usize]),
1808            i8::bitor(a[2usize], &b[2usize]),
1809            i8::bitor(a[3usize], &b[3usize]),
1810            i8::bitor(a[4usize], &b[4usize]),
1811            i8::bitor(a[5usize], &b[5usize]),
1812            i8::bitor(a[6usize], &b[6usize]),
1813            i8::bitor(a[7usize], &b[7usize]),
1814            i8::bitor(a[8usize], &b[8usize]),
1815            i8::bitor(a[9usize], &b[9usize]),
1816            i8::bitor(a[10usize], &b[10usize]),
1817            i8::bitor(a[11usize], &b[11usize]),
1818            i8::bitor(a[12usize], &b[12usize]),
1819            i8::bitor(a[13usize], &b[13usize]),
1820            i8::bitor(a[14usize], &b[14usize]),
1821            i8::bitor(a[15usize], &b[15usize]),
1822        ]
1823        .simd_into(self)
1824    }
1825    #[inline(always)]
1826    fn xor_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1827        [
1828            i8::bitxor(a[0usize], &b[0usize]),
1829            i8::bitxor(a[1usize], &b[1usize]),
1830            i8::bitxor(a[2usize], &b[2usize]),
1831            i8::bitxor(a[3usize], &b[3usize]),
1832            i8::bitxor(a[4usize], &b[4usize]),
1833            i8::bitxor(a[5usize], &b[5usize]),
1834            i8::bitxor(a[6usize], &b[6usize]),
1835            i8::bitxor(a[7usize], &b[7usize]),
1836            i8::bitxor(a[8usize], &b[8usize]),
1837            i8::bitxor(a[9usize], &b[9usize]),
1838            i8::bitxor(a[10usize], &b[10usize]),
1839            i8::bitxor(a[11usize], &b[11usize]),
1840            i8::bitxor(a[12usize], &b[12usize]),
1841            i8::bitxor(a[13usize], &b[13usize]),
1842            i8::bitxor(a[14usize], &b[14usize]),
1843            i8::bitxor(a[15usize], &b[15usize]),
1844        ]
1845        .simd_into(self)
1846    }
1847    #[inline(always)]
1848    fn not_mask8x16(self, a: mask8x16<Self>) -> mask8x16<Self> {
1849        [
1850            i8::not(a[0usize]),
1851            i8::not(a[1usize]),
1852            i8::not(a[2usize]),
1853            i8::not(a[3usize]),
1854            i8::not(a[4usize]),
1855            i8::not(a[5usize]),
1856            i8::not(a[6usize]),
1857            i8::not(a[7usize]),
1858            i8::not(a[8usize]),
1859            i8::not(a[9usize]),
1860            i8::not(a[10usize]),
1861            i8::not(a[11usize]),
1862            i8::not(a[12usize]),
1863            i8::not(a[13usize]),
1864            i8::not(a[14usize]),
1865            i8::not(a[15usize]),
1866        ]
1867        .simd_into(self)
1868    }
1869    #[inline(always)]
1870    fn select_mask8x16(
1871        self,
1872        a: mask8x16<Self>,
1873        b: mask8x16<Self>,
1874        c: mask8x16<Self>,
1875    ) -> mask8x16<Self> {
1876        [
1877            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1878            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1879            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1880            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1881            if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1882            if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1883            if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1884            if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1885            if a[8usize] != 0 { b[8usize] } else { c[8usize] },
1886            if a[9usize] != 0 { b[9usize] } else { c[9usize] },
1887            if a[10usize] != 0 {
1888                b[10usize]
1889            } else {
1890                c[10usize]
1891            },
1892            if a[11usize] != 0 {
1893                b[11usize]
1894            } else {
1895                c[11usize]
1896            },
1897            if a[12usize] != 0 {
1898                b[12usize]
1899            } else {
1900                c[12usize]
1901            },
1902            if a[13usize] != 0 {
1903                b[13usize]
1904            } else {
1905                c[13usize]
1906            },
1907            if a[14usize] != 0 {
1908                b[14usize]
1909            } else {
1910                c[14usize]
1911            },
1912            if a[15usize] != 0 {
1913                b[15usize]
1914            } else {
1915                c[15usize]
1916            },
1917        ]
1918        .simd_into(self)
1919    }
1920    #[inline(always)]
1921    fn simd_eq_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1922        [
1923            -(i8::eq(&a[0usize], &b[0usize]) as i8),
1924            -(i8::eq(&a[1usize], &b[1usize]) as i8),
1925            -(i8::eq(&a[2usize], &b[2usize]) as i8),
1926            -(i8::eq(&a[3usize], &b[3usize]) as i8),
1927            -(i8::eq(&a[4usize], &b[4usize]) as i8),
1928            -(i8::eq(&a[5usize], &b[5usize]) as i8),
1929            -(i8::eq(&a[6usize], &b[6usize]) as i8),
1930            -(i8::eq(&a[7usize], &b[7usize]) as i8),
1931            -(i8::eq(&a[8usize], &b[8usize]) as i8),
1932            -(i8::eq(&a[9usize], &b[9usize]) as i8),
1933            -(i8::eq(&a[10usize], &b[10usize]) as i8),
1934            -(i8::eq(&a[11usize], &b[11usize]) as i8),
1935            -(i8::eq(&a[12usize], &b[12usize]) as i8),
1936            -(i8::eq(&a[13usize], &b[13usize]) as i8),
1937            -(i8::eq(&a[14usize], &b[14usize]) as i8),
1938            -(i8::eq(&a[15usize], &b[15usize]) as i8),
1939        ]
1940        .simd_into(self)
1941    }
1942    #[inline(always)]
1943    fn any_true_mask8x16(self, a: mask8x16<Self>) -> bool {
1944        a[0usize] != 0
1945            || a[1usize] != 0
1946            || a[2usize] != 0
1947            || a[3usize] != 0
1948            || a[4usize] != 0
1949            || a[5usize] != 0
1950            || a[6usize] != 0
1951            || a[7usize] != 0
1952            || a[8usize] != 0
1953            || a[9usize] != 0
1954            || a[10usize] != 0
1955            || a[11usize] != 0
1956            || a[12usize] != 0
1957            || a[13usize] != 0
1958            || a[14usize] != 0
1959            || a[15usize] != 0
1960    }
1961    #[inline(always)]
1962    fn all_true_mask8x16(self, a: mask8x16<Self>) -> bool {
1963        a[0usize] != 0
1964            && a[1usize] != 0
1965            && a[2usize] != 0
1966            && a[3usize] != 0
1967            && a[4usize] != 0
1968            && a[5usize] != 0
1969            && a[6usize] != 0
1970            && a[7usize] != 0
1971            && a[8usize] != 0
1972            && a[9usize] != 0
1973            && a[10usize] != 0
1974            && a[11usize] != 0
1975            && a[12usize] != 0
1976            && a[13usize] != 0
1977            && a[14usize] != 0
1978            && a[15usize] != 0
1979    }
1980    #[inline(always)]
1981    fn any_false_mask8x16(self, a: mask8x16<Self>) -> bool {
1982        a[0usize] == 0
1983            || a[1usize] == 0
1984            || a[2usize] == 0
1985            || a[3usize] == 0
1986            || a[4usize] == 0
1987            || a[5usize] == 0
1988            || a[6usize] == 0
1989            || a[7usize] == 0
1990            || a[8usize] == 0
1991            || a[9usize] == 0
1992            || a[10usize] == 0
1993            || a[11usize] == 0
1994            || a[12usize] == 0
1995            || a[13usize] == 0
1996            || a[14usize] == 0
1997            || a[15usize] == 0
1998    }
1999    #[inline(always)]
2000    fn all_false_mask8x16(self, a: mask8x16<Self>) -> bool {
2001        a[0usize] == 0
2002            && a[1usize] == 0
2003            && a[2usize] == 0
2004            && a[3usize] == 0
2005            && a[4usize] == 0
2006            && a[5usize] == 0
2007            && a[6usize] == 0
2008            && a[7usize] == 0
2009            && a[8usize] == 0
2010            && a[9usize] == 0
2011            && a[10usize] == 0
2012            && a[11usize] == 0
2013            && a[12usize] == 0
2014            && a[13usize] == 0
2015            && a[14usize] == 0
2016            && a[15usize] == 0
2017    }
2018    #[inline(always)]
2019    fn combine_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x32<Self> {
2020        let mut result = [0; 32usize];
2021        result[0..16usize].copy_from_slice(&a.val.0);
2022        result[16usize..32usize].copy_from_slice(&b.val.0);
2023        result.simd_into(self)
2024    }
2025    #[inline(always)]
2026    fn splat_i16x8(self, val: i16) -> i16x8<Self> {
2027        [val; 8usize].simd_into(self)
2028    }
2029    #[inline(always)]
2030    fn load_array_i16x8(self, val: [i16; 8usize]) -> i16x8<Self> {
2031        i16x8 {
2032            val: crate::support::Aligned128(val),
2033            simd: self,
2034        }
2035    }
2036    #[inline(always)]
2037    fn load_array_ref_i16x8(self, val: &[i16; 8usize]) -> i16x8<Self> {
2038        i16x8 {
2039            val: crate::support::Aligned128(*val),
2040            simd: self,
2041        }
2042    }
2043    #[inline(always)]
2044    fn as_array_i16x8(self, a: i16x8<Self>) -> [i16; 8usize] {
2045        a.val.0
2046    }
2047    #[inline(always)]
2048    fn as_array_ref_i16x8(self, a: &i16x8<Self>) -> &[i16; 8usize] {
2049        &a.val.0
2050    }
2051    #[inline(always)]
2052    fn as_array_mut_i16x8(self, a: &mut i16x8<Self>) -> &mut [i16; 8usize] {
2053        &mut a.val.0
2054    }
2055    #[inline(always)]
2056    fn store_array_i16x8(self, a: i16x8<Self>, dest: &mut [i16; 8usize]) -> () {
2057        *dest = a.val.0;
2058    }
2059    #[inline(always)]
2060    fn cvt_from_bytes_i16x8(self, a: u8x16<Self>) -> i16x8<Self> {
2061        unsafe {
2062            i16x8 {
2063                val: core::mem::transmute(a.val),
2064                simd: self,
2065            }
2066        }
2067    }
2068    #[inline(always)]
2069    fn cvt_to_bytes_i16x8(self, a: i16x8<Self>) -> u8x16<Self> {
2070        unsafe {
2071            u8x16 {
2072                val: core::mem::transmute(a.val),
2073                simd: self,
2074            }
2075        }
2076    }
2077    #[inline(always)]
2078    fn slide_i16x8<const SHIFT: usize>(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2079        let mut dest = [Default::default(); 8usize];
2080        dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
2081        dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
2082        dest.simd_into(self)
2083    }
2084    #[inline(always)]
2085    fn slide_within_blocks_i16x8<const SHIFT: usize>(
2086        self,
2087        a: i16x8<Self>,
2088        b: i16x8<Self>,
2089    ) -> i16x8<Self> {
2090        self.slide_i16x8::<SHIFT>(a, b)
2091    }
2092    #[inline(always)]
2093    fn add_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2094        [
2095            i16::wrapping_add(a[0usize], b[0usize]),
2096            i16::wrapping_add(a[1usize], b[1usize]),
2097            i16::wrapping_add(a[2usize], b[2usize]),
2098            i16::wrapping_add(a[3usize], b[3usize]),
2099            i16::wrapping_add(a[4usize], b[4usize]),
2100            i16::wrapping_add(a[5usize], b[5usize]),
2101            i16::wrapping_add(a[6usize], b[6usize]),
2102            i16::wrapping_add(a[7usize], b[7usize]),
2103        ]
2104        .simd_into(self)
2105    }
2106    #[inline(always)]
2107    fn sub_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2108        [
2109            i16::wrapping_sub(a[0usize], b[0usize]),
2110            i16::wrapping_sub(a[1usize], b[1usize]),
2111            i16::wrapping_sub(a[2usize], b[2usize]),
2112            i16::wrapping_sub(a[3usize], b[3usize]),
2113            i16::wrapping_sub(a[4usize], b[4usize]),
2114            i16::wrapping_sub(a[5usize], b[5usize]),
2115            i16::wrapping_sub(a[6usize], b[6usize]),
2116            i16::wrapping_sub(a[7usize], b[7usize]),
2117        ]
2118        .simd_into(self)
2119    }
2120    #[inline(always)]
2121    fn mul_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2122        [
2123            i16::wrapping_mul(a[0usize], b[0usize]),
2124            i16::wrapping_mul(a[1usize], b[1usize]),
2125            i16::wrapping_mul(a[2usize], b[2usize]),
2126            i16::wrapping_mul(a[3usize], b[3usize]),
2127            i16::wrapping_mul(a[4usize], b[4usize]),
2128            i16::wrapping_mul(a[5usize], b[5usize]),
2129            i16::wrapping_mul(a[6usize], b[6usize]),
2130            i16::wrapping_mul(a[7usize], b[7usize]),
2131        ]
2132        .simd_into(self)
2133    }
2134    #[inline(always)]
2135    fn and_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2136        [
2137            i16::bitand(a[0usize], &b[0usize]),
2138            i16::bitand(a[1usize], &b[1usize]),
2139            i16::bitand(a[2usize], &b[2usize]),
2140            i16::bitand(a[3usize], &b[3usize]),
2141            i16::bitand(a[4usize], &b[4usize]),
2142            i16::bitand(a[5usize], &b[5usize]),
2143            i16::bitand(a[6usize], &b[6usize]),
2144            i16::bitand(a[7usize], &b[7usize]),
2145        ]
2146        .simd_into(self)
2147    }
2148    #[inline(always)]
2149    fn or_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2150        [
2151            i16::bitor(a[0usize], &b[0usize]),
2152            i16::bitor(a[1usize], &b[1usize]),
2153            i16::bitor(a[2usize], &b[2usize]),
2154            i16::bitor(a[3usize], &b[3usize]),
2155            i16::bitor(a[4usize], &b[4usize]),
2156            i16::bitor(a[5usize], &b[5usize]),
2157            i16::bitor(a[6usize], &b[6usize]),
2158            i16::bitor(a[7usize], &b[7usize]),
2159        ]
2160        .simd_into(self)
2161    }
2162    #[inline(always)]
2163    fn xor_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2164        [
2165            i16::bitxor(a[0usize], &b[0usize]),
2166            i16::bitxor(a[1usize], &b[1usize]),
2167            i16::bitxor(a[2usize], &b[2usize]),
2168            i16::bitxor(a[3usize], &b[3usize]),
2169            i16::bitxor(a[4usize], &b[4usize]),
2170            i16::bitxor(a[5usize], &b[5usize]),
2171            i16::bitxor(a[6usize], &b[6usize]),
2172            i16::bitxor(a[7usize], &b[7usize]),
2173        ]
2174        .simd_into(self)
2175    }
2176    #[inline(always)]
2177    fn not_i16x8(self, a: i16x8<Self>) -> i16x8<Self> {
2178        [
2179            i16::not(a[0usize]),
2180            i16::not(a[1usize]),
2181            i16::not(a[2usize]),
2182            i16::not(a[3usize]),
2183            i16::not(a[4usize]),
2184            i16::not(a[5usize]),
2185            i16::not(a[6usize]),
2186            i16::not(a[7usize]),
2187        ]
2188        .simd_into(self)
2189    }
2190    #[inline(always)]
2191    fn shl_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {
2192        [
2193            i16::shl(a[0usize], shift),
2194            i16::shl(a[1usize], shift),
2195            i16::shl(a[2usize], shift),
2196            i16::shl(a[3usize], shift),
2197            i16::shl(a[4usize], shift),
2198            i16::shl(a[5usize], shift),
2199            i16::shl(a[6usize], shift),
2200            i16::shl(a[7usize], shift),
2201        ]
2202        .simd_into(self)
2203    }
2204    #[inline(always)]
2205    fn shlv_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2206        [
2207            i16::shl(a[0usize], &b[0usize]),
2208            i16::shl(a[1usize], &b[1usize]),
2209            i16::shl(a[2usize], &b[2usize]),
2210            i16::shl(a[3usize], &b[3usize]),
2211            i16::shl(a[4usize], &b[4usize]),
2212            i16::shl(a[5usize], &b[5usize]),
2213            i16::shl(a[6usize], &b[6usize]),
2214            i16::shl(a[7usize], &b[7usize]),
2215        ]
2216        .simd_into(self)
2217    }
2218    #[inline(always)]
2219    fn shr_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {
2220        [
2221            i16::shr(a[0usize], shift),
2222            i16::shr(a[1usize], shift),
2223            i16::shr(a[2usize], shift),
2224            i16::shr(a[3usize], shift),
2225            i16::shr(a[4usize], shift),
2226            i16::shr(a[5usize], shift),
2227            i16::shr(a[6usize], shift),
2228            i16::shr(a[7usize], shift),
2229        ]
2230        .simd_into(self)
2231    }
2232    #[inline(always)]
2233    fn shrv_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2234        [
2235            i16::shr(a[0usize], &b[0usize]),
2236            i16::shr(a[1usize], &b[1usize]),
2237            i16::shr(a[2usize], &b[2usize]),
2238            i16::shr(a[3usize], &b[3usize]),
2239            i16::shr(a[4usize], &b[4usize]),
2240            i16::shr(a[5usize], &b[5usize]),
2241            i16::shr(a[6usize], &b[6usize]),
2242            i16::shr(a[7usize], &b[7usize]),
2243        ]
2244        .simd_into(self)
2245    }
2246    #[inline(always)]
2247    fn simd_eq_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
2248        [
2249            -(i16::eq(&a[0usize], &b[0usize]) as i16),
2250            -(i16::eq(&a[1usize], &b[1usize]) as i16),
2251            -(i16::eq(&a[2usize], &b[2usize]) as i16),
2252            -(i16::eq(&a[3usize], &b[3usize]) as i16),
2253            -(i16::eq(&a[4usize], &b[4usize]) as i16),
2254            -(i16::eq(&a[5usize], &b[5usize]) as i16),
2255            -(i16::eq(&a[6usize], &b[6usize]) as i16),
2256            -(i16::eq(&a[7usize], &b[7usize]) as i16),
2257        ]
2258        .simd_into(self)
2259    }
2260    #[inline(always)]
2261    fn simd_lt_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
2262        [
2263            -(i16::lt(&a[0usize], &b[0usize]) as i16),
2264            -(i16::lt(&a[1usize], &b[1usize]) as i16),
2265            -(i16::lt(&a[2usize], &b[2usize]) as i16),
2266            -(i16::lt(&a[3usize], &b[3usize]) as i16),
2267            -(i16::lt(&a[4usize], &b[4usize]) as i16),
2268            -(i16::lt(&a[5usize], &b[5usize]) as i16),
2269            -(i16::lt(&a[6usize], &b[6usize]) as i16),
2270            -(i16::lt(&a[7usize], &b[7usize]) as i16),
2271        ]
2272        .simd_into(self)
2273    }
2274    #[inline(always)]
2275    fn simd_le_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
2276        [
2277            -(i16::le(&a[0usize], &b[0usize]) as i16),
2278            -(i16::le(&a[1usize], &b[1usize]) as i16),
2279            -(i16::le(&a[2usize], &b[2usize]) as i16),
2280            -(i16::le(&a[3usize], &b[3usize]) as i16),
2281            -(i16::le(&a[4usize], &b[4usize]) as i16),
2282            -(i16::le(&a[5usize], &b[5usize]) as i16),
2283            -(i16::le(&a[6usize], &b[6usize]) as i16),
2284            -(i16::le(&a[7usize], &b[7usize]) as i16),
2285        ]
2286        .simd_into(self)
2287    }
2288    #[inline(always)]
2289    fn simd_ge_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
2290        [
2291            -(i16::ge(&a[0usize], &b[0usize]) as i16),
2292            -(i16::ge(&a[1usize], &b[1usize]) as i16),
2293            -(i16::ge(&a[2usize], &b[2usize]) as i16),
2294            -(i16::ge(&a[3usize], &b[3usize]) as i16),
2295            -(i16::ge(&a[4usize], &b[4usize]) as i16),
2296            -(i16::ge(&a[5usize], &b[5usize]) as i16),
2297            -(i16::ge(&a[6usize], &b[6usize]) as i16),
2298            -(i16::ge(&a[7usize], &b[7usize]) as i16),
2299        ]
2300        .simd_into(self)
2301    }
2302    #[inline(always)]
2303    fn simd_gt_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
2304        [
2305            -(i16::gt(&a[0usize], &b[0usize]) as i16),
2306            -(i16::gt(&a[1usize], &b[1usize]) as i16),
2307            -(i16::gt(&a[2usize], &b[2usize]) as i16),
2308            -(i16::gt(&a[3usize], &b[3usize]) as i16),
2309            -(i16::gt(&a[4usize], &b[4usize]) as i16),
2310            -(i16::gt(&a[5usize], &b[5usize]) as i16),
2311            -(i16::gt(&a[6usize], &b[6usize]) as i16),
2312            -(i16::gt(&a[7usize], &b[7usize]) as i16),
2313        ]
2314        .simd_into(self)
2315    }
2316    #[inline(always)]
2317    fn zip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2318        [
2319            a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
2320        ]
2321        .simd_into(self)
2322    }
2323    #[inline(always)]
2324    fn zip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2325        [
2326            a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
2327        ]
2328        .simd_into(self)
2329    }
2330    #[inline(always)]
2331    fn unzip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2332        [
2333            a[0usize], a[2usize], a[4usize], a[6usize], b[0usize], b[2usize], b[4usize], b[6usize],
2334        ]
2335        .simd_into(self)
2336    }
2337    #[inline(always)]
2338    fn unzip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2339        [
2340            a[1usize], a[3usize], a[5usize], a[7usize], b[1usize], b[3usize], b[5usize], b[7usize],
2341        ]
2342        .simd_into(self)
2343    }
2344    #[inline(always)]
2345    fn interleave_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> (i16x8<Self>, i16x8<Self>) {
2346        (self.zip_low_i16x8(a, b), self.zip_high_i16x8(a, b))
2347    }
2348    #[inline(always)]
2349    fn deinterleave_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> (i16x8<Self>, i16x8<Self>) {
2350        (self.unzip_low_i16x8(a, b), self.unzip_high_i16x8(a, b))
2351    }
2352    #[inline(always)]
2353    fn select_i16x8(self, a: mask16x8<Self>, b: i16x8<Self>, c: i16x8<Self>) -> i16x8<Self> {
2354        [
2355            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2356            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2357            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2358            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2359            if a[4usize] != 0 { b[4usize] } else { c[4usize] },
2360            if a[5usize] != 0 { b[5usize] } else { c[5usize] },
2361            if a[6usize] != 0 { b[6usize] } else { c[6usize] },
2362            if a[7usize] != 0 { b[7usize] } else { c[7usize] },
2363        ]
2364        .simd_into(self)
2365    }
2366    #[inline(always)]
2367    fn min_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2368        [
2369            i16::min(a[0usize], b[0usize]),
2370            i16::min(a[1usize], b[1usize]),
2371            i16::min(a[2usize], b[2usize]),
2372            i16::min(a[3usize], b[3usize]),
2373            i16::min(a[4usize], b[4usize]),
2374            i16::min(a[5usize], b[5usize]),
2375            i16::min(a[6usize], b[6usize]),
2376            i16::min(a[7usize], b[7usize]),
2377        ]
2378        .simd_into(self)
2379    }
2380    #[inline(always)]
2381    fn max_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
2382        [
2383            i16::max(a[0usize], b[0usize]),
2384            i16::max(a[1usize], b[1usize]),
2385            i16::max(a[2usize], b[2usize]),
2386            i16::max(a[3usize], b[3usize]),
2387            i16::max(a[4usize], b[4usize]),
2388            i16::max(a[5usize], b[5usize]),
2389            i16::max(a[6usize], b[6usize]),
2390            i16::max(a[7usize], b[7usize]),
2391        ]
2392        .simd_into(self)
2393    }
2394    #[inline(always)]
2395    fn combine_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x16<Self> {
2396        let mut result = [0; 16usize];
2397        result[0..8usize].copy_from_slice(&a.val.0);
2398        result[8usize..16usize].copy_from_slice(&b.val.0);
2399        result.simd_into(self)
2400    }
2401    #[inline(always)]
2402    fn neg_i16x8(self, a: i16x8<Self>) -> i16x8<Self> {
2403        [
2404            i16::neg(a[0usize]),
2405            i16::neg(a[1usize]),
2406            i16::neg(a[2usize]),
2407            i16::neg(a[3usize]),
2408            i16::neg(a[4usize]),
2409            i16::neg(a[5usize]),
2410            i16::neg(a[6usize]),
2411            i16::neg(a[7usize]),
2412        ]
2413        .simd_into(self)
2414    }
2415    #[inline(always)]
2416    fn reinterpret_u8_i16x8(self, a: i16x8<Self>) -> u8x16<Self> {
2417        a.bitcast()
2418    }
2419    #[inline(always)]
2420    fn reinterpret_u32_i16x8(self, a: i16x8<Self>) -> u32x4<Self> {
2421        a.bitcast()
2422    }
2423    #[inline(always)]
2424    fn splat_u16x8(self, val: u16) -> u16x8<Self> {
2425        [val; 8usize].simd_into(self)
2426    }
2427    #[inline(always)]
2428    fn load_array_u16x8(self, val: [u16; 8usize]) -> u16x8<Self> {
2429        u16x8 {
2430            val: crate::support::Aligned128(val),
2431            simd: self,
2432        }
2433    }
2434    #[inline(always)]
2435    fn load_array_ref_u16x8(self, val: &[u16; 8usize]) -> u16x8<Self> {
2436        u16x8 {
2437            val: crate::support::Aligned128(*val),
2438            simd: self,
2439        }
2440    }
2441    #[inline(always)]
2442    fn as_array_u16x8(self, a: u16x8<Self>) -> [u16; 8usize] {
2443        a.val.0
2444    }
2445    #[inline(always)]
2446    fn as_array_ref_u16x8(self, a: &u16x8<Self>) -> &[u16; 8usize] {
2447        &a.val.0
2448    }
2449    #[inline(always)]
2450    fn as_array_mut_u16x8(self, a: &mut u16x8<Self>) -> &mut [u16; 8usize] {
2451        &mut a.val.0
2452    }
2453    #[inline(always)]
2454    fn store_array_u16x8(self, a: u16x8<Self>, dest: &mut [u16; 8usize]) -> () {
2455        *dest = a.val.0;
2456    }
2457    #[inline(always)]
2458    fn cvt_from_bytes_u16x8(self, a: u8x16<Self>) -> u16x8<Self> {
2459        unsafe {
2460            u16x8 {
2461                val: core::mem::transmute(a.val),
2462                simd: self,
2463            }
2464        }
2465    }
2466    #[inline(always)]
2467    fn cvt_to_bytes_u16x8(self, a: u16x8<Self>) -> u8x16<Self> {
2468        unsafe {
2469            u8x16 {
2470                val: core::mem::transmute(a.val),
2471                simd: self,
2472            }
2473        }
2474    }
2475    #[inline(always)]
2476    fn slide_u16x8<const SHIFT: usize>(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2477        let mut dest = [Default::default(); 8usize];
2478        dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
2479        dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
2480        dest.simd_into(self)
2481    }
2482    #[inline(always)]
2483    fn slide_within_blocks_u16x8<const SHIFT: usize>(
2484        self,
2485        a: u16x8<Self>,
2486        b: u16x8<Self>,
2487    ) -> u16x8<Self> {
2488        self.slide_u16x8::<SHIFT>(a, b)
2489    }
2490    #[inline(always)]
2491    fn add_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2492        [
2493            u16::wrapping_add(a[0usize], b[0usize]),
2494            u16::wrapping_add(a[1usize], b[1usize]),
2495            u16::wrapping_add(a[2usize], b[2usize]),
2496            u16::wrapping_add(a[3usize], b[3usize]),
2497            u16::wrapping_add(a[4usize], b[4usize]),
2498            u16::wrapping_add(a[5usize], b[5usize]),
2499            u16::wrapping_add(a[6usize], b[6usize]),
2500            u16::wrapping_add(a[7usize], b[7usize]),
2501        ]
2502        .simd_into(self)
2503    }
2504    #[inline(always)]
2505    fn sub_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2506        [
2507            u16::wrapping_sub(a[0usize], b[0usize]),
2508            u16::wrapping_sub(a[1usize], b[1usize]),
2509            u16::wrapping_sub(a[2usize], b[2usize]),
2510            u16::wrapping_sub(a[3usize], b[3usize]),
2511            u16::wrapping_sub(a[4usize], b[4usize]),
2512            u16::wrapping_sub(a[5usize], b[5usize]),
2513            u16::wrapping_sub(a[6usize], b[6usize]),
2514            u16::wrapping_sub(a[7usize], b[7usize]),
2515        ]
2516        .simd_into(self)
2517    }
2518    #[inline(always)]
2519    fn mul_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2520        [
2521            u16::wrapping_mul(a[0usize], b[0usize]),
2522            u16::wrapping_mul(a[1usize], b[1usize]),
2523            u16::wrapping_mul(a[2usize], b[2usize]),
2524            u16::wrapping_mul(a[3usize], b[3usize]),
2525            u16::wrapping_mul(a[4usize], b[4usize]),
2526            u16::wrapping_mul(a[5usize], b[5usize]),
2527            u16::wrapping_mul(a[6usize], b[6usize]),
2528            u16::wrapping_mul(a[7usize], b[7usize]),
2529        ]
2530        .simd_into(self)
2531    }
2532    #[inline(always)]
2533    fn and_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2534        [
2535            u16::bitand(a[0usize], &b[0usize]),
2536            u16::bitand(a[1usize], &b[1usize]),
2537            u16::bitand(a[2usize], &b[2usize]),
2538            u16::bitand(a[3usize], &b[3usize]),
2539            u16::bitand(a[4usize], &b[4usize]),
2540            u16::bitand(a[5usize], &b[5usize]),
2541            u16::bitand(a[6usize], &b[6usize]),
2542            u16::bitand(a[7usize], &b[7usize]),
2543        ]
2544        .simd_into(self)
2545    }
2546    #[inline(always)]
2547    fn or_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2548        [
2549            u16::bitor(a[0usize], &b[0usize]),
2550            u16::bitor(a[1usize], &b[1usize]),
2551            u16::bitor(a[2usize], &b[2usize]),
2552            u16::bitor(a[3usize], &b[3usize]),
2553            u16::bitor(a[4usize], &b[4usize]),
2554            u16::bitor(a[5usize], &b[5usize]),
2555            u16::bitor(a[6usize], &b[6usize]),
2556            u16::bitor(a[7usize], &b[7usize]),
2557        ]
2558        .simd_into(self)
2559    }
2560    #[inline(always)]
2561    fn xor_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2562        [
2563            u16::bitxor(a[0usize], &b[0usize]),
2564            u16::bitxor(a[1usize], &b[1usize]),
2565            u16::bitxor(a[2usize], &b[2usize]),
2566            u16::bitxor(a[3usize], &b[3usize]),
2567            u16::bitxor(a[4usize], &b[4usize]),
2568            u16::bitxor(a[5usize], &b[5usize]),
2569            u16::bitxor(a[6usize], &b[6usize]),
2570            u16::bitxor(a[7usize], &b[7usize]),
2571        ]
2572        .simd_into(self)
2573    }
2574    #[inline(always)]
2575    fn not_u16x8(self, a: u16x8<Self>) -> u16x8<Self> {
2576        [
2577            u16::not(a[0usize]),
2578            u16::not(a[1usize]),
2579            u16::not(a[2usize]),
2580            u16::not(a[3usize]),
2581            u16::not(a[4usize]),
2582            u16::not(a[5usize]),
2583            u16::not(a[6usize]),
2584            u16::not(a[7usize]),
2585        ]
2586        .simd_into(self)
2587    }
2588    #[inline(always)]
2589    fn shl_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {
2590        [
2591            u16::shl(a[0usize], shift),
2592            u16::shl(a[1usize], shift),
2593            u16::shl(a[2usize], shift),
2594            u16::shl(a[3usize], shift),
2595            u16::shl(a[4usize], shift),
2596            u16::shl(a[5usize], shift),
2597            u16::shl(a[6usize], shift),
2598            u16::shl(a[7usize], shift),
2599        ]
2600        .simd_into(self)
2601    }
2602    #[inline(always)]
2603    fn shlv_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2604        [
2605            u16::shl(a[0usize], &b[0usize]),
2606            u16::shl(a[1usize], &b[1usize]),
2607            u16::shl(a[2usize], &b[2usize]),
2608            u16::shl(a[3usize], &b[3usize]),
2609            u16::shl(a[4usize], &b[4usize]),
2610            u16::shl(a[5usize], &b[5usize]),
2611            u16::shl(a[6usize], &b[6usize]),
2612            u16::shl(a[7usize], &b[7usize]),
2613        ]
2614        .simd_into(self)
2615    }
2616    #[inline(always)]
2617    fn shr_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {
2618        [
2619            u16::shr(a[0usize], shift),
2620            u16::shr(a[1usize], shift),
2621            u16::shr(a[2usize], shift),
2622            u16::shr(a[3usize], shift),
2623            u16::shr(a[4usize], shift),
2624            u16::shr(a[5usize], shift),
2625            u16::shr(a[6usize], shift),
2626            u16::shr(a[7usize], shift),
2627        ]
2628        .simd_into(self)
2629    }
2630    #[inline(always)]
2631    fn shrv_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2632        [
2633            u16::shr(a[0usize], &b[0usize]),
2634            u16::shr(a[1usize], &b[1usize]),
2635            u16::shr(a[2usize], &b[2usize]),
2636            u16::shr(a[3usize], &b[3usize]),
2637            u16::shr(a[4usize], &b[4usize]),
2638            u16::shr(a[5usize], &b[5usize]),
2639            u16::shr(a[6usize], &b[6usize]),
2640            u16::shr(a[7usize], &b[7usize]),
2641        ]
2642        .simd_into(self)
2643    }
2644    #[inline(always)]
2645    fn simd_eq_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2646        [
2647            -(u16::eq(&a[0usize], &b[0usize]) as i16),
2648            -(u16::eq(&a[1usize], &b[1usize]) as i16),
2649            -(u16::eq(&a[2usize], &b[2usize]) as i16),
2650            -(u16::eq(&a[3usize], &b[3usize]) as i16),
2651            -(u16::eq(&a[4usize], &b[4usize]) as i16),
2652            -(u16::eq(&a[5usize], &b[5usize]) as i16),
2653            -(u16::eq(&a[6usize], &b[6usize]) as i16),
2654            -(u16::eq(&a[7usize], &b[7usize]) as i16),
2655        ]
2656        .simd_into(self)
2657    }
2658    #[inline(always)]
2659    fn simd_lt_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2660        [
2661            -(u16::lt(&a[0usize], &b[0usize]) as i16),
2662            -(u16::lt(&a[1usize], &b[1usize]) as i16),
2663            -(u16::lt(&a[2usize], &b[2usize]) as i16),
2664            -(u16::lt(&a[3usize], &b[3usize]) as i16),
2665            -(u16::lt(&a[4usize], &b[4usize]) as i16),
2666            -(u16::lt(&a[5usize], &b[5usize]) as i16),
2667            -(u16::lt(&a[6usize], &b[6usize]) as i16),
2668            -(u16::lt(&a[7usize], &b[7usize]) as i16),
2669        ]
2670        .simd_into(self)
2671    }
2672    #[inline(always)]
2673    fn simd_le_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2674        [
2675            -(u16::le(&a[0usize], &b[0usize]) as i16),
2676            -(u16::le(&a[1usize], &b[1usize]) as i16),
2677            -(u16::le(&a[2usize], &b[2usize]) as i16),
2678            -(u16::le(&a[3usize], &b[3usize]) as i16),
2679            -(u16::le(&a[4usize], &b[4usize]) as i16),
2680            -(u16::le(&a[5usize], &b[5usize]) as i16),
2681            -(u16::le(&a[6usize], &b[6usize]) as i16),
2682            -(u16::le(&a[7usize], &b[7usize]) as i16),
2683        ]
2684        .simd_into(self)
2685    }
2686    #[inline(always)]
2687    fn simd_ge_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2688        [
2689            -(u16::ge(&a[0usize], &b[0usize]) as i16),
2690            -(u16::ge(&a[1usize], &b[1usize]) as i16),
2691            -(u16::ge(&a[2usize], &b[2usize]) as i16),
2692            -(u16::ge(&a[3usize], &b[3usize]) as i16),
2693            -(u16::ge(&a[4usize], &b[4usize]) as i16),
2694            -(u16::ge(&a[5usize], &b[5usize]) as i16),
2695            -(u16::ge(&a[6usize], &b[6usize]) as i16),
2696            -(u16::ge(&a[7usize], &b[7usize]) as i16),
2697        ]
2698        .simd_into(self)
2699    }
2700    #[inline(always)]
2701    fn simd_gt_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2702        [
2703            -(u16::gt(&a[0usize], &b[0usize]) as i16),
2704            -(u16::gt(&a[1usize], &b[1usize]) as i16),
2705            -(u16::gt(&a[2usize], &b[2usize]) as i16),
2706            -(u16::gt(&a[3usize], &b[3usize]) as i16),
2707            -(u16::gt(&a[4usize], &b[4usize]) as i16),
2708            -(u16::gt(&a[5usize], &b[5usize]) as i16),
2709            -(u16::gt(&a[6usize], &b[6usize]) as i16),
2710            -(u16::gt(&a[7usize], &b[7usize]) as i16),
2711        ]
2712        .simd_into(self)
2713    }
2714    #[inline(always)]
2715    fn zip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2716        [
2717            a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
2718        ]
2719        .simd_into(self)
2720    }
2721    #[inline(always)]
2722    fn zip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2723        [
2724            a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
2725        ]
2726        .simd_into(self)
2727    }
2728    #[inline(always)]
2729    fn unzip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2730        [
2731            a[0usize], a[2usize], a[4usize], a[6usize], b[0usize], b[2usize], b[4usize], b[6usize],
2732        ]
2733        .simd_into(self)
2734    }
2735    #[inline(always)]
2736    fn unzip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2737        [
2738            a[1usize], a[3usize], a[5usize], a[7usize], b[1usize], b[3usize], b[5usize], b[7usize],
2739        ]
2740        .simd_into(self)
2741    }
2742    #[inline(always)]
2743    fn interleave_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> (u16x8<Self>, u16x8<Self>) {
2744        (self.zip_low_u16x8(a, b), self.zip_high_u16x8(a, b))
2745    }
2746    #[inline(always)]
2747    fn deinterleave_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> (u16x8<Self>, u16x8<Self>) {
2748        (self.unzip_low_u16x8(a, b), self.unzip_high_u16x8(a, b))
2749    }
2750    #[inline(always)]
2751    fn select_u16x8(self, a: mask16x8<Self>, b: u16x8<Self>, c: u16x8<Self>) -> u16x8<Self> {
2752        [
2753            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2754            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2755            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2756            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2757            if a[4usize] != 0 { b[4usize] } else { c[4usize] },
2758            if a[5usize] != 0 { b[5usize] } else { c[5usize] },
2759            if a[6usize] != 0 { b[6usize] } else { c[6usize] },
2760            if a[7usize] != 0 { b[7usize] } else { c[7usize] },
2761        ]
2762        .simd_into(self)
2763    }
2764    #[inline(always)]
2765    fn min_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2766        [
2767            u16::min(a[0usize], b[0usize]),
2768            u16::min(a[1usize], b[1usize]),
2769            u16::min(a[2usize], b[2usize]),
2770            u16::min(a[3usize], b[3usize]),
2771            u16::min(a[4usize], b[4usize]),
2772            u16::min(a[5usize], b[5usize]),
2773            u16::min(a[6usize], b[6usize]),
2774            u16::min(a[7usize], b[7usize]),
2775        ]
2776        .simd_into(self)
2777    }
2778    #[inline(always)]
2779    fn max_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2780        [
2781            u16::max(a[0usize], b[0usize]),
2782            u16::max(a[1usize], b[1usize]),
2783            u16::max(a[2usize], b[2usize]),
2784            u16::max(a[3usize], b[3usize]),
2785            u16::max(a[4usize], b[4usize]),
2786            u16::max(a[5usize], b[5usize]),
2787            u16::max(a[6usize], b[6usize]),
2788            u16::max(a[7usize], b[7usize]),
2789        ]
2790        .simd_into(self)
2791    }
2792    #[inline(always)]
2793    fn combine_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x16<Self> {
2794        let mut result = [0; 16usize];
2795        result[0..8usize].copy_from_slice(&a.val.0);
2796        result[8usize..16usize].copy_from_slice(&b.val.0);
2797        result.simd_into(self)
2798    }
2799    #[inline(always)]
2800    fn reinterpret_u8_u16x8(self, a: u16x8<Self>) -> u8x16<Self> {
2801        a.bitcast()
2802    }
2803    #[inline(always)]
2804    fn reinterpret_u32_u16x8(self, a: u16x8<Self>) -> u32x4<Self> {
2805        a.bitcast()
2806    }
2807    #[inline(always)]
2808    fn splat_mask16x8(self, val: i16) -> mask16x8<Self> {
2809        [val; 8usize].simd_into(self)
2810    }
2811    #[inline(always)]
2812    fn load_array_mask16x8(self, val: [i16; 8usize]) -> mask16x8<Self> {
2813        mask16x8 {
2814            val: crate::support::Aligned128(val),
2815            simd: self,
2816        }
2817    }
2818    #[inline(always)]
2819    fn load_array_ref_mask16x8(self, val: &[i16; 8usize]) -> mask16x8<Self> {
2820        mask16x8 {
2821            val: crate::support::Aligned128(*val),
2822            simd: self,
2823        }
2824    }
2825    #[inline(always)]
2826    fn as_array_mask16x8(self, a: mask16x8<Self>) -> [i16; 8usize] {
2827        a.val.0
2828    }
2829    #[inline(always)]
2830    fn as_array_ref_mask16x8(self, a: &mask16x8<Self>) -> &[i16; 8usize] {
2831        &a.val.0
2832    }
2833    #[inline(always)]
2834    fn as_array_mut_mask16x8(self, a: &mut mask16x8<Self>) -> &mut [i16; 8usize] {
2835        &mut a.val.0
2836    }
2837    #[inline(always)]
2838    fn store_array_mask16x8(self, a: mask16x8<Self>, dest: &mut [i16; 8usize]) -> () {
2839        *dest = a.val.0;
2840    }
2841    #[inline(always)]
2842    fn cvt_from_bytes_mask16x8(self, a: u8x16<Self>) -> mask16x8<Self> {
2843        unsafe {
2844            mask16x8 {
2845                val: core::mem::transmute(a.val),
2846                simd: self,
2847            }
2848        }
2849    }
2850    #[inline(always)]
2851    fn cvt_to_bytes_mask16x8(self, a: mask16x8<Self>) -> u8x16<Self> {
2852        unsafe {
2853            u8x16 {
2854                val: core::mem::transmute(a.val),
2855                simd: self,
2856            }
2857        }
2858    }
2859    #[inline(always)]
2860    fn slide_mask16x8<const SHIFT: usize>(
2861        self,
2862        a: mask16x8<Self>,
2863        b: mask16x8<Self>,
2864    ) -> mask16x8<Self> {
2865        let mut dest = [Default::default(); 8usize];
2866        dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
2867        dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
2868        dest.simd_into(self)
2869    }
2870    #[inline(always)]
2871    fn slide_within_blocks_mask16x8<const SHIFT: usize>(
2872        self,
2873        a: mask16x8<Self>,
2874        b: mask16x8<Self>,
2875    ) -> mask16x8<Self> {
2876        self.slide_mask16x8::<SHIFT>(a, b)
2877    }
2878    #[inline(always)]
2879    fn and_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2880        [
2881            i16::bitand(a[0usize], &b[0usize]),
2882            i16::bitand(a[1usize], &b[1usize]),
2883            i16::bitand(a[2usize], &b[2usize]),
2884            i16::bitand(a[3usize], &b[3usize]),
2885            i16::bitand(a[4usize], &b[4usize]),
2886            i16::bitand(a[5usize], &b[5usize]),
2887            i16::bitand(a[6usize], &b[6usize]),
2888            i16::bitand(a[7usize], &b[7usize]),
2889        ]
2890        .simd_into(self)
2891    }
2892    #[inline(always)]
2893    fn or_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2894        [
2895            i16::bitor(a[0usize], &b[0usize]),
2896            i16::bitor(a[1usize], &b[1usize]),
2897            i16::bitor(a[2usize], &b[2usize]),
2898            i16::bitor(a[3usize], &b[3usize]),
2899            i16::bitor(a[4usize], &b[4usize]),
2900            i16::bitor(a[5usize], &b[5usize]),
2901            i16::bitor(a[6usize], &b[6usize]),
2902            i16::bitor(a[7usize], &b[7usize]),
2903        ]
2904        .simd_into(self)
2905    }
2906    #[inline(always)]
2907    fn xor_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2908        [
2909            i16::bitxor(a[0usize], &b[0usize]),
2910            i16::bitxor(a[1usize], &b[1usize]),
2911            i16::bitxor(a[2usize], &b[2usize]),
2912            i16::bitxor(a[3usize], &b[3usize]),
2913            i16::bitxor(a[4usize], &b[4usize]),
2914            i16::bitxor(a[5usize], &b[5usize]),
2915            i16::bitxor(a[6usize], &b[6usize]),
2916            i16::bitxor(a[7usize], &b[7usize]),
2917        ]
2918        .simd_into(self)
2919    }
2920    #[inline(always)]
2921    fn not_mask16x8(self, a: mask16x8<Self>) -> mask16x8<Self> {
2922        [
2923            i16::not(a[0usize]),
2924            i16::not(a[1usize]),
2925            i16::not(a[2usize]),
2926            i16::not(a[3usize]),
2927            i16::not(a[4usize]),
2928            i16::not(a[5usize]),
2929            i16::not(a[6usize]),
2930            i16::not(a[7usize]),
2931        ]
2932        .simd_into(self)
2933    }
2934    #[inline(always)]
2935    fn select_mask16x8(
2936        self,
2937        a: mask16x8<Self>,
2938        b: mask16x8<Self>,
2939        c: mask16x8<Self>,
2940    ) -> mask16x8<Self> {
2941        [
2942            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2943            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2944            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2945            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2946            if a[4usize] != 0 { b[4usize] } else { c[4usize] },
2947            if a[5usize] != 0 { b[5usize] } else { c[5usize] },
2948            if a[6usize] != 0 { b[6usize] } else { c[6usize] },
2949            if a[7usize] != 0 { b[7usize] } else { c[7usize] },
2950        ]
2951        .simd_into(self)
2952    }
2953    #[inline(always)]
2954    fn simd_eq_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2955        [
2956            -(i16::eq(&a[0usize], &b[0usize]) as i16),
2957            -(i16::eq(&a[1usize], &b[1usize]) as i16),
2958            -(i16::eq(&a[2usize], &b[2usize]) as i16),
2959            -(i16::eq(&a[3usize], &b[3usize]) as i16),
2960            -(i16::eq(&a[4usize], &b[4usize]) as i16),
2961            -(i16::eq(&a[5usize], &b[5usize]) as i16),
2962            -(i16::eq(&a[6usize], &b[6usize]) as i16),
2963            -(i16::eq(&a[7usize], &b[7usize]) as i16),
2964        ]
2965        .simd_into(self)
2966    }
2967    #[inline(always)]
2968    fn any_true_mask16x8(self, a: mask16x8<Self>) -> bool {
2969        a[0usize] != 0
2970            || a[1usize] != 0
2971            || a[2usize] != 0
2972            || a[3usize] != 0
2973            || a[4usize] != 0
2974            || a[5usize] != 0
2975            || a[6usize] != 0
2976            || a[7usize] != 0
2977    }
2978    #[inline(always)]
2979    fn all_true_mask16x8(self, a: mask16x8<Self>) -> bool {
2980        a[0usize] != 0
2981            && a[1usize] != 0
2982            && a[2usize] != 0
2983            && a[3usize] != 0
2984            && a[4usize] != 0
2985            && a[5usize] != 0
2986            && a[6usize] != 0
2987            && a[7usize] != 0
2988    }
2989    #[inline(always)]
2990    fn any_false_mask16x8(self, a: mask16x8<Self>) -> bool {
2991        a[0usize] == 0
2992            || a[1usize] == 0
2993            || a[2usize] == 0
2994            || a[3usize] == 0
2995            || a[4usize] == 0
2996            || a[5usize] == 0
2997            || a[6usize] == 0
2998            || a[7usize] == 0
2999    }
3000    #[inline(always)]
3001    fn all_false_mask16x8(self, a: mask16x8<Self>) -> bool {
3002        a[0usize] == 0
3003            && a[1usize] == 0
3004            && a[2usize] == 0
3005            && a[3usize] == 0
3006            && a[4usize] == 0
3007            && a[5usize] == 0
3008            && a[6usize] == 0
3009            && a[7usize] == 0
3010    }
3011    #[inline(always)]
3012    fn combine_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x16<Self> {
3013        let mut result = [0; 16usize];
3014        result[0..8usize].copy_from_slice(&a.val.0);
3015        result[8usize..16usize].copy_from_slice(&b.val.0);
3016        result.simd_into(self)
3017    }
3018    #[inline(always)]
3019    fn splat_i32x4(self, val: i32) -> i32x4<Self> {
3020        [val; 4usize].simd_into(self)
3021    }
3022    #[inline(always)]
3023    fn load_array_i32x4(self, val: [i32; 4usize]) -> i32x4<Self> {
3024        i32x4 {
3025            val: crate::support::Aligned128(val),
3026            simd: self,
3027        }
3028    }
3029    #[inline(always)]
3030    fn load_array_ref_i32x4(self, val: &[i32; 4usize]) -> i32x4<Self> {
3031        i32x4 {
3032            val: crate::support::Aligned128(*val),
3033            simd: self,
3034        }
3035    }
3036    #[inline(always)]
3037    fn as_array_i32x4(self, a: i32x4<Self>) -> [i32; 4usize] {
3038        a.val.0
3039    }
3040    #[inline(always)]
3041    fn as_array_ref_i32x4(self, a: &i32x4<Self>) -> &[i32; 4usize] {
3042        &a.val.0
3043    }
3044    #[inline(always)]
3045    fn as_array_mut_i32x4(self, a: &mut i32x4<Self>) -> &mut [i32; 4usize] {
3046        &mut a.val.0
3047    }
3048    #[inline(always)]
3049    fn store_array_i32x4(self, a: i32x4<Self>, dest: &mut [i32; 4usize]) -> () {
3050        *dest = a.val.0;
3051    }
3052    #[inline(always)]
3053    fn cvt_from_bytes_i32x4(self, a: u8x16<Self>) -> i32x4<Self> {
3054        unsafe {
3055            i32x4 {
3056                val: core::mem::transmute(a.val),
3057                simd: self,
3058            }
3059        }
3060    }
3061    #[inline(always)]
3062    fn cvt_to_bytes_i32x4(self, a: i32x4<Self>) -> u8x16<Self> {
3063        unsafe {
3064            u8x16 {
3065                val: core::mem::transmute(a.val),
3066                simd: self,
3067            }
3068        }
3069    }
3070    #[inline(always)]
3071    fn slide_i32x4<const SHIFT: usize>(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3072        let mut dest = [Default::default(); 4usize];
3073        dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
3074        dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
3075        dest.simd_into(self)
3076    }
3077    #[inline(always)]
3078    fn slide_within_blocks_i32x4<const SHIFT: usize>(
3079        self,
3080        a: i32x4<Self>,
3081        b: i32x4<Self>,
3082    ) -> i32x4<Self> {
3083        self.slide_i32x4::<SHIFT>(a, b)
3084    }
3085    #[inline(always)]
3086    fn add_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3087        [
3088            i32::wrapping_add(a[0usize], b[0usize]),
3089            i32::wrapping_add(a[1usize], b[1usize]),
3090            i32::wrapping_add(a[2usize], b[2usize]),
3091            i32::wrapping_add(a[3usize], b[3usize]),
3092        ]
3093        .simd_into(self)
3094    }
3095    #[inline(always)]
3096    fn sub_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3097        [
3098            i32::wrapping_sub(a[0usize], b[0usize]),
3099            i32::wrapping_sub(a[1usize], b[1usize]),
3100            i32::wrapping_sub(a[2usize], b[2usize]),
3101            i32::wrapping_sub(a[3usize], b[3usize]),
3102        ]
3103        .simd_into(self)
3104    }
3105    #[inline(always)]
3106    fn mul_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3107        [
3108            i32::wrapping_mul(a[0usize], b[0usize]),
3109            i32::wrapping_mul(a[1usize], b[1usize]),
3110            i32::wrapping_mul(a[2usize], b[2usize]),
3111            i32::wrapping_mul(a[3usize], b[3usize]),
3112        ]
3113        .simd_into(self)
3114    }
3115    #[inline(always)]
3116    fn and_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3117        [
3118            i32::bitand(a[0usize], &b[0usize]),
3119            i32::bitand(a[1usize], &b[1usize]),
3120            i32::bitand(a[2usize], &b[2usize]),
3121            i32::bitand(a[3usize], &b[3usize]),
3122        ]
3123        .simd_into(self)
3124    }
3125    #[inline(always)]
3126    fn or_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3127        [
3128            i32::bitor(a[0usize], &b[0usize]),
3129            i32::bitor(a[1usize], &b[1usize]),
3130            i32::bitor(a[2usize], &b[2usize]),
3131            i32::bitor(a[3usize], &b[3usize]),
3132        ]
3133        .simd_into(self)
3134    }
3135    #[inline(always)]
3136    fn xor_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3137        [
3138            i32::bitxor(a[0usize], &b[0usize]),
3139            i32::bitxor(a[1usize], &b[1usize]),
3140            i32::bitxor(a[2usize], &b[2usize]),
3141            i32::bitxor(a[3usize], &b[3usize]),
3142        ]
3143        .simd_into(self)
3144    }
3145    #[inline(always)]
3146    fn not_i32x4(self, a: i32x4<Self>) -> i32x4<Self> {
3147        [
3148            i32::not(a[0usize]),
3149            i32::not(a[1usize]),
3150            i32::not(a[2usize]),
3151            i32::not(a[3usize]),
3152        ]
3153        .simd_into(self)
3154    }
3155    #[inline(always)]
3156    fn shl_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {
3157        [
3158            i32::shl(a[0usize], shift),
3159            i32::shl(a[1usize], shift),
3160            i32::shl(a[2usize], shift),
3161            i32::shl(a[3usize], shift),
3162        ]
3163        .simd_into(self)
3164    }
3165    #[inline(always)]
3166    fn shlv_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3167        [
3168            i32::shl(a[0usize], &b[0usize]),
3169            i32::shl(a[1usize], &b[1usize]),
3170            i32::shl(a[2usize], &b[2usize]),
3171            i32::shl(a[3usize], &b[3usize]),
3172        ]
3173        .simd_into(self)
3174    }
3175    #[inline(always)]
3176    fn shr_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {
3177        [
3178            i32::shr(a[0usize], shift),
3179            i32::shr(a[1usize], shift),
3180            i32::shr(a[2usize], shift),
3181            i32::shr(a[3usize], shift),
3182        ]
3183        .simd_into(self)
3184    }
3185    #[inline(always)]
3186    fn shrv_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3187        [
3188            i32::shr(a[0usize], &b[0usize]),
3189            i32::shr(a[1usize], &b[1usize]),
3190            i32::shr(a[2usize], &b[2usize]),
3191            i32::shr(a[3usize], &b[3usize]),
3192        ]
3193        .simd_into(self)
3194    }
3195    #[inline(always)]
3196    fn simd_eq_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
3197        [
3198            -(i32::eq(&a[0usize], &b[0usize]) as i32),
3199            -(i32::eq(&a[1usize], &b[1usize]) as i32),
3200            -(i32::eq(&a[2usize], &b[2usize]) as i32),
3201            -(i32::eq(&a[3usize], &b[3usize]) as i32),
3202        ]
3203        .simd_into(self)
3204    }
3205    #[inline(always)]
3206    fn simd_lt_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
3207        [
3208            -(i32::lt(&a[0usize], &b[0usize]) as i32),
3209            -(i32::lt(&a[1usize], &b[1usize]) as i32),
3210            -(i32::lt(&a[2usize], &b[2usize]) as i32),
3211            -(i32::lt(&a[3usize], &b[3usize]) as i32),
3212        ]
3213        .simd_into(self)
3214    }
3215    #[inline(always)]
3216    fn simd_le_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
3217        [
3218            -(i32::le(&a[0usize], &b[0usize]) as i32),
3219            -(i32::le(&a[1usize], &b[1usize]) as i32),
3220            -(i32::le(&a[2usize], &b[2usize]) as i32),
3221            -(i32::le(&a[3usize], &b[3usize]) as i32),
3222        ]
3223        .simd_into(self)
3224    }
3225    #[inline(always)]
3226    fn simd_ge_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
3227        [
3228            -(i32::ge(&a[0usize], &b[0usize]) as i32),
3229            -(i32::ge(&a[1usize], &b[1usize]) as i32),
3230            -(i32::ge(&a[2usize], &b[2usize]) as i32),
3231            -(i32::ge(&a[3usize], &b[3usize]) as i32),
3232        ]
3233        .simd_into(self)
3234    }
3235    #[inline(always)]
3236    fn simd_gt_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
3237        [
3238            -(i32::gt(&a[0usize], &b[0usize]) as i32),
3239            -(i32::gt(&a[1usize], &b[1usize]) as i32),
3240            -(i32::gt(&a[2usize], &b[2usize]) as i32),
3241            -(i32::gt(&a[3usize], &b[3usize]) as i32),
3242        ]
3243        .simd_into(self)
3244    }
3245    #[inline(always)]
3246    fn zip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3247        [a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
3248    }
3249    #[inline(always)]
3250    fn zip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3251        [a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
3252    }
3253    #[inline(always)]
3254    fn unzip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3255        [a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
3256    }
3257    #[inline(always)]
3258    fn unzip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3259        [a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
3260    }
3261    #[inline(always)]
3262    fn interleave_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> (i32x4<Self>, i32x4<Self>) {
3263        (self.zip_low_i32x4(a, b), self.zip_high_i32x4(a, b))
3264    }
3265    #[inline(always)]
3266    fn deinterleave_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> (i32x4<Self>, i32x4<Self>) {
3267        (self.unzip_low_i32x4(a, b), self.unzip_high_i32x4(a, b))
3268    }
3269    #[inline(always)]
3270    fn select_i32x4(self, a: mask32x4<Self>, b: i32x4<Self>, c: i32x4<Self>) -> i32x4<Self> {
3271        [
3272            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
3273            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
3274            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
3275            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
3276        ]
3277        .simd_into(self)
3278    }
3279    #[inline(always)]
3280    fn min_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3281        [
3282            i32::min(a[0usize], b[0usize]),
3283            i32::min(a[1usize], b[1usize]),
3284            i32::min(a[2usize], b[2usize]),
3285            i32::min(a[3usize], b[3usize]),
3286        ]
3287        .simd_into(self)
3288    }
3289    #[inline(always)]
3290    fn max_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
3291        [
3292            i32::max(a[0usize], b[0usize]),
3293            i32::max(a[1usize], b[1usize]),
3294            i32::max(a[2usize], b[2usize]),
3295            i32::max(a[3usize], b[3usize]),
3296        ]
3297        .simd_into(self)
3298    }
3299    #[inline(always)]
3300    fn combine_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x8<Self> {
3301        let mut result = [0; 8usize];
3302        result[0..4usize].copy_from_slice(&a.val.0);
3303        result[4usize..8usize].copy_from_slice(&b.val.0);
3304        result.simd_into(self)
3305    }
3306    #[inline(always)]
3307    fn neg_i32x4(self, a: i32x4<Self>) -> i32x4<Self> {
3308        [
3309            i32::neg(a[0usize]),
3310            i32::neg(a[1usize]),
3311            i32::neg(a[2usize]),
3312            i32::neg(a[3usize]),
3313        ]
3314        .simd_into(self)
3315    }
3316    #[inline(always)]
3317    fn reinterpret_u8_i32x4(self, a: i32x4<Self>) -> u8x16<Self> {
3318        a.bitcast()
3319    }
3320    #[inline(always)]
3321    fn reinterpret_u32_i32x4(self, a: i32x4<Self>) -> u32x4<Self> {
3322        a.bitcast()
3323    }
3324    #[inline(always)]
3325    fn cvt_f32_i32x4(self, a: i32x4<Self>) -> f32x4<Self> {
3326        [
3327            a[0usize] as f32,
3328            a[1usize] as f32,
3329            a[2usize] as f32,
3330            a[3usize] as f32,
3331        ]
3332        .simd_into(self)
3333    }
3334    #[inline(always)]
3335    fn splat_u32x4(self, val: u32) -> u32x4<Self> {
3336        [val; 4usize].simd_into(self)
3337    }
3338    #[inline(always)]
3339    fn load_array_u32x4(self, val: [u32; 4usize]) -> u32x4<Self> {
3340        u32x4 {
3341            val: crate::support::Aligned128(val),
3342            simd: self,
3343        }
3344    }
3345    #[inline(always)]
3346    fn load_array_ref_u32x4(self, val: &[u32; 4usize]) -> u32x4<Self> {
3347        u32x4 {
3348            val: crate::support::Aligned128(*val),
3349            simd: self,
3350        }
3351    }
3352    #[inline(always)]
3353    fn as_array_u32x4(self, a: u32x4<Self>) -> [u32; 4usize] {
3354        a.val.0
3355    }
3356    #[inline(always)]
3357    fn as_array_ref_u32x4(self, a: &u32x4<Self>) -> &[u32; 4usize] {
3358        &a.val.0
3359    }
3360    #[inline(always)]
3361    fn as_array_mut_u32x4(self, a: &mut u32x4<Self>) -> &mut [u32; 4usize] {
3362        &mut a.val.0
3363    }
3364    #[inline(always)]
3365    fn store_array_u32x4(self, a: u32x4<Self>, dest: &mut [u32; 4usize]) -> () {
3366        *dest = a.val.0;
3367    }
3368    #[inline(always)]
3369    fn cvt_from_bytes_u32x4(self, a: u8x16<Self>) -> u32x4<Self> {
3370        unsafe {
3371            u32x4 {
3372                val: core::mem::transmute(a.val),
3373                simd: self,
3374            }
3375        }
3376    }
3377    #[inline(always)]
3378    fn cvt_to_bytes_u32x4(self, a: u32x4<Self>) -> u8x16<Self> {
3379        unsafe {
3380            u8x16 {
3381                val: core::mem::transmute(a.val),
3382                simd: self,
3383            }
3384        }
3385    }
3386    #[inline(always)]
3387    fn slide_u32x4<const SHIFT: usize>(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3388        let mut dest = [Default::default(); 4usize];
3389        dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
3390        dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
3391        dest.simd_into(self)
3392    }
3393    #[inline(always)]
3394    fn slide_within_blocks_u32x4<const SHIFT: usize>(
3395        self,
3396        a: u32x4<Self>,
3397        b: u32x4<Self>,
3398    ) -> u32x4<Self> {
3399        self.slide_u32x4::<SHIFT>(a, b)
3400    }
3401    #[inline(always)]
3402    fn add_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3403        [
3404            u32::wrapping_add(a[0usize], b[0usize]),
3405            u32::wrapping_add(a[1usize], b[1usize]),
3406            u32::wrapping_add(a[2usize], b[2usize]),
3407            u32::wrapping_add(a[3usize], b[3usize]),
3408        ]
3409        .simd_into(self)
3410    }
3411    #[inline(always)]
3412    fn sub_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3413        [
3414            u32::wrapping_sub(a[0usize], b[0usize]),
3415            u32::wrapping_sub(a[1usize], b[1usize]),
3416            u32::wrapping_sub(a[2usize], b[2usize]),
3417            u32::wrapping_sub(a[3usize], b[3usize]),
3418        ]
3419        .simd_into(self)
3420    }
3421    #[inline(always)]
3422    fn mul_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3423        [
3424            u32::wrapping_mul(a[0usize], b[0usize]),
3425            u32::wrapping_mul(a[1usize], b[1usize]),
3426            u32::wrapping_mul(a[2usize], b[2usize]),
3427            u32::wrapping_mul(a[3usize], b[3usize]),
3428        ]
3429        .simd_into(self)
3430    }
3431    #[inline(always)]
3432    fn and_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3433        [
3434            u32::bitand(a[0usize], &b[0usize]),
3435            u32::bitand(a[1usize], &b[1usize]),
3436            u32::bitand(a[2usize], &b[2usize]),
3437            u32::bitand(a[3usize], &b[3usize]),
3438        ]
3439        .simd_into(self)
3440    }
3441    #[inline(always)]
3442    fn or_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3443        [
3444            u32::bitor(a[0usize], &b[0usize]),
3445            u32::bitor(a[1usize], &b[1usize]),
3446            u32::bitor(a[2usize], &b[2usize]),
3447            u32::bitor(a[3usize], &b[3usize]),
3448        ]
3449        .simd_into(self)
3450    }
3451    #[inline(always)]
3452    fn xor_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3453        [
3454            u32::bitxor(a[0usize], &b[0usize]),
3455            u32::bitxor(a[1usize], &b[1usize]),
3456            u32::bitxor(a[2usize], &b[2usize]),
3457            u32::bitxor(a[3usize], &b[3usize]),
3458        ]
3459        .simd_into(self)
3460    }
3461    #[inline(always)]
3462    fn not_u32x4(self, a: u32x4<Self>) -> u32x4<Self> {
3463        [
3464            u32::not(a[0usize]),
3465            u32::not(a[1usize]),
3466            u32::not(a[2usize]),
3467            u32::not(a[3usize]),
3468        ]
3469        .simd_into(self)
3470    }
3471    #[inline(always)]
3472    fn shl_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {
3473        [
3474            u32::shl(a[0usize], shift),
3475            u32::shl(a[1usize], shift),
3476            u32::shl(a[2usize], shift),
3477            u32::shl(a[3usize], shift),
3478        ]
3479        .simd_into(self)
3480    }
3481    #[inline(always)]
3482    fn shlv_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3483        [
3484            u32::shl(a[0usize], &b[0usize]),
3485            u32::shl(a[1usize], &b[1usize]),
3486            u32::shl(a[2usize], &b[2usize]),
3487            u32::shl(a[3usize], &b[3usize]),
3488        ]
3489        .simd_into(self)
3490    }
3491    #[inline(always)]
3492    fn shr_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {
3493        [
3494            u32::shr(a[0usize], shift),
3495            u32::shr(a[1usize], shift),
3496            u32::shr(a[2usize], shift),
3497            u32::shr(a[3usize], shift),
3498        ]
3499        .simd_into(self)
3500    }
3501    #[inline(always)]
3502    fn shrv_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3503        [
3504            u32::shr(a[0usize], &b[0usize]),
3505            u32::shr(a[1usize], &b[1usize]),
3506            u32::shr(a[2usize], &b[2usize]),
3507            u32::shr(a[3usize], &b[3usize]),
3508        ]
3509        .simd_into(self)
3510    }
3511    #[inline(always)]
3512    fn simd_eq_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
3513        [
3514            -(u32::eq(&a[0usize], &b[0usize]) as i32),
3515            -(u32::eq(&a[1usize], &b[1usize]) as i32),
3516            -(u32::eq(&a[2usize], &b[2usize]) as i32),
3517            -(u32::eq(&a[3usize], &b[3usize]) as i32),
3518        ]
3519        .simd_into(self)
3520    }
3521    #[inline(always)]
3522    fn simd_lt_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
3523        [
3524            -(u32::lt(&a[0usize], &b[0usize]) as i32),
3525            -(u32::lt(&a[1usize], &b[1usize]) as i32),
3526            -(u32::lt(&a[2usize], &b[2usize]) as i32),
3527            -(u32::lt(&a[3usize], &b[3usize]) as i32),
3528        ]
3529        .simd_into(self)
3530    }
3531    #[inline(always)]
3532    fn simd_le_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
3533        [
3534            -(u32::le(&a[0usize], &b[0usize]) as i32),
3535            -(u32::le(&a[1usize], &b[1usize]) as i32),
3536            -(u32::le(&a[2usize], &b[2usize]) as i32),
3537            -(u32::le(&a[3usize], &b[3usize]) as i32),
3538        ]
3539        .simd_into(self)
3540    }
3541    #[inline(always)]
3542    fn simd_ge_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
3543        [
3544            -(u32::ge(&a[0usize], &b[0usize]) as i32),
3545            -(u32::ge(&a[1usize], &b[1usize]) as i32),
3546            -(u32::ge(&a[2usize], &b[2usize]) as i32),
3547            -(u32::ge(&a[3usize], &b[3usize]) as i32),
3548        ]
3549        .simd_into(self)
3550    }
3551    #[inline(always)]
3552    fn simd_gt_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
3553        [
3554            -(u32::gt(&a[0usize], &b[0usize]) as i32),
3555            -(u32::gt(&a[1usize], &b[1usize]) as i32),
3556            -(u32::gt(&a[2usize], &b[2usize]) as i32),
3557            -(u32::gt(&a[3usize], &b[3usize]) as i32),
3558        ]
3559        .simd_into(self)
3560    }
3561    #[inline(always)]
3562    fn zip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3563        [a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
3564    }
3565    #[inline(always)]
3566    fn zip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3567        [a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
3568    }
3569    #[inline(always)]
3570    fn unzip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3571        [a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
3572    }
3573    #[inline(always)]
3574    fn unzip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3575        [a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
3576    }
3577    #[inline(always)]
3578    fn interleave_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> (u32x4<Self>, u32x4<Self>) {
3579        (self.zip_low_u32x4(a, b), self.zip_high_u32x4(a, b))
3580    }
3581    #[inline(always)]
3582    fn deinterleave_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> (u32x4<Self>, u32x4<Self>) {
3583        (self.unzip_low_u32x4(a, b), self.unzip_high_u32x4(a, b))
3584    }
3585    #[inline(always)]
3586    fn select_u32x4(self, a: mask32x4<Self>, b: u32x4<Self>, c: u32x4<Self>) -> u32x4<Self> {
3587        [
3588            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
3589            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
3590            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
3591            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
3592        ]
3593        .simd_into(self)
3594    }
3595    #[inline(always)]
3596    fn min_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3597        [
3598            u32::min(a[0usize], b[0usize]),
3599            u32::min(a[1usize], b[1usize]),
3600            u32::min(a[2usize], b[2usize]),
3601            u32::min(a[3usize], b[3usize]),
3602        ]
3603        .simd_into(self)
3604    }
3605    #[inline(always)]
3606    fn max_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
3607        [
3608            u32::max(a[0usize], b[0usize]),
3609            u32::max(a[1usize], b[1usize]),
3610            u32::max(a[2usize], b[2usize]),
3611            u32::max(a[3usize], b[3usize]),
3612        ]
3613        .simd_into(self)
3614    }
3615    #[inline(always)]
3616    fn combine_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x8<Self> {
3617        let mut result = [0; 8usize];
3618        result[0..4usize].copy_from_slice(&a.val.0);
3619        result[4usize..8usize].copy_from_slice(&b.val.0);
3620        result.simd_into(self)
3621    }
3622    #[inline(always)]
3623    fn reinterpret_u8_u32x4(self, a: u32x4<Self>) -> u8x16<Self> {
3624        a.bitcast()
3625    }
3626    #[inline(always)]
3627    fn cvt_f32_u32x4(self, a: u32x4<Self>) -> f32x4<Self> {
3628        [
3629            a[0usize] as f32,
3630            a[1usize] as f32,
3631            a[2usize] as f32,
3632            a[3usize] as f32,
3633        ]
3634        .simd_into(self)
3635    }
3636    #[inline(always)]
3637    fn splat_mask32x4(self, val: i32) -> mask32x4<Self> {
3638        [val; 4usize].simd_into(self)
3639    }
3640    #[inline(always)]
3641    fn load_array_mask32x4(self, val: [i32; 4usize]) -> mask32x4<Self> {
3642        mask32x4 {
3643            val: crate::support::Aligned128(val),
3644            simd: self,
3645        }
3646    }
3647    #[inline(always)]
3648    fn load_array_ref_mask32x4(self, val: &[i32; 4usize]) -> mask32x4<Self> {
3649        mask32x4 {
3650            val: crate::support::Aligned128(*val),
3651            simd: self,
3652        }
3653    }
3654    #[inline(always)]
3655    fn as_array_mask32x4(self, a: mask32x4<Self>) -> [i32; 4usize] {
3656        a.val.0
3657    }
3658    #[inline(always)]
3659    fn as_array_ref_mask32x4(self, a: &mask32x4<Self>) -> &[i32; 4usize] {
3660        &a.val.0
3661    }
3662    #[inline(always)]
3663    fn as_array_mut_mask32x4(self, a: &mut mask32x4<Self>) -> &mut [i32; 4usize] {
3664        &mut a.val.0
3665    }
3666    #[inline(always)]
3667    fn store_array_mask32x4(self, a: mask32x4<Self>, dest: &mut [i32; 4usize]) -> () {
3668        *dest = a.val.0;
3669    }
3670    #[inline(always)]
3671    fn cvt_from_bytes_mask32x4(self, a: u8x16<Self>) -> mask32x4<Self> {
3672        unsafe {
3673            mask32x4 {
3674                val: core::mem::transmute(a.val),
3675                simd: self,
3676            }
3677        }
3678    }
3679    #[inline(always)]
3680    fn cvt_to_bytes_mask32x4(self, a: mask32x4<Self>) -> u8x16<Self> {
3681        unsafe {
3682            u8x16 {
3683                val: core::mem::transmute(a.val),
3684                simd: self,
3685            }
3686        }
3687    }
3688    #[inline(always)]
3689    fn slide_mask32x4<const SHIFT: usize>(
3690        self,
3691        a: mask32x4<Self>,
3692        b: mask32x4<Self>,
3693    ) -> mask32x4<Self> {
3694        let mut dest = [Default::default(); 4usize];
3695        dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
3696        dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
3697        dest.simd_into(self)
3698    }
3699    #[inline(always)]
3700    fn slide_within_blocks_mask32x4<const SHIFT: usize>(
3701        self,
3702        a: mask32x4<Self>,
3703        b: mask32x4<Self>,
3704    ) -> mask32x4<Self> {
3705        self.slide_mask32x4::<SHIFT>(a, b)
3706    }
3707    #[inline(always)]
3708    fn and_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
3709        [
3710            i32::bitand(a[0usize], &b[0usize]),
3711            i32::bitand(a[1usize], &b[1usize]),
3712            i32::bitand(a[2usize], &b[2usize]),
3713            i32::bitand(a[3usize], &b[3usize]),
3714        ]
3715        .simd_into(self)
3716    }
3717    #[inline(always)]
3718    fn or_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
3719        [
3720            i32::bitor(a[0usize], &b[0usize]),
3721            i32::bitor(a[1usize], &b[1usize]),
3722            i32::bitor(a[2usize], &b[2usize]),
3723            i32::bitor(a[3usize], &b[3usize]),
3724        ]
3725        .simd_into(self)
3726    }
3727    #[inline(always)]
3728    fn xor_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
3729        [
3730            i32::bitxor(a[0usize], &b[0usize]),
3731            i32::bitxor(a[1usize], &b[1usize]),
3732            i32::bitxor(a[2usize], &b[2usize]),
3733            i32::bitxor(a[3usize], &b[3usize]),
3734        ]
3735        .simd_into(self)
3736    }
3737    #[inline(always)]
3738    fn not_mask32x4(self, a: mask32x4<Self>) -> mask32x4<Self> {
3739        [
3740            i32::not(a[0usize]),
3741            i32::not(a[1usize]),
3742            i32::not(a[2usize]),
3743            i32::not(a[3usize]),
3744        ]
3745        .simd_into(self)
3746    }
3747    #[inline(always)]
3748    fn select_mask32x4(
3749        self,
3750        a: mask32x4<Self>,
3751        b: mask32x4<Self>,
3752        c: mask32x4<Self>,
3753    ) -> mask32x4<Self> {
3754        [
3755            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
3756            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
3757            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
3758            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
3759        ]
3760        .simd_into(self)
3761    }
3762    #[inline(always)]
3763    fn simd_eq_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
3764        [
3765            -(i32::eq(&a[0usize], &b[0usize]) as i32),
3766            -(i32::eq(&a[1usize], &b[1usize]) as i32),
3767            -(i32::eq(&a[2usize], &b[2usize]) as i32),
3768            -(i32::eq(&a[3usize], &b[3usize]) as i32),
3769        ]
3770        .simd_into(self)
3771    }
3772    #[inline(always)]
3773    fn any_true_mask32x4(self, a: mask32x4<Self>) -> bool {
3774        a[0usize] != 0 || a[1usize] != 0 || a[2usize] != 0 || a[3usize] != 0
3775    }
3776    #[inline(always)]
3777    fn all_true_mask32x4(self, a: mask32x4<Self>) -> bool {
3778        a[0usize] != 0 && a[1usize] != 0 && a[2usize] != 0 && a[3usize] != 0
3779    }
3780    #[inline(always)]
3781    fn any_false_mask32x4(self, a: mask32x4<Self>) -> bool {
3782        a[0usize] == 0 || a[1usize] == 0 || a[2usize] == 0 || a[3usize] == 0
3783    }
3784    #[inline(always)]
3785    fn all_false_mask32x4(self, a: mask32x4<Self>) -> bool {
3786        a[0usize] == 0 && a[1usize] == 0 && a[2usize] == 0 && a[3usize] == 0
3787    }
3788    #[inline(always)]
3789    fn combine_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x8<Self> {
3790        let mut result = [0; 8usize];
3791        result[0..4usize].copy_from_slice(&a.val.0);
3792        result[4usize..8usize].copy_from_slice(&b.val.0);
3793        result.simd_into(self)
3794    }
3795    #[inline(always)]
3796    fn splat_f64x2(self, val: f64) -> f64x2<Self> {
3797        [val; 2usize].simd_into(self)
3798    }
3799    #[inline(always)]
3800    fn load_array_f64x2(self, val: [f64; 2usize]) -> f64x2<Self> {
3801        f64x2 {
3802            val: crate::support::Aligned128(val),
3803            simd: self,
3804        }
3805    }
3806    #[inline(always)]
3807    fn load_array_ref_f64x2(self, val: &[f64; 2usize]) -> f64x2<Self> {
3808        f64x2 {
3809            val: crate::support::Aligned128(*val),
3810            simd: self,
3811        }
3812    }
3813    #[inline(always)]
3814    fn as_array_f64x2(self, a: f64x2<Self>) -> [f64; 2usize] {
3815        a.val.0
3816    }
3817    #[inline(always)]
3818    fn as_array_ref_f64x2(self, a: &f64x2<Self>) -> &[f64; 2usize] {
3819        &a.val.0
3820    }
3821    #[inline(always)]
3822    fn as_array_mut_f64x2(self, a: &mut f64x2<Self>) -> &mut [f64; 2usize] {
3823        &mut a.val.0
3824    }
3825    #[inline(always)]
3826    fn store_array_f64x2(self, a: f64x2<Self>, dest: &mut [f64; 2usize]) -> () {
3827        *dest = a.val.0;
3828    }
3829    #[inline(always)]
3830    fn cvt_from_bytes_f64x2(self, a: u8x16<Self>) -> f64x2<Self> {
3831        unsafe {
3832            f64x2 {
3833                val: core::mem::transmute(a.val),
3834                simd: self,
3835            }
3836        }
3837    }
3838    #[inline(always)]
3839    fn cvt_to_bytes_f64x2(self, a: f64x2<Self>) -> u8x16<Self> {
3840        unsafe {
3841            u8x16 {
3842                val: core::mem::transmute(a.val),
3843                simd: self,
3844            }
3845        }
3846    }
3847    #[inline(always)]
3848    fn slide_f64x2<const SHIFT: usize>(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3849        let mut dest = [Default::default(); 2usize];
3850        dest[..2usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
3851        dest[2usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
3852        dest.simd_into(self)
3853    }
3854    #[inline(always)]
3855    fn slide_within_blocks_f64x2<const SHIFT: usize>(
3856        self,
3857        a: f64x2<Self>,
3858        b: f64x2<Self>,
3859    ) -> f64x2<Self> {
3860        self.slide_f64x2::<SHIFT>(a, b)
3861    }
3862    #[inline(always)]
3863    fn abs_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
3864        [f64::abs(a[0usize]), f64::abs(a[1usize])].simd_into(self)
3865    }
3866    #[inline(always)]
3867    fn neg_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
3868        [f64::neg(a[0usize]), f64::neg(a[1usize])].simd_into(self)
3869    }
3870    #[inline(always)]
3871    fn sqrt_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
3872        [f64::sqrt(a[0usize]), f64::sqrt(a[1usize])].simd_into(self)
3873    }
3874    #[inline(always)]
3875    fn add_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3876        [
3877            f64::add(a[0usize], &b[0usize]),
3878            f64::add(a[1usize], &b[1usize]),
3879        ]
3880        .simd_into(self)
3881    }
3882    #[inline(always)]
3883    fn sub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3884        [
3885            f64::sub(a[0usize], &b[0usize]),
3886            f64::sub(a[1usize], &b[1usize]),
3887        ]
3888        .simd_into(self)
3889    }
3890    #[inline(always)]
3891    fn mul_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3892        [
3893            f64::mul(a[0usize], &b[0usize]),
3894            f64::mul(a[1usize], &b[1usize]),
3895        ]
3896        .simd_into(self)
3897    }
3898    #[inline(always)]
3899    fn div_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3900        [
3901            f64::div(a[0usize], &b[0usize]),
3902            f64::div(a[1usize], &b[1usize]),
3903        ]
3904        .simd_into(self)
3905    }
3906    #[inline(always)]
3907    fn copysign_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3908        [
3909            f64::copysign(a[0usize], b[0usize]),
3910            f64::copysign(a[1usize], b[1usize]),
3911        ]
3912        .simd_into(self)
3913    }
3914    #[inline(always)]
3915    fn simd_eq_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
3916        [
3917            -(f64::eq(&a[0usize], &b[0usize]) as i64),
3918            -(f64::eq(&a[1usize], &b[1usize]) as i64),
3919        ]
3920        .simd_into(self)
3921    }
3922    #[inline(always)]
3923    fn simd_lt_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
3924        [
3925            -(f64::lt(&a[0usize], &b[0usize]) as i64),
3926            -(f64::lt(&a[1usize], &b[1usize]) as i64),
3927        ]
3928        .simd_into(self)
3929    }
3930    #[inline(always)]
3931    fn simd_le_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
3932        [
3933            -(f64::le(&a[0usize], &b[0usize]) as i64),
3934            -(f64::le(&a[1usize], &b[1usize]) as i64),
3935        ]
3936        .simd_into(self)
3937    }
3938    #[inline(always)]
3939    fn simd_ge_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
3940        [
3941            -(f64::ge(&a[0usize], &b[0usize]) as i64),
3942            -(f64::ge(&a[1usize], &b[1usize]) as i64),
3943        ]
3944        .simd_into(self)
3945    }
3946    #[inline(always)]
3947    fn simd_gt_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
3948        [
3949            -(f64::gt(&a[0usize], &b[0usize]) as i64),
3950            -(f64::gt(&a[1usize], &b[1usize]) as i64),
3951        ]
3952        .simd_into(self)
3953    }
3954    #[inline(always)]
3955    fn zip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3956        [a[0usize], b[0usize]].simd_into(self)
3957    }
3958    #[inline(always)]
3959    fn zip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3960        [a[1usize], b[1usize]].simd_into(self)
3961    }
3962    #[inline(always)]
3963    fn unzip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3964        [a[0usize], b[0usize]].simd_into(self)
3965    }
3966    #[inline(always)]
3967    fn unzip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3968        [a[1usize], b[1usize]].simd_into(self)
3969    }
3970    #[inline(always)]
3971    fn interleave_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> (f64x2<Self>, f64x2<Self>) {
3972        (self.zip_low_f64x2(a, b), self.zip_high_f64x2(a, b))
3973    }
3974    #[inline(always)]
3975    fn deinterleave_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> (f64x2<Self>, f64x2<Self>) {
3976        (self.unzip_low_f64x2(a, b), self.unzip_high_f64x2(a, b))
3977    }
3978    #[inline(always)]
3979    fn max_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3980        [
3981            f64::max(a[0usize], b[0usize]),
3982            f64::max(a[1usize], b[1usize]),
3983        ]
3984        .simd_into(self)
3985    }
3986    #[inline(always)]
3987    fn min_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3988        [
3989            f64::min(a[0usize], b[0usize]),
3990            f64::min(a[1usize], b[1usize]),
3991        ]
3992        .simd_into(self)
3993    }
3994    #[inline(always)]
3995    fn max_precise_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
3996        [
3997            f64::max(a[0usize], b[0usize]),
3998            f64::max(a[1usize], b[1usize]),
3999        ]
4000        .simd_into(self)
4001    }
4002    #[inline(always)]
4003    fn min_precise_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
4004        [
4005            f64::min(a[0usize], b[0usize]),
4006            f64::min(a[1usize], b[1usize]),
4007        ]
4008        .simd_into(self)
4009    }
4010    #[inline(always)]
4011    fn mul_add_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
4012        a.mul(b).add(c)
4013    }
4014    #[inline(always)]
4015    fn mul_sub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
4016        a.mul(b).sub(c)
4017    }
4018    #[inline(always)]
4019    fn floor_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
4020        [f64::floor(a[0usize]), f64::floor(a[1usize])].simd_into(self)
4021    }
4022    #[inline(always)]
4023    fn ceil_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
4024        [f64::ceil(a[0usize]), f64::ceil(a[1usize])].simd_into(self)
4025    }
4026    #[inline(always)]
4027    fn round_ties_even_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
4028        [
4029            f64::round_ties_even(a[0usize]),
4030            f64::round_ties_even(a[1usize]),
4031        ]
4032        .simd_into(self)
4033    }
4034    #[inline(always)]
4035    fn fract_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
4036        [f64::fract(a[0usize]), f64::fract(a[1usize])].simd_into(self)
4037    }
4038    #[inline(always)]
4039    fn trunc_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
4040        [f64::trunc(a[0usize]), f64::trunc(a[1usize])].simd_into(self)
4041    }
4042    #[inline(always)]
4043    fn select_f64x2(self, a: mask64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
4044        [
4045            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
4046            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
4047        ]
4048        .simd_into(self)
4049    }
4050    #[inline(always)]
4051    fn combine_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x4<Self> {
4052        let mut result = [0.0; 4usize];
4053        result[0..2usize].copy_from_slice(&a.val.0);
4054        result[2usize..4usize].copy_from_slice(&b.val.0);
4055        result.simd_into(self)
4056    }
4057    #[inline(always)]
4058    fn reinterpret_f32_f64x2(self, a: f64x2<Self>) -> f32x4<Self> {
4059        a.bitcast()
4060    }
4061    #[inline(always)]
4062    fn splat_mask64x2(self, val: i64) -> mask64x2<Self> {
4063        [val; 2usize].simd_into(self)
4064    }
4065    #[inline(always)]
4066    fn load_array_mask64x2(self, val: [i64; 2usize]) -> mask64x2<Self> {
4067        mask64x2 {
4068            val: crate::support::Aligned128(val),
4069            simd: self,
4070        }
4071    }
4072    #[inline(always)]
4073    fn load_array_ref_mask64x2(self, val: &[i64; 2usize]) -> mask64x2<Self> {
4074        mask64x2 {
4075            val: crate::support::Aligned128(*val),
4076            simd: self,
4077        }
4078    }
4079    #[inline(always)]
4080    fn as_array_mask64x2(self, a: mask64x2<Self>) -> [i64; 2usize] {
4081        a.val.0
4082    }
4083    #[inline(always)]
4084    fn as_array_ref_mask64x2(self, a: &mask64x2<Self>) -> &[i64; 2usize] {
4085        &a.val.0
4086    }
4087    #[inline(always)]
4088    fn as_array_mut_mask64x2(self, a: &mut mask64x2<Self>) -> &mut [i64; 2usize] {
4089        &mut a.val.0
4090    }
4091    #[inline(always)]
4092    fn store_array_mask64x2(self, a: mask64x2<Self>, dest: &mut [i64; 2usize]) -> () {
4093        *dest = a.val.0;
4094    }
4095    #[inline(always)]
4096    fn cvt_from_bytes_mask64x2(self, a: u8x16<Self>) -> mask64x2<Self> {
4097        unsafe {
4098            mask64x2 {
4099                val: core::mem::transmute(a.val),
4100                simd: self,
4101            }
4102        }
4103    }
4104    #[inline(always)]
4105    fn cvt_to_bytes_mask64x2(self, a: mask64x2<Self>) -> u8x16<Self> {
4106        unsafe {
4107            u8x16 {
4108                val: core::mem::transmute(a.val),
4109                simd: self,
4110            }
4111        }
4112    }
4113    #[inline(always)]
4114    fn slide_mask64x2<const SHIFT: usize>(
4115        self,
4116        a: mask64x2<Self>,
4117        b: mask64x2<Self>,
4118    ) -> mask64x2<Self> {
4119        let mut dest = [Default::default(); 2usize];
4120        dest[..2usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
4121        dest[2usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
4122        dest.simd_into(self)
4123    }
4124    #[inline(always)]
4125    fn slide_within_blocks_mask64x2<const SHIFT: usize>(
4126        self,
4127        a: mask64x2<Self>,
4128        b: mask64x2<Self>,
4129    ) -> mask64x2<Self> {
4130        self.slide_mask64x2::<SHIFT>(a, b)
4131    }
4132    #[inline(always)]
4133    fn and_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
4134        [
4135            i64::bitand(a[0usize], &b[0usize]),
4136            i64::bitand(a[1usize], &b[1usize]),
4137        ]
4138        .simd_into(self)
4139    }
4140    #[inline(always)]
4141    fn or_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
4142        [
4143            i64::bitor(a[0usize], &b[0usize]),
4144            i64::bitor(a[1usize], &b[1usize]),
4145        ]
4146        .simd_into(self)
4147    }
4148    #[inline(always)]
4149    fn xor_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
4150        [
4151            i64::bitxor(a[0usize], &b[0usize]),
4152            i64::bitxor(a[1usize], &b[1usize]),
4153        ]
4154        .simd_into(self)
4155    }
4156    #[inline(always)]
4157    fn not_mask64x2(self, a: mask64x2<Self>) -> mask64x2<Self> {
4158        [i64::not(a[0usize]), i64::not(a[1usize])].simd_into(self)
4159    }
4160    #[inline(always)]
4161    fn select_mask64x2(
4162        self,
4163        a: mask64x2<Self>,
4164        b: mask64x2<Self>,
4165        c: mask64x2<Self>,
4166    ) -> mask64x2<Self> {
4167        [
4168            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
4169            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
4170        ]
4171        .simd_into(self)
4172    }
4173    #[inline(always)]
4174    fn simd_eq_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
4175        [
4176            -(i64::eq(&a[0usize], &b[0usize]) as i64),
4177            -(i64::eq(&a[1usize], &b[1usize]) as i64),
4178        ]
4179        .simd_into(self)
4180    }
4181    #[inline(always)]
4182    fn any_true_mask64x2(self, a: mask64x2<Self>) -> bool {
4183        a[0usize] != 0 || a[1usize] != 0
4184    }
4185    #[inline(always)]
4186    fn all_true_mask64x2(self, a: mask64x2<Self>) -> bool {
4187        a[0usize] != 0 && a[1usize] != 0
4188    }
4189    #[inline(always)]
4190    fn any_false_mask64x2(self, a: mask64x2<Self>) -> bool {
4191        a[0usize] == 0 || a[1usize] == 0
4192    }
4193    #[inline(always)]
4194    fn all_false_mask64x2(self, a: mask64x2<Self>) -> bool {
4195        a[0usize] == 0 && a[1usize] == 0
4196    }
4197    #[inline(always)]
4198    fn combine_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x4<Self> {
4199        let mut result = [0; 4usize];
4200        result[0..2usize].copy_from_slice(&a.val.0);
4201        result[2usize..4usize].copy_from_slice(&b.val.0);
4202        result.simd_into(self)
4203    }
4204    #[inline(always)]
4205    fn splat_f32x8(self, val: f32) -> f32x8<Self> {
4206        let half = self.splat_f32x4(val);
4207        self.combine_f32x4(half, half)
4208    }
4209    #[inline(always)]
4210    fn load_array_f32x8(self, val: [f32; 8usize]) -> f32x8<Self> {
4211        f32x8 {
4212            val: crate::support::Aligned256(val),
4213            simd: self,
4214        }
4215    }
4216    #[inline(always)]
4217    fn load_array_ref_f32x8(self, val: &[f32; 8usize]) -> f32x8<Self> {
4218        f32x8 {
4219            val: crate::support::Aligned256(*val),
4220            simd: self,
4221        }
4222    }
4223    #[inline(always)]
4224    fn as_array_f32x8(self, a: f32x8<Self>) -> [f32; 8usize] {
4225        a.val.0
4226    }
4227    #[inline(always)]
4228    fn as_array_ref_f32x8(self, a: &f32x8<Self>) -> &[f32; 8usize] {
4229        &a.val.0
4230    }
4231    #[inline(always)]
4232    fn as_array_mut_f32x8(self, a: &mut f32x8<Self>) -> &mut [f32; 8usize] {
4233        &mut a.val.0
4234    }
4235    #[inline(always)]
4236    fn store_array_f32x8(self, a: f32x8<Self>, dest: &mut [f32; 8usize]) -> () {
4237        *dest = a.val.0;
4238    }
4239    #[inline(always)]
4240    fn cvt_from_bytes_f32x8(self, a: u8x32<Self>) -> f32x8<Self> {
4241        unsafe {
4242            f32x8 {
4243                val: core::mem::transmute(a.val),
4244                simd: self,
4245            }
4246        }
4247    }
4248    #[inline(always)]
4249    fn cvt_to_bytes_f32x8(self, a: f32x8<Self>) -> u8x32<Self> {
4250        unsafe {
4251            u8x32 {
4252                val: core::mem::transmute(a.val),
4253                simd: self,
4254            }
4255        }
4256    }
4257    #[inline(always)]
4258    fn slide_f32x8<const SHIFT: usize>(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4259        let mut dest = [Default::default(); 8usize];
4260        dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
4261        dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
4262        dest.simd_into(self)
4263    }
4264    #[inline(always)]
4265    fn slide_within_blocks_f32x8<const SHIFT: usize>(
4266        self,
4267        a: f32x8<Self>,
4268        b: f32x8<Self>,
4269    ) -> f32x8<Self> {
4270        let (a0, a1) = self.split_f32x8(a);
4271        let (b0, b1) = self.split_f32x8(b);
4272        self.combine_f32x4(
4273            self.slide_within_blocks_f32x4::<SHIFT>(a0, b0),
4274            self.slide_within_blocks_f32x4::<SHIFT>(a1, b1),
4275        )
4276    }
4277    #[inline(always)]
4278    fn abs_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
4279        let (a0, a1) = self.split_f32x8(a);
4280        self.combine_f32x4(self.abs_f32x4(a0), self.abs_f32x4(a1))
4281    }
4282    #[inline(always)]
4283    fn neg_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
4284        let (a0, a1) = self.split_f32x8(a);
4285        self.combine_f32x4(self.neg_f32x4(a0), self.neg_f32x4(a1))
4286    }
4287    #[inline(always)]
4288    fn sqrt_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
4289        let (a0, a1) = self.split_f32x8(a);
4290        self.combine_f32x4(self.sqrt_f32x4(a0), self.sqrt_f32x4(a1))
4291    }
4292    #[inline(always)]
4293    fn add_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4294        let (a0, a1) = self.split_f32x8(a);
4295        let (b0, b1) = self.split_f32x8(b);
4296        self.combine_f32x4(self.add_f32x4(a0, b0), self.add_f32x4(a1, b1))
4297    }
4298    #[inline(always)]
4299    fn sub_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4300        let (a0, a1) = self.split_f32x8(a);
4301        let (b0, b1) = self.split_f32x8(b);
4302        self.combine_f32x4(self.sub_f32x4(a0, b0), self.sub_f32x4(a1, b1))
4303    }
4304    #[inline(always)]
4305    fn mul_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4306        let (a0, a1) = self.split_f32x8(a);
4307        let (b0, b1) = self.split_f32x8(b);
4308        self.combine_f32x4(self.mul_f32x4(a0, b0), self.mul_f32x4(a1, b1))
4309    }
4310    #[inline(always)]
4311    fn div_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4312        let (a0, a1) = self.split_f32x8(a);
4313        let (b0, b1) = self.split_f32x8(b);
4314        self.combine_f32x4(self.div_f32x4(a0, b0), self.div_f32x4(a1, b1))
4315    }
4316    #[inline(always)]
4317    fn copysign_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4318        let (a0, a1) = self.split_f32x8(a);
4319        let (b0, b1) = self.split_f32x8(b);
4320        self.combine_f32x4(self.copysign_f32x4(a0, b0), self.copysign_f32x4(a1, b1))
4321    }
4322    #[inline(always)]
4323    fn simd_eq_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
4324        let (a0, a1) = self.split_f32x8(a);
4325        let (b0, b1) = self.split_f32x8(b);
4326        self.combine_mask32x4(self.simd_eq_f32x4(a0, b0), self.simd_eq_f32x4(a1, b1))
4327    }
4328    #[inline(always)]
4329    fn simd_lt_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
4330        let (a0, a1) = self.split_f32x8(a);
4331        let (b0, b1) = self.split_f32x8(b);
4332        self.combine_mask32x4(self.simd_lt_f32x4(a0, b0), self.simd_lt_f32x4(a1, b1))
4333    }
4334    #[inline(always)]
4335    fn simd_le_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
4336        let (a0, a1) = self.split_f32x8(a);
4337        let (b0, b1) = self.split_f32x8(b);
4338        self.combine_mask32x4(self.simd_le_f32x4(a0, b0), self.simd_le_f32x4(a1, b1))
4339    }
4340    #[inline(always)]
4341    fn simd_ge_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
4342        let (a0, a1) = self.split_f32x8(a);
4343        let (b0, b1) = self.split_f32x8(b);
4344        self.combine_mask32x4(self.simd_ge_f32x4(a0, b0), self.simd_ge_f32x4(a1, b1))
4345    }
4346    #[inline(always)]
4347    fn simd_gt_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
4348        let (a0, a1) = self.split_f32x8(a);
4349        let (b0, b1) = self.split_f32x8(b);
4350        self.combine_mask32x4(self.simd_gt_f32x4(a0, b0), self.simd_gt_f32x4(a1, b1))
4351    }
4352    #[inline(always)]
4353    fn zip_low_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4354        let (a0, _) = self.split_f32x8(a);
4355        let (b0, _) = self.split_f32x8(b);
4356        self.combine_f32x4(self.zip_low_f32x4(a0, b0), self.zip_high_f32x4(a0, b0))
4357    }
4358    #[inline(always)]
4359    fn zip_high_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4360        let (_, a1) = self.split_f32x8(a);
4361        let (_, b1) = self.split_f32x8(b);
4362        self.combine_f32x4(self.zip_low_f32x4(a1, b1), self.zip_high_f32x4(a1, b1))
4363    }
4364    #[inline(always)]
4365    fn unzip_low_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4366        let (a0, a1) = self.split_f32x8(a);
4367        let (b0, b1) = self.split_f32x8(b);
4368        self.combine_f32x4(self.unzip_low_f32x4(a0, a1), self.unzip_low_f32x4(b0, b1))
4369    }
4370    #[inline(always)]
4371    fn unzip_high_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4372        let (a0, a1) = self.split_f32x8(a);
4373        let (b0, b1) = self.split_f32x8(b);
4374        self.combine_f32x4(self.unzip_high_f32x4(a0, a1), self.unzip_high_f32x4(b0, b1))
4375    }
4376    #[inline(always)]
4377    fn interleave_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> (f32x8<Self>, f32x8<Self>) {
4378        let (a0, a1) = self.split_f32x8(a);
4379        let (b0, b1) = self.split_f32x8(b);
4380        let lo_lo = self.zip_low_f32x4(a0, b0);
4381        let lo_hi = self.zip_high_f32x4(a0, b0);
4382        let hi_lo = self.zip_low_f32x4(a1, b1);
4383        let hi_hi = self.zip_high_f32x4(a1, b1);
4384        (
4385            self.combine_f32x4(lo_lo, lo_hi),
4386            self.combine_f32x4(hi_lo, hi_hi),
4387        )
4388    }
4389    #[inline(always)]
4390    fn deinterleave_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> (f32x8<Self>, f32x8<Self>) {
4391        let (a0, a1) = self.split_f32x8(a);
4392        let (b0, b1) = self.split_f32x8(b);
4393        let lo_even = self.unzip_low_f32x4(a0, a1);
4394        let lo_odd = self.unzip_high_f32x4(a0, a1);
4395        let hi_even = self.unzip_low_f32x4(b0, b1);
4396        let hi_odd = self.unzip_high_f32x4(b0, b1);
4397        (
4398            self.combine_f32x4(lo_even, hi_even),
4399            self.combine_f32x4(lo_odd, hi_odd),
4400        )
4401    }
4402    #[inline(always)]
4403    fn max_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4404        let (a0, a1) = self.split_f32x8(a);
4405        let (b0, b1) = self.split_f32x8(b);
4406        self.combine_f32x4(self.max_f32x4(a0, b0), self.max_f32x4(a1, b1))
4407    }
4408    #[inline(always)]
4409    fn min_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4410        let (a0, a1) = self.split_f32x8(a);
4411        let (b0, b1) = self.split_f32x8(b);
4412        self.combine_f32x4(self.min_f32x4(a0, b0), self.min_f32x4(a1, b1))
4413    }
4414    #[inline(always)]
4415    fn max_precise_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4416        let (a0, a1) = self.split_f32x8(a);
4417        let (b0, b1) = self.split_f32x8(b);
4418        self.combine_f32x4(
4419            self.max_precise_f32x4(a0, b0),
4420            self.max_precise_f32x4(a1, b1),
4421        )
4422    }
4423    #[inline(always)]
4424    fn min_precise_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
4425        let (a0, a1) = self.split_f32x8(a);
4426        let (b0, b1) = self.split_f32x8(b);
4427        self.combine_f32x4(
4428            self.min_precise_f32x4(a0, b0),
4429            self.min_precise_f32x4(a1, b1),
4430        )
4431    }
4432    #[inline(always)]
4433    fn mul_add_f32x8(self, a: f32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
4434        let (a0, a1) = self.split_f32x8(a);
4435        let (b0, b1) = self.split_f32x8(b);
4436        let (c0, c1) = self.split_f32x8(c);
4437        self.combine_f32x4(
4438            self.mul_add_f32x4(a0, b0, c0),
4439            self.mul_add_f32x4(a1, b1, c1),
4440        )
4441    }
4442    #[inline(always)]
4443    fn mul_sub_f32x8(self, a: f32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
4444        let (a0, a1) = self.split_f32x8(a);
4445        let (b0, b1) = self.split_f32x8(b);
4446        let (c0, c1) = self.split_f32x8(c);
4447        self.combine_f32x4(
4448            self.mul_sub_f32x4(a0, b0, c0),
4449            self.mul_sub_f32x4(a1, b1, c1),
4450        )
4451    }
4452    #[inline(always)]
4453    fn floor_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
4454        let (a0, a1) = self.split_f32x8(a);
4455        self.combine_f32x4(self.floor_f32x4(a0), self.floor_f32x4(a1))
4456    }
4457    #[inline(always)]
4458    fn ceil_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
4459        let (a0, a1) = self.split_f32x8(a);
4460        self.combine_f32x4(self.ceil_f32x4(a0), self.ceil_f32x4(a1))
4461    }
4462    #[inline(always)]
4463    fn round_ties_even_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
4464        let (a0, a1) = self.split_f32x8(a);
4465        self.combine_f32x4(
4466            self.round_ties_even_f32x4(a0),
4467            self.round_ties_even_f32x4(a1),
4468        )
4469    }
4470    #[inline(always)]
4471    fn fract_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
4472        let (a0, a1) = self.split_f32x8(a);
4473        self.combine_f32x4(self.fract_f32x4(a0), self.fract_f32x4(a1))
4474    }
4475    #[inline(always)]
4476    fn trunc_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
4477        let (a0, a1) = self.split_f32x8(a);
4478        self.combine_f32x4(self.trunc_f32x4(a0), self.trunc_f32x4(a1))
4479    }
4480    #[inline(always)]
4481    fn select_f32x8(self, a: mask32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
4482        let (a0, a1) = self.split_mask32x8(a);
4483        let (b0, b1) = self.split_f32x8(b);
4484        let (c0, c1) = self.split_f32x8(c);
4485        self.combine_f32x4(self.select_f32x4(a0, b0, c0), self.select_f32x4(a1, b1, c1))
4486    }
4487    #[inline(always)]
4488    fn combine_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x16<Self> {
4489        let mut result = [0.0; 16usize];
4490        result[0..8usize].copy_from_slice(&a.val.0);
4491        result[8usize..16usize].copy_from_slice(&b.val.0);
4492        result.simd_into(self)
4493    }
4494    #[inline(always)]
4495    fn split_f32x8(self, a: f32x8<Self>) -> (f32x4<Self>, f32x4<Self>) {
4496        let mut b0 = [0.0; 4usize];
4497        let mut b1 = [0.0; 4usize];
4498        b0.copy_from_slice(&a.val.0[0..4usize]);
4499        b1.copy_from_slice(&a.val.0[4usize..8usize]);
4500        (b0.simd_into(self), b1.simd_into(self))
4501    }
4502    #[inline(always)]
4503    fn reinterpret_f64_f32x8(self, a: f32x8<Self>) -> f64x4<Self> {
4504        let (a0, a1) = self.split_f32x8(a);
4505        self.combine_f64x2(
4506            self.reinterpret_f64_f32x4(a0),
4507            self.reinterpret_f64_f32x4(a1),
4508        )
4509    }
4510    #[inline(always)]
4511    fn reinterpret_i32_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
4512        let (a0, a1) = self.split_f32x8(a);
4513        self.combine_i32x4(
4514            self.reinterpret_i32_f32x4(a0),
4515            self.reinterpret_i32_f32x4(a1),
4516        )
4517    }
4518    #[inline(always)]
4519    fn reinterpret_u8_f32x8(self, a: f32x8<Self>) -> u8x32<Self> {
4520        let (a0, a1) = self.split_f32x8(a);
4521        self.combine_u8x16(self.reinterpret_u8_f32x4(a0), self.reinterpret_u8_f32x4(a1))
4522    }
4523    #[inline(always)]
4524    fn reinterpret_u32_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
4525        let (a0, a1) = self.split_f32x8(a);
4526        self.combine_u32x4(
4527            self.reinterpret_u32_f32x4(a0),
4528            self.reinterpret_u32_f32x4(a1),
4529        )
4530    }
4531    #[inline(always)]
4532    fn cvt_u32_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
4533        let (a0, a1) = self.split_f32x8(a);
4534        self.combine_u32x4(self.cvt_u32_f32x4(a0), self.cvt_u32_f32x4(a1))
4535    }
4536    #[inline(always)]
4537    fn cvt_u32_precise_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
4538        let (a0, a1) = self.split_f32x8(a);
4539        self.combine_u32x4(
4540            self.cvt_u32_precise_f32x4(a0),
4541            self.cvt_u32_precise_f32x4(a1),
4542        )
4543    }
4544    #[inline(always)]
4545    fn cvt_i32_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
4546        let (a0, a1) = self.split_f32x8(a);
4547        self.combine_i32x4(self.cvt_i32_f32x4(a0), self.cvt_i32_f32x4(a1))
4548    }
4549    #[inline(always)]
4550    fn cvt_i32_precise_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
4551        let (a0, a1) = self.split_f32x8(a);
4552        self.combine_i32x4(
4553            self.cvt_i32_precise_f32x4(a0),
4554            self.cvt_i32_precise_f32x4(a1),
4555        )
4556    }
4557    #[inline(always)]
4558    fn splat_i8x32(self, val: i8) -> i8x32<Self> {
4559        let half = self.splat_i8x16(val);
4560        self.combine_i8x16(half, half)
4561    }
4562    #[inline(always)]
4563    fn load_array_i8x32(self, val: [i8; 32usize]) -> i8x32<Self> {
4564        i8x32 {
4565            val: crate::support::Aligned256(val),
4566            simd: self,
4567        }
4568    }
4569    #[inline(always)]
4570    fn load_array_ref_i8x32(self, val: &[i8; 32usize]) -> i8x32<Self> {
4571        i8x32 {
4572            val: crate::support::Aligned256(*val),
4573            simd: self,
4574        }
4575    }
4576    #[inline(always)]
4577    fn as_array_i8x32(self, a: i8x32<Self>) -> [i8; 32usize] {
4578        a.val.0
4579    }
4580    #[inline(always)]
4581    fn as_array_ref_i8x32(self, a: &i8x32<Self>) -> &[i8; 32usize] {
4582        &a.val.0
4583    }
4584    #[inline(always)]
4585    fn as_array_mut_i8x32(self, a: &mut i8x32<Self>) -> &mut [i8; 32usize] {
4586        &mut a.val.0
4587    }
4588    #[inline(always)]
4589    fn store_array_i8x32(self, a: i8x32<Self>, dest: &mut [i8; 32usize]) -> () {
4590        *dest = a.val.0;
4591    }
4592    #[inline(always)]
4593    fn cvt_from_bytes_i8x32(self, a: u8x32<Self>) -> i8x32<Self> {
4594        unsafe {
4595            i8x32 {
4596                val: core::mem::transmute(a.val),
4597                simd: self,
4598            }
4599        }
4600    }
4601    #[inline(always)]
4602    fn cvt_to_bytes_i8x32(self, a: i8x32<Self>) -> u8x32<Self> {
4603        unsafe {
4604            u8x32 {
4605                val: core::mem::transmute(a.val),
4606                simd: self,
4607            }
4608        }
4609    }
4610    #[inline(always)]
4611    fn slide_i8x32<const SHIFT: usize>(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4612        let mut dest = [Default::default(); 32usize];
4613        dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
4614        dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
4615        dest.simd_into(self)
4616    }
4617    #[inline(always)]
4618    fn slide_within_blocks_i8x32<const SHIFT: usize>(
4619        self,
4620        a: i8x32<Self>,
4621        b: i8x32<Self>,
4622    ) -> i8x32<Self> {
4623        let (a0, a1) = self.split_i8x32(a);
4624        let (b0, b1) = self.split_i8x32(b);
4625        self.combine_i8x16(
4626            self.slide_within_blocks_i8x16::<SHIFT>(a0, b0),
4627            self.slide_within_blocks_i8x16::<SHIFT>(a1, b1),
4628        )
4629    }
4630    #[inline(always)]
4631    fn add_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4632        let (a0, a1) = self.split_i8x32(a);
4633        let (b0, b1) = self.split_i8x32(b);
4634        self.combine_i8x16(self.add_i8x16(a0, b0), self.add_i8x16(a1, b1))
4635    }
4636    #[inline(always)]
4637    fn sub_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4638        let (a0, a1) = self.split_i8x32(a);
4639        let (b0, b1) = self.split_i8x32(b);
4640        self.combine_i8x16(self.sub_i8x16(a0, b0), self.sub_i8x16(a1, b1))
4641    }
4642    #[inline(always)]
4643    fn mul_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4644        let (a0, a1) = self.split_i8x32(a);
4645        let (b0, b1) = self.split_i8x32(b);
4646        self.combine_i8x16(self.mul_i8x16(a0, b0), self.mul_i8x16(a1, b1))
4647    }
4648    #[inline(always)]
4649    fn and_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4650        let (a0, a1) = self.split_i8x32(a);
4651        let (b0, b1) = self.split_i8x32(b);
4652        self.combine_i8x16(self.and_i8x16(a0, b0), self.and_i8x16(a1, b1))
4653    }
4654    #[inline(always)]
4655    fn or_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4656        let (a0, a1) = self.split_i8x32(a);
4657        let (b0, b1) = self.split_i8x32(b);
4658        self.combine_i8x16(self.or_i8x16(a0, b0), self.or_i8x16(a1, b1))
4659    }
4660    #[inline(always)]
4661    fn xor_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4662        let (a0, a1) = self.split_i8x32(a);
4663        let (b0, b1) = self.split_i8x32(b);
4664        self.combine_i8x16(self.xor_i8x16(a0, b0), self.xor_i8x16(a1, b1))
4665    }
4666    #[inline(always)]
4667    fn not_i8x32(self, a: i8x32<Self>) -> i8x32<Self> {
4668        let (a0, a1) = self.split_i8x32(a);
4669        self.combine_i8x16(self.not_i8x16(a0), self.not_i8x16(a1))
4670    }
4671    #[inline(always)]
4672    fn shl_i8x32(self, a: i8x32<Self>, shift: u32) -> i8x32<Self> {
4673        let (a0, a1) = self.split_i8x32(a);
4674        self.combine_i8x16(self.shl_i8x16(a0, shift), self.shl_i8x16(a1, shift))
4675    }
4676    #[inline(always)]
4677    fn shlv_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4678        let (a0, a1) = self.split_i8x32(a);
4679        let (b0, b1) = self.split_i8x32(b);
4680        self.combine_i8x16(self.shlv_i8x16(a0, b0), self.shlv_i8x16(a1, b1))
4681    }
4682    #[inline(always)]
4683    fn shr_i8x32(self, a: i8x32<Self>, shift: u32) -> i8x32<Self> {
4684        let (a0, a1) = self.split_i8x32(a);
4685        self.combine_i8x16(self.shr_i8x16(a0, shift), self.shr_i8x16(a1, shift))
4686    }
4687    #[inline(always)]
4688    fn shrv_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4689        let (a0, a1) = self.split_i8x32(a);
4690        let (b0, b1) = self.split_i8x32(b);
4691        self.combine_i8x16(self.shrv_i8x16(a0, b0), self.shrv_i8x16(a1, b1))
4692    }
4693    #[inline(always)]
4694    fn simd_eq_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
4695        let (a0, a1) = self.split_i8x32(a);
4696        let (b0, b1) = self.split_i8x32(b);
4697        self.combine_mask8x16(self.simd_eq_i8x16(a0, b0), self.simd_eq_i8x16(a1, b1))
4698    }
4699    #[inline(always)]
4700    fn simd_lt_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
4701        let (a0, a1) = self.split_i8x32(a);
4702        let (b0, b1) = self.split_i8x32(b);
4703        self.combine_mask8x16(self.simd_lt_i8x16(a0, b0), self.simd_lt_i8x16(a1, b1))
4704    }
4705    #[inline(always)]
4706    fn simd_le_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
4707        let (a0, a1) = self.split_i8x32(a);
4708        let (b0, b1) = self.split_i8x32(b);
4709        self.combine_mask8x16(self.simd_le_i8x16(a0, b0), self.simd_le_i8x16(a1, b1))
4710    }
4711    #[inline(always)]
4712    fn simd_ge_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
4713        let (a0, a1) = self.split_i8x32(a);
4714        let (b0, b1) = self.split_i8x32(b);
4715        self.combine_mask8x16(self.simd_ge_i8x16(a0, b0), self.simd_ge_i8x16(a1, b1))
4716    }
4717    #[inline(always)]
4718    fn simd_gt_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
4719        let (a0, a1) = self.split_i8x32(a);
4720        let (b0, b1) = self.split_i8x32(b);
4721        self.combine_mask8x16(self.simd_gt_i8x16(a0, b0), self.simd_gt_i8x16(a1, b1))
4722    }
4723    #[inline(always)]
4724    fn zip_low_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4725        let (a0, _) = self.split_i8x32(a);
4726        let (b0, _) = self.split_i8x32(b);
4727        self.combine_i8x16(self.zip_low_i8x16(a0, b0), self.zip_high_i8x16(a0, b0))
4728    }
4729    #[inline(always)]
4730    fn zip_high_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4731        let (_, a1) = self.split_i8x32(a);
4732        let (_, b1) = self.split_i8x32(b);
4733        self.combine_i8x16(self.zip_low_i8x16(a1, b1), self.zip_high_i8x16(a1, b1))
4734    }
4735    #[inline(always)]
4736    fn unzip_low_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4737        let (a0, a1) = self.split_i8x32(a);
4738        let (b0, b1) = self.split_i8x32(b);
4739        self.combine_i8x16(self.unzip_low_i8x16(a0, a1), self.unzip_low_i8x16(b0, b1))
4740    }
4741    #[inline(always)]
4742    fn unzip_high_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4743        let (a0, a1) = self.split_i8x32(a);
4744        let (b0, b1) = self.split_i8x32(b);
4745        self.combine_i8x16(self.unzip_high_i8x16(a0, a1), self.unzip_high_i8x16(b0, b1))
4746    }
4747    #[inline(always)]
4748    fn interleave_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> (i8x32<Self>, i8x32<Self>) {
4749        let (a0, a1) = self.split_i8x32(a);
4750        let (b0, b1) = self.split_i8x32(b);
4751        let lo_lo = self.zip_low_i8x16(a0, b0);
4752        let lo_hi = self.zip_high_i8x16(a0, b0);
4753        let hi_lo = self.zip_low_i8x16(a1, b1);
4754        let hi_hi = self.zip_high_i8x16(a1, b1);
4755        (
4756            self.combine_i8x16(lo_lo, lo_hi),
4757            self.combine_i8x16(hi_lo, hi_hi),
4758        )
4759    }
4760    #[inline(always)]
4761    fn deinterleave_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> (i8x32<Self>, i8x32<Self>) {
4762        let (a0, a1) = self.split_i8x32(a);
4763        let (b0, b1) = self.split_i8x32(b);
4764        let lo_even = self.unzip_low_i8x16(a0, a1);
4765        let lo_odd = self.unzip_high_i8x16(a0, a1);
4766        let hi_even = self.unzip_low_i8x16(b0, b1);
4767        let hi_odd = self.unzip_high_i8x16(b0, b1);
4768        (
4769            self.combine_i8x16(lo_even, hi_even),
4770            self.combine_i8x16(lo_odd, hi_odd),
4771        )
4772    }
4773    #[inline(always)]
4774    fn select_i8x32(self, a: mask8x32<Self>, b: i8x32<Self>, c: i8x32<Self>) -> i8x32<Self> {
4775        let (a0, a1) = self.split_mask8x32(a);
4776        let (b0, b1) = self.split_i8x32(b);
4777        let (c0, c1) = self.split_i8x32(c);
4778        self.combine_i8x16(self.select_i8x16(a0, b0, c0), self.select_i8x16(a1, b1, c1))
4779    }
4780    #[inline(always)]
4781    fn min_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4782        let (a0, a1) = self.split_i8x32(a);
4783        let (b0, b1) = self.split_i8x32(b);
4784        self.combine_i8x16(self.min_i8x16(a0, b0), self.min_i8x16(a1, b1))
4785    }
4786    #[inline(always)]
4787    fn max_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
4788        let (a0, a1) = self.split_i8x32(a);
4789        let (b0, b1) = self.split_i8x32(b);
4790        self.combine_i8x16(self.max_i8x16(a0, b0), self.max_i8x16(a1, b1))
4791    }
4792    #[inline(always)]
4793    fn combine_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x64<Self> {
4794        let mut result = [0; 64usize];
4795        result[0..32usize].copy_from_slice(&a.val.0);
4796        result[32usize..64usize].copy_from_slice(&b.val.0);
4797        result.simd_into(self)
4798    }
4799    #[inline(always)]
4800    fn split_i8x32(self, a: i8x32<Self>) -> (i8x16<Self>, i8x16<Self>) {
4801        let mut b0 = [0; 16usize];
4802        let mut b1 = [0; 16usize];
4803        b0.copy_from_slice(&a.val.0[0..16usize]);
4804        b1.copy_from_slice(&a.val.0[16usize..32usize]);
4805        (b0.simd_into(self), b1.simd_into(self))
4806    }
4807    #[inline(always)]
4808    fn neg_i8x32(self, a: i8x32<Self>) -> i8x32<Self> {
4809        let (a0, a1) = self.split_i8x32(a);
4810        self.combine_i8x16(self.neg_i8x16(a0), self.neg_i8x16(a1))
4811    }
4812    #[inline(always)]
4813    fn reinterpret_u8_i8x32(self, a: i8x32<Self>) -> u8x32<Self> {
4814        let (a0, a1) = self.split_i8x32(a);
4815        self.combine_u8x16(self.reinterpret_u8_i8x16(a0), self.reinterpret_u8_i8x16(a1))
4816    }
4817    #[inline(always)]
4818    fn reinterpret_u32_i8x32(self, a: i8x32<Self>) -> u32x8<Self> {
4819        let (a0, a1) = self.split_i8x32(a);
4820        self.combine_u32x4(
4821            self.reinterpret_u32_i8x16(a0),
4822            self.reinterpret_u32_i8x16(a1),
4823        )
4824    }
4825    #[inline(always)]
4826    fn splat_u8x32(self, val: u8) -> u8x32<Self> {
4827        let half = self.splat_u8x16(val);
4828        self.combine_u8x16(half, half)
4829    }
4830    #[inline(always)]
4831    fn load_array_u8x32(self, val: [u8; 32usize]) -> u8x32<Self> {
4832        u8x32 {
4833            val: crate::support::Aligned256(val),
4834            simd: self,
4835        }
4836    }
4837    #[inline(always)]
4838    fn load_array_ref_u8x32(self, val: &[u8; 32usize]) -> u8x32<Self> {
4839        u8x32 {
4840            val: crate::support::Aligned256(*val),
4841            simd: self,
4842        }
4843    }
4844    #[inline(always)]
4845    fn as_array_u8x32(self, a: u8x32<Self>) -> [u8; 32usize] {
4846        a.val.0
4847    }
4848    #[inline(always)]
4849    fn as_array_ref_u8x32(self, a: &u8x32<Self>) -> &[u8; 32usize] {
4850        &a.val.0
4851    }
4852    #[inline(always)]
4853    fn as_array_mut_u8x32(self, a: &mut u8x32<Self>) -> &mut [u8; 32usize] {
4854        &mut a.val.0
4855    }
4856    #[inline(always)]
4857    fn store_array_u8x32(self, a: u8x32<Self>, dest: &mut [u8; 32usize]) -> () {
4858        *dest = a.val.0;
4859    }
4860    #[inline(always)]
4861    fn cvt_from_bytes_u8x32(self, a: u8x32<Self>) -> u8x32<Self> {
4862        unsafe {
4863            u8x32 {
4864                val: core::mem::transmute(a.val),
4865                simd: self,
4866            }
4867        }
4868    }
4869    #[inline(always)]
4870    fn cvt_to_bytes_u8x32(self, a: u8x32<Self>) -> u8x32<Self> {
4871        unsafe {
4872            u8x32 {
4873                val: core::mem::transmute(a.val),
4874                simd: self,
4875            }
4876        }
4877    }
4878    #[inline(always)]
4879    fn slide_u8x32<const SHIFT: usize>(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4880        let mut dest = [Default::default(); 32usize];
4881        dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
4882        dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
4883        dest.simd_into(self)
4884    }
4885    #[inline(always)]
4886    fn slide_within_blocks_u8x32<const SHIFT: usize>(
4887        self,
4888        a: u8x32<Self>,
4889        b: u8x32<Self>,
4890    ) -> u8x32<Self> {
4891        let (a0, a1) = self.split_u8x32(a);
4892        let (b0, b1) = self.split_u8x32(b);
4893        self.combine_u8x16(
4894            self.slide_within_blocks_u8x16::<SHIFT>(a0, b0),
4895            self.slide_within_blocks_u8x16::<SHIFT>(a1, b1),
4896        )
4897    }
4898    #[inline(always)]
4899    fn add_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4900        let (a0, a1) = self.split_u8x32(a);
4901        let (b0, b1) = self.split_u8x32(b);
4902        self.combine_u8x16(self.add_u8x16(a0, b0), self.add_u8x16(a1, b1))
4903    }
4904    #[inline(always)]
4905    fn sub_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4906        let (a0, a1) = self.split_u8x32(a);
4907        let (b0, b1) = self.split_u8x32(b);
4908        self.combine_u8x16(self.sub_u8x16(a0, b0), self.sub_u8x16(a1, b1))
4909    }
4910    #[inline(always)]
4911    fn mul_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4912        let (a0, a1) = self.split_u8x32(a);
4913        let (b0, b1) = self.split_u8x32(b);
4914        self.combine_u8x16(self.mul_u8x16(a0, b0), self.mul_u8x16(a1, b1))
4915    }
4916    #[inline(always)]
4917    fn and_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4918        let (a0, a1) = self.split_u8x32(a);
4919        let (b0, b1) = self.split_u8x32(b);
4920        self.combine_u8x16(self.and_u8x16(a0, b0), self.and_u8x16(a1, b1))
4921    }
4922    #[inline(always)]
4923    fn or_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4924        let (a0, a1) = self.split_u8x32(a);
4925        let (b0, b1) = self.split_u8x32(b);
4926        self.combine_u8x16(self.or_u8x16(a0, b0), self.or_u8x16(a1, b1))
4927    }
4928    #[inline(always)]
4929    fn xor_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4930        let (a0, a1) = self.split_u8x32(a);
4931        let (b0, b1) = self.split_u8x32(b);
4932        self.combine_u8x16(self.xor_u8x16(a0, b0), self.xor_u8x16(a1, b1))
4933    }
4934    #[inline(always)]
4935    fn not_u8x32(self, a: u8x32<Self>) -> u8x32<Self> {
4936        let (a0, a1) = self.split_u8x32(a);
4937        self.combine_u8x16(self.not_u8x16(a0), self.not_u8x16(a1))
4938    }
4939    #[inline(always)]
4940    fn shl_u8x32(self, a: u8x32<Self>, shift: u32) -> u8x32<Self> {
4941        let (a0, a1) = self.split_u8x32(a);
4942        self.combine_u8x16(self.shl_u8x16(a0, shift), self.shl_u8x16(a1, shift))
4943    }
4944    #[inline(always)]
4945    fn shlv_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4946        let (a0, a1) = self.split_u8x32(a);
4947        let (b0, b1) = self.split_u8x32(b);
4948        self.combine_u8x16(self.shlv_u8x16(a0, b0), self.shlv_u8x16(a1, b1))
4949    }
4950    #[inline(always)]
4951    fn shr_u8x32(self, a: u8x32<Self>, shift: u32) -> u8x32<Self> {
4952        let (a0, a1) = self.split_u8x32(a);
4953        self.combine_u8x16(self.shr_u8x16(a0, shift), self.shr_u8x16(a1, shift))
4954    }
4955    #[inline(always)]
4956    fn shrv_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4957        let (a0, a1) = self.split_u8x32(a);
4958        let (b0, b1) = self.split_u8x32(b);
4959        self.combine_u8x16(self.shrv_u8x16(a0, b0), self.shrv_u8x16(a1, b1))
4960    }
4961    #[inline(always)]
4962    fn simd_eq_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
4963        let (a0, a1) = self.split_u8x32(a);
4964        let (b0, b1) = self.split_u8x32(b);
4965        self.combine_mask8x16(self.simd_eq_u8x16(a0, b0), self.simd_eq_u8x16(a1, b1))
4966    }
4967    #[inline(always)]
4968    fn simd_lt_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
4969        let (a0, a1) = self.split_u8x32(a);
4970        let (b0, b1) = self.split_u8x32(b);
4971        self.combine_mask8x16(self.simd_lt_u8x16(a0, b0), self.simd_lt_u8x16(a1, b1))
4972    }
4973    #[inline(always)]
4974    fn simd_le_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
4975        let (a0, a1) = self.split_u8x32(a);
4976        let (b0, b1) = self.split_u8x32(b);
4977        self.combine_mask8x16(self.simd_le_u8x16(a0, b0), self.simd_le_u8x16(a1, b1))
4978    }
4979    #[inline(always)]
4980    fn simd_ge_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
4981        let (a0, a1) = self.split_u8x32(a);
4982        let (b0, b1) = self.split_u8x32(b);
4983        self.combine_mask8x16(self.simd_ge_u8x16(a0, b0), self.simd_ge_u8x16(a1, b1))
4984    }
4985    #[inline(always)]
4986    fn simd_gt_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
4987        let (a0, a1) = self.split_u8x32(a);
4988        let (b0, b1) = self.split_u8x32(b);
4989        self.combine_mask8x16(self.simd_gt_u8x16(a0, b0), self.simd_gt_u8x16(a1, b1))
4990    }
4991    #[inline(always)]
4992    fn zip_low_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4993        let (a0, _) = self.split_u8x32(a);
4994        let (b0, _) = self.split_u8x32(b);
4995        self.combine_u8x16(self.zip_low_u8x16(a0, b0), self.zip_high_u8x16(a0, b0))
4996    }
4997    #[inline(always)]
4998    fn zip_high_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
4999        let (_, a1) = self.split_u8x32(a);
5000        let (_, b1) = self.split_u8x32(b);
5001        self.combine_u8x16(self.zip_low_u8x16(a1, b1), self.zip_high_u8x16(a1, b1))
5002    }
5003    #[inline(always)]
5004    fn unzip_low_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
5005        let (a0, a1) = self.split_u8x32(a);
5006        let (b0, b1) = self.split_u8x32(b);
5007        self.combine_u8x16(self.unzip_low_u8x16(a0, a1), self.unzip_low_u8x16(b0, b1))
5008    }
5009    #[inline(always)]
5010    fn unzip_high_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
5011        let (a0, a1) = self.split_u8x32(a);
5012        let (b0, b1) = self.split_u8x32(b);
5013        self.combine_u8x16(self.unzip_high_u8x16(a0, a1), self.unzip_high_u8x16(b0, b1))
5014    }
5015    #[inline(always)]
5016    fn interleave_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> (u8x32<Self>, u8x32<Self>) {
5017        let (a0, a1) = self.split_u8x32(a);
5018        let (b0, b1) = self.split_u8x32(b);
5019        let lo_lo = self.zip_low_u8x16(a0, b0);
5020        let lo_hi = self.zip_high_u8x16(a0, b0);
5021        let hi_lo = self.zip_low_u8x16(a1, b1);
5022        let hi_hi = self.zip_high_u8x16(a1, b1);
5023        (
5024            self.combine_u8x16(lo_lo, lo_hi),
5025            self.combine_u8x16(hi_lo, hi_hi),
5026        )
5027    }
5028    #[inline(always)]
5029    fn deinterleave_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> (u8x32<Self>, u8x32<Self>) {
5030        let (a0, a1) = self.split_u8x32(a);
5031        let (b0, b1) = self.split_u8x32(b);
5032        let lo_even = self.unzip_low_u8x16(a0, a1);
5033        let lo_odd = self.unzip_high_u8x16(a0, a1);
5034        let hi_even = self.unzip_low_u8x16(b0, b1);
5035        let hi_odd = self.unzip_high_u8x16(b0, b1);
5036        (
5037            self.combine_u8x16(lo_even, hi_even),
5038            self.combine_u8x16(lo_odd, hi_odd),
5039        )
5040    }
5041    #[inline(always)]
5042    fn select_u8x32(self, a: mask8x32<Self>, b: u8x32<Self>, c: u8x32<Self>) -> u8x32<Self> {
5043        let (a0, a1) = self.split_mask8x32(a);
5044        let (b0, b1) = self.split_u8x32(b);
5045        let (c0, c1) = self.split_u8x32(c);
5046        self.combine_u8x16(self.select_u8x16(a0, b0, c0), self.select_u8x16(a1, b1, c1))
5047    }
5048    #[inline(always)]
5049    fn min_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
5050        let (a0, a1) = self.split_u8x32(a);
5051        let (b0, b1) = self.split_u8x32(b);
5052        self.combine_u8x16(self.min_u8x16(a0, b0), self.min_u8x16(a1, b1))
5053    }
5054    #[inline(always)]
5055    fn max_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
5056        let (a0, a1) = self.split_u8x32(a);
5057        let (b0, b1) = self.split_u8x32(b);
5058        self.combine_u8x16(self.max_u8x16(a0, b0), self.max_u8x16(a1, b1))
5059    }
5060    #[inline(always)]
5061    fn combine_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x64<Self> {
5062        let mut result = [0; 64usize];
5063        result[0..32usize].copy_from_slice(&a.val.0);
5064        result[32usize..64usize].copy_from_slice(&b.val.0);
5065        result.simd_into(self)
5066    }
5067    #[inline(always)]
5068    fn split_u8x32(self, a: u8x32<Self>) -> (u8x16<Self>, u8x16<Self>) {
5069        let mut b0 = [0; 16usize];
5070        let mut b1 = [0; 16usize];
5071        b0.copy_from_slice(&a.val.0[0..16usize]);
5072        b1.copy_from_slice(&a.val.0[16usize..32usize]);
5073        (b0.simd_into(self), b1.simd_into(self))
5074    }
5075    #[inline(always)]
5076    fn widen_u8x32(self, a: u8x32<Self>) -> u16x32<Self> {
5077        let (a0, a1) = self.split_u8x32(a);
5078        self.combine_u16x16(self.widen_u8x16(a0), self.widen_u8x16(a1))
5079    }
5080    #[inline(always)]
5081    fn reinterpret_u32_u8x32(self, a: u8x32<Self>) -> u32x8<Self> {
5082        let (a0, a1) = self.split_u8x32(a);
5083        self.combine_u32x4(
5084            self.reinterpret_u32_u8x16(a0),
5085            self.reinterpret_u32_u8x16(a1),
5086        )
5087    }
5088    #[inline(always)]
5089    fn splat_mask8x32(self, val: i8) -> mask8x32<Self> {
5090        let half = self.splat_mask8x16(val);
5091        self.combine_mask8x16(half, half)
5092    }
5093    #[inline(always)]
5094    fn load_array_mask8x32(self, val: [i8; 32usize]) -> mask8x32<Self> {
5095        mask8x32 {
5096            val: crate::support::Aligned256(val),
5097            simd: self,
5098        }
5099    }
5100    #[inline(always)]
5101    fn load_array_ref_mask8x32(self, val: &[i8; 32usize]) -> mask8x32<Self> {
5102        mask8x32 {
5103            val: crate::support::Aligned256(*val),
5104            simd: self,
5105        }
5106    }
5107    #[inline(always)]
5108    fn as_array_mask8x32(self, a: mask8x32<Self>) -> [i8; 32usize] {
5109        a.val.0
5110    }
5111    #[inline(always)]
5112    fn as_array_ref_mask8x32(self, a: &mask8x32<Self>) -> &[i8; 32usize] {
5113        &a.val.0
5114    }
5115    #[inline(always)]
5116    fn as_array_mut_mask8x32(self, a: &mut mask8x32<Self>) -> &mut [i8; 32usize] {
5117        &mut a.val.0
5118    }
5119    #[inline(always)]
5120    fn store_array_mask8x32(self, a: mask8x32<Self>, dest: &mut [i8; 32usize]) -> () {
5121        *dest = a.val.0;
5122    }
5123    #[inline(always)]
5124    fn cvt_from_bytes_mask8x32(self, a: u8x32<Self>) -> mask8x32<Self> {
5125        unsafe {
5126            mask8x32 {
5127                val: core::mem::transmute(a.val),
5128                simd: self,
5129            }
5130        }
5131    }
5132    #[inline(always)]
5133    fn cvt_to_bytes_mask8x32(self, a: mask8x32<Self>) -> u8x32<Self> {
5134        unsafe {
5135            u8x32 {
5136                val: core::mem::transmute(a.val),
5137                simd: self,
5138            }
5139        }
5140    }
5141    #[inline(always)]
5142    fn slide_mask8x32<const SHIFT: usize>(
5143        self,
5144        a: mask8x32<Self>,
5145        b: mask8x32<Self>,
5146    ) -> mask8x32<Self> {
5147        let mut dest = [Default::default(); 32usize];
5148        dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
5149        dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
5150        dest.simd_into(self)
5151    }
5152    #[inline(always)]
5153    fn slide_within_blocks_mask8x32<const SHIFT: usize>(
5154        self,
5155        a: mask8x32<Self>,
5156        b: mask8x32<Self>,
5157    ) -> mask8x32<Self> {
5158        let (a0, a1) = self.split_mask8x32(a);
5159        let (b0, b1) = self.split_mask8x32(b);
5160        self.combine_mask8x16(
5161            self.slide_within_blocks_mask8x16::<SHIFT>(a0, b0),
5162            self.slide_within_blocks_mask8x16::<SHIFT>(a1, b1),
5163        )
5164    }
5165    #[inline(always)]
5166    fn and_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
5167        let (a0, a1) = self.split_mask8x32(a);
5168        let (b0, b1) = self.split_mask8x32(b);
5169        self.combine_mask8x16(self.and_mask8x16(a0, b0), self.and_mask8x16(a1, b1))
5170    }
5171    #[inline(always)]
5172    fn or_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
5173        let (a0, a1) = self.split_mask8x32(a);
5174        let (b0, b1) = self.split_mask8x32(b);
5175        self.combine_mask8x16(self.or_mask8x16(a0, b0), self.or_mask8x16(a1, b1))
5176    }
5177    #[inline(always)]
5178    fn xor_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
5179        let (a0, a1) = self.split_mask8x32(a);
5180        let (b0, b1) = self.split_mask8x32(b);
5181        self.combine_mask8x16(self.xor_mask8x16(a0, b0), self.xor_mask8x16(a1, b1))
5182    }
5183    #[inline(always)]
5184    fn not_mask8x32(self, a: mask8x32<Self>) -> mask8x32<Self> {
5185        let (a0, a1) = self.split_mask8x32(a);
5186        self.combine_mask8x16(self.not_mask8x16(a0), self.not_mask8x16(a1))
5187    }
5188    #[inline(always)]
5189    fn select_mask8x32(
5190        self,
5191        a: mask8x32<Self>,
5192        b: mask8x32<Self>,
5193        c: mask8x32<Self>,
5194    ) -> mask8x32<Self> {
5195        let (a0, a1) = self.split_mask8x32(a);
5196        let (b0, b1) = self.split_mask8x32(b);
5197        let (c0, c1) = self.split_mask8x32(c);
5198        self.combine_mask8x16(
5199            self.select_mask8x16(a0, b0, c0),
5200            self.select_mask8x16(a1, b1, c1),
5201        )
5202    }
5203    #[inline(always)]
5204    fn simd_eq_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
5205        let (a0, a1) = self.split_mask8x32(a);
5206        let (b0, b1) = self.split_mask8x32(b);
5207        self.combine_mask8x16(self.simd_eq_mask8x16(a0, b0), self.simd_eq_mask8x16(a1, b1))
5208    }
5209    #[inline(always)]
5210    fn any_true_mask8x32(self, a: mask8x32<Self>) -> bool {
5211        let (a0, a1) = self.split_mask8x32(a);
5212        self.any_true_mask8x16(a0) || self.any_true_mask8x16(a1)
5213    }
5214    #[inline(always)]
5215    fn all_true_mask8x32(self, a: mask8x32<Self>) -> bool {
5216        let (a0, a1) = self.split_mask8x32(a);
5217        self.all_true_mask8x16(a0) && self.all_true_mask8x16(a1)
5218    }
5219    #[inline(always)]
5220    fn any_false_mask8x32(self, a: mask8x32<Self>) -> bool {
5221        let (a0, a1) = self.split_mask8x32(a);
5222        self.any_false_mask8x16(a0) || self.any_false_mask8x16(a1)
5223    }
5224    #[inline(always)]
5225    fn all_false_mask8x32(self, a: mask8x32<Self>) -> bool {
5226        let (a0, a1) = self.split_mask8x32(a);
5227        self.all_false_mask8x16(a0) && self.all_false_mask8x16(a1)
5228    }
5229    #[inline(always)]
5230    fn combine_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x64<Self> {
5231        let mut result = [0; 64usize];
5232        result[0..32usize].copy_from_slice(&a.val.0);
5233        result[32usize..64usize].copy_from_slice(&b.val.0);
5234        result.simd_into(self)
5235    }
5236    #[inline(always)]
5237    fn split_mask8x32(self, a: mask8x32<Self>) -> (mask8x16<Self>, mask8x16<Self>) {
5238        let mut b0 = [0; 16usize];
5239        let mut b1 = [0; 16usize];
5240        b0.copy_from_slice(&a.val.0[0..16usize]);
5241        b1.copy_from_slice(&a.val.0[16usize..32usize]);
5242        (b0.simd_into(self), b1.simd_into(self))
5243    }
5244    #[inline(always)]
5245    fn splat_i16x16(self, val: i16) -> i16x16<Self> {
5246        let half = self.splat_i16x8(val);
5247        self.combine_i16x8(half, half)
5248    }
5249    #[inline(always)]
5250    fn load_array_i16x16(self, val: [i16; 16usize]) -> i16x16<Self> {
5251        i16x16 {
5252            val: crate::support::Aligned256(val),
5253            simd: self,
5254        }
5255    }
5256    #[inline(always)]
5257    fn load_array_ref_i16x16(self, val: &[i16; 16usize]) -> i16x16<Self> {
5258        i16x16 {
5259            val: crate::support::Aligned256(*val),
5260            simd: self,
5261        }
5262    }
5263    #[inline(always)]
5264    fn as_array_i16x16(self, a: i16x16<Self>) -> [i16; 16usize] {
5265        a.val.0
5266    }
5267    #[inline(always)]
5268    fn as_array_ref_i16x16(self, a: &i16x16<Self>) -> &[i16; 16usize] {
5269        &a.val.0
5270    }
5271    #[inline(always)]
5272    fn as_array_mut_i16x16(self, a: &mut i16x16<Self>) -> &mut [i16; 16usize] {
5273        &mut a.val.0
5274    }
5275    #[inline(always)]
5276    fn store_array_i16x16(self, a: i16x16<Self>, dest: &mut [i16; 16usize]) -> () {
5277        *dest = a.val.0;
5278    }
5279    #[inline(always)]
5280    fn cvt_from_bytes_i16x16(self, a: u8x32<Self>) -> i16x16<Self> {
5281        unsafe {
5282            i16x16 {
5283                val: core::mem::transmute(a.val),
5284                simd: self,
5285            }
5286        }
5287    }
5288    #[inline(always)]
5289    fn cvt_to_bytes_i16x16(self, a: i16x16<Self>) -> u8x32<Self> {
5290        unsafe {
5291            u8x32 {
5292                val: core::mem::transmute(a.val),
5293                simd: self,
5294            }
5295        }
5296    }
5297    #[inline(always)]
5298    fn slide_i16x16<const SHIFT: usize>(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5299        let mut dest = [Default::default(); 16usize];
5300        dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
5301        dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
5302        dest.simd_into(self)
5303    }
5304    #[inline(always)]
5305    fn slide_within_blocks_i16x16<const SHIFT: usize>(
5306        self,
5307        a: i16x16<Self>,
5308        b: i16x16<Self>,
5309    ) -> i16x16<Self> {
5310        let (a0, a1) = self.split_i16x16(a);
5311        let (b0, b1) = self.split_i16x16(b);
5312        self.combine_i16x8(
5313            self.slide_within_blocks_i16x8::<SHIFT>(a0, b0),
5314            self.slide_within_blocks_i16x8::<SHIFT>(a1, b1),
5315        )
5316    }
5317    #[inline(always)]
5318    fn add_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5319        let (a0, a1) = self.split_i16x16(a);
5320        let (b0, b1) = self.split_i16x16(b);
5321        self.combine_i16x8(self.add_i16x8(a0, b0), self.add_i16x8(a1, b1))
5322    }
5323    #[inline(always)]
5324    fn sub_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5325        let (a0, a1) = self.split_i16x16(a);
5326        let (b0, b1) = self.split_i16x16(b);
5327        self.combine_i16x8(self.sub_i16x8(a0, b0), self.sub_i16x8(a1, b1))
5328    }
5329    #[inline(always)]
5330    fn mul_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5331        let (a0, a1) = self.split_i16x16(a);
5332        let (b0, b1) = self.split_i16x16(b);
5333        self.combine_i16x8(self.mul_i16x8(a0, b0), self.mul_i16x8(a1, b1))
5334    }
5335    #[inline(always)]
5336    fn and_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5337        let (a0, a1) = self.split_i16x16(a);
5338        let (b0, b1) = self.split_i16x16(b);
5339        self.combine_i16x8(self.and_i16x8(a0, b0), self.and_i16x8(a1, b1))
5340    }
5341    #[inline(always)]
5342    fn or_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5343        let (a0, a1) = self.split_i16x16(a);
5344        let (b0, b1) = self.split_i16x16(b);
5345        self.combine_i16x8(self.or_i16x8(a0, b0), self.or_i16x8(a1, b1))
5346    }
5347    #[inline(always)]
5348    fn xor_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5349        let (a0, a1) = self.split_i16x16(a);
5350        let (b0, b1) = self.split_i16x16(b);
5351        self.combine_i16x8(self.xor_i16x8(a0, b0), self.xor_i16x8(a1, b1))
5352    }
5353    #[inline(always)]
5354    fn not_i16x16(self, a: i16x16<Self>) -> i16x16<Self> {
5355        let (a0, a1) = self.split_i16x16(a);
5356        self.combine_i16x8(self.not_i16x8(a0), self.not_i16x8(a1))
5357    }
5358    #[inline(always)]
5359    fn shl_i16x16(self, a: i16x16<Self>, shift: u32) -> i16x16<Self> {
5360        let (a0, a1) = self.split_i16x16(a);
5361        self.combine_i16x8(self.shl_i16x8(a0, shift), self.shl_i16x8(a1, shift))
5362    }
5363    #[inline(always)]
5364    fn shlv_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5365        let (a0, a1) = self.split_i16x16(a);
5366        let (b0, b1) = self.split_i16x16(b);
5367        self.combine_i16x8(self.shlv_i16x8(a0, b0), self.shlv_i16x8(a1, b1))
5368    }
5369    #[inline(always)]
5370    fn shr_i16x16(self, a: i16x16<Self>, shift: u32) -> i16x16<Self> {
5371        let (a0, a1) = self.split_i16x16(a);
5372        self.combine_i16x8(self.shr_i16x8(a0, shift), self.shr_i16x8(a1, shift))
5373    }
5374    #[inline(always)]
5375    fn shrv_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5376        let (a0, a1) = self.split_i16x16(a);
5377        let (b0, b1) = self.split_i16x16(b);
5378        self.combine_i16x8(self.shrv_i16x8(a0, b0), self.shrv_i16x8(a1, b1))
5379    }
5380    #[inline(always)]
5381    fn simd_eq_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
5382        let (a0, a1) = self.split_i16x16(a);
5383        let (b0, b1) = self.split_i16x16(b);
5384        self.combine_mask16x8(self.simd_eq_i16x8(a0, b0), self.simd_eq_i16x8(a1, b1))
5385    }
5386    #[inline(always)]
5387    fn simd_lt_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
5388        let (a0, a1) = self.split_i16x16(a);
5389        let (b0, b1) = self.split_i16x16(b);
5390        self.combine_mask16x8(self.simd_lt_i16x8(a0, b0), self.simd_lt_i16x8(a1, b1))
5391    }
5392    #[inline(always)]
5393    fn simd_le_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
5394        let (a0, a1) = self.split_i16x16(a);
5395        let (b0, b1) = self.split_i16x16(b);
5396        self.combine_mask16x8(self.simd_le_i16x8(a0, b0), self.simd_le_i16x8(a1, b1))
5397    }
5398    #[inline(always)]
5399    fn simd_ge_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
5400        let (a0, a1) = self.split_i16x16(a);
5401        let (b0, b1) = self.split_i16x16(b);
5402        self.combine_mask16x8(self.simd_ge_i16x8(a0, b0), self.simd_ge_i16x8(a1, b1))
5403    }
5404    #[inline(always)]
5405    fn simd_gt_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
5406        let (a0, a1) = self.split_i16x16(a);
5407        let (b0, b1) = self.split_i16x16(b);
5408        self.combine_mask16x8(self.simd_gt_i16x8(a0, b0), self.simd_gt_i16x8(a1, b1))
5409    }
5410    #[inline(always)]
5411    fn zip_low_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5412        let (a0, _) = self.split_i16x16(a);
5413        let (b0, _) = self.split_i16x16(b);
5414        self.combine_i16x8(self.zip_low_i16x8(a0, b0), self.zip_high_i16x8(a0, b0))
5415    }
5416    #[inline(always)]
5417    fn zip_high_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5418        let (_, a1) = self.split_i16x16(a);
5419        let (_, b1) = self.split_i16x16(b);
5420        self.combine_i16x8(self.zip_low_i16x8(a1, b1), self.zip_high_i16x8(a1, b1))
5421    }
5422    #[inline(always)]
5423    fn unzip_low_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5424        let (a0, a1) = self.split_i16x16(a);
5425        let (b0, b1) = self.split_i16x16(b);
5426        self.combine_i16x8(self.unzip_low_i16x8(a0, a1), self.unzip_low_i16x8(b0, b1))
5427    }
5428    #[inline(always)]
5429    fn unzip_high_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5430        let (a0, a1) = self.split_i16x16(a);
5431        let (b0, b1) = self.split_i16x16(b);
5432        self.combine_i16x8(self.unzip_high_i16x8(a0, a1), self.unzip_high_i16x8(b0, b1))
5433    }
5434    #[inline(always)]
5435    fn interleave_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> (i16x16<Self>, i16x16<Self>) {
5436        let (a0, a1) = self.split_i16x16(a);
5437        let (b0, b1) = self.split_i16x16(b);
5438        let lo_lo = self.zip_low_i16x8(a0, b0);
5439        let lo_hi = self.zip_high_i16x8(a0, b0);
5440        let hi_lo = self.zip_low_i16x8(a1, b1);
5441        let hi_hi = self.zip_high_i16x8(a1, b1);
5442        (
5443            self.combine_i16x8(lo_lo, lo_hi),
5444            self.combine_i16x8(hi_lo, hi_hi),
5445        )
5446    }
5447    #[inline(always)]
5448    fn deinterleave_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> (i16x16<Self>, i16x16<Self>) {
5449        let (a0, a1) = self.split_i16x16(a);
5450        let (b0, b1) = self.split_i16x16(b);
5451        let lo_even = self.unzip_low_i16x8(a0, a1);
5452        let lo_odd = self.unzip_high_i16x8(a0, a1);
5453        let hi_even = self.unzip_low_i16x8(b0, b1);
5454        let hi_odd = self.unzip_high_i16x8(b0, b1);
5455        (
5456            self.combine_i16x8(lo_even, hi_even),
5457            self.combine_i16x8(lo_odd, hi_odd),
5458        )
5459    }
5460    #[inline(always)]
5461    fn select_i16x16(self, a: mask16x16<Self>, b: i16x16<Self>, c: i16x16<Self>) -> i16x16<Self> {
5462        let (a0, a1) = self.split_mask16x16(a);
5463        let (b0, b1) = self.split_i16x16(b);
5464        let (c0, c1) = self.split_i16x16(c);
5465        self.combine_i16x8(self.select_i16x8(a0, b0, c0), self.select_i16x8(a1, b1, c1))
5466    }
5467    #[inline(always)]
5468    fn min_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5469        let (a0, a1) = self.split_i16x16(a);
5470        let (b0, b1) = self.split_i16x16(b);
5471        self.combine_i16x8(self.min_i16x8(a0, b0), self.min_i16x8(a1, b1))
5472    }
5473    #[inline(always)]
5474    fn max_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
5475        let (a0, a1) = self.split_i16x16(a);
5476        let (b0, b1) = self.split_i16x16(b);
5477        self.combine_i16x8(self.max_i16x8(a0, b0), self.max_i16x8(a1, b1))
5478    }
5479    #[inline(always)]
5480    fn combine_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x32<Self> {
5481        let mut result = [0; 32usize];
5482        result[0..16usize].copy_from_slice(&a.val.0);
5483        result[16usize..32usize].copy_from_slice(&b.val.0);
5484        result.simd_into(self)
5485    }
5486    #[inline(always)]
5487    fn split_i16x16(self, a: i16x16<Self>) -> (i16x8<Self>, i16x8<Self>) {
5488        let mut b0 = [0; 8usize];
5489        let mut b1 = [0; 8usize];
5490        b0.copy_from_slice(&a.val.0[0..8usize]);
5491        b1.copy_from_slice(&a.val.0[8usize..16usize]);
5492        (b0.simd_into(self), b1.simd_into(self))
5493    }
5494    #[inline(always)]
5495    fn neg_i16x16(self, a: i16x16<Self>) -> i16x16<Self> {
5496        let (a0, a1) = self.split_i16x16(a);
5497        self.combine_i16x8(self.neg_i16x8(a0), self.neg_i16x8(a1))
5498    }
5499    #[inline(always)]
5500    fn reinterpret_u8_i16x16(self, a: i16x16<Self>) -> u8x32<Self> {
5501        let (a0, a1) = self.split_i16x16(a);
5502        self.combine_u8x16(self.reinterpret_u8_i16x8(a0), self.reinterpret_u8_i16x8(a1))
5503    }
5504    #[inline(always)]
5505    fn reinterpret_u32_i16x16(self, a: i16x16<Self>) -> u32x8<Self> {
5506        let (a0, a1) = self.split_i16x16(a);
5507        self.combine_u32x4(
5508            self.reinterpret_u32_i16x8(a0),
5509            self.reinterpret_u32_i16x8(a1),
5510        )
5511    }
5512    #[inline(always)]
5513    fn splat_u16x16(self, val: u16) -> u16x16<Self> {
5514        let half = self.splat_u16x8(val);
5515        self.combine_u16x8(half, half)
5516    }
5517    #[inline(always)]
5518    fn load_array_u16x16(self, val: [u16; 16usize]) -> u16x16<Self> {
5519        u16x16 {
5520            val: crate::support::Aligned256(val),
5521            simd: self,
5522        }
5523    }
5524    #[inline(always)]
5525    fn load_array_ref_u16x16(self, val: &[u16; 16usize]) -> u16x16<Self> {
5526        u16x16 {
5527            val: crate::support::Aligned256(*val),
5528            simd: self,
5529        }
5530    }
5531    #[inline(always)]
5532    fn as_array_u16x16(self, a: u16x16<Self>) -> [u16; 16usize] {
5533        a.val.0
5534    }
5535    #[inline(always)]
5536    fn as_array_ref_u16x16(self, a: &u16x16<Self>) -> &[u16; 16usize] {
5537        &a.val.0
5538    }
5539    #[inline(always)]
5540    fn as_array_mut_u16x16(self, a: &mut u16x16<Self>) -> &mut [u16; 16usize] {
5541        &mut a.val.0
5542    }
5543    #[inline(always)]
5544    fn store_array_u16x16(self, a: u16x16<Self>, dest: &mut [u16; 16usize]) -> () {
5545        *dest = a.val.0;
5546    }
5547    #[inline(always)]
5548    fn cvt_from_bytes_u16x16(self, a: u8x32<Self>) -> u16x16<Self> {
5549        unsafe {
5550            u16x16 {
5551                val: core::mem::transmute(a.val),
5552                simd: self,
5553            }
5554        }
5555    }
5556    #[inline(always)]
5557    fn cvt_to_bytes_u16x16(self, a: u16x16<Self>) -> u8x32<Self> {
5558        unsafe {
5559            u8x32 {
5560                val: core::mem::transmute(a.val),
5561                simd: self,
5562            }
5563        }
5564    }
5565    #[inline(always)]
5566    fn slide_u16x16<const SHIFT: usize>(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5567        let mut dest = [Default::default(); 16usize];
5568        dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
5569        dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
5570        dest.simd_into(self)
5571    }
5572    #[inline(always)]
5573    fn slide_within_blocks_u16x16<const SHIFT: usize>(
5574        self,
5575        a: u16x16<Self>,
5576        b: u16x16<Self>,
5577    ) -> u16x16<Self> {
5578        let (a0, a1) = self.split_u16x16(a);
5579        let (b0, b1) = self.split_u16x16(b);
5580        self.combine_u16x8(
5581            self.slide_within_blocks_u16x8::<SHIFT>(a0, b0),
5582            self.slide_within_blocks_u16x8::<SHIFT>(a1, b1),
5583        )
5584    }
5585    #[inline(always)]
5586    fn add_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5587        let (a0, a1) = self.split_u16x16(a);
5588        let (b0, b1) = self.split_u16x16(b);
5589        self.combine_u16x8(self.add_u16x8(a0, b0), self.add_u16x8(a1, b1))
5590    }
5591    #[inline(always)]
5592    fn sub_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5593        let (a0, a1) = self.split_u16x16(a);
5594        let (b0, b1) = self.split_u16x16(b);
5595        self.combine_u16x8(self.sub_u16x8(a0, b0), self.sub_u16x8(a1, b1))
5596    }
5597    #[inline(always)]
5598    fn mul_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5599        let (a0, a1) = self.split_u16x16(a);
5600        let (b0, b1) = self.split_u16x16(b);
5601        self.combine_u16x8(self.mul_u16x8(a0, b0), self.mul_u16x8(a1, b1))
5602    }
5603    #[inline(always)]
5604    fn and_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5605        let (a0, a1) = self.split_u16x16(a);
5606        let (b0, b1) = self.split_u16x16(b);
5607        self.combine_u16x8(self.and_u16x8(a0, b0), self.and_u16x8(a1, b1))
5608    }
5609    #[inline(always)]
5610    fn or_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5611        let (a0, a1) = self.split_u16x16(a);
5612        let (b0, b1) = self.split_u16x16(b);
5613        self.combine_u16x8(self.or_u16x8(a0, b0), self.or_u16x8(a1, b1))
5614    }
5615    #[inline(always)]
5616    fn xor_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5617        let (a0, a1) = self.split_u16x16(a);
5618        let (b0, b1) = self.split_u16x16(b);
5619        self.combine_u16x8(self.xor_u16x8(a0, b0), self.xor_u16x8(a1, b1))
5620    }
5621    #[inline(always)]
5622    fn not_u16x16(self, a: u16x16<Self>) -> u16x16<Self> {
5623        let (a0, a1) = self.split_u16x16(a);
5624        self.combine_u16x8(self.not_u16x8(a0), self.not_u16x8(a1))
5625    }
5626    #[inline(always)]
5627    fn shl_u16x16(self, a: u16x16<Self>, shift: u32) -> u16x16<Self> {
5628        let (a0, a1) = self.split_u16x16(a);
5629        self.combine_u16x8(self.shl_u16x8(a0, shift), self.shl_u16x8(a1, shift))
5630    }
5631    #[inline(always)]
5632    fn shlv_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5633        let (a0, a1) = self.split_u16x16(a);
5634        let (b0, b1) = self.split_u16x16(b);
5635        self.combine_u16x8(self.shlv_u16x8(a0, b0), self.shlv_u16x8(a1, b1))
5636    }
5637    #[inline(always)]
5638    fn shr_u16x16(self, a: u16x16<Self>, shift: u32) -> u16x16<Self> {
5639        let (a0, a1) = self.split_u16x16(a);
5640        self.combine_u16x8(self.shr_u16x8(a0, shift), self.shr_u16x8(a1, shift))
5641    }
5642    #[inline(always)]
5643    fn shrv_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5644        let (a0, a1) = self.split_u16x16(a);
5645        let (b0, b1) = self.split_u16x16(b);
5646        self.combine_u16x8(self.shrv_u16x8(a0, b0), self.shrv_u16x8(a1, b1))
5647    }
5648    #[inline(always)]
5649    fn simd_eq_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
5650        let (a0, a1) = self.split_u16x16(a);
5651        let (b0, b1) = self.split_u16x16(b);
5652        self.combine_mask16x8(self.simd_eq_u16x8(a0, b0), self.simd_eq_u16x8(a1, b1))
5653    }
5654    #[inline(always)]
5655    fn simd_lt_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
5656        let (a0, a1) = self.split_u16x16(a);
5657        let (b0, b1) = self.split_u16x16(b);
5658        self.combine_mask16x8(self.simd_lt_u16x8(a0, b0), self.simd_lt_u16x8(a1, b1))
5659    }
5660    #[inline(always)]
5661    fn simd_le_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
5662        let (a0, a1) = self.split_u16x16(a);
5663        let (b0, b1) = self.split_u16x16(b);
5664        self.combine_mask16x8(self.simd_le_u16x8(a0, b0), self.simd_le_u16x8(a1, b1))
5665    }
5666    #[inline(always)]
5667    fn simd_ge_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
5668        let (a0, a1) = self.split_u16x16(a);
5669        let (b0, b1) = self.split_u16x16(b);
5670        self.combine_mask16x8(self.simd_ge_u16x8(a0, b0), self.simd_ge_u16x8(a1, b1))
5671    }
5672    #[inline(always)]
5673    fn simd_gt_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
5674        let (a0, a1) = self.split_u16x16(a);
5675        let (b0, b1) = self.split_u16x16(b);
5676        self.combine_mask16x8(self.simd_gt_u16x8(a0, b0), self.simd_gt_u16x8(a1, b1))
5677    }
5678    #[inline(always)]
5679    fn zip_low_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5680        let (a0, _) = self.split_u16x16(a);
5681        let (b0, _) = self.split_u16x16(b);
5682        self.combine_u16x8(self.zip_low_u16x8(a0, b0), self.zip_high_u16x8(a0, b0))
5683    }
5684    #[inline(always)]
5685    fn zip_high_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5686        let (_, a1) = self.split_u16x16(a);
5687        let (_, b1) = self.split_u16x16(b);
5688        self.combine_u16x8(self.zip_low_u16x8(a1, b1), self.zip_high_u16x8(a1, b1))
5689    }
5690    #[inline(always)]
5691    fn unzip_low_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5692        let (a0, a1) = self.split_u16x16(a);
5693        let (b0, b1) = self.split_u16x16(b);
5694        self.combine_u16x8(self.unzip_low_u16x8(a0, a1), self.unzip_low_u16x8(b0, b1))
5695    }
5696    #[inline(always)]
5697    fn unzip_high_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5698        let (a0, a1) = self.split_u16x16(a);
5699        let (b0, b1) = self.split_u16x16(b);
5700        self.combine_u16x8(self.unzip_high_u16x8(a0, a1), self.unzip_high_u16x8(b0, b1))
5701    }
5702    #[inline(always)]
5703    fn interleave_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> (u16x16<Self>, u16x16<Self>) {
5704        let (a0, a1) = self.split_u16x16(a);
5705        let (b0, b1) = self.split_u16x16(b);
5706        let lo_lo = self.zip_low_u16x8(a0, b0);
5707        let lo_hi = self.zip_high_u16x8(a0, b0);
5708        let hi_lo = self.zip_low_u16x8(a1, b1);
5709        let hi_hi = self.zip_high_u16x8(a1, b1);
5710        (
5711            self.combine_u16x8(lo_lo, lo_hi),
5712            self.combine_u16x8(hi_lo, hi_hi),
5713        )
5714    }
5715    #[inline(always)]
5716    fn deinterleave_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> (u16x16<Self>, u16x16<Self>) {
5717        let (a0, a1) = self.split_u16x16(a);
5718        let (b0, b1) = self.split_u16x16(b);
5719        let lo_even = self.unzip_low_u16x8(a0, a1);
5720        let lo_odd = self.unzip_high_u16x8(a0, a1);
5721        let hi_even = self.unzip_low_u16x8(b0, b1);
5722        let hi_odd = self.unzip_high_u16x8(b0, b1);
5723        (
5724            self.combine_u16x8(lo_even, hi_even),
5725            self.combine_u16x8(lo_odd, hi_odd),
5726        )
5727    }
5728    #[inline(always)]
5729    fn select_u16x16(self, a: mask16x16<Self>, b: u16x16<Self>, c: u16x16<Self>) -> u16x16<Self> {
5730        let (a0, a1) = self.split_mask16x16(a);
5731        let (b0, b1) = self.split_u16x16(b);
5732        let (c0, c1) = self.split_u16x16(c);
5733        self.combine_u16x8(self.select_u16x8(a0, b0, c0), self.select_u16x8(a1, b1, c1))
5734    }
5735    #[inline(always)]
5736    fn min_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5737        let (a0, a1) = self.split_u16x16(a);
5738        let (b0, b1) = self.split_u16x16(b);
5739        self.combine_u16x8(self.min_u16x8(a0, b0), self.min_u16x8(a1, b1))
5740    }
5741    #[inline(always)]
5742    fn max_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
5743        let (a0, a1) = self.split_u16x16(a);
5744        let (b0, b1) = self.split_u16x16(b);
5745        self.combine_u16x8(self.max_u16x8(a0, b0), self.max_u16x8(a1, b1))
5746    }
5747    #[inline(always)]
5748    fn combine_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x32<Self> {
5749        let mut result = [0; 32usize];
5750        result[0..16usize].copy_from_slice(&a.val.0);
5751        result[16usize..32usize].copy_from_slice(&b.val.0);
5752        result.simd_into(self)
5753    }
5754    #[inline(always)]
5755    fn split_u16x16(self, a: u16x16<Self>) -> (u16x8<Self>, u16x8<Self>) {
5756        let mut b0 = [0; 8usize];
5757        let mut b1 = [0; 8usize];
5758        b0.copy_from_slice(&a.val.0[0..8usize]);
5759        b1.copy_from_slice(&a.val.0[8usize..16usize]);
5760        (b0.simd_into(self), b1.simd_into(self))
5761    }
5762    #[inline(always)]
5763    fn narrow_u16x16(self, a: u16x16<Self>) -> u8x16<Self> {
5764        [
5765            a[0usize] as u8,
5766            a[1usize] as u8,
5767            a[2usize] as u8,
5768            a[3usize] as u8,
5769            a[4usize] as u8,
5770            a[5usize] as u8,
5771            a[6usize] as u8,
5772            a[7usize] as u8,
5773            a[8usize] as u8,
5774            a[9usize] as u8,
5775            a[10usize] as u8,
5776            a[11usize] as u8,
5777            a[12usize] as u8,
5778            a[13usize] as u8,
5779            a[14usize] as u8,
5780            a[15usize] as u8,
5781        ]
5782        .simd_into(self)
5783    }
5784    #[inline(always)]
5785    fn reinterpret_u8_u16x16(self, a: u16x16<Self>) -> u8x32<Self> {
5786        let (a0, a1) = self.split_u16x16(a);
5787        self.combine_u8x16(self.reinterpret_u8_u16x8(a0), self.reinterpret_u8_u16x8(a1))
5788    }
5789    #[inline(always)]
5790    fn reinterpret_u32_u16x16(self, a: u16x16<Self>) -> u32x8<Self> {
5791        let (a0, a1) = self.split_u16x16(a);
5792        self.combine_u32x4(
5793            self.reinterpret_u32_u16x8(a0),
5794            self.reinterpret_u32_u16x8(a1),
5795        )
5796    }
5797    #[inline(always)]
5798    fn splat_mask16x16(self, val: i16) -> mask16x16<Self> {
5799        let half = self.splat_mask16x8(val);
5800        self.combine_mask16x8(half, half)
5801    }
5802    #[inline(always)]
5803    fn load_array_mask16x16(self, val: [i16; 16usize]) -> mask16x16<Self> {
5804        mask16x16 {
5805            val: crate::support::Aligned256(val),
5806            simd: self,
5807        }
5808    }
5809    #[inline(always)]
5810    fn load_array_ref_mask16x16(self, val: &[i16; 16usize]) -> mask16x16<Self> {
5811        mask16x16 {
5812            val: crate::support::Aligned256(*val),
5813            simd: self,
5814        }
5815    }
5816    #[inline(always)]
5817    fn as_array_mask16x16(self, a: mask16x16<Self>) -> [i16; 16usize] {
5818        a.val.0
5819    }
5820    #[inline(always)]
5821    fn as_array_ref_mask16x16(self, a: &mask16x16<Self>) -> &[i16; 16usize] {
5822        &a.val.0
5823    }
5824    #[inline(always)]
5825    fn as_array_mut_mask16x16(self, a: &mut mask16x16<Self>) -> &mut [i16; 16usize] {
5826        &mut a.val.0
5827    }
5828    #[inline(always)]
5829    fn store_array_mask16x16(self, a: mask16x16<Self>, dest: &mut [i16; 16usize]) -> () {
5830        *dest = a.val.0;
5831    }
5832    #[inline(always)]
5833    fn cvt_from_bytes_mask16x16(self, a: u8x32<Self>) -> mask16x16<Self> {
5834        unsafe {
5835            mask16x16 {
5836                val: core::mem::transmute(a.val),
5837                simd: self,
5838            }
5839        }
5840    }
5841    #[inline(always)]
5842    fn cvt_to_bytes_mask16x16(self, a: mask16x16<Self>) -> u8x32<Self> {
5843        unsafe {
5844            u8x32 {
5845                val: core::mem::transmute(a.val),
5846                simd: self,
5847            }
5848        }
5849    }
5850    #[inline(always)]
5851    fn slide_mask16x16<const SHIFT: usize>(
5852        self,
5853        a: mask16x16<Self>,
5854        b: mask16x16<Self>,
5855    ) -> mask16x16<Self> {
5856        let mut dest = [Default::default(); 16usize];
5857        dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
5858        dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
5859        dest.simd_into(self)
5860    }
5861    #[inline(always)]
5862    fn slide_within_blocks_mask16x16<const SHIFT: usize>(
5863        self,
5864        a: mask16x16<Self>,
5865        b: mask16x16<Self>,
5866    ) -> mask16x16<Self> {
5867        let (a0, a1) = self.split_mask16x16(a);
5868        let (b0, b1) = self.split_mask16x16(b);
5869        self.combine_mask16x8(
5870            self.slide_within_blocks_mask16x8::<SHIFT>(a0, b0),
5871            self.slide_within_blocks_mask16x8::<SHIFT>(a1, b1),
5872        )
5873    }
5874    #[inline(always)]
5875    fn and_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
5876        let (a0, a1) = self.split_mask16x16(a);
5877        let (b0, b1) = self.split_mask16x16(b);
5878        self.combine_mask16x8(self.and_mask16x8(a0, b0), self.and_mask16x8(a1, b1))
5879    }
5880    #[inline(always)]
5881    fn or_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
5882        let (a0, a1) = self.split_mask16x16(a);
5883        let (b0, b1) = self.split_mask16x16(b);
5884        self.combine_mask16x8(self.or_mask16x8(a0, b0), self.or_mask16x8(a1, b1))
5885    }
5886    #[inline(always)]
5887    fn xor_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
5888        let (a0, a1) = self.split_mask16x16(a);
5889        let (b0, b1) = self.split_mask16x16(b);
5890        self.combine_mask16x8(self.xor_mask16x8(a0, b0), self.xor_mask16x8(a1, b1))
5891    }
5892    #[inline(always)]
5893    fn not_mask16x16(self, a: mask16x16<Self>) -> mask16x16<Self> {
5894        let (a0, a1) = self.split_mask16x16(a);
5895        self.combine_mask16x8(self.not_mask16x8(a0), self.not_mask16x8(a1))
5896    }
5897    #[inline(always)]
5898    fn select_mask16x16(
5899        self,
5900        a: mask16x16<Self>,
5901        b: mask16x16<Self>,
5902        c: mask16x16<Self>,
5903    ) -> mask16x16<Self> {
5904        let (a0, a1) = self.split_mask16x16(a);
5905        let (b0, b1) = self.split_mask16x16(b);
5906        let (c0, c1) = self.split_mask16x16(c);
5907        self.combine_mask16x8(
5908            self.select_mask16x8(a0, b0, c0),
5909            self.select_mask16x8(a1, b1, c1),
5910        )
5911    }
5912    #[inline(always)]
5913    fn simd_eq_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
5914        let (a0, a1) = self.split_mask16x16(a);
5915        let (b0, b1) = self.split_mask16x16(b);
5916        self.combine_mask16x8(self.simd_eq_mask16x8(a0, b0), self.simd_eq_mask16x8(a1, b1))
5917    }
5918    #[inline(always)]
5919    fn any_true_mask16x16(self, a: mask16x16<Self>) -> bool {
5920        let (a0, a1) = self.split_mask16x16(a);
5921        self.any_true_mask16x8(a0) || self.any_true_mask16x8(a1)
5922    }
5923    #[inline(always)]
5924    fn all_true_mask16x16(self, a: mask16x16<Self>) -> bool {
5925        let (a0, a1) = self.split_mask16x16(a);
5926        self.all_true_mask16x8(a0) && self.all_true_mask16x8(a1)
5927    }
5928    #[inline(always)]
5929    fn any_false_mask16x16(self, a: mask16x16<Self>) -> bool {
5930        let (a0, a1) = self.split_mask16x16(a);
5931        self.any_false_mask16x8(a0) || self.any_false_mask16x8(a1)
5932    }
5933    #[inline(always)]
5934    fn all_false_mask16x16(self, a: mask16x16<Self>) -> bool {
5935        let (a0, a1) = self.split_mask16x16(a);
5936        self.all_false_mask16x8(a0) && self.all_false_mask16x8(a1)
5937    }
5938    #[inline(always)]
5939    fn combine_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x32<Self> {
5940        let mut result = [0; 32usize];
5941        result[0..16usize].copy_from_slice(&a.val.0);
5942        result[16usize..32usize].copy_from_slice(&b.val.0);
5943        result.simd_into(self)
5944    }
5945    #[inline(always)]
5946    fn split_mask16x16(self, a: mask16x16<Self>) -> (mask16x8<Self>, mask16x8<Self>) {
5947        let mut b0 = [0; 8usize];
5948        let mut b1 = [0; 8usize];
5949        b0.copy_from_slice(&a.val.0[0..8usize]);
5950        b1.copy_from_slice(&a.val.0[8usize..16usize]);
5951        (b0.simd_into(self), b1.simd_into(self))
5952    }
5953    #[inline(always)]
5954    fn splat_i32x8(self, val: i32) -> i32x8<Self> {
5955        let half = self.splat_i32x4(val);
5956        self.combine_i32x4(half, half)
5957    }
5958    #[inline(always)]
5959    fn load_array_i32x8(self, val: [i32; 8usize]) -> i32x8<Self> {
5960        i32x8 {
5961            val: crate::support::Aligned256(val),
5962            simd: self,
5963        }
5964    }
5965    #[inline(always)]
5966    fn load_array_ref_i32x8(self, val: &[i32; 8usize]) -> i32x8<Self> {
5967        i32x8 {
5968            val: crate::support::Aligned256(*val),
5969            simd: self,
5970        }
5971    }
5972    #[inline(always)]
5973    fn as_array_i32x8(self, a: i32x8<Self>) -> [i32; 8usize] {
5974        a.val.0
5975    }
5976    #[inline(always)]
5977    fn as_array_ref_i32x8(self, a: &i32x8<Self>) -> &[i32; 8usize] {
5978        &a.val.0
5979    }
5980    #[inline(always)]
5981    fn as_array_mut_i32x8(self, a: &mut i32x8<Self>) -> &mut [i32; 8usize] {
5982        &mut a.val.0
5983    }
5984    #[inline(always)]
5985    fn store_array_i32x8(self, a: i32x8<Self>, dest: &mut [i32; 8usize]) -> () {
5986        *dest = a.val.0;
5987    }
5988    #[inline(always)]
5989    fn cvt_from_bytes_i32x8(self, a: u8x32<Self>) -> i32x8<Self> {
5990        unsafe {
5991            i32x8 {
5992                val: core::mem::transmute(a.val),
5993                simd: self,
5994            }
5995        }
5996    }
5997    #[inline(always)]
5998    fn cvt_to_bytes_i32x8(self, a: i32x8<Self>) -> u8x32<Self> {
5999        unsafe {
6000            u8x32 {
6001                val: core::mem::transmute(a.val),
6002                simd: self,
6003            }
6004        }
6005    }
6006    #[inline(always)]
6007    fn slide_i32x8<const SHIFT: usize>(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6008        let mut dest = [Default::default(); 8usize];
6009        dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
6010        dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
6011        dest.simd_into(self)
6012    }
6013    #[inline(always)]
6014    fn slide_within_blocks_i32x8<const SHIFT: usize>(
6015        self,
6016        a: i32x8<Self>,
6017        b: i32x8<Self>,
6018    ) -> i32x8<Self> {
6019        let (a0, a1) = self.split_i32x8(a);
6020        let (b0, b1) = self.split_i32x8(b);
6021        self.combine_i32x4(
6022            self.slide_within_blocks_i32x4::<SHIFT>(a0, b0),
6023            self.slide_within_blocks_i32x4::<SHIFT>(a1, b1),
6024        )
6025    }
6026    #[inline(always)]
6027    fn add_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6028        let (a0, a1) = self.split_i32x8(a);
6029        let (b0, b1) = self.split_i32x8(b);
6030        self.combine_i32x4(self.add_i32x4(a0, b0), self.add_i32x4(a1, b1))
6031    }
6032    #[inline(always)]
6033    fn sub_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6034        let (a0, a1) = self.split_i32x8(a);
6035        let (b0, b1) = self.split_i32x8(b);
6036        self.combine_i32x4(self.sub_i32x4(a0, b0), self.sub_i32x4(a1, b1))
6037    }
6038    #[inline(always)]
6039    fn mul_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6040        let (a0, a1) = self.split_i32x8(a);
6041        let (b0, b1) = self.split_i32x8(b);
6042        self.combine_i32x4(self.mul_i32x4(a0, b0), self.mul_i32x4(a1, b1))
6043    }
6044    #[inline(always)]
6045    fn and_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6046        let (a0, a1) = self.split_i32x8(a);
6047        let (b0, b1) = self.split_i32x8(b);
6048        self.combine_i32x4(self.and_i32x4(a0, b0), self.and_i32x4(a1, b1))
6049    }
6050    #[inline(always)]
6051    fn or_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6052        let (a0, a1) = self.split_i32x8(a);
6053        let (b0, b1) = self.split_i32x8(b);
6054        self.combine_i32x4(self.or_i32x4(a0, b0), self.or_i32x4(a1, b1))
6055    }
6056    #[inline(always)]
6057    fn xor_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6058        let (a0, a1) = self.split_i32x8(a);
6059        let (b0, b1) = self.split_i32x8(b);
6060        self.combine_i32x4(self.xor_i32x4(a0, b0), self.xor_i32x4(a1, b1))
6061    }
6062    #[inline(always)]
6063    fn not_i32x8(self, a: i32x8<Self>) -> i32x8<Self> {
6064        let (a0, a1) = self.split_i32x8(a);
6065        self.combine_i32x4(self.not_i32x4(a0), self.not_i32x4(a1))
6066    }
6067    #[inline(always)]
6068    fn shl_i32x8(self, a: i32x8<Self>, shift: u32) -> i32x8<Self> {
6069        let (a0, a1) = self.split_i32x8(a);
6070        self.combine_i32x4(self.shl_i32x4(a0, shift), self.shl_i32x4(a1, shift))
6071    }
6072    #[inline(always)]
6073    fn shlv_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6074        let (a0, a1) = self.split_i32x8(a);
6075        let (b0, b1) = self.split_i32x8(b);
6076        self.combine_i32x4(self.shlv_i32x4(a0, b0), self.shlv_i32x4(a1, b1))
6077    }
6078    #[inline(always)]
6079    fn shr_i32x8(self, a: i32x8<Self>, shift: u32) -> i32x8<Self> {
6080        let (a0, a1) = self.split_i32x8(a);
6081        self.combine_i32x4(self.shr_i32x4(a0, shift), self.shr_i32x4(a1, shift))
6082    }
6083    #[inline(always)]
6084    fn shrv_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6085        let (a0, a1) = self.split_i32x8(a);
6086        let (b0, b1) = self.split_i32x8(b);
6087        self.combine_i32x4(self.shrv_i32x4(a0, b0), self.shrv_i32x4(a1, b1))
6088    }
6089    #[inline(always)]
6090    fn simd_eq_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
6091        let (a0, a1) = self.split_i32x8(a);
6092        let (b0, b1) = self.split_i32x8(b);
6093        self.combine_mask32x4(self.simd_eq_i32x4(a0, b0), self.simd_eq_i32x4(a1, b1))
6094    }
6095    #[inline(always)]
6096    fn simd_lt_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
6097        let (a0, a1) = self.split_i32x8(a);
6098        let (b0, b1) = self.split_i32x8(b);
6099        self.combine_mask32x4(self.simd_lt_i32x4(a0, b0), self.simd_lt_i32x4(a1, b1))
6100    }
6101    #[inline(always)]
6102    fn simd_le_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
6103        let (a0, a1) = self.split_i32x8(a);
6104        let (b0, b1) = self.split_i32x8(b);
6105        self.combine_mask32x4(self.simd_le_i32x4(a0, b0), self.simd_le_i32x4(a1, b1))
6106    }
6107    #[inline(always)]
6108    fn simd_ge_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
6109        let (a0, a1) = self.split_i32x8(a);
6110        let (b0, b1) = self.split_i32x8(b);
6111        self.combine_mask32x4(self.simd_ge_i32x4(a0, b0), self.simd_ge_i32x4(a1, b1))
6112    }
6113    #[inline(always)]
6114    fn simd_gt_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
6115        let (a0, a1) = self.split_i32x8(a);
6116        let (b0, b1) = self.split_i32x8(b);
6117        self.combine_mask32x4(self.simd_gt_i32x4(a0, b0), self.simd_gt_i32x4(a1, b1))
6118    }
6119    #[inline(always)]
6120    fn zip_low_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6121        let (a0, _) = self.split_i32x8(a);
6122        let (b0, _) = self.split_i32x8(b);
6123        self.combine_i32x4(self.zip_low_i32x4(a0, b0), self.zip_high_i32x4(a0, b0))
6124    }
6125    #[inline(always)]
6126    fn zip_high_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6127        let (_, a1) = self.split_i32x8(a);
6128        let (_, b1) = self.split_i32x8(b);
6129        self.combine_i32x4(self.zip_low_i32x4(a1, b1), self.zip_high_i32x4(a1, b1))
6130    }
6131    #[inline(always)]
6132    fn unzip_low_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6133        let (a0, a1) = self.split_i32x8(a);
6134        let (b0, b1) = self.split_i32x8(b);
6135        self.combine_i32x4(self.unzip_low_i32x4(a0, a1), self.unzip_low_i32x4(b0, b1))
6136    }
6137    #[inline(always)]
6138    fn unzip_high_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6139        let (a0, a1) = self.split_i32x8(a);
6140        let (b0, b1) = self.split_i32x8(b);
6141        self.combine_i32x4(self.unzip_high_i32x4(a0, a1), self.unzip_high_i32x4(b0, b1))
6142    }
6143    #[inline(always)]
6144    fn interleave_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> (i32x8<Self>, i32x8<Self>) {
6145        let (a0, a1) = self.split_i32x8(a);
6146        let (b0, b1) = self.split_i32x8(b);
6147        let lo_lo = self.zip_low_i32x4(a0, b0);
6148        let lo_hi = self.zip_high_i32x4(a0, b0);
6149        let hi_lo = self.zip_low_i32x4(a1, b1);
6150        let hi_hi = self.zip_high_i32x4(a1, b1);
6151        (
6152            self.combine_i32x4(lo_lo, lo_hi),
6153            self.combine_i32x4(hi_lo, hi_hi),
6154        )
6155    }
6156    #[inline(always)]
6157    fn deinterleave_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> (i32x8<Self>, i32x8<Self>) {
6158        let (a0, a1) = self.split_i32x8(a);
6159        let (b0, b1) = self.split_i32x8(b);
6160        let lo_even = self.unzip_low_i32x4(a0, a1);
6161        let lo_odd = self.unzip_high_i32x4(a0, a1);
6162        let hi_even = self.unzip_low_i32x4(b0, b1);
6163        let hi_odd = self.unzip_high_i32x4(b0, b1);
6164        (
6165            self.combine_i32x4(lo_even, hi_even),
6166            self.combine_i32x4(lo_odd, hi_odd),
6167        )
6168    }
6169    #[inline(always)]
6170    fn select_i32x8(self, a: mask32x8<Self>, b: i32x8<Self>, c: i32x8<Self>) -> i32x8<Self> {
6171        let (a0, a1) = self.split_mask32x8(a);
6172        let (b0, b1) = self.split_i32x8(b);
6173        let (c0, c1) = self.split_i32x8(c);
6174        self.combine_i32x4(self.select_i32x4(a0, b0, c0), self.select_i32x4(a1, b1, c1))
6175    }
6176    #[inline(always)]
6177    fn min_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6178        let (a0, a1) = self.split_i32x8(a);
6179        let (b0, b1) = self.split_i32x8(b);
6180        self.combine_i32x4(self.min_i32x4(a0, b0), self.min_i32x4(a1, b1))
6181    }
6182    #[inline(always)]
6183    fn max_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
6184        let (a0, a1) = self.split_i32x8(a);
6185        let (b0, b1) = self.split_i32x8(b);
6186        self.combine_i32x4(self.max_i32x4(a0, b0), self.max_i32x4(a1, b1))
6187    }
6188    #[inline(always)]
6189    fn combine_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x16<Self> {
6190        let mut result = [0; 16usize];
6191        result[0..8usize].copy_from_slice(&a.val.0);
6192        result[8usize..16usize].copy_from_slice(&b.val.0);
6193        result.simd_into(self)
6194    }
6195    #[inline(always)]
6196    fn split_i32x8(self, a: i32x8<Self>) -> (i32x4<Self>, i32x4<Self>) {
6197        let mut b0 = [0; 4usize];
6198        let mut b1 = [0; 4usize];
6199        b0.copy_from_slice(&a.val.0[0..4usize]);
6200        b1.copy_from_slice(&a.val.0[4usize..8usize]);
6201        (b0.simd_into(self), b1.simd_into(self))
6202    }
6203    #[inline(always)]
6204    fn neg_i32x8(self, a: i32x8<Self>) -> i32x8<Self> {
6205        let (a0, a1) = self.split_i32x8(a);
6206        self.combine_i32x4(self.neg_i32x4(a0), self.neg_i32x4(a1))
6207    }
6208    #[inline(always)]
6209    fn reinterpret_u8_i32x8(self, a: i32x8<Self>) -> u8x32<Self> {
6210        let (a0, a1) = self.split_i32x8(a);
6211        self.combine_u8x16(self.reinterpret_u8_i32x4(a0), self.reinterpret_u8_i32x4(a1))
6212    }
6213    #[inline(always)]
6214    fn reinterpret_u32_i32x8(self, a: i32x8<Self>) -> u32x8<Self> {
6215        let (a0, a1) = self.split_i32x8(a);
6216        self.combine_u32x4(
6217            self.reinterpret_u32_i32x4(a0),
6218            self.reinterpret_u32_i32x4(a1),
6219        )
6220    }
6221    #[inline(always)]
6222    fn cvt_f32_i32x8(self, a: i32x8<Self>) -> f32x8<Self> {
6223        let (a0, a1) = self.split_i32x8(a);
6224        self.combine_f32x4(self.cvt_f32_i32x4(a0), self.cvt_f32_i32x4(a1))
6225    }
6226    #[inline(always)]
6227    fn splat_u32x8(self, val: u32) -> u32x8<Self> {
6228        let half = self.splat_u32x4(val);
6229        self.combine_u32x4(half, half)
6230    }
6231    #[inline(always)]
6232    fn load_array_u32x8(self, val: [u32; 8usize]) -> u32x8<Self> {
6233        u32x8 {
6234            val: crate::support::Aligned256(val),
6235            simd: self,
6236        }
6237    }
6238    #[inline(always)]
6239    fn load_array_ref_u32x8(self, val: &[u32; 8usize]) -> u32x8<Self> {
6240        u32x8 {
6241            val: crate::support::Aligned256(*val),
6242            simd: self,
6243        }
6244    }
6245    #[inline(always)]
6246    fn as_array_u32x8(self, a: u32x8<Self>) -> [u32; 8usize] {
6247        a.val.0
6248    }
6249    #[inline(always)]
6250    fn as_array_ref_u32x8(self, a: &u32x8<Self>) -> &[u32; 8usize] {
6251        &a.val.0
6252    }
6253    #[inline(always)]
6254    fn as_array_mut_u32x8(self, a: &mut u32x8<Self>) -> &mut [u32; 8usize] {
6255        &mut a.val.0
6256    }
6257    #[inline(always)]
6258    fn store_array_u32x8(self, a: u32x8<Self>, dest: &mut [u32; 8usize]) -> () {
6259        *dest = a.val.0;
6260    }
6261    #[inline(always)]
6262    fn cvt_from_bytes_u32x8(self, a: u8x32<Self>) -> u32x8<Self> {
6263        unsafe {
6264            u32x8 {
6265                val: core::mem::transmute(a.val),
6266                simd: self,
6267            }
6268        }
6269    }
6270    #[inline(always)]
6271    fn cvt_to_bytes_u32x8(self, a: u32x8<Self>) -> u8x32<Self> {
6272        unsafe {
6273            u8x32 {
6274                val: core::mem::transmute(a.val),
6275                simd: self,
6276            }
6277        }
6278    }
6279    #[inline(always)]
6280    fn slide_u32x8<const SHIFT: usize>(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6281        let mut dest = [Default::default(); 8usize];
6282        dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
6283        dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
6284        dest.simd_into(self)
6285    }
6286    #[inline(always)]
6287    fn slide_within_blocks_u32x8<const SHIFT: usize>(
6288        self,
6289        a: u32x8<Self>,
6290        b: u32x8<Self>,
6291    ) -> u32x8<Self> {
6292        let (a0, a1) = self.split_u32x8(a);
6293        let (b0, b1) = self.split_u32x8(b);
6294        self.combine_u32x4(
6295            self.slide_within_blocks_u32x4::<SHIFT>(a0, b0),
6296            self.slide_within_blocks_u32x4::<SHIFT>(a1, b1),
6297        )
6298    }
6299    #[inline(always)]
6300    fn add_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6301        let (a0, a1) = self.split_u32x8(a);
6302        let (b0, b1) = self.split_u32x8(b);
6303        self.combine_u32x4(self.add_u32x4(a0, b0), self.add_u32x4(a1, b1))
6304    }
6305    #[inline(always)]
6306    fn sub_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6307        let (a0, a1) = self.split_u32x8(a);
6308        let (b0, b1) = self.split_u32x8(b);
6309        self.combine_u32x4(self.sub_u32x4(a0, b0), self.sub_u32x4(a1, b1))
6310    }
6311    #[inline(always)]
6312    fn mul_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6313        let (a0, a1) = self.split_u32x8(a);
6314        let (b0, b1) = self.split_u32x8(b);
6315        self.combine_u32x4(self.mul_u32x4(a0, b0), self.mul_u32x4(a1, b1))
6316    }
6317    #[inline(always)]
6318    fn and_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6319        let (a0, a1) = self.split_u32x8(a);
6320        let (b0, b1) = self.split_u32x8(b);
6321        self.combine_u32x4(self.and_u32x4(a0, b0), self.and_u32x4(a1, b1))
6322    }
6323    #[inline(always)]
6324    fn or_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6325        let (a0, a1) = self.split_u32x8(a);
6326        let (b0, b1) = self.split_u32x8(b);
6327        self.combine_u32x4(self.or_u32x4(a0, b0), self.or_u32x4(a1, b1))
6328    }
6329    #[inline(always)]
6330    fn xor_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6331        let (a0, a1) = self.split_u32x8(a);
6332        let (b0, b1) = self.split_u32x8(b);
6333        self.combine_u32x4(self.xor_u32x4(a0, b0), self.xor_u32x4(a1, b1))
6334    }
6335    #[inline(always)]
6336    fn not_u32x8(self, a: u32x8<Self>) -> u32x8<Self> {
6337        let (a0, a1) = self.split_u32x8(a);
6338        self.combine_u32x4(self.not_u32x4(a0), self.not_u32x4(a1))
6339    }
6340    #[inline(always)]
6341    fn shl_u32x8(self, a: u32x8<Self>, shift: u32) -> u32x8<Self> {
6342        let (a0, a1) = self.split_u32x8(a);
6343        self.combine_u32x4(self.shl_u32x4(a0, shift), self.shl_u32x4(a1, shift))
6344    }
6345    #[inline(always)]
6346    fn shlv_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6347        let (a0, a1) = self.split_u32x8(a);
6348        let (b0, b1) = self.split_u32x8(b);
6349        self.combine_u32x4(self.shlv_u32x4(a0, b0), self.shlv_u32x4(a1, b1))
6350    }
6351    #[inline(always)]
6352    fn shr_u32x8(self, a: u32x8<Self>, shift: u32) -> u32x8<Self> {
6353        let (a0, a1) = self.split_u32x8(a);
6354        self.combine_u32x4(self.shr_u32x4(a0, shift), self.shr_u32x4(a1, shift))
6355    }
6356    #[inline(always)]
6357    fn shrv_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6358        let (a0, a1) = self.split_u32x8(a);
6359        let (b0, b1) = self.split_u32x8(b);
6360        self.combine_u32x4(self.shrv_u32x4(a0, b0), self.shrv_u32x4(a1, b1))
6361    }
6362    #[inline(always)]
6363    fn simd_eq_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
6364        let (a0, a1) = self.split_u32x8(a);
6365        let (b0, b1) = self.split_u32x8(b);
6366        self.combine_mask32x4(self.simd_eq_u32x4(a0, b0), self.simd_eq_u32x4(a1, b1))
6367    }
6368    #[inline(always)]
6369    fn simd_lt_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
6370        let (a0, a1) = self.split_u32x8(a);
6371        let (b0, b1) = self.split_u32x8(b);
6372        self.combine_mask32x4(self.simd_lt_u32x4(a0, b0), self.simd_lt_u32x4(a1, b1))
6373    }
6374    #[inline(always)]
6375    fn simd_le_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
6376        let (a0, a1) = self.split_u32x8(a);
6377        let (b0, b1) = self.split_u32x8(b);
6378        self.combine_mask32x4(self.simd_le_u32x4(a0, b0), self.simd_le_u32x4(a1, b1))
6379    }
6380    #[inline(always)]
6381    fn simd_ge_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
6382        let (a0, a1) = self.split_u32x8(a);
6383        let (b0, b1) = self.split_u32x8(b);
6384        self.combine_mask32x4(self.simd_ge_u32x4(a0, b0), self.simd_ge_u32x4(a1, b1))
6385    }
6386    #[inline(always)]
6387    fn simd_gt_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
6388        let (a0, a1) = self.split_u32x8(a);
6389        let (b0, b1) = self.split_u32x8(b);
6390        self.combine_mask32x4(self.simd_gt_u32x4(a0, b0), self.simd_gt_u32x4(a1, b1))
6391    }
6392    #[inline(always)]
6393    fn zip_low_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6394        let (a0, _) = self.split_u32x8(a);
6395        let (b0, _) = self.split_u32x8(b);
6396        self.combine_u32x4(self.zip_low_u32x4(a0, b0), self.zip_high_u32x4(a0, b0))
6397    }
6398    #[inline(always)]
6399    fn zip_high_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6400        let (_, a1) = self.split_u32x8(a);
6401        let (_, b1) = self.split_u32x8(b);
6402        self.combine_u32x4(self.zip_low_u32x4(a1, b1), self.zip_high_u32x4(a1, b1))
6403    }
6404    #[inline(always)]
6405    fn unzip_low_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6406        let (a0, a1) = self.split_u32x8(a);
6407        let (b0, b1) = self.split_u32x8(b);
6408        self.combine_u32x4(self.unzip_low_u32x4(a0, a1), self.unzip_low_u32x4(b0, b1))
6409    }
6410    #[inline(always)]
6411    fn unzip_high_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6412        let (a0, a1) = self.split_u32x8(a);
6413        let (b0, b1) = self.split_u32x8(b);
6414        self.combine_u32x4(self.unzip_high_u32x4(a0, a1), self.unzip_high_u32x4(b0, b1))
6415    }
6416    #[inline(always)]
6417    fn interleave_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> (u32x8<Self>, u32x8<Self>) {
6418        let (a0, a1) = self.split_u32x8(a);
6419        let (b0, b1) = self.split_u32x8(b);
6420        let lo_lo = self.zip_low_u32x4(a0, b0);
6421        let lo_hi = self.zip_high_u32x4(a0, b0);
6422        let hi_lo = self.zip_low_u32x4(a1, b1);
6423        let hi_hi = self.zip_high_u32x4(a1, b1);
6424        (
6425            self.combine_u32x4(lo_lo, lo_hi),
6426            self.combine_u32x4(hi_lo, hi_hi),
6427        )
6428    }
6429    #[inline(always)]
6430    fn deinterleave_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> (u32x8<Self>, u32x8<Self>) {
6431        let (a0, a1) = self.split_u32x8(a);
6432        let (b0, b1) = self.split_u32x8(b);
6433        let lo_even = self.unzip_low_u32x4(a0, a1);
6434        let lo_odd = self.unzip_high_u32x4(a0, a1);
6435        let hi_even = self.unzip_low_u32x4(b0, b1);
6436        let hi_odd = self.unzip_high_u32x4(b0, b1);
6437        (
6438            self.combine_u32x4(lo_even, hi_even),
6439            self.combine_u32x4(lo_odd, hi_odd),
6440        )
6441    }
6442    #[inline(always)]
6443    fn select_u32x8(self, a: mask32x8<Self>, b: u32x8<Self>, c: u32x8<Self>) -> u32x8<Self> {
6444        let (a0, a1) = self.split_mask32x8(a);
6445        let (b0, b1) = self.split_u32x8(b);
6446        let (c0, c1) = self.split_u32x8(c);
6447        self.combine_u32x4(self.select_u32x4(a0, b0, c0), self.select_u32x4(a1, b1, c1))
6448    }
6449    #[inline(always)]
6450    fn min_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6451        let (a0, a1) = self.split_u32x8(a);
6452        let (b0, b1) = self.split_u32x8(b);
6453        self.combine_u32x4(self.min_u32x4(a0, b0), self.min_u32x4(a1, b1))
6454    }
6455    #[inline(always)]
6456    fn max_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
6457        let (a0, a1) = self.split_u32x8(a);
6458        let (b0, b1) = self.split_u32x8(b);
6459        self.combine_u32x4(self.max_u32x4(a0, b0), self.max_u32x4(a1, b1))
6460    }
6461    #[inline(always)]
6462    fn combine_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x16<Self> {
6463        let mut result = [0; 16usize];
6464        result[0..8usize].copy_from_slice(&a.val.0);
6465        result[8usize..16usize].copy_from_slice(&b.val.0);
6466        result.simd_into(self)
6467    }
6468    #[inline(always)]
6469    fn split_u32x8(self, a: u32x8<Self>) -> (u32x4<Self>, u32x4<Self>) {
6470        let mut b0 = [0; 4usize];
6471        let mut b1 = [0; 4usize];
6472        b0.copy_from_slice(&a.val.0[0..4usize]);
6473        b1.copy_from_slice(&a.val.0[4usize..8usize]);
6474        (b0.simd_into(self), b1.simd_into(self))
6475    }
6476    #[inline(always)]
6477    fn reinterpret_u8_u32x8(self, a: u32x8<Self>) -> u8x32<Self> {
6478        let (a0, a1) = self.split_u32x8(a);
6479        self.combine_u8x16(self.reinterpret_u8_u32x4(a0), self.reinterpret_u8_u32x4(a1))
6480    }
6481    #[inline(always)]
6482    fn cvt_f32_u32x8(self, a: u32x8<Self>) -> f32x8<Self> {
6483        let (a0, a1) = self.split_u32x8(a);
6484        self.combine_f32x4(self.cvt_f32_u32x4(a0), self.cvt_f32_u32x4(a1))
6485    }
6486    #[inline(always)]
6487    fn splat_mask32x8(self, val: i32) -> mask32x8<Self> {
6488        let half = self.splat_mask32x4(val);
6489        self.combine_mask32x4(half, half)
6490    }
6491    #[inline(always)]
6492    fn load_array_mask32x8(self, val: [i32; 8usize]) -> mask32x8<Self> {
6493        mask32x8 {
6494            val: crate::support::Aligned256(val),
6495            simd: self,
6496        }
6497    }
6498    #[inline(always)]
6499    fn load_array_ref_mask32x8(self, val: &[i32; 8usize]) -> mask32x8<Self> {
6500        mask32x8 {
6501            val: crate::support::Aligned256(*val),
6502            simd: self,
6503        }
6504    }
6505    #[inline(always)]
6506    fn as_array_mask32x8(self, a: mask32x8<Self>) -> [i32; 8usize] {
6507        a.val.0
6508    }
6509    #[inline(always)]
6510    fn as_array_ref_mask32x8(self, a: &mask32x8<Self>) -> &[i32; 8usize] {
6511        &a.val.0
6512    }
6513    #[inline(always)]
6514    fn as_array_mut_mask32x8(self, a: &mut mask32x8<Self>) -> &mut [i32; 8usize] {
6515        &mut a.val.0
6516    }
6517    #[inline(always)]
6518    fn store_array_mask32x8(self, a: mask32x8<Self>, dest: &mut [i32; 8usize]) -> () {
6519        *dest = a.val.0;
6520    }
6521    #[inline(always)]
6522    fn cvt_from_bytes_mask32x8(self, a: u8x32<Self>) -> mask32x8<Self> {
6523        unsafe {
6524            mask32x8 {
6525                val: core::mem::transmute(a.val),
6526                simd: self,
6527            }
6528        }
6529    }
6530    #[inline(always)]
6531    fn cvt_to_bytes_mask32x8(self, a: mask32x8<Self>) -> u8x32<Self> {
6532        unsafe {
6533            u8x32 {
6534                val: core::mem::transmute(a.val),
6535                simd: self,
6536            }
6537        }
6538    }
6539    #[inline(always)]
6540    fn slide_mask32x8<const SHIFT: usize>(
6541        self,
6542        a: mask32x8<Self>,
6543        b: mask32x8<Self>,
6544    ) -> mask32x8<Self> {
6545        let mut dest = [Default::default(); 8usize];
6546        dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
6547        dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
6548        dest.simd_into(self)
6549    }
6550    #[inline(always)]
6551    fn slide_within_blocks_mask32x8<const SHIFT: usize>(
6552        self,
6553        a: mask32x8<Self>,
6554        b: mask32x8<Self>,
6555    ) -> mask32x8<Self> {
6556        let (a0, a1) = self.split_mask32x8(a);
6557        let (b0, b1) = self.split_mask32x8(b);
6558        self.combine_mask32x4(
6559            self.slide_within_blocks_mask32x4::<SHIFT>(a0, b0),
6560            self.slide_within_blocks_mask32x4::<SHIFT>(a1, b1),
6561        )
6562    }
6563    #[inline(always)]
6564    fn and_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
6565        let (a0, a1) = self.split_mask32x8(a);
6566        let (b0, b1) = self.split_mask32x8(b);
6567        self.combine_mask32x4(self.and_mask32x4(a0, b0), self.and_mask32x4(a1, b1))
6568    }
6569    #[inline(always)]
6570    fn or_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
6571        let (a0, a1) = self.split_mask32x8(a);
6572        let (b0, b1) = self.split_mask32x8(b);
6573        self.combine_mask32x4(self.or_mask32x4(a0, b0), self.or_mask32x4(a1, b1))
6574    }
6575    #[inline(always)]
6576    fn xor_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
6577        let (a0, a1) = self.split_mask32x8(a);
6578        let (b0, b1) = self.split_mask32x8(b);
6579        self.combine_mask32x4(self.xor_mask32x4(a0, b0), self.xor_mask32x4(a1, b1))
6580    }
6581    #[inline(always)]
6582    fn not_mask32x8(self, a: mask32x8<Self>) -> mask32x8<Self> {
6583        let (a0, a1) = self.split_mask32x8(a);
6584        self.combine_mask32x4(self.not_mask32x4(a0), self.not_mask32x4(a1))
6585    }
6586    #[inline(always)]
6587    fn select_mask32x8(
6588        self,
6589        a: mask32x8<Self>,
6590        b: mask32x8<Self>,
6591        c: mask32x8<Self>,
6592    ) -> mask32x8<Self> {
6593        let (a0, a1) = self.split_mask32x8(a);
6594        let (b0, b1) = self.split_mask32x8(b);
6595        let (c0, c1) = self.split_mask32x8(c);
6596        self.combine_mask32x4(
6597            self.select_mask32x4(a0, b0, c0),
6598            self.select_mask32x4(a1, b1, c1),
6599        )
6600    }
6601    #[inline(always)]
6602    fn simd_eq_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
6603        let (a0, a1) = self.split_mask32x8(a);
6604        let (b0, b1) = self.split_mask32x8(b);
6605        self.combine_mask32x4(self.simd_eq_mask32x4(a0, b0), self.simd_eq_mask32x4(a1, b1))
6606    }
6607    #[inline(always)]
6608    fn any_true_mask32x8(self, a: mask32x8<Self>) -> bool {
6609        let (a0, a1) = self.split_mask32x8(a);
6610        self.any_true_mask32x4(a0) || self.any_true_mask32x4(a1)
6611    }
6612    #[inline(always)]
6613    fn all_true_mask32x8(self, a: mask32x8<Self>) -> bool {
6614        let (a0, a1) = self.split_mask32x8(a);
6615        self.all_true_mask32x4(a0) && self.all_true_mask32x4(a1)
6616    }
6617    #[inline(always)]
6618    fn any_false_mask32x8(self, a: mask32x8<Self>) -> bool {
6619        let (a0, a1) = self.split_mask32x8(a);
6620        self.any_false_mask32x4(a0) || self.any_false_mask32x4(a1)
6621    }
6622    #[inline(always)]
6623    fn all_false_mask32x8(self, a: mask32x8<Self>) -> bool {
6624        let (a0, a1) = self.split_mask32x8(a);
6625        self.all_false_mask32x4(a0) && self.all_false_mask32x4(a1)
6626    }
6627    #[inline(always)]
6628    fn combine_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x16<Self> {
6629        let mut result = [0; 16usize];
6630        result[0..8usize].copy_from_slice(&a.val.0);
6631        result[8usize..16usize].copy_from_slice(&b.val.0);
6632        result.simd_into(self)
6633    }
6634    #[inline(always)]
6635    fn split_mask32x8(self, a: mask32x8<Self>) -> (mask32x4<Self>, mask32x4<Self>) {
6636        let mut b0 = [0; 4usize];
6637        let mut b1 = [0; 4usize];
6638        b0.copy_from_slice(&a.val.0[0..4usize]);
6639        b1.copy_from_slice(&a.val.0[4usize..8usize]);
6640        (b0.simd_into(self), b1.simd_into(self))
6641    }
6642    #[inline(always)]
6643    fn splat_f64x4(self, val: f64) -> f64x4<Self> {
6644        let half = self.splat_f64x2(val);
6645        self.combine_f64x2(half, half)
6646    }
6647    #[inline(always)]
6648    fn load_array_f64x4(self, val: [f64; 4usize]) -> f64x4<Self> {
6649        f64x4 {
6650            val: crate::support::Aligned256(val),
6651            simd: self,
6652        }
6653    }
6654    #[inline(always)]
6655    fn load_array_ref_f64x4(self, val: &[f64; 4usize]) -> f64x4<Self> {
6656        f64x4 {
6657            val: crate::support::Aligned256(*val),
6658            simd: self,
6659        }
6660    }
6661    #[inline(always)]
6662    fn as_array_f64x4(self, a: f64x4<Self>) -> [f64; 4usize] {
6663        a.val.0
6664    }
6665    #[inline(always)]
6666    fn as_array_ref_f64x4(self, a: &f64x4<Self>) -> &[f64; 4usize] {
6667        &a.val.0
6668    }
6669    #[inline(always)]
6670    fn as_array_mut_f64x4(self, a: &mut f64x4<Self>) -> &mut [f64; 4usize] {
6671        &mut a.val.0
6672    }
6673    #[inline(always)]
6674    fn store_array_f64x4(self, a: f64x4<Self>, dest: &mut [f64; 4usize]) -> () {
6675        *dest = a.val.0;
6676    }
6677    #[inline(always)]
6678    fn cvt_from_bytes_f64x4(self, a: u8x32<Self>) -> f64x4<Self> {
6679        unsafe {
6680            f64x4 {
6681                val: core::mem::transmute(a.val),
6682                simd: self,
6683            }
6684        }
6685    }
6686    #[inline(always)]
6687    fn cvt_to_bytes_f64x4(self, a: f64x4<Self>) -> u8x32<Self> {
6688        unsafe {
6689            u8x32 {
6690                val: core::mem::transmute(a.val),
6691                simd: self,
6692            }
6693        }
6694    }
6695    #[inline(always)]
6696    fn slide_f64x4<const SHIFT: usize>(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6697        let mut dest = [Default::default(); 4usize];
6698        dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
6699        dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
6700        dest.simd_into(self)
6701    }
6702    #[inline(always)]
6703    fn slide_within_blocks_f64x4<const SHIFT: usize>(
6704        self,
6705        a: f64x4<Self>,
6706        b: f64x4<Self>,
6707    ) -> f64x4<Self> {
6708        let (a0, a1) = self.split_f64x4(a);
6709        let (b0, b1) = self.split_f64x4(b);
6710        self.combine_f64x2(
6711            self.slide_within_blocks_f64x2::<SHIFT>(a0, b0),
6712            self.slide_within_blocks_f64x2::<SHIFT>(a1, b1),
6713        )
6714    }
6715    #[inline(always)]
6716    fn abs_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
6717        let (a0, a1) = self.split_f64x4(a);
6718        self.combine_f64x2(self.abs_f64x2(a0), self.abs_f64x2(a1))
6719    }
6720    #[inline(always)]
6721    fn neg_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
6722        let (a0, a1) = self.split_f64x4(a);
6723        self.combine_f64x2(self.neg_f64x2(a0), self.neg_f64x2(a1))
6724    }
6725    #[inline(always)]
6726    fn sqrt_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
6727        let (a0, a1) = self.split_f64x4(a);
6728        self.combine_f64x2(self.sqrt_f64x2(a0), self.sqrt_f64x2(a1))
6729    }
6730    #[inline(always)]
6731    fn add_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6732        let (a0, a1) = self.split_f64x4(a);
6733        let (b0, b1) = self.split_f64x4(b);
6734        self.combine_f64x2(self.add_f64x2(a0, b0), self.add_f64x2(a1, b1))
6735    }
6736    #[inline(always)]
6737    fn sub_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6738        let (a0, a1) = self.split_f64x4(a);
6739        let (b0, b1) = self.split_f64x4(b);
6740        self.combine_f64x2(self.sub_f64x2(a0, b0), self.sub_f64x2(a1, b1))
6741    }
6742    #[inline(always)]
6743    fn mul_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6744        let (a0, a1) = self.split_f64x4(a);
6745        let (b0, b1) = self.split_f64x4(b);
6746        self.combine_f64x2(self.mul_f64x2(a0, b0), self.mul_f64x2(a1, b1))
6747    }
6748    #[inline(always)]
6749    fn div_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6750        let (a0, a1) = self.split_f64x4(a);
6751        let (b0, b1) = self.split_f64x4(b);
6752        self.combine_f64x2(self.div_f64x2(a0, b0), self.div_f64x2(a1, b1))
6753    }
6754    #[inline(always)]
6755    fn copysign_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6756        let (a0, a1) = self.split_f64x4(a);
6757        let (b0, b1) = self.split_f64x4(b);
6758        self.combine_f64x2(self.copysign_f64x2(a0, b0), self.copysign_f64x2(a1, b1))
6759    }
6760    #[inline(always)]
6761    fn simd_eq_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
6762        let (a0, a1) = self.split_f64x4(a);
6763        let (b0, b1) = self.split_f64x4(b);
6764        self.combine_mask64x2(self.simd_eq_f64x2(a0, b0), self.simd_eq_f64x2(a1, b1))
6765    }
6766    #[inline(always)]
6767    fn simd_lt_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
6768        let (a0, a1) = self.split_f64x4(a);
6769        let (b0, b1) = self.split_f64x4(b);
6770        self.combine_mask64x2(self.simd_lt_f64x2(a0, b0), self.simd_lt_f64x2(a1, b1))
6771    }
6772    #[inline(always)]
6773    fn simd_le_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
6774        let (a0, a1) = self.split_f64x4(a);
6775        let (b0, b1) = self.split_f64x4(b);
6776        self.combine_mask64x2(self.simd_le_f64x2(a0, b0), self.simd_le_f64x2(a1, b1))
6777    }
6778    #[inline(always)]
6779    fn simd_ge_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
6780        let (a0, a1) = self.split_f64x4(a);
6781        let (b0, b1) = self.split_f64x4(b);
6782        self.combine_mask64x2(self.simd_ge_f64x2(a0, b0), self.simd_ge_f64x2(a1, b1))
6783    }
6784    #[inline(always)]
6785    fn simd_gt_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
6786        let (a0, a1) = self.split_f64x4(a);
6787        let (b0, b1) = self.split_f64x4(b);
6788        self.combine_mask64x2(self.simd_gt_f64x2(a0, b0), self.simd_gt_f64x2(a1, b1))
6789    }
6790    #[inline(always)]
6791    fn zip_low_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6792        let (a0, _) = self.split_f64x4(a);
6793        let (b0, _) = self.split_f64x4(b);
6794        self.combine_f64x2(self.zip_low_f64x2(a0, b0), self.zip_high_f64x2(a0, b0))
6795    }
6796    #[inline(always)]
6797    fn zip_high_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6798        let (_, a1) = self.split_f64x4(a);
6799        let (_, b1) = self.split_f64x4(b);
6800        self.combine_f64x2(self.zip_low_f64x2(a1, b1), self.zip_high_f64x2(a1, b1))
6801    }
6802    #[inline(always)]
6803    fn unzip_low_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6804        let (a0, a1) = self.split_f64x4(a);
6805        let (b0, b1) = self.split_f64x4(b);
6806        self.combine_f64x2(self.unzip_low_f64x2(a0, a1), self.unzip_low_f64x2(b0, b1))
6807    }
6808    #[inline(always)]
6809    fn unzip_high_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6810        let (a0, a1) = self.split_f64x4(a);
6811        let (b0, b1) = self.split_f64x4(b);
6812        self.combine_f64x2(self.unzip_high_f64x2(a0, a1), self.unzip_high_f64x2(b0, b1))
6813    }
6814    #[inline(always)]
6815    fn interleave_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> (f64x4<Self>, f64x4<Self>) {
6816        let (a0, a1) = self.split_f64x4(a);
6817        let (b0, b1) = self.split_f64x4(b);
6818        let lo_lo = self.zip_low_f64x2(a0, b0);
6819        let lo_hi = self.zip_high_f64x2(a0, b0);
6820        let hi_lo = self.zip_low_f64x2(a1, b1);
6821        let hi_hi = self.zip_high_f64x2(a1, b1);
6822        (
6823            self.combine_f64x2(lo_lo, lo_hi),
6824            self.combine_f64x2(hi_lo, hi_hi),
6825        )
6826    }
6827    #[inline(always)]
6828    fn deinterleave_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> (f64x4<Self>, f64x4<Self>) {
6829        let (a0, a1) = self.split_f64x4(a);
6830        let (b0, b1) = self.split_f64x4(b);
6831        let lo_even = self.unzip_low_f64x2(a0, a1);
6832        let lo_odd = self.unzip_high_f64x2(a0, a1);
6833        let hi_even = self.unzip_low_f64x2(b0, b1);
6834        let hi_odd = self.unzip_high_f64x2(b0, b1);
6835        (
6836            self.combine_f64x2(lo_even, hi_even),
6837            self.combine_f64x2(lo_odd, hi_odd),
6838        )
6839    }
6840    #[inline(always)]
6841    fn max_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6842        let (a0, a1) = self.split_f64x4(a);
6843        let (b0, b1) = self.split_f64x4(b);
6844        self.combine_f64x2(self.max_f64x2(a0, b0), self.max_f64x2(a1, b1))
6845    }
6846    #[inline(always)]
6847    fn min_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6848        let (a0, a1) = self.split_f64x4(a);
6849        let (b0, b1) = self.split_f64x4(b);
6850        self.combine_f64x2(self.min_f64x2(a0, b0), self.min_f64x2(a1, b1))
6851    }
6852    #[inline(always)]
6853    fn max_precise_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6854        let (a0, a1) = self.split_f64x4(a);
6855        let (b0, b1) = self.split_f64x4(b);
6856        self.combine_f64x2(
6857            self.max_precise_f64x2(a0, b0),
6858            self.max_precise_f64x2(a1, b1),
6859        )
6860    }
6861    #[inline(always)]
6862    fn min_precise_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
6863        let (a0, a1) = self.split_f64x4(a);
6864        let (b0, b1) = self.split_f64x4(b);
6865        self.combine_f64x2(
6866            self.min_precise_f64x2(a0, b0),
6867            self.min_precise_f64x2(a1, b1),
6868        )
6869    }
6870    #[inline(always)]
6871    fn mul_add_f64x4(self, a: f64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
6872        let (a0, a1) = self.split_f64x4(a);
6873        let (b0, b1) = self.split_f64x4(b);
6874        let (c0, c1) = self.split_f64x4(c);
6875        self.combine_f64x2(
6876            self.mul_add_f64x2(a0, b0, c0),
6877            self.mul_add_f64x2(a1, b1, c1),
6878        )
6879    }
6880    #[inline(always)]
6881    fn mul_sub_f64x4(self, a: f64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
6882        let (a0, a1) = self.split_f64x4(a);
6883        let (b0, b1) = self.split_f64x4(b);
6884        let (c0, c1) = self.split_f64x4(c);
6885        self.combine_f64x2(
6886            self.mul_sub_f64x2(a0, b0, c0),
6887            self.mul_sub_f64x2(a1, b1, c1),
6888        )
6889    }
6890    #[inline(always)]
6891    fn floor_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
6892        let (a0, a1) = self.split_f64x4(a);
6893        self.combine_f64x2(self.floor_f64x2(a0), self.floor_f64x2(a1))
6894    }
6895    #[inline(always)]
6896    fn ceil_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
6897        let (a0, a1) = self.split_f64x4(a);
6898        self.combine_f64x2(self.ceil_f64x2(a0), self.ceil_f64x2(a1))
6899    }
6900    #[inline(always)]
6901    fn round_ties_even_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
6902        let (a0, a1) = self.split_f64x4(a);
6903        self.combine_f64x2(
6904            self.round_ties_even_f64x2(a0),
6905            self.round_ties_even_f64x2(a1),
6906        )
6907    }
6908    #[inline(always)]
6909    fn fract_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
6910        let (a0, a1) = self.split_f64x4(a);
6911        self.combine_f64x2(self.fract_f64x2(a0), self.fract_f64x2(a1))
6912    }
6913    #[inline(always)]
6914    fn trunc_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
6915        let (a0, a1) = self.split_f64x4(a);
6916        self.combine_f64x2(self.trunc_f64x2(a0), self.trunc_f64x2(a1))
6917    }
6918    #[inline(always)]
6919    fn select_f64x4(self, a: mask64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
6920        let (a0, a1) = self.split_mask64x4(a);
6921        let (b0, b1) = self.split_f64x4(b);
6922        let (c0, c1) = self.split_f64x4(c);
6923        self.combine_f64x2(self.select_f64x2(a0, b0, c0), self.select_f64x2(a1, b1, c1))
6924    }
6925    #[inline(always)]
6926    fn combine_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x8<Self> {
6927        let mut result = [0.0; 8usize];
6928        result[0..4usize].copy_from_slice(&a.val.0);
6929        result[4usize..8usize].copy_from_slice(&b.val.0);
6930        result.simd_into(self)
6931    }
6932    #[inline(always)]
6933    fn split_f64x4(self, a: f64x4<Self>) -> (f64x2<Self>, f64x2<Self>) {
6934        let mut b0 = [0.0; 2usize];
6935        let mut b1 = [0.0; 2usize];
6936        b0.copy_from_slice(&a.val.0[0..2usize]);
6937        b1.copy_from_slice(&a.val.0[2usize..4usize]);
6938        (b0.simd_into(self), b1.simd_into(self))
6939    }
6940    #[inline(always)]
6941    fn reinterpret_f32_f64x4(self, a: f64x4<Self>) -> f32x8<Self> {
6942        let (a0, a1) = self.split_f64x4(a);
6943        self.combine_f32x4(
6944            self.reinterpret_f32_f64x2(a0),
6945            self.reinterpret_f32_f64x2(a1),
6946        )
6947    }
6948    #[inline(always)]
6949    fn splat_mask64x4(self, val: i64) -> mask64x4<Self> {
6950        let half = self.splat_mask64x2(val);
6951        self.combine_mask64x2(half, half)
6952    }
6953    #[inline(always)]
6954    fn load_array_mask64x4(self, val: [i64; 4usize]) -> mask64x4<Self> {
6955        mask64x4 {
6956            val: crate::support::Aligned256(val),
6957            simd: self,
6958        }
6959    }
6960    #[inline(always)]
6961    fn load_array_ref_mask64x4(self, val: &[i64; 4usize]) -> mask64x4<Self> {
6962        mask64x4 {
6963            val: crate::support::Aligned256(*val),
6964            simd: self,
6965        }
6966    }
6967    #[inline(always)]
6968    fn as_array_mask64x4(self, a: mask64x4<Self>) -> [i64; 4usize] {
6969        a.val.0
6970    }
6971    #[inline(always)]
6972    fn as_array_ref_mask64x4(self, a: &mask64x4<Self>) -> &[i64; 4usize] {
6973        &a.val.0
6974    }
6975    #[inline(always)]
6976    fn as_array_mut_mask64x4(self, a: &mut mask64x4<Self>) -> &mut [i64; 4usize] {
6977        &mut a.val.0
6978    }
6979    #[inline(always)]
6980    fn store_array_mask64x4(self, a: mask64x4<Self>, dest: &mut [i64; 4usize]) -> () {
6981        *dest = a.val.0;
6982    }
6983    #[inline(always)]
6984    fn cvt_from_bytes_mask64x4(self, a: u8x32<Self>) -> mask64x4<Self> {
6985        unsafe {
6986            mask64x4 {
6987                val: core::mem::transmute(a.val),
6988                simd: self,
6989            }
6990        }
6991    }
6992    #[inline(always)]
6993    fn cvt_to_bytes_mask64x4(self, a: mask64x4<Self>) -> u8x32<Self> {
6994        unsafe {
6995            u8x32 {
6996                val: core::mem::transmute(a.val),
6997                simd: self,
6998            }
6999        }
7000    }
7001    #[inline(always)]
7002    fn slide_mask64x4<const SHIFT: usize>(
7003        self,
7004        a: mask64x4<Self>,
7005        b: mask64x4<Self>,
7006    ) -> mask64x4<Self> {
7007        let mut dest = [Default::default(); 4usize];
7008        dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
7009        dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
7010        dest.simd_into(self)
7011    }
7012    #[inline(always)]
7013    fn slide_within_blocks_mask64x4<const SHIFT: usize>(
7014        self,
7015        a: mask64x4<Self>,
7016        b: mask64x4<Self>,
7017    ) -> mask64x4<Self> {
7018        let (a0, a1) = self.split_mask64x4(a);
7019        let (b0, b1) = self.split_mask64x4(b);
7020        self.combine_mask64x2(
7021            self.slide_within_blocks_mask64x2::<SHIFT>(a0, b0),
7022            self.slide_within_blocks_mask64x2::<SHIFT>(a1, b1),
7023        )
7024    }
7025    #[inline(always)]
7026    fn and_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
7027        let (a0, a1) = self.split_mask64x4(a);
7028        let (b0, b1) = self.split_mask64x4(b);
7029        self.combine_mask64x2(self.and_mask64x2(a0, b0), self.and_mask64x2(a1, b1))
7030    }
7031    #[inline(always)]
7032    fn or_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
7033        let (a0, a1) = self.split_mask64x4(a);
7034        let (b0, b1) = self.split_mask64x4(b);
7035        self.combine_mask64x2(self.or_mask64x2(a0, b0), self.or_mask64x2(a1, b1))
7036    }
7037    #[inline(always)]
7038    fn xor_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
7039        let (a0, a1) = self.split_mask64x4(a);
7040        let (b0, b1) = self.split_mask64x4(b);
7041        self.combine_mask64x2(self.xor_mask64x2(a0, b0), self.xor_mask64x2(a1, b1))
7042    }
7043    #[inline(always)]
7044    fn not_mask64x4(self, a: mask64x4<Self>) -> mask64x4<Self> {
7045        let (a0, a1) = self.split_mask64x4(a);
7046        self.combine_mask64x2(self.not_mask64x2(a0), self.not_mask64x2(a1))
7047    }
7048    #[inline(always)]
7049    fn select_mask64x4(
7050        self,
7051        a: mask64x4<Self>,
7052        b: mask64x4<Self>,
7053        c: mask64x4<Self>,
7054    ) -> mask64x4<Self> {
7055        let (a0, a1) = self.split_mask64x4(a);
7056        let (b0, b1) = self.split_mask64x4(b);
7057        let (c0, c1) = self.split_mask64x4(c);
7058        self.combine_mask64x2(
7059            self.select_mask64x2(a0, b0, c0),
7060            self.select_mask64x2(a1, b1, c1),
7061        )
7062    }
7063    #[inline(always)]
7064    fn simd_eq_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
7065        let (a0, a1) = self.split_mask64x4(a);
7066        let (b0, b1) = self.split_mask64x4(b);
7067        self.combine_mask64x2(self.simd_eq_mask64x2(a0, b0), self.simd_eq_mask64x2(a1, b1))
7068    }
7069    #[inline(always)]
7070    fn any_true_mask64x4(self, a: mask64x4<Self>) -> bool {
7071        let (a0, a1) = self.split_mask64x4(a);
7072        self.any_true_mask64x2(a0) || self.any_true_mask64x2(a1)
7073    }
7074    #[inline(always)]
7075    fn all_true_mask64x4(self, a: mask64x4<Self>) -> bool {
7076        let (a0, a1) = self.split_mask64x4(a);
7077        self.all_true_mask64x2(a0) && self.all_true_mask64x2(a1)
7078    }
7079    #[inline(always)]
7080    fn any_false_mask64x4(self, a: mask64x4<Self>) -> bool {
7081        let (a0, a1) = self.split_mask64x4(a);
7082        self.any_false_mask64x2(a0) || self.any_false_mask64x2(a1)
7083    }
7084    #[inline(always)]
7085    fn all_false_mask64x4(self, a: mask64x4<Self>) -> bool {
7086        let (a0, a1) = self.split_mask64x4(a);
7087        self.all_false_mask64x2(a0) && self.all_false_mask64x2(a1)
7088    }
7089    #[inline(always)]
7090    fn combine_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x8<Self> {
7091        let mut result = [0; 8usize];
7092        result[0..4usize].copy_from_slice(&a.val.0);
7093        result[4usize..8usize].copy_from_slice(&b.val.0);
7094        result.simd_into(self)
7095    }
7096    #[inline(always)]
7097    fn split_mask64x4(self, a: mask64x4<Self>) -> (mask64x2<Self>, mask64x2<Self>) {
7098        let mut b0 = [0; 2usize];
7099        let mut b1 = [0; 2usize];
7100        b0.copy_from_slice(&a.val.0[0..2usize]);
7101        b1.copy_from_slice(&a.val.0[2usize..4usize]);
7102        (b0.simd_into(self), b1.simd_into(self))
7103    }
7104    #[inline(always)]
7105    fn splat_f32x16(self, val: f32) -> f32x16<Self> {
7106        let half = self.splat_f32x8(val);
7107        self.combine_f32x8(half, half)
7108    }
7109    #[inline(always)]
7110    fn load_array_f32x16(self, val: [f32; 16usize]) -> f32x16<Self> {
7111        f32x16 {
7112            val: crate::support::Aligned512(val),
7113            simd: self,
7114        }
7115    }
7116    #[inline(always)]
7117    fn load_array_ref_f32x16(self, val: &[f32; 16usize]) -> f32x16<Self> {
7118        f32x16 {
7119            val: crate::support::Aligned512(*val),
7120            simd: self,
7121        }
7122    }
7123    #[inline(always)]
7124    fn as_array_f32x16(self, a: f32x16<Self>) -> [f32; 16usize] {
7125        a.val.0
7126    }
7127    #[inline(always)]
7128    fn as_array_ref_f32x16(self, a: &f32x16<Self>) -> &[f32; 16usize] {
7129        &a.val.0
7130    }
7131    #[inline(always)]
7132    fn as_array_mut_f32x16(self, a: &mut f32x16<Self>) -> &mut [f32; 16usize] {
7133        &mut a.val.0
7134    }
7135    #[inline(always)]
7136    fn store_array_f32x16(self, a: f32x16<Self>, dest: &mut [f32; 16usize]) -> () {
7137        *dest = a.val.0;
7138    }
7139    #[inline(always)]
7140    fn cvt_from_bytes_f32x16(self, a: u8x64<Self>) -> f32x16<Self> {
7141        unsafe {
7142            f32x16 {
7143                val: core::mem::transmute(a.val),
7144                simd: self,
7145            }
7146        }
7147    }
7148    #[inline(always)]
7149    fn cvt_to_bytes_f32x16(self, a: f32x16<Self>) -> u8x64<Self> {
7150        unsafe {
7151            u8x64 {
7152                val: core::mem::transmute(a.val),
7153                simd: self,
7154            }
7155        }
7156    }
7157    #[inline(always)]
7158    fn slide_f32x16<const SHIFT: usize>(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7159        let mut dest = [Default::default(); 16usize];
7160        dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
7161        dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
7162        dest.simd_into(self)
7163    }
7164    #[inline(always)]
7165    fn slide_within_blocks_f32x16<const SHIFT: usize>(
7166        self,
7167        a: f32x16<Self>,
7168        b: f32x16<Self>,
7169    ) -> f32x16<Self> {
7170        let (a0, a1) = self.split_f32x16(a);
7171        let (b0, b1) = self.split_f32x16(b);
7172        self.combine_f32x8(
7173            self.slide_within_blocks_f32x8::<SHIFT>(a0, b0),
7174            self.slide_within_blocks_f32x8::<SHIFT>(a1, b1),
7175        )
7176    }
7177    #[inline(always)]
7178    fn abs_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
7179        let (a0, a1) = self.split_f32x16(a);
7180        self.combine_f32x8(self.abs_f32x8(a0), self.abs_f32x8(a1))
7181    }
7182    #[inline(always)]
7183    fn neg_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
7184        let (a0, a1) = self.split_f32x16(a);
7185        self.combine_f32x8(self.neg_f32x8(a0), self.neg_f32x8(a1))
7186    }
7187    #[inline(always)]
7188    fn sqrt_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
7189        let (a0, a1) = self.split_f32x16(a);
7190        self.combine_f32x8(self.sqrt_f32x8(a0), self.sqrt_f32x8(a1))
7191    }
7192    #[inline(always)]
7193    fn add_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7194        let (a0, a1) = self.split_f32x16(a);
7195        let (b0, b1) = self.split_f32x16(b);
7196        self.combine_f32x8(self.add_f32x8(a0, b0), self.add_f32x8(a1, b1))
7197    }
7198    #[inline(always)]
7199    fn sub_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7200        let (a0, a1) = self.split_f32x16(a);
7201        let (b0, b1) = self.split_f32x16(b);
7202        self.combine_f32x8(self.sub_f32x8(a0, b0), self.sub_f32x8(a1, b1))
7203    }
7204    #[inline(always)]
7205    fn mul_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7206        let (a0, a1) = self.split_f32x16(a);
7207        let (b0, b1) = self.split_f32x16(b);
7208        self.combine_f32x8(self.mul_f32x8(a0, b0), self.mul_f32x8(a1, b1))
7209    }
7210    #[inline(always)]
7211    fn div_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7212        let (a0, a1) = self.split_f32x16(a);
7213        let (b0, b1) = self.split_f32x16(b);
7214        self.combine_f32x8(self.div_f32x8(a0, b0), self.div_f32x8(a1, b1))
7215    }
7216    #[inline(always)]
7217    fn copysign_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7218        let (a0, a1) = self.split_f32x16(a);
7219        let (b0, b1) = self.split_f32x16(b);
7220        self.combine_f32x8(self.copysign_f32x8(a0, b0), self.copysign_f32x8(a1, b1))
7221    }
7222    #[inline(always)]
7223    fn simd_eq_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
7224        let (a0, a1) = self.split_f32x16(a);
7225        let (b0, b1) = self.split_f32x16(b);
7226        self.combine_mask32x8(self.simd_eq_f32x8(a0, b0), self.simd_eq_f32x8(a1, b1))
7227    }
7228    #[inline(always)]
7229    fn simd_lt_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
7230        let (a0, a1) = self.split_f32x16(a);
7231        let (b0, b1) = self.split_f32x16(b);
7232        self.combine_mask32x8(self.simd_lt_f32x8(a0, b0), self.simd_lt_f32x8(a1, b1))
7233    }
7234    #[inline(always)]
7235    fn simd_le_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
7236        let (a0, a1) = self.split_f32x16(a);
7237        let (b0, b1) = self.split_f32x16(b);
7238        self.combine_mask32x8(self.simd_le_f32x8(a0, b0), self.simd_le_f32x8(a1, b1))
7239    }
7240    #[inline(always)]
7241    fn simd_ge_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
7242        let (a0, a1) = self.split_f32x16(a);
7243        let (b0, b1) = self.split_f32x16(b);
7244        self.combine_mask32x8(self.simd_ge_f32x8(a0, b0), self.simd_ge_f32x8(a1, b1))
7245    }
7246    #[inline(always)]
7247    fn simd_gt_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
7248        let (a0, a1) = self.split_f32x16(a);
7249        let (b0, b1) = self.split_f32x16(b);
7250        self.combine_mask32x8(self.simd_gt_f32x8(a0, b0), self.simd_gt_f32x8(a1, b1))
7251    }
7252    #[inline(always)]
7253    fn zip_low_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7254        let (a0, _) = self.split_f32x16(a);
7255        let (b0, _) = self.split_f32x16(b);
7256        self.combine_f32x8(self.zip_low_f32x8(a0, b0), self.zip_high_f32x8(a0, b0))
7257    }
7258    #[inline(always)]
7259    fn zip_high_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7260        let (_, a1) = self.split_f32x16(a);
7261        let (_, b1) = self.split_f32x16(b);
7262        self.combine_f32x8(self.zip_low_f32x8(a1, b1), self.zip_high_f32x8(a1, b1))
7263    }
7264    #[inline(always)]
7265    fn unzip_low_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7266        let (a0, a1) = self.split_f32x16(a);
7267        let (b0, b1) = self.split_f32x16(b);
7268        self.combine_f32x8(self.unzip_low_f32x8(a0, a1), self.unzip_low_f32x8(b0, b1))
7269    }
7270    #[inline(always)]
7271    fn unzip_high_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7272        let (a0, a1) = self.split_f32x16(a);
7273        let (b0, b1) = self.split_f32x16(b);
7274        self.combine_f32x8(self.unzip_high_f32x8(a0, a1), self.unzip_high_f32x8(b0, b1))
7275    }
7276    #[inline(always)]
7277    fn interleave_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> (f32x16<Self>, f32x16<Self>) {
7278        let (a0, a1) = self.split_f32x16(a);
7279        let (b0, b1) = self.split_f32x16(b);
7280        let lo_lo = self.zip_low_f32x8(a0, b0);
7281        let lo_hi = self.zip_high_f32x8(a0, b0);
7282        let hi_lo = self.zip_low_f32x8(a1, b1);
7283        let hi_hi = self.zip_high_f32x8(a1, b1);
7284        (
7285            self.combine_f32x8(lo_lo, lo_hi),
7286            self.combine_f32x8(hi_lo, hi_hi),
7287        )
7288    }
7289    #[inline(always)]
7290    fn deinterleave_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> (f32x16<Self>, f32x16<Self>) {
7291        let (a0, a1) = self.split_f32x16(a);
7292        let (b0, b1) = self.split_f32x16(b);
7293        let lo_even = self.unzip_low_f32x8(a0, a1);
7294        let lo_odd = self.unzip_high_f32x8(a0, a1);
7295        let hi_even = self.unzip_low_f32x8(b0, b1);
7296        let hi_odd = self.unzip_high_f32x8(b0, b1);
7297        (
7298            self.combine_f32x8(lo_even, hi_even),
7299            self.combine_f32x8(lo_odd, hi_odd),
7300        )
7301    }
7302    #[inline(always)]
7303    fn max_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7304        let (a0, a1) = self.split_f32x16(a);
7305        let (b0, b1) = self.split_f32x16(b);
7306        self.combine_f32x8(self.max_f32x8(a0, b0), self.max_f32x8(a1, b1))
7307    }
7308    #[inline(always)]
7309    fn min_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7310        let (a0, a1) = self.split_f32x16(a);
7311        let (b0, b1) = self.split_f32x16(b);
7312        self.combine_f32x8(self.min_f32x8(a0, b0), self.min_f32x8(a1, b1))
7313    }
7314    #[inline(always)]
7315    fn max_precise_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7316        let (a0, a1) = self.split_f32x16(a);
7317        let (b0, b1) = self.split_f32x16(b);
7318        self.combine_f32x8(
7319            self.max_precise_f32x8(a0, b0),
7320            self.max_precise_f32x8(a1, b1),
7321        )
7322    }
7323    #[inline(always)]
7324    fn min_precise_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
7325        let (a0, a1) = self.split_f32x16(a);
7326        let (b0, b1) = self.split_f32x16(b);
7327        self.combine_f32x8(
7328            self.min_precise_f32x8(a0, b0),
7329            self.min_precise_f32x8(a1, b1),
7330        )
7331    }
7332    #[inline(always)]
7333    fn mul_add_f32x16(self, a: f32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
7334        let (a0, a1) = self.split_f32x16(a);
7335        let (b0, b1) = self.split_f32x16(b);
7336        let (c0, c1) = self.split_f32x16(c);
7337        self.combine_f32x8(
7338            self.mul_add_f32x8(a0, b0, c0),
7339            self.mul_add_f32x8(a1, b1, c1),
7340        )
7341    }
7342    #[inline(always)]
7343    fn mul_sub_f32x16(self, a: f32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
7344        let (a0, a1) = self.split_f32x16(a);
7345        let (b0, b1) = self.split_f32x16(b);
7346        let (c0, c1) = self.split_f32x16(c);
7347        self.combine_f32x8(
7348            self.mul_sub_f32x8(a0, b0, c0),
7349            self.mul_sub_f32x8(a1, b1, c1),
7350        )
7351    }
7352    #[inline(always)]
7353    fn floor_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
7354        let (a0, a1) = self.split_f32x16(a);
7355        self.combine_f32x8(self.floor_f32x8(a0), self.floor_f32x8(a1))
7356    }
7357    #[inline(always)]
7358    fn ceil_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
7359        let (a0, a1) = self.split_f32x16(a);
7360        self.combine_f32x8(self.ceil_f32x8(a0), self.ceil_f32x8(a1))
7361    }
7362    #[inline(always)]
7363    fn round_ties_even_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
7364        let (a0, a1) = self.split_f32x16(a);
7365        self.combine_f32x8(
7366            self.round_ties_even_f32x8(a0),
7367            self.round_ties_even_f32x8(a1),
7368        )
7369    }
7370    #[inline(always)]
7371    fn fract_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
7372        let (a0, a1) = self.split_f32x16(a);
7373        self.combine_f32x8(self.fract_f32x8(a0), self.fract_f32x8(a1))
7374    }
7375    #[inline(always)]
7376    fn trunc_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
7377        let (a0, a1) = self.split_f32x16(a);
7378        self.combine_f32x8(self.trunc_f32x8(a0), self.trunc_f32x8(a1))
7379    }
7380    #[inline(always)]
7381    fn select_f32x16(self, a: mask32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
7382        let (a0, a1) = self.split_mask32x16(a);
7383        let (b0, b1) = self.split_f32x16(b);
7384        let (c0, c1) = self.split_f32x16(c);
7385        self.combine_f32x8(self.select_f32x8(a0, b0, c0), self.select_f32x8(a1, b1, c1))
7386    }
7387    #[inline(always)]
7388    fn split_f32x16(self, a: f32x16<Self>) -> (f32x8<Self>, f32x8<Self>) {
7389        let mut b0 = [0.0; 8usize];
7390        let mut b1 = [0.0; 8usize];
7391        b0.copy_from_slice(&a.val.0[0..8usize]);
7392        b1.copy_from_slice(&a.val.0[8usize..16usize]);
7393        (b0.simd_into(self), b1.simd_into(self))
7394    }
7395    #[inline(always)]
7396    fn reinterpret_f64_f32x16(self, a: f32x16<Self>) -> f64x8<Self> {
7397        let (a0, a1) = self.split_f32x16(a);
7398        self.combine_f64x4(
7399            self.reinterpret_f64_f32x8(a0),
7400            self.reinterpret_f64_f32x8(a1),
7401        )
7402    }
7403    #[inline(always)]
7404    fn reinterpret_i32_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
7405        let (a0, a1) = self.split_f32x16(a);
7406        self.combine_i32x8(
7407            self.reinterpret_i32_f32x8(a0),
7408            self.reinterpret_i32_f32x8(a1),
7409        )
7410    }
7411    #[inline(always)]
7412    fn load_interleaved_128_f32x16(self, src: &[f32; 16usize]) -> f32x16<Self> {
7413        [
7414            src[0usize],
7415            src[4usize],
7416            src[8usize],
7417            src[12usize],
7418            src[1usize],
7419            src[5usize],
7420            src[9usize],
7421            src[13usize],
7422            src[2usize],
7423            src[6usize],
7424            src[10usize],
7425            src[14usize],
7426            src[3usize],
7427            src[7usize],
7428            src[11usize],
7429            src[15usize],
7430        ]
7431        .simd_into(self)
7432    }
7433    #[inline(always)]
7434    fn store_interleaved_128_f32x16(self, a: f32x16<Self>, dest: &mut [f32; 16usize]) -> () {
7435        *dest = [
7436            a[0usize], a[4usize], a[8usize], a[12usize], a[1usize], a[5usize], a[9usize],
7437            a[13usize], a[2usize], a[6usize], a[10usize], a[14usize], a[3usize], a[7usize],
7438            a[11usize], a[15usize],
7439        ];
7440    }
7441    #[inline(always)]
7442    fn reinterpret_u8_f32x16(self, a: f32x16<Self>) -> u8x64<Self> {
7443        let (a0, a1) = self.split_f32x16(a);
7444        self.combine_u8x32(self.reinterpret_u8_f32x8(a0), self.reinterpret_u8_f32x8(a1))
7445    }
7446    #[inline(always)]
7447    fn reinterpret_u32_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
7448        let (a0, a1) = self.split_f32x16(a);
7449        self.combine_u32x8(
7450            self.reinterpret_u32_f32x8(a0),
7451            self.reinterpret_u32_f32x8(a1),
7452        )
7453    }
7454    #[inline(always)]
7455    fn cvt_u32_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
7456        let (a0, a1) = self.split_f32x16(a);
7457        self.combine_u32x8(self.cvt_u32_f32x8(a0), self.cvt_u32_f32x8(a1))
7458    }
7459    #[inline(always)]
7460    fn cvt_u32_precise_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
7461        let (a0, a1) = self.split_f32x16(a);
7462        self.combine_u32x8(
7463            self.cvt_u32_precise_f32x8(a0),
7464            self.cvt_u32_precise_f32x8(a1),
7465        )
7466    }
7467    #[inline(always)]
7468    fn cvt_i32_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
7469        let (a0, a1) = self.split_f32x16(a);
7470        self.combine_i32x8(self.cvt_i32_f32x8(a0), self.cvt_i32_f32x8(a1))
7471    }
7472    #[inline(always)]
7473    fn cvt_i32_precise_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
7474        let (a0, a1) = self.split_f32x16(a);
7475        self.combine_i32x8(
7476            self.cvt_i32_precise_f32x8(a0),
7477            self.cvt_i32_precise_f32x8(a1),
7478        )
7479    }
7480    #[inline(always)]
7481    fn splat_i8x64(self, val: i8) -> i8x64<Self> {
7482        let half = self.splat_i8x32(val);
7483        self.combine_i8x32(half, half)
7484    }
7485    #[inline(always)]
7486    fn load_array_i8x64(self, val: [i8; 64usize]) -> i8x64<Self> {
7487        i8x64 {
7488            val: crate::support::Aligned512(val),
7489            simd: self,
7490        }
7491    }
7492    #[inline(always)]
7493    fn load_array_ref_i8x64(self, val: &[i8; 64usize]) -> i8x64<Self> {
7494        i8x64 {
7495            val: crate::support::Aligned512(*val),
7496            simd: self,
7497        }
7498    }
7499    #[inline(always)]
7500    fn as_array_i8x64(self, a: i8x64<Self>) -> [i8; 64usize] {
7501        a.val.0
7502    }
7503    #[inline(always)]
7504    fn as_array_ref_i8x64(self, a: &i8x64<Self>) -> &[i8; 64usize] {
7505        &a.val.0
7506    }
7507    #[inline(always)]
7508    fn as_array_mut_i8x64(self, a: &mut i8x64<Self>) -> &mut [i8; 64usize] {
7509        &mut a.val.0
7510    }
7511    #[inline(always)]
7512    fn store_array_i8x64(self, a: i8x64<Self>, dest: &mut [i8; 64usize]) -> () {
7513        *dest = a.val.0;
7514    }
7515    #[inline(always)]
7516    fn cvt_from_bytes_i8x64(self, a: u8x64<Self>) -> i8x64<Self> {
7517        unsafe {
7518            i8x64 {
7519                val: core::mem::transmute(a.val),
7520                simd: self,
7521            }
7522        }
7523    }
7524    #[inline(always)]
7525    fn cvt_to_bytes_i8x64(self, a: i8x64<Self>) -> u8x64<Self> {
7526        unsafe {
7527            u8x64 {
7528                val: core::mem::transmute(a.val),
7529                simd: self,
7530            }
7531        }
7532    }
7533    #[inline(always)]
7534    fn slide_i8x64<const SHIFT: usize>(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7535        let mut dest = [Default::default(); 64usize];
7536        dest[..64usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
7537        dest[64usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
7538        dest.simd_into(self)
7539    }
7540    #[inline(always)]
7541    fn slide_within_blocks_i8x64<const SHIFT: usize>(
7542        self,
7543        a: i8x64<Self>,
7544        b: i8x64<Self>,
7545    ) -> i8x64<Self> {
7546        let (a0, a1) = self.split_i8x64(a);
7547        let (b0, b1) = self.split_i8x64(b);
7548        self.combine_i8x32(
7549            self.slide_within_blocks_i8x32::<SHIFT>(a0, b0),
7550            self.slide_within_blocks_i8x32::<SHIFT>(a1, b1),
7551        )
7552    }
7553    #[inline(always)]
7554    fn add_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7555        let (a0, a1) = self.split_i8x64(a);
7556        let (b0, b1) = self.split_i8x64(b);
7557        self.combine_i8x32(self.add_i8x32(a0, b0), self.add_i8x32(a1, b1))
7558    }
7559    #[inline(always)]
7560    fn sub_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7561        let (a0, a1) = self.split_i8x64(a);
7562        let (b0, b1) = self.split_i8x64(b);
7563        self.combine_i8x32(self.sub_i8x32(a0, b0), self.sub_i8x32(a1, b1))
7564    }
7565    #[inline(always)]
7566    fn mul_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7567        let (a0, a1) = self.split_i8x64(a);
7568        let (b0, b1) = self.split_i8x64(b);
7569        self.combine_i8x32(self.mul_i8x32(a0, b0), self.mul_i8x32(a1, b1))
7570    }
7571    #[inline(always)]
7572    fn and_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7573        let (a0, a1) = self.split_i8x64(a);
7574        let (b0, b1) = self.split_i8x64(b);
7575        self.combine_i8x32(self.and_i8x32(a0, b0), self.and_i8x32(a1, b1))
7576    }
7577    #[inline(always)]
7578    fn or_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7579        let (a0, a1) = self.split_i8x64(a);
7580        let (b0, b1) = self.split_i8x64(b);
7581        self.combine_i8x32(self.or_i8x32(a0, b0), self.or_i8x32(a1, b1))
7582    }
7583    #[inline(always)]
7584    fn xor_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7585        let (a0, a1) = self.split_i8x64(a);
7586        let (b0, b1) = self.split_i8x64(b);
7587        self.combine_i8x32(self.xor_i8x32(a0, b0), self.xor_i8x32(a1, b1))
7588    }
7589    #[inline(always)]
7590    fn not_i8x64(self, a: i8x64<Self>) -> i8x64<Self> {
7591        let (a0, a1) = self.split_i8x64(a);
7592        self.combine_i8x32(self.not_i8x32(a0), self.not_i8x32(a1))
7593    }
7594    #[inline(always)]
7595    fn shl_i8x64(self, a: i8x64<Self>, shift: u32) -> i8x64<Self> {
7596        let (a0, a1) = self.split_i8x64(a);
7597        self.combine_i8x32(self.shl_i8x32(a0, shift), self.shl_i8x32(a1, shift))
7598    }
7599    #[inline(always)]
7600    fn shlv_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7601        let (a0, a1) = self.split_i8x64(a);
7602        let (b0, b1) = self.split_i8x64(b);
7603        self.combine_i8x32(self.shlv_i8x32(a0, b0), self.shlv_i8x32(a1, b1))
7604    }
7605    #[inline(always)]
7606    fn shr_i8x64(self, a: i8x64<Self>, shift: u32) -> i8x64<Self> {
7607        let (a0, a1) = self.split_i8x64(a);
7608        self.combine_i8x32(self.shr_i8x32(a0, shift), self.shr_i8x32(a1, shift))
7609    }
7610    #[inline(always)]
7611    fn shrv_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7612        let (a0, a1) = self.split_i8x64(a);
7613        let (b0, b1) = self.split_i8x64(b);
7614        self.combine_i8x32(self.shrv_i8x32(a0, b0), self.shrv_i8x32(a1, b1))
7615    }
7616    #[inline(always)]
7617    fn simd_eq_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
7618        let (a0, a1) = self.split_i8x64(a);
7619        let (b0, b1) = self.split_i8x64(b);
7620        self.combine_mask8x32(self.simd_eq_i8x32(a0, b0), self.simd_eq_i8x32(a1, b1))
7621    }
7622    #[inline(always)]
7623    fn simd_lt_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
7624        let (a0, a1) = self.split_i8x64(a);
7625        let (b0, b1) = self.split_i8x64(b);
7626        self.combine_mask8x32(self.simd_lt_i8x32(a0, b0), self.simd_lt_i8x32(a1, b1))
7627    }
7628    #[inline(always)]
7629    fn simd_le_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
7630        let (a0, a1) = self.split_i8x64(a);
7631        let (b0, b1) = self.split_i8x64(b);
7632        self.combine_mask8x32(self.simd_le_i8x32(a0, b0), self.simd_le_i8x32(a1, b1))
7633    }
7634    #[inline(always)]
7635    fn simd_ge_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
7636        let (a0, a1) = self.split_i8x64(a);
7637        let (b0, b1) = self.split_i8x64(b);
7638        self.combine_mask8x32(self.simd_ge_i8x32(a0, b0), self.simd_ge_i8x32(a1, b1))
7639    }
7640    #[inline(always)]
7641    fn simd_gt_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
7642        let (a0, a1) = self.split_i8x64(a);
7643        let (b0, b1) = self.split_i8x64(b);
7644        self.combine_mask8x32(self.simd_gt_i8x32(a0, b0), self.simd_gt_i8x32(a1, b1))
7645    }
7646    #[inline(always)]
7647    fn zip_low_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7648        let (a0, _) = self.split_i8x64(a);
7649        let (b0, _) = self.split_i8x64(b);
7650        self.combine_i8x32(self.zip_low_i8x32(a0, b0), self.zip_high_i8x32(a0, b0))
7651    }
7652    #[inline(always)]
7653    fn zip_high_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7654        let (_, a1) = self.split_i8x64(a);
7655        let (_, b1) = self.split_i8x64(b);
7656        self.combine_i8x32(self.zip_low_i8x32(a1, b1), self.zip_high_i8x32(a1, b1))
7657    }
7658    #[inline(always)]
7659    fn unzip_low_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7660        let (a0, a1) = self.split_i8x64(a);
7661        let (b0, b1) = self.split_i8x64(b);
7662        self.combine_i8x32(self.unzip_low_i8x32(a0, a1), self.unzip_low_i8x32(b0, b1))
7663    }
7664    #[inline(always)]
7665    fn unzip_high_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7666        let (a0, a1) = self.split_i8x64(a);
7667        let (b0, b1) = self.split_i8x64(b);
7668        self.combine_i8x32(self.unzip_high_i8x32(a0, a1), self.unzip_high_i8x32(b0, b1))
7669    }
7670    #[inline(always)]
7671    fn interleave_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> (i8x64<Self>, i8x64<Self>) {
7672        let (a0, a1) = self.split_i8x64(a);
7673        let (b0, b1) = self.split_i8x64(b);
7674        let lo_lo = self.zip_low_i8x32(a0, b0);
7675        let lo_hi = self.zip_high_i8x32(a0, b0);
7676        let hi_lo = self.zip_low_i8x32(a1, b1);
7677        let hi_hi = self.zip_high_i8x32(a1, b1);
7678        (
7679            self.combine_i8x32(lo_lo, lo_hi),
7680            self.combine_i8x32(hi_lo, hi_hi),
7681        )
7682    }
7683    #[inline(always)]
7684    fn deinterleave_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> (i8x64<Self>, i8x64<Self>) {
7685        let (a0, a1) = self.split_i8x64(a);
7686        let (b0, b1) = self.split_i8x64(b);
7687        let lo_even = self.unzip_low_i8x32(a0, a1);
7688        let lo_odd = self.unzip_high_i8x32(a0, a1);
7689        let hi_even = self.unzip_low_i8x32(b0, b1);
7690        let hi_odd = self.unzip_high_i8x32(b0, b1);
7691        (
7692            self.combine_i8x32(lo_even, hi_even),
7693            self.combine_i8x32(lo_odd, hi_odd),
7694        )
7695    }
7696    #[inline(always)]
7697    fn select_i8x64(self, a: mask8x64<Self>, b: i8x64<Self>, c: i8x64<Self>) -> i8x64<Self> {
7698        let (a0, a1) = self.split_mask8x64(a);
7699        let (b0, b1) = self.split_i8x64(b);
7700        let (c0, c1) = self.split_i8x64(c);
7701        self.combine_i8x32(self.select_i8x32(a0, b0, c0), self.select_i8x32(a1, b1, c1))
7702    }
7703    #[inline(always)]
7704    fn min_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7705        let (a0, a1) = self.split_i8x64(a);
7706        let (b0, b1) = self.split_i8x64(b);
7707        self.combine_i8x32(self.min_i8x32(a0, b0), self.min_i8x32(a1, b1))
7708    }
7709    #[inline(always)]
7710    fn max_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
7711        let (a0, a1) = self.split_i8x64(a);
7712        let (b0, b1) = self.split_i8x64(b);
7713        self.combine_i8x32(self.max_i8x32(a0, b0), self.max_i8x32(a1, b1))
7714    }
7715    #[inline(always)]
7716    fn split_i8x64(self, a: i8x64<Self>) -> (i8x32<Self>, i8x32<Self>) {
7717        let mut b0 = [0; 32usize];
7718        let mut b1 = [0; 32usize];
7719        b0.copy_from_slice(&a.val.0[0..32usize]);
7720        b1.copy_from_slice(&a.val.0[32usize..64usize]);
7721        (b0.simd_into(self), b1.simd_into(self))
7722    }
7723    #[inline(always)]
7724    fn neg_i8x64(self, a: i8x64<Self>) -> i8x64<Self> {
7725        let (a0, a1) = self.split_i8x64(a);
7726        self.combine_i8x32(self.neg_i8x32(a0), self.neg_i8x32(a1))
7727    }
7728    #[inline(always)]
7729    fn reinterpret_u8_i8x64(self, a: i8x64<Self>) -> u8x64<Self> {
7730        let (a0, a1) = self.split_i8x64(a);
7731        self.combine_u8x32(self.reinterpret_u8_i8x32(a0), self.reinterpret_u8_i8x32(a1))
7732    }
7733    #[inline(always)]
7734    fn reinterpret_u32_i8x64(self, a: i8x64<Self>) -> u32x16<Self> {
7735        let (a0, a1) = self.split_i8x64(a);
7736        self.combine_u32x8(
7737            self.reinterpret_u32_i8x32(a0),
7738            self.reinterpret_u32_i8x32(a1),
7739        )
7740    }
7741    #[inline(always)]
7742    fn splat_u8x64(self, val: u8) -> u8x64<Self> {
7743        let half = self.splat_u8x32(val);
7744        self.combine_u8x32(half, half)
7745    }
7746    #[inline(always)]
7747    fn load_array_u8x64(self, val: [u8; 64usize]) -> u8x64<Self> {
7748        u8x64 {
7749            val: crate::support::Aligned512(val),
7750            simd: self,
7751        }
7752    }
7753    #[inline(always)]
7754    fn load_array_ref_u8x64(self, val: &[u8; 64usize]) -> u8x64<Self> {
7755        u8x64 {
7756            val: crate::support::Aligned512(*val),
7757            simd: self,
7758        }
7759    }
7760    #[inline(always)]
7761    fn as_array_u8x64(self, a: u8x64<Self>) -> [u8; 64usize] {
7762        a.val.0
7763    }
7764    #[inline(always)]
7765    fn as_array_ref_u8x64(self, a: &u8x64<Self>) -> &[u8; 64usize] {
7766        &a.val.0
7767    }
7768    #[inline(always)]
7769    fn as_array_mut_u8x64(self, a: &mut u8x64<Self>) -> &mut [u8; 64usize] {
7770        &mut a.val.0
7771    }
7772    #[inline(always)]
7773    fn store_array_u8x64(self, a: u8x64<Self>, dest: &mut [u8; 64usize]) -> () {
7774        *dest = a.val.0;
7775    }
7776    #[inline(always)]
7777    fn cvt_from_bytes_u8x64(self, a: u8x64<Self>) -> u8x64<Self> {
7778        unsafe {
7779            u8x64 {
7780                val: core::mem::transmute(a.val),
7781                simd: self,
7782            }
7783        }
7784    }
7785    #[inline(always)]
7786    fn cvt_to_bytes_u8x64(self, a: u8x64<Self>) -> u8x64<Self> {
7787        unsafe {
7788            u8x64 {
7789                val: core::mem::transmute(a.val),
7790                simd: self,
7791            }
7792        }
7793    }
7794    #[inline(always)]
7795    fn slide_u8x64<const SHIFT: usize>(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7796        let mut dest = [Default::default(); 64usize];
7797        dest[..64usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
7798        dest[64usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
7799        dest.simd_into(self)
7800    }
7801    #[inline(always)]
7802    fn slide_within_blocks_u8x64<const SHIFT: usize>(
7803        self,
7804        a: u8x64<Self>,
7805        b: u8x64<Self>,
7806    ) -> u8x64<Self> {
7807        let (a0, a1) = self.split_u8x64(a);
7808        let (b0, b1) = self.split_u8x64(b);
7809        self.combine_u8x32(
7810            self.slide_within_blocks_u8x32::<SHIFT>(a0, b0),
7811            self.slide_within_blocks_u8x32::<SHIFT>(a1, b1),
7812        )
7813    }
7814    #[inline(always)]
7815    fn add_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7816        let (a0, a1) = self.split_u8x64(a);
7817        let (b0, b1) = self.split_u8x64(b);
7818        self.combine_u8x32(self.add_u8x32(a0, b0), self.add_u8x32(a1, b1))
7819    }
7820    #[inline(always)]
7821    fn sub_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7822        let (a0, a1) = self.split_u8x64(a);
7823        let (b0, b1) = self.split_u8x64(b);
7824        self.combine_u8x32(self.sub_u8x32(a0, b0), self.sub_u8x32(a1, b1))
7825    }
7826    #[inline(always)]
7827    fn mul_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7828        let (a0, a1) = self.split_u8x64(a);
7829        let (b0, b1) = self.split_u8x64(b);
7830        self.combine_u8x32(self.mul_u8x32(a0, b0), self.mul_u8x32(a1, b1))
7831    }
7832    #[inline(always)]
7833    fn and_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7834        let (a0, a1) = self.split_u8x64(a);
7835        let (b0, b1) = self.split_u8x64(b);
7836        self.combine_u8x32(self.and_u8x32(a0, b0), self.and_u8x32(a1, b1))
7837    }
7838    #[inline(always)]
7839    fn or_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7840        let (a0, a1) = self.split_u8x64(a);
7841        let (b0, b1) = self.split_u8x64(b);
7842        self.combine_u8x32(self.or_u8x32(a0, b0), self.or_u8x32(a1, b1))
7843    }
7844    #[inline(always)]
7845    fn xor_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7846        let (a0, a1) = self.split_u8x64(a);
7847        let (b0, b1) = self.split_u8x64(b);
7848        self.combine_u8x32(self.xor_u8x32(a0, b0), self.xor_u8x32(a1, b1))
7849    }
7850    #[inline(always)]
7851    fn not_u8x64(self, a: u8x64<Self>) -> u8x64<Self> {
7852        let (a0, a1) = self.split_u8x64(a);
7853        self.combine_u8x32(self.not_u8x32(a0), self.not_u8x32(a1))
7854    }
7855    #[inline(always)]
7856    fn shl_u8x64(self, a: u8x64<Self>, shift: u32) -> u8x64<Self> {
7857        let (a0, a1) = self.split_u8x64(a);
7858        self.combine_u8x32(self.shl_u8x32(a0, shift), self.shl_u8x32(a1, shift))
7859    }
7860    #[inline(always)]
7861    fn shlv_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7862        let (a0, a1) = self.split_u8x64(a);
7863        let (b0, b1) = self.split_u8x64(b);
7864        self.combine_u8x32(self.shlv_u8x32(a0, b0), self.shlv_u8x32(a1, b1))
7865    }
7866    #[inline(always)]
7867    fn shr_u8x64(self, a: u8x64<Self>, shift: u32) -> u8x64<Self> {
7868        let (a0, a1) = self.split_u8x64(a);
7869        self.combine_u8x32(self.shr_u8x32(a0, shift), self.shr_u8x32(a1, shift))
7870    }
7871    #[inline(always)]
7872    fn shrv_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7873        let (a0, a1) = self.split_u8x64(a);
7874        let (b0, b1) = self.split_u8x64(b);
7875        self.combine_u8x32(self.shrv_u8x32(a0, b0), self.shrv_u8x32(a1, b1))
7876    }
7877    #[inline(always)]
7878    fn simd_eq_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
7879        let (a0, a1) = self.split_u8x64(a);
7880        let (b0, b1) = self.split_u8x64(b);
7881        self.combine_mask8x32(self.simd_eq_u8x32(a0, b0), self.simd_eq_u8x32(a1, b1))
7882    }
7883    #[inline(always)]
7884    fn simd_lt_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
7885        let (a0, a1) = self.split_u8x64(a);
7886        let (b0, b1) = self.split_u8x64(b);
7887        self.combine_mask8x32(self.simd_lt_u8x32(a0, b0), self.simd_lt_u8x32(a1, b1))
7888    }
7889    #[inline(always)]
7890    fn simd_le_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
7891        let (a0, a1) = self.split_u8x64(a);
7892        let (b0, b1) = self.split_u8x64(b);
7893        self.combine_mask8x32(self.simd_le_u8x32(a0, b0), self.simd_le_u8x32(a1, b1))
7894    }
7895    #[inline(always)]
7896    fn simd_ge_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
7897        let (a0, a1) = self.split_u8x64(a);
7898        let (b0, b1) = self.split_u8x64(b);
7899        self.combine_mask8x32(self.simd_ge_u8x32(a0, b0), self.simd_ge_u8x32(a1, b1))
7900    }
7901    #[inline(always)]
7902    fn simd_gt_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
7903        let (a0, a1) = self.split_u8x64(a);
7904        let (b0, b1) = self.split_u8x64(b);
7905        self.combine_mask8x32(self.simd_gt_u8x32(a0, b0), self.simd_gt_u8x32(a1, b1))
7906    }
7907    #[inline(always)]
7908    fn zip_low_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7909        let (a0, _) = self.split_u8x64(a);
7910        let (b0, _) = self.split_u8x64(b);
7911        self.combine_u8x32(self.zip_low_u8x32(a0, b0), self.zip_high_u8x32(a0, b0))
7912    }
7913    #[inline(always)]
7914    fn zip_high_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7915        let (_, a1) = self.split_u8x64(a);
7916        let (_, b1) = self.split_u8x64(b);
7917        self.combine_u8x32(self.zip_low_u8x32(a1, b1), self.zip_high_u8x32(a1, b1))
7918    }
7919    #[inline(always)]
7920    fn unzip_low_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7921        let (a0, a1) = self.split_u8x64(a);
7922        let (b0, b1) = self.split_u8x64(b);
7923        self.combine_u8x32(self.unzip_low_u8x32(a0, a1), self.unzip_low_u8x32(b0, b1))
7924    }
7925    #[inline(always)]
7926    fn unzip_high_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7927        let (a0, a1) = self.split_u8x64(a);
7928        let (b0, b1) = self.split_u8x64(b);
7929        self.combine_u8x32(self.unzip_high_u8x32(a0, a1), self.unzip_high_u8x32(b0, b1))
7930    }
7931    #[inline(always)]
7932    fn interleave_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> (u8x64<Self>, u8x64<Self>) {
7933        let (a0, a1) = self.split_u8x64(a);
7934        let (b0, b1) = self.split_u8x64(b);
7935        let lo_lo = self.zip_low_u8x32(a0, b0);
7936        let lo_hi = self.zip_high_u8x32(a0, b0);
7937        let hi_lo = self.zip_low_u8x32(a1, b1);
7938        let hi_hi = self.zip_high_u8x32(a1, b1);
7939        (
7940            self.combine_u8x32(lo_lo, lo_hi),
7941            self.combine_u8x32(hi_lo, hi_hi),
7942        )
7943    }
7944    #[inline(always)]
7945    fn deinterleave_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> (u8x64<Self>, u8x64<Self>) {
7946        let (a0, a1) = self.split_u8x64(a);
7947        let (b0, b1) = self.split_u8x64(b);
7948        let lo_even = self.unzip_low_u8x32(a0, a1);
7949        let lo_odd = self.unzip_high_u8x32(a0, a1);
7950        let hi_even = self.unzip_low_u8x32(b0, b1);
7951        let hi_odd = self.unzip_high_u8x32(b0, b1);
7952        (
7953            self.combine_u8x32(lo_even, hi_even),
7954            self.combine_u8x32(lo_odd, hi_odd),
7955        )
7956    }
7957    #[inline(always)]
7958    fn select_u8x64(self, a: mask8x64<Self>, b: u8x64<Self>, c: u8x64<Self>) -> u8x64<Self> {
7959        let (a0, a1) = self.split_mask8x64(a);
7960        let (b0, b1) = self.split_u8x64(b);
7961        let (c0, c1) = self.split_u8x64(c);
7962        self.combine_u8x32(self.select_u8x32(a0, b0, c0), self.select_u8x32(a1, b1, c1))
7963    }
7964    #[inline(always)]
7965    fn min_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7966        let (a0, a1) = self.split_u8x64(a);
7967        let (b0, b1) = self.split_u8x64(b);
7968        self.combine_u8x32(self.min_u8x32(a0, b0), self.min_u8x32(a1, b1))
7969    }
7970    #[inline(always)]
7971    fn max_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
7972        let (a0, a1) = self.split_u8x64(a);
7973        let (b0, b1) = self.split_u8x64(b);
7974        self.combine_u8x32(self.max_u8x32(a0, b0), self.max_u8x32(a1, b1))
7975    }
7976    #[inline(always)]
7977    fn split_u8x64(self, a: u8x64<Self>) -> (u8x32<Self>, u8x32<Self>) {
7978        let mut b0 = [0; 32usize];
7979        let mut b1 = [0; 32usize];
7980        b0.copy_from_slice(&a.val.0[0..32usize]);
7981        b1.copy_from_slice(&a.val.0[32usize..64usize]);
7982        (b0.simd_into(self), b1.simd_into(self))
7983    }
7984    #[inline(always)]
7985    fn load_interleaved_128_u8x64(self, src: &[u8; 64usize]) -> u8x64<Self> {
7986        [
7987            src[0usize],
7988            src[4usize],
7989            src[8usize],
7990            src[12usize],
7991            src[16usize],
7992            src[20usize],
7993            src[24usize],
7994            src[28usize],
7995            src[32usize],
7996            src[36usize],
7997            src[40usize],
7998            src[44usize],
7999            src[48usize],
8000            src[52usize],
8001            src[56usize],
8002            src[60usize],
8003            src[1usize],
8004            src[5usize],
8005            src[9usize],
8006            src[13usize],
8007            src[17usize],
8008            src[21usize],
8009            src[25usize],
8010            src[29usize],
8011            src[33usize],
8012            src[37usize],
8013            src[41usize],
8014            src[45usize],
8015            src[49usize],
8016            src[53usize],
8017            src[57usize],
8018            src[61usize],
8019            src[2usize],
8020            src[6usize],
8021            src[10usize],
8022            src[14usize],
8023            src[18usize],
8024            src[22usize],
8025            src[26usize],
8026            src[30usize],
8027            src[34usize],
8028            src[38usize],
8029            src[42usize],
8030            src[46usize],
8031            src[50usize],
8032            src[54usize],
8033            src[58usize],
8034            src[62usize],
8035            src[3usize],
8036            src[7usize],
8037            src[11usize],
8038            src[15usize],
8039            src[19usize],
8040            src[23usize],
8041            src[27usize],
8042            src[31usize],
8043            src[35usize],
8044            src[39usize],
8045            src[43usize],
8046            src[47usize],
8047            src[51usize],
8048            src[55usize],
8049            src[59usize],
8050            src[63usize],
8051        ]
8052        .simd_into(self)
8053    }
8054    #[inline(always)]
8055    fn store_interleaved_128_u8x64(self, a: u8x64<Self>, dest: &mut [u8; 64usize]) -> () {
8056        *dest = [
8057            a[0usize], a[16usize], a[32usize], a[48usize], a[1usize], a[17usize], a[33usize],
8058            a[49usize], a[2usize], a[18usize], a[34usize], a[50usize], a[3usize], a[19usize],
8059            a[35usize], a[51usize], a[4usize], a[20usize], a[36usize], a[52usize], a[5usize],
8060            a[21usize], a[37usize], a[53usize], a[6usize], a[22usize], a[38usize], a[54usize],
8061            a[7usize], a[23usize], a[39usize], a[55usize], a[8usize], a[24usize], a[40usize],
8062            a[56usize], a[9usize], a[25usize], a[41usize], a[57usize], a[10usize], a[26usize],
8063            a[42usize], a[58usize], a[11usize], a[27usize], a[43usize], a[59usize], a[12usize],
8064            a[28usize], a[44usize], a[60usize], a[13usize], a[29usize], a[45usize], a[61usize],
8065            a[14usize], a[30usize], a[46usize], a[62usize], a[15usize], a[31usize], a[47usize],
8066            a[63usize],
8067        ];
8068    }
8069    #[inline(always)]
8070    fn reinterpret_u32_u8x64(self, a: u8x64<Self>) -> u32x16<Self> {
8071        let (a0, a1) = self.split_u8x64(a);
8072        self.combine_u32x8(
8073            self.reinterpret_u32_u8x32(a0),
8074            self.reinterpret_u32_u8x32(a1),
8075        )
8076    }
8077    #[inline(always)]
8078    fn splat_mask8x64(self, val: i8) -> mask8x64<Self> {
8079        let half = self.splat_mask8x32(val);
8080        self.combine_mask8x32(half, half)
8081    }
8082    #[inline(always)]
8083    fn load_array_mask8x64(self, val: [i8; 64usize]) -> mask8x64<Self> {
8084        mask8x64 {
8085            val: crate::support::Aligned512(val),
8086            simd: self,
8087        }
8088    }
8089    #[inline(always)]
8090    fn load_array_ref_mask8x64(self, val: &[i8; 64usize]) -> mask8x64<Self> {
8091        mask8x64 {
8092            val: crate::support::Aligned512(*val),
8093            simd: self,
8094        }
8095    }
8096    #[inline(always)]
8097    fn as_array_mask8x64(self, a: mask8x64<Self>) -> [i8; 64usize] {
8098        a.val.0
8099    }
8100    #[inline(always)]
8101    fn as_array_ref_mask8x64(self, a: &mask8x64<Self>) -> &[i8; 64usize] {
8102        &a.val.0
8103    }
8104    #[inline(always)]
8105    fn as_array_mut_mask8x64(self, a: &mut mask8x64<Self>) -> &mut [i8; 64usize] {
8106        &mut a.val.0
8107    }
8108    #[inline(always)]
8109    fn store_array_mask8x64(self, a: mask8x64<Self>, dest: &mut [i8; 64usize]) -> () {
8110        *dest = a.val.0;
8111    }
8112    #[inline(always)]
8113    fn cvt_from_bytes_mask8x64(self, a: u8x64<Self>) -> mask8x64<Self> {
8114        unsafe {
8115            mask8x64 {
8116                val: core::mem::transmute(a.val),
8117                simd: self,
8118            }
8119        }
8120    }
8121    #[inline(always)]
8122    fn cvt_to_bytes_mask8x64(self, a: mask8x64<Self>) -> u8x64<Self> {
8123        unsafe {
8124            u8x64 {
8125                val: core::mem::transmute(a.val),
8126                simd: self,
8127            }
8128        }
8129    }
8130    #[inline(always)]
8131    fn slide_mask8x64<const SHIFT: usize>(
8132        self,
8133        a: mask8x64<Self>,
8134        b: mask8x64<Self>,
8135    ) -> mask8x64<Self> {
8136        let mut dest = [Default::default(); 64usize];
8137        dest[..64usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
8138        dest[64usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
8139        dest.simd_into(self)
8140    }
8141    #[inline(always)]
8142    fn slide_within_blocks_mask8x64<const SHIFT: usize>(
8143        self,
8144        a: mask8x64<Self>,
8145        b: mask8x64<Self>,
8146    ) -> mask8x64<Self> {
8147        let (a0, a1) = self.split_mask8x64(a);
8148        let (b0, b1) = self.split_mask8x64(b);
8149        self.combine_mask8x32(
8150            self.slide_within_blocks_mask8x32::<SHIFT>(a0, b0),
8151            self.slide_within_blocks_mask8x32::<SHIFT>(a1, b1),
8152        )
8153    }
8154    #[inline(always)]
8155    fn and_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
8156        let (a0, a1) = self.split_mask8x64(a);
8157        let (b0, b1) = self.split_mask8x64(b);
8158        self.combine_mask8x32(self.and_mask8x32(a0, b0), self.and_mask8x32(a1, b1))
8159    }
8160    #[inline(always)]
8161    fn or_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
8162        let (a0, a1) = self.split_mask8x64(a);
8163        let (b0, b1) = self.split_mask8x64(b);
8164        self.combine_mask8x32(self.or_mask8x32(a0, b0), self.or_mask8x32(a1, b1))
8165    }
8166    #[inline(always)]
8167    fn xor_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
8168        let (a0, a1) = self.split_mask8x64(a);
8169        let (b0, b1) = self.split_mask8x64(b);
8170        self.combine_mask8x32(self.xor_mask8x32(a0, b0), self.xor_mask8x32(a1, b1))
8171    }
8172    #[inline(always)]
8173    fn not_mask8x64(self, a: mask8x64<Self>) -> mask8x64<Self> {
8174        let (a0, a1) = self.split_mask8x64(a);
8175        self.combine_mask8x32(self.not_mask8x32(a0), self.not_mask8x32(a1))
8176    }
8177    #[inline(always)]
8178    fn select_mask8x64(
8179        self,
8180        a: mask8x64<Self>,
8181        b: mask8x64<Self>,
8182        c: mask8x64<Self>,
8183    ) -> mask8x64<Self> {
8184        let (a0, a1) = self.split_mask8x64(a);
8185        let (b0, b1) = self.split_mask8x64(b);
8186        let (c0, c1) = self.split_mask8x64(c);
8187        self.combine_mask8x32(
8188            self.select_mask8x32(a0, b0, c0),
8189            self.select_mask8x32(a1, b1, c1),
8190        )
8191    }
8192    #[inline(always)]
8193    fn simd_eq_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
8194        let (a0, a1) = self.split_mask8x64(a);
8195        let (b0, b1) = self.split_mask8x64(b);
8196        self.combine_mask8x32(self.simd_eq_mask8x32(a0, b0), self.simd_eq_mask8x32(a1, b1))
8197    }
8198    #[inline(always)]
8199    fn any_true_mask8x64(self, a: mask8x64<Self>) -> bool {
8200        let (a0, a1) = self.split_mask8x64(a);
8201        self.any_true_mask8x32(a0) || self.any_true_mask8x32(a1)
8202    }
8203    #[inline(always)]
8204    fn all_true_mask8x64(self, a: mask8x64<Self>) -> bool {
8205        let (a0, a1) = self.split_mask8x64(a);
8206        self.all_true_mask8x32(a0) && self.all_true_mask8x32(a1)
8207    }
8208    #[inline(always)]
8209    fn any_false_mask8x64(self, a: mask8x64<Self>) -> bool {
8210        let (a0, a1) = self.split_mask8x64(a);
8211        self.any_false_mask8x32(a0) || self.any_false_mask8x32(a1)
8212    }
8213    #[inline(always)]
8214    fn all_false_mask8x64(self, a: mask8x64<Self>) -> bool {
8215        let (a0, a1) = self.split_mask8x64(a);
8216        self.all_false_mask8x32(a0) && self.all_false_mask8x32(a1)
8217    }
8218    #[inline(always)]
8219    fn split_mask8x64(self, a: mask8x64<Self>) -> (mask8x32<Self>, mask8x32<Self>) {
8220        let mut b0 = [0; 32usize];
8221        let mut b1 = [0; 32usize];
8222        b0.copy_from_slice(&a.val.0[0..32usize]);
8223        b1.copy_from_slice(&a.val.0[32usize..64usize]);
8224        (b0.simd_into(self), b1.simd_into(self))
8225    }
8226    #[inline(always)]
8227    fn splat_i16x32(self, val: i16) -> i16x32<Self> {
8228        let half = self.splat_i16x16(val);
8229        self.combine_i16x16(half, half)
8230    }
8231    #[inline(always)]
8232    fn load_array_i16x32(self, val: [i16; 32usize]) -> i16x32<Self> {
8233        i16x32 {
8234            val: crate::support::Aligned512(val),
8235            simd: self,
8236        }
8237    }
8238    #[inline(always)]
8239    fn load_array_ref_i16x32(self, val: &[i16; 32usize]) -> i16x32<Self> {
8240        i16x32 {
8241            val: crate::support::Aligned512(*val),
8242            simd: self,
8243        }
8244    }
8245    #[inline(always)]
8246    fn as_array_i16x32(self, a: i16x32<Self>) -> [i16; 32usize] {
8247        a.val.0
8248    }
8249    #[inline(always)]
8250    fn as_array_ref_i16x32(self, a: &i16x32<Self>) -> &[i16; 32usize] {
8251        &a.val.0
8252    }
8253    #[inline(always)]
8254    fn as_array_mut_i16x32(self, a: &mut i16x32<Self>) -> &mut [i16; 32usize] {
8255        &mut a.val.0
8256    }
8257    #[inline(always)]
8258    fn store_array_i16x32(self, a: i16x32<Self>, dest: &mut [i16; 32usize]) -> () {
8259        *dest = a.val.0;
8260    }
8261    #[inline(always)]
8262    fn cvt_from_bytes_i16x32(self, a: u8x64<Self>) -> i16x32<Self> {
8263        unsafe {
8264            i16x32 {
8265                val: core::mem::transmute(a.val),
8266                simd: self,
8267            }
8268        }
8269    }
8270    #[inline(always)]
8271    fn cvt_to_bytes_i16x32(self, a: i16x32<Self>) -> u8x64<Self> {
8272        unsafe {
8273            u8x64 {
8274                val: core::mem::transmute(a.val),
8275                simd: self,
8276            }
8277        }
8278    }
8279    #[inline(always)]
8280    fn slide_i16x32<const SHIFT: usize>(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8281        let mut dest = [Default::default(); 32usize];
8282        dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
8283        dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
8284        dest.simd_into(self)
8285    }
8286    #[inline(always)]
8287    fn slide_within_blocks_i16x32<const SHIFT: usize>(
8288        self,
8289        a: i16x32<Self>,
8290        b: i16x32<Self>,
8291    ) -> i16x32<Self> {
8292        let (a0, a1) = self.split_i16x32(a);
8293        let (b0, b1) = self.split_i16x32(b);
8294        self.combine_i16x16(
8295            self.slide_within_blocks_i16x16::<SHIFT>(a0, b0),
8296            self.slide_within_blocks_i16x16::<SHIFT>(a1, b1),
8297        )
8298    }
8299    #[inline(always)]
8300    fn add_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8301        let (a0, a1) = self.split_i16x32(a);
8302        let (b0, b1) = self.split_i16x32(b);
8303        self.combine_i16x16(self.add_i16x16(a0, b0), self.add_i16x16(a1, b1))
8304    }
8305    #[inline(always)]
8306    fn sub_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8307        let (a0, a1) = self.split_i16x32(a);
8308        let (b0, b1) = self.split_i16x32(b);
8309        self.combine_i16x16(self.sub_i16x16(a0, b0), self.sub_i16x16(a1, b1))
8310    }
8311    #[inline(always)]
8312    fn mul_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8313        let (a0, a1) = self.split_i16x32(a);
8314        let (b0, b1) = self.split_i16x32(b);
8315        self.combine_i16x16(self.mul_i16x16(a0, b0), self.mul_i16x16(a1, b1))
8316    }
8317    #[inline(always)]
8318    fn and_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8319        let (a0, a1) = self.split_i16x32(a);
8320        let (b0, b1) = self.split_i16x32(b);
8321        self.combine_i16x16(self.and_i16x16(a0, b0), self.and_i16x16(a1, b1))
8322    }
8323    #[inline(always)]
8324    fn or_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8325        let (a0, a1) = self.split_i16x32(a);
8326        let (b0, b1) = self.split_i16x32(b);
8327        self.combine_i16x16(self.or_i16x16(a0, b0), self.or_i16x16(a1, b1))
8328    }
8329    #[inline(always)]
8330    fn xor_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8331        let (a0, a1) = self.split_i16x32(a);
8332        let (b0, b1) = self.split_i16x32(b);
8333        self.combine_i16x16(self.xor_i16x16(a0, b0), self.xor_i16x16(a1, b1))
8334    }
8335    #[inline(always)]
8336    fn not_i16x32(self, a: i16x32<Self>) -> i16x32<Self> {
8337        let (a0, a1) = self.split_i16x32(a);
8338        self.combine_i16x16(self.not_i16x16(a0), self.not_i16x16(a1))
8339    }
8340    #[inline(always)]
8341    fn shl_i16x32(self, a: i16x32<Self>, shift: u32) -> i16x32<Self> {
8342        let (a0, a1) = self.split_i16x32(a);
8343        self.combine_i16x16(self.shl_i16x16(a0, shift), self.shl_i16x16(a1, shift))
8344    }
8345    #[inline(always)]
8346    fn shlv_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8347        let (a0, a1) = self.split_i16x32(a);
8348        let (b0, b1) = self.split_i16x32(b);
8349        self.combine_i16x16(self.shlv_i16x16(a0, b0), self.shlv_i16x16(a1, b1))
8350    }
8351    #[inline(always)]
8352    fn shr_i16x32(self, a: i16x32<Self>, shift: u32) -> i16x32<Self> {
8353        let (a0, a1) = self.split_i16x32(a);
8354        self.combine_i16x16(self.shr_i16x16(a0, shift), self.shr_i16x16(a1, shift))
8355    }
8356    #[inline(always)]
8357    fn shrv_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8358        let (a0, a1) = self.split_i16x32(a);
8359        let (b0, b1) = self.split_i16x32(b);
8360        self.combine_i16x16(self.shrv_i16x16(a0, b0), self.shrv_i16x16(a1, b1))
8361    }
8362    #[inline(always)]
8363    fn simd_eq_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
8364        let (a0, a1) = self.split_i16x32(a);
8365        let (b0, b1) = self.split_i16x32(b);
8366        self.combine_mask16x16(self.simd_eq_i16x16(a0, b0), self.simd_eq_i16x16(a1, b1))
8367    }
8368    #[inline(always)]
8369    fn simd_lt_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
8370        let (a0, a1) = self.split_i16x32(a);
8371        let (b0, b1) = self.split_i16x32(b);
8372        self.combine_mask16x16(self.simd_lt_i16x16(a0, b0), self.simd_lt_i16x16(a1, b1))
8373    }
8374    #[inline(always)]
8375    fn simd_le_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
8376        let (a0, a1) = self.split_i16x32(a);
8377        let (b0, b1) = self.split_i16x32(b);
8378        self.combine_mask16x16(self.simd_le_i16x16(a0, b0), self.simd_le_i16x16(a1, b1))
8379    }
8380    #[inline(always)]
8381    fn simd_ge_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
8382        let (a0, a1) = self.split_i16x32(a);
8383        let (b0, b1) = self.split_i16x32(b);
8384        self.combine_mask16x16(self.simd_ge_i16x16(a0, b0), self.simd_ge_i16x16(a1, b1))
8385    }
8386    #[inline(always)]
8387    fn simd_gt_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
8388        let (a0, a1) = self.split_i16x32(a);
8389        let (b0, b1) = self.split_i16x32(b);
8390        self.combine_mask16x16(self.simd_gt_i16x16(a0, b0), self.simd_gt_i16x16(a1, b1))
8391    }
8392    #[inline(always)]
8393    fn zip_low_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8394        let (a0, _) = self.split_i16x32(a);
8395        let (b0, _) = self.split_i16x32(b);
8396        self.combine_i16x16(self.zip_low_i16x16(a0, b0), self.zip_high_i16x16(a0, b0))
8397    }
8398    #[inline(always)]
8399    fn zip_high_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8400        let (_, a1) = self.split_i16x32(a);
8401        let (_, b1) = self.split_i16x32(b);
8402        self.combine_i16x16(self.zip_low_i16x16(a1, b1), self.zip_high_i16x16(a1, b1))
8403    }
8404    #[inline(always)]
8405    fn unzip_low_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8406        let (a0, a1) = self.split_i16x32(a);
8407        let (b0, b1) = self.split_i16x32(b);
8408        self.combine_i16x16(self.unzip_low_i16x16(a0, a1), self.unzip_low_i16x16(b0, b1))
8409    }
8410    #[inline(always)]
8411    fn unzip_high_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8412        let (a0, a1) = self.split_i16x32(a);
8413        let (b0, b1) = self.split_i16x32(b);
8414        self.combine_i16x16(
8415            self.unzip_high_i16x16(a0, a1),
8416            self.unzip_high_i16x16(b0, b1),
8417        )
8418    }
8419    #[inline(always)]
8420    fn interleave_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> (i16x32<Self>, i16x32<Self>) {
8421        let (a0, a1) = self.split_i16x32(a);
8422        let (b0, b1) = self.split_i16x32(b);
8423        let lo_lo = self.zip_low_i16x16(a0, b0);
8424        let lo_hi = self.zip_high_i16x16(a0, b0);
8425        let hi_lo = self.zip_low_i16x16(a1, b1);
8426        let hi_hi = self.zip_high_i16x16(a1, b1);
8427        (
8428            self.combine_i16x16(lo_lo, lo_hi),
8429            self.combine_i16x16(hi_lo, hi_hi),
8430        )
8431    }
8432    #[inline(always)]
8433    fn deinterleave_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> (i16x32<Self>, i16x32<Self>) {
8434        let (a0, a1) = self.split_i16x32(a);
8435        let (b0, b1) = self.split_i16x32(b);
8436        let lo_even = self.unzip_low_i16x16(a0, a1);
8437        let lo_odd = self.unzip_high_i16x16(a0, a1);
8438        let hi_even = self.unzip_low_i16x16(b0, b1);
8439        let hi_odd = self.unzip_high_i16x16(b0, b1);
8440        (
8441            self.combine_i16x16(lo_even, hi_even),
8442            self.combine_i16x16(lo_odd, hi_odd),
8443        )
8444    }
8445    #[inline(always)]
8446    fn select_i16x32(self, a: mask16x32<Self>, b: i16x32<Self>, c: i16x32<Self>) -> i16x32<Self> {
8447        let (a0, a1) = self.split_mask16x32(a);
8448        let (b0, b1) = self.split_i16x32(b);
8449        let (c0, c1) = self.split_i16x32(c);
8450        self.combine_i16x16(
8451            self.select_i16x16(a0, b0, c0),
8452            self.select_i16x16(a1, b1, c1),
8453        )
8454    }
8455    #[inline(always)]
8456    fn min_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8457        let (a0, a1) = self.split_i16x32(a);
8458        let (b0, b1) = self.split_i16x32(b);
8459        self.combine_i16x16(self.min_i16x16(a0, b0), self.min_i16x16(a1, b1))
8460    }
8461    #[inline(always)]
8462    fn max_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
8463        let (a0, a1) = self.split_i16x32(a);
8464        let (b0, b1) = self.split_i16x32(b);
8465        self.combine_i16x16(self.max_i16x16(a0, b0), self.max_i16x16(a1, b1))
8466    }
8467    #[inline(always)]
8468    fn split_i16x32(self, a: i16x32<Self>) -> (i16x16<Self>, i16x16<Self>) {
8469        let mut b0 = [0; 16usize];
8470        let mut b1 = [0; 16usize];
8471        b0.copy_from_slice(&a.val.0[0..16usize]);
8472        b1.copy_from_slice(&a.val.0[16usize..32usize]);
8473        (b0.simd_into(self), b1.simd_into(self))
8474    }
8475    #[inline(always)]
8476    fn neg_i16x32(self, a: i16x32<Self>) -> i16x32<Self> {
8477        let (a0, a1) = self.split_i16x32(a);
8478        self.combine_i16x16(self.neg_i16x16(a0), self.neg_i16x16(a1))
8479    }
8480    #[inline(always)]
8481    fn reinterpret_u8_i16x32(self, a: i16x32<Self>) -> u8x64<Self> {
8482        let (a0, a1) = self.split_i16x32(a);
8483        self.combine_u8x32(
8484            self.reinterpret_u8_i16x16(a0),
8485            self.reinterpret_u8_i16x16(a1),
8486        )
8487    }
8488    #[inline(always)]
8489    fn reinterpret_u32_i16x32(self, a: i16x32<Self>) -> u32x16<Self> {
8490        let (a0, a1) = self.split_i16x32(a);
8491        self.combine_u32x8(
8492            self.reinterpret_u32_i16x16(a0),
8493            self.reinterpret_u32_i16x16(a1),
8494        )
8495    }
8496    #[inline(always)]
8497    fn splat_u16x32(self, val: u16) -> u16x32<Self> {
8498        let half = self.splat_u16x16(val);
8499        self.combine_u16x16(half, half)
8500    }
8501    #[inline(always)]
8502    fn load_array_u16x32(self, val: [u16; 32usize]) -> u16x32<Self> {
8503        u16x32 {
8504            val: crate::support::Aligned512(val),
8505            simd: self,
8506        }
8507    }
8508    #[inline(always)]
8509    fn load_array_ref_u16x32(self, val: &[u16; 32usize]) -> u16x32<Self> {
8510        u16x32 {
8511            val: crate::support::Aligned512(*val),
8512            simd: self,
8513        }
8514    }
8515    #[inline(always)]
8516    fn as_array_u16x32(self, a: u16x32<Self>) -> [u16; 32usize] {
8517        a.val.0
8518    }
8519    #[inline(always)]
8520    fn as_array_ref_u16x32(self, a: &u16x32<Self>) -> &[u16; 32usize] {
8521        &a.val.0
8522    }
8523    #[inline(always)]
8524    fn as_array_mut_u16x32(self, a: &mut u16x32<Self>) -> &mut [u16; 32usize] {
8525        &mut a.val.0
8526    }
8527    #[inline(always)]
8528    fn store_array_u16x32(self, a: u16x32<Self>, dest: &mut [u16; 32usize]) -> () {
8529        *dest = a.val.0;
8530    }
8531    #[inline(always)]
8532    fn cvt_from_bytes_u16x32(self, a: u8x64<Self>) -> u16x32<Self> {
8533        unsafe {
8534            u16x32 {
8535                val: core::mem::transmute(a.val),
8536                simd: self,
8537            }
8538        }
8539    }
8540    #[inline(always)]
8541    fn cvt_to_bytes_u16x32(self, a: u16x32<Self>) -> u8x64<Self> {
8542        unsafe {
8543            u8x64 {
8544                val: core::mem::transmute(a.val),
8545                simd: self,
8546            }
8547        }
8548    }
8549    #[inline(always)]
8550    fn slide_u16x32<const SHIFT: usize>(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8551        let mut dest = [Default::default(); 32usize];
8552        dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
8553        dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
8554        dest.simd_into(self)
8555    }
8556    #[inline(always)]
8557    fn slide_within_blocks_u16x32<const SHIFT: usize>(
8558        self,
8559        a: u16x32<Self>,
8560        b: u16x32<Self>,
8561    ) -> u16x32<Self> {
8562        let (a0, a1) = self.split_u16x32(a);
8563        let (b0, b1) = self.split_u16x32(b);
8564        self.combine_u16x16(
8565            self.slide_within_blocks_u16x16::<SHIFT>(a0, b0),
8566            self.slide_within_blocks_u16x16::<SHIFT>(a1, b1),
8567        )
8568    }
8569    #[inline(always)]
8570    fn add_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8571        let (a0, a1) = self.split_u16x32(a);
8572        let (b0, b1) = self.split_u16x32(b);
8573        self.combine_u16x16(self.add_u16x16(a0, b0), self.add_u16x16(a1, b1))
8574    }
8575    #[inline(always)]
8576    fn sub_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8577        let (a0, a1) = self.split_u16x32(a);
8578        let (b0, b1) = self.split_u16x32(b);
8579        self.combine_u16x16(self.sub_u16x16(a0, b0), self.sub_u16x16(a1, b1))
8580    }
8581    #[inline(always)]
8582    fn mul_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8583        let (a0, a1) = self.split_u16x32(a);
8584        let (b0, b1) = self.split_u16x32(b);
8585        self.combine_u16x16(self.mul_u16x16(a0, b0), self.mul_u16x16(a1, b1))
8586    }
8587    #[inline(always)]
8588    fn and_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8589        let (a0, a1) = self.split_u16x32(a);
8590        let (b0, b1) = self.split_u16x32(b);
8591        self.combine_u16x16(self.and_u16x16(a0, b0), self.and_u16x16(a1, b1))
8592    }
8593    #[inline(always)]
8594    fn or_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8595        let (a0, a1) = self.split_u16x32(a);
8596        let (b0, b1) = self.split_u16x32(b);
8597        self.combine_u16x16(self.or_u16x16(a0, b0), self.or_u16x16(a1, b1))
8598    }
8599    #[inline(always)]
8600    fn xor_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8601        let (a0, a1) = self.split_u16x32(a);
8602        let (b0, b1) = self.split_u16x32(b);
8603        self.combine_u16x16(self.xor_u16x16(a0, b0), self.xor_u16x16(a1, b1))
8604    }
8605    #[inline(always)]
8606    fn not_u16x32(self, a: u16x32<Self>) -> u16x32<Self> {
8607        let (a0, a1) = self.split_u16x32(a);
8608        self.combine_u16x16(self.not_u16x16(a0), self.not_u16x16(a1))
8609    }
8610    #[inline(always)]
8611    fn shl_u16x32(self, a: u16x32<Self>, shift: u32) -> u16x32<Self> {
8612        let (a0, a1) = self.split_u16x32(a);
8613        self.combine_u16x16(self.shl_u16x16(a0, shift), self.shl_u16x16(a1, shift))
8614    }
8615    #[inline(always)]
8616    fn shlv_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8617        let (a0, a1) = self.split_u16x32(a);
8618        let (b0, b1) = self.split_u16x32(b);
8619        self.combine_u16x16(self.shlv_u16x16(a0, b0), self.shlv_u16x16(a1, b1))
8620    }
8621    #[inline(always)]
8622    fn shr_u16x32(self, a: u16x32<Self>, shift: u32) -> u16x32<Self> {
8623        let (a0, a1) = self.split_u16x32(a);
8624        self.combine_u16x16(self.shr_u16x16(a0, shift), self.shr_u16x16(a1, shift))
8625    }
8626    #[inline(always)]
8627    fn shrv_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8628        let (a0, a1) = self.split_u16x32(a);
8629        let (b0, b1) = self.split_u16x32(b);
8630        self.combine_u16x16(self.shrv_u16x16(a0, b0), self.shrv_u16x16(a1, b1))
8631    }
8632    #[inline(always)]
8633    fn simd_eq_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
8634        let (a0, a1) = self.split_u16x32(a);
8635        let (b0, b1) = self.split_u16x32(b);
8636        self.combine_mask16x16(self.simd_eq_u16x16(a0, b0), self.simd_eq_u16x16(a1, b1))
8637    }
8638    #[inline(always)]
8639    fn simd_lt_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
8640        let (a0, a1) = self.split_u16x32(a);
8641        let (b0, b1) = self.split_u16x32(b);
8642        self.combine_mask16x16(self.simd_lt_u16x16(a0, b0), self.simd_lt_u16x16(a1, b1))
8643    }
8644    #[inline(always)]
8645    fn simd_le_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
8646        let (a0, a1) = self.split_u16x32(a);
8647        let (b0, b1) = self.split_u16x32(b);
8648        self.combine_mask16x16(self.simd_le_u16x16(a0, b0), self.simd_le_u16x16(a1, b1))
8649    }
8650    #[inline(always)]
8651    fn simd_ge_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
8652        let (a0, a1) = self.split_u16x32(a);
8653        let (b0, b1) = self.split_u16x32(b);
8654        self.combine_mask16x16(self.simd_ge_u16x16(a0, b0), self.simd_ge_u16x16(a1, b1))
8655    }
8656    #[inline(always)]
8657    fn simd_gt_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
8658        let (a0, a1) = self.split_u16x32(a);
8659        let (b0, b1) = self.split_u16x32(b);
8660        self.combine_mask16x16(self.simd_gt_u16x16(a0, b0), self.simd_gt_u16x16(a1, b1))
8661    }
8662    #[inline(always)]
8663    fn zip_low_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8664        let (a0, _) = self.split_u16x32(a);
8665        let (b0, _) = self.split_u16x32(b);
8666        self.combine_u16x16(self.zip_low_u16x16(a0, b0), self.zip_high_u16x16(a0, b0))
8667    }
8668    #[inline(always)]
8669    fn zip_high_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8670        let (_, a1) = self.split_u16x32(a);
8671        let (_, b1) = self.split_u16x32(b);
8672        self.combine_u16x16(self.zip_low_u16x16(a1, b1), self.zip_high_u16x16(a1, b1))
8673    }
8674    #[inline(always)]
8675    fn unzip_low_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8676        let (a0, a1) = self.split_u16x32(a);
8677        let (b0, b1) = self.split_u16x32(b);
8678        self.combine_u16x16(self.unzip_low_u16x16(a0, a1), self.unzip_low_u16x16(b0, b1))
8679    }
8680    #[inline(always)]
8681    fn unzip_high_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8682        let (a0, a1) = self.split_u16x32(a);
8683        let (b0, b1) = self.split_u16x32(b);
8684        self.combine_u16x16(
8685            self.unzip_high_u16x16(a0, a1),
8686            self.unzip_high_u16x16(b0, b1),
8687        )
8688    }
8689    #[inline(always)]
8690    fn interleave_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> (u16x32<Self>, u16x32<Self>) {
8691        let (a0, a1) = self.split_u16x32(a);
8692        let (b0, b1) = self.split_u16x32(b);
8693        let lo_lo = self.zip_low_u16x16(a0, b0);
8694        let lo_hi = self.zip_high_u16x16(a0, b0);
8695        let hi_lo = self.zip_low_u16x16(a1, b1);
8696        let hi_hi = self.zip_high_u16x16(a1, b1);
8697        (
8698            self.combine_u16x16(lo_lo, lo_hi),
8699            self.combine_u16x16(hi_lo, hi_hi),
8700        )
8701    }
8702    #[inline(always)]
8703    fn deinterleave_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> (u16x32<Self>, u16x32<Self>) {
8704        let (a0, a1) = self.split_u16x32(a);
8705        let (b0, b1) = self.split_u16x32(b);
8706        let lo_even = self.unzip_low_u16x16(a0, a1);
8707        let lo_odd = self.unzip_high_u16x16(a0, a1);
8708        let hi_even = self.unzip_low_u16x16(b0, b1);
8709        let hi_odd = self.unzip_high_u16x16(b0, b1);
8710        (
8711            self.combine_u16x16(lo_even, hi_even),
8712            self.combine_u16x16(lo_odd, hi_odd),
8713        )
8714    }
8715    #[inline(always)]
8716    fn select_u16x32(self, a: mask16x32<Self>, b: u16x32<Self>, c: u16x32<Self>) -> u16x32<Self> {
8717        let (a0, a1) = self.split_mask16x32(a);
8718        let (b0, b1) = self.split_u16x32(b);
8719        let (c0, c1) = self.split_u16x32(c);
8720        self.combine_u16x16(
8721            self.select_u16x16(a0, b0, c0),
8722            self.select_u16x16(a1, b1, c1),
8723        )
8724    }
8725    #[inline(always)]
8726    fn min_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8727        let (a0, a1) = self.split_u16x32(a);
8728        let (b0, b1) = self.split_u16x32(b);
8729        self.combine_u16x16(self.min_u16x16(a0, b0), self.min_u16x16(a1, b1))
8730    }
8731    #[inline(always)]
8732    fn max_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
8733        let (a0, a1) = self.split_u16x32(a);
8734        let (b0, b1) = self.split_u16x32(b);
8735        self.combine_u16x16(self.max_u16x16(a0, b0), self.max_u16x16(a1, b1))
8736    }
8737    #[inline(always)]
8738    fn split_u16x32(self, a: u16x32<Self>) -> (u16x16<Self>, u16x16<Self>) {
8739        let mut b0 = [0; 16usize];
8740        let mut b1 = [0; 16usize];
8741        b0.copy_from_slice(&a.val.0[0..16usize]);
8742        b1.copy_from_slice(&a.val.0[16usize..32usize]);
8743        (b0.simd_into(self), b1.simd_into(self))
8744    }
8745    #[inline(always)]
8746    fn load_interleaved_128_u16x32(self, src: &[u16; 32usize]) -> u16x32<Self> {
8747        [
8748            src[0usize],
8749            src[4usize],
8750            src[8usize],
8751            src[12usize],
8752            src[16usize],
8753            src[20usize],
8754            src[24usize],
8755            src[28usize],
8756            src[1usize],
8757            src[5usize],
8758            src[9usize],
8759            src[13usize],
8760            src[17usize],
8761            src[21usize],
8762            src[25usize],
8763            src[29usize],
8764            src[2usize],
8765            src[6usize],
8766            src[10usize],
8767            src[14usize],
8768            src[18usize],
8769            src[22usize],
8770            src[26usize],
8771            src[30usize],
8772            src[3usize],
8773            src[7usize],
8774            src[11usize],
8775            src[15usize],
8776            src[19usize],
8777            src[23usize],
8778            src[27usize],
8779            src[31usize],
8780        ]
8781        .simd_into(self)
8782    }
8783    #[inline(always)]
8784    fn store_interleaved_128_u16x32(self, a: u16x32<Self>, dest: &mut [u16; 32usize]) -> () {
8785        *dest = [
8786            a[0usize], a[8usize], a[16usize], a[24usize], a[1usize], a[9usize], a[17usize],
8787            a[25usize], a[2usize], a[10usize], a[18usize], a[26usize], a[3usize], a[11usize],
8788            a[19usize], a[27usize], a[4usize], a[12usize], a[20usize], a[28usize], a[5usize],
8789            a[13usize], a[21usize], a[29usize], a[6usize], a[14usize], a[22usize], a[30usize],
8790            a[7usize], a[15usize], a[23usize], a[31usize],
8791        ];
8792    }
8793    #[inline(always)]
8794    fn narrow_u16x32(self, a: u16x32<Self>) -> u8x32<Self> {
8795        let (a0, a1) = self.split_u16x32(a);
8796        self.combine_u8x16(self.narrow_u16x16(a0), self.narrow_u16x16(a1))
8797    }
8798    #[inline(always)]
8799    fn reinterpret_u8_u16x32(self, a: u16x32<Self>) -> u8x64<Self> {
8800        let (a0, a1) = self.split_u16x32(a);
8801        self.combine_u8x32(
8802            self.reinterpret_u8_u16x16(a0),
8803            self.reinterpret_u8_u16x16(a1),
8804        )
8805    }
8806    #[inline(always)]
8807    fn reinterpret_u32_u16x32(self, a: u16x32<Self>) -> u32x16<Self> {
8808        let (a0, a1) = self.split_u16x32(a);
8809        self.combine_u32x8(
8810            self.reinterpret_u32_u16x16(a0),
8811            self.reinterpret_u32_u16x16(a1),
8812        )
8813    }
8814    #[inline(always)]
8815    fn splat_mask16x32(self, val: i16) -> mask16x32<Self> {
8816        let half = self.splat_mask16x16(val);
8817        self.combine_mask16x16(half, half)
8818    }
8819    #[inline(always)]
8820    fn load_array_mask16x32(self, val: [i16; 32usize]) -> mask16x32<Self> {
8821        mask16x32 {
8822            val: crate::support::Aligned512(val),
8823            simd: self,
8824        }
8825    }
8826    #[inline(always)]
8827    fn load_array_ref_mask16x32(self, val: &[i16; 32usize]) -> mask16x32<Self> {
8828        mask16x32 {
8829            val: crate::support::Aligned512(*val),
8830            simd: self,
8831        }
8832    }
8833    #[inline(always)]
8834    fn as_array_mask16x32(self, a: mask16x32<Self>) -> [i16; 32usize] {
8835        a.val.0
8836    }
8837    #[inline(always)]
8838    fn as_array_ref_mask16x32(self, a: &mask16x32<Self>) -> &[i16; 32usize] {
8839        &a.val.0
8840    }
8841    #[inline(always)]
8842    fn as_array_mut_mask16x32(self, a: &mut mask16x32<Self>) -> &mut [i16; 32usize] {
8843        &mut a.val.0
8844    }
8845    #[inline(always)]
8846    fn store_array_mask16x32(self, a: mask16x32<Self>, dest: &mut [i16; 32usize]) -> () {
8847        *dest = a.val.0;
8848    }
8849    #[inline(always)]
8850    fn cvt_from_bytes_mask16x32(self, a: u8x64<Self>) -> mask16x32<Self> {
8851        unsafe {
8852            mask16x32 {
8853                val: core::mem::transmute(a.val),
8854                simd: self,
8855            }
8856        }
8857    }
8858    #[inline(always)]
8859    fn cvt_to_bytes_mask16x32(self, a: mask16x32<Self>) -> u8x64<Self> {
8860        unsafe {
8861            u8x64 {
8862                val: core::mem::transmute(a.val),
8863                simd: self,
8864            }
8865        }
8866    }
8867    #[inline(always)]
8868    fn slide_mask16x32<const SHIFT: usize>(
8869        self,
8870        a: mask16x32<Self>,
8871        b: mask16x32<Self>,
8872    ) -> mask16x32<Self> {
8873        let mut dest = [Default::default(); 32usize];
8874        dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
8875        dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
8876        dest.simd_into(self)
8877    }
8878    #[inline(always)]
8879    fn slide_within_blocks_mask16x32<const SHIFT: usize>(
8880        self,
8881        a: mask16x32<Self>,
8882        b: mask16x32<Self>,
8883    ) -> mask16x32<Self> {
8884        let (a0, a1) = self.split_mask16x32(a);
8885        let (b0, b1) = self.split_mask16x32(b);
8886        self.combine_mask16x16(
8887            self.slide_within_blocks_mask16x16::<SHIFT>(a0, b0),
8888            self.slide_within_blocks_mask16x16::<SHIFT>(a1, b1),
8889        )
8890    }
8891    #[inline(always)]
8892    fn and_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
8893        let (a0, a1) = self.split_mask16x32(a);
8894        let (b0, b1) = self.split_mask16x32(b);
8895        self.combine_mask16x16(self.and_mask16x16(a0, b0), self.and_mask16x16(a1, b1))
8896    }
8897    #[inline(always)]
8898    fn or_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
8899        let (a0, a1) = self.split_mask16x32(a);
8900        let (b0, b1) = self.split_mask16x32(b);
8901        self.combine_mask16x16(self.or_mask16x16(a0, b0), self.or_mask16x16(a1, b1))
8902    }
8903    #[inline(always)]
8904    fn xor_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
8905        let (a0, a1) = self.split_mask16x32(a);
8906        let (b0, b1) = self.split_mask16x32(b);
8907        self.combine_mask16x16(self.xor_mask16x16(a0, b0), self.xor_mask16x16(a1, b1))
8908    }
8909    #[inline(always)]
8910    fn not_mask16x32(self, a: mask16x32<Self>) -> mask16x32<Self> {
8911        let (a0, a1) = self.split_mask16x32(a);
8912        self.combine_mask16x16(self.not_mask16x16(a0), self.not_mask16x16(a1))
8913    }
8914    #[inline(always)]
8915    fn select_mask16x32(
8916        self,
8917        a: mask16x32<Self>,
8918        b: mask16x32<Self>,
8919        c: mask16x32<Self>,
8920    ) -> mask16x32<Self> {
8921        let (a0, a1) = self.split_mask16x32(a);
8922        let (b0, b1) = self.split_mask16x32(b);
8923        let (c0, c1) = self.split_mask16x32(c);
8924        self.combine_mask16x16(
8925            self.select_mask16x16(a0, b0, c0),
8926            self.select_mask16x16(a1, b1, c1),
8927        )
8928    }
8929    #[inline(always)]
8930    fn simd_eq_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
8931        let (a0, a1) = self.split_mask16x32(a);
8932        let (b0, b1) = self.split_mask16x32(b);
8933        self.combine_mask16x16(
8934            self.simd_eq_mask16x16(a0, b0),
8935            self.simd_eq_mask16x16(a1, b1),
8936        )
8937    }
8938    #[inline(always)]
8939    fn any_true_mask16x32(self, a: mask16x32<Self>) -> bool {
8940        let (a0, a1) = self.split_mask16x32(a);
8941        self.any_true_mask16x16(a0) || self.any_true_mask16x16(a1)
8942    }
8943    #[inline(always)]
8944    fn all_true_mask16x32(self, a: mask16x32<Self>) -> bool {
8945        let (a0, a1) = self.split_mask16x32(a);
8946        self.all_true_mask16x16(a0) && self.all_true_mask16x16(a1)
8947    }
8948    #[inline(always)]
8949    fn any_false_mask16x32(self, a: mask16x32<Self>) -> bool {
8950        let (a0, a1) = self.split_mask16x32(a);
8951        self.any_false_mask16x16(a0) || self.any_false_mask16x16(a1)
8952    }
8953    #[inline(always)]
8954    fn all_false_mask16x32(self, a: mask16x32<Self>) -> bool {
8955        let (a0, a1) = self.split_mask16x32(a);
8956        self.all_false_mask16x16(a0) && self.all_false_mask16x16(a1)
8957    }
8958    #[inline(always)]
8959    fn split_mask16x32(self, a: mask16x32<Self>) -> (mask16x16<Self>, mask16x16<Self>) {
8960        let mut b0 = [0; 16usize];
8961        let mut b1 = [0; 16usize];
8962        b0.copy_from_slice(&a.val.0[0..16usize]);
8963        b1.copy_from_slice(&a.val.0[16usize..32usize]);
8964        (b0.simd_into(self), b1.simd_into(self))
8965    }
8966    #[inline(always)]
8967    fn splat_i32x16(self, val: i32) -> i32x16<Self> {
8968        let half = self.splat_i32x8(val);
8969        self.combine_i32x8(half, half)
8970    }
8971    #[inline(always)]
8972    fn load_array_i32x16(self, val: [i32; 16usize]) -> i32x16<Self> {
8973        i32x16 {
8974            val: crate::support::Aligned512(val),
8975            simd: self,
8976        }
8977    }
8978    #[inline(always)]
8979    fn load_array_ref_i32x16(self, val: &[i32; 16usize]) -> i32x16<Self> {
8980        i32x16 {
8981            val: crate::support::Aligned512(*val),
8982            simd: self,
8983        }
8984    }
8985    #[inline(always)]
8986    fn as_array_i32x16(self, a: i32x16<Self>) -> [i32; 16usize] {
8987        a.val.0
8988    }
8989    #[inline(always)]
8990    fn as_array_ref_i32x16(self, a: &i32x16<Self>) -> &[i32; 16usize] {
8991        &a.val.0
8992    }
8993    #[inline(always)]
8994    fn as_array_mut_i32x16(self, a: &mut i32x16<Self>) -> &mut [i32; 16usize] {
8995        &mut a.val.0
8996    }
8997    #[inline(always)]
8998    fn store_array_i32x16(self, a: i32x16<Self>, dest: &mut [i32; 16usize]) -> () {
8999        *dest = a.val.0;
9000    }
9001    #[inline(always)]
9002    fn cvt_from_bytes_i32x16(self, a: u8x64<Self>) -> i32x16<Self> {
9003        unsafe {
9004            i32x16 {
9005                val: core::mem::transmute(a.val),
9006                simd: self,
9007            }
9008        }
9009    }
9010    #[inline(always)]
9011    fn cvt_to_bytes_i32x16(self, a: i32x16<Self>) -> u8x64<Self> {
9012        unsafe {
9013            u8x64 {
9014                val: core::mem::transmute(a.val),
9015                simd: self,
9016            }
9017        }
9018    }
9019    #[inline(always)]
9020    fn slide_i32x16<const SHIFT: usize>(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9021        let mut dest = [Default::default(); 16usize];
9022        dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
9023        dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
9024        dest.simd_into(self)
9025    }
9026    #[inline(always)]
9027    fn slide_within_blocks_i32x16<const SHIFT: usize>(
9028        self,
9029        a: i32x16<Self>,
9030        b: i32x16<Self>,
9031    ) -> i32x16<Self> {
9032        let (a0, a1) = self.split_i32x16(a);
9033        let (b0, b1) = self.split_i32x16(b);
9034        self.combine_i32x8(
9035            self.slide_within_blocks_i32x8::<SHIFT>(a0, b0),
9036            self.slide_within_blocks_i32x8::<SHIFT>(a1, b1),
9037        )
9038    }
9039    #[inline(always)]
9040    fn add_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9041        let (a0, a1) = self.split_i32x16(a);
9042        let (b0, b1) = self.split_i32x16(b);
9043        self.combine_i32x8(self.add_i32x8(a0, b0), self.add_i32x8(a1, b1))
9044    }
9045    #[inline(always)]
9046    fn sub_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9047        let (a0, a1) = self.split_i32x16(a);
9048        let (b0, b1) = self.split_i32x16(b);
9049        self.combine_i32x8(self.sub_i32x8(a0, b0), self.sub_i32x8(a1, b1))
9050    }
9051    #[inline(always)]
9052    fn mul_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9053        let (a0, a1) = self.split_i32x16(a);
9054        let (b0, b1) = self.split_i32x16(b);
9055        self.combine_i32x8(self.mul_i32x8(a0, b0), self.mul_i32x8(a1, b1))
9056    }
9057    #[inline(always)]
9058    fn and_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9059        let (a0, a1) = self.split_i32x16(a);
9060        let (b0, b1) = self.split_i32x16(b);
9061        self.combine_i32x8(self.and_i32x8(a0, b0), self.and_i32x8(a1, b1))
9062    }
9063    #[inline(always)]
9064    fn or_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9065        let (a0, a1) = self.split_i32x16(a);
9066        let (b0, b1) = self.split_i32x16(b);
9067        self.combine_i32x8(self.or_i32x8(a0, b0), self.or_i32x8(a1, b1))
9068    }
9069    #[inline(always)]
9070    fn xor_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9071        let (a0, a1) = self.split_i32x16(a);
9072        let (b0, b1) = self.split_i32x16(b);
9073        self.combine_i32x8(self.xor_i32x8(a0, b0), self.xor_i32x8(a1, b1))
9074    }
9075    #[inline(always)]
9076    fn not_i32x16(self, a: i32x16<Self>) -> i32x16<Self> {
9077        let (a0, a1) = self.split_i32x16(a);
9078        self.combine_i32x8(self.not_i32x8(a0), self.not_i32x8(a1))
9079    }
9080    #[inline(always)]
9081    fn shl_i32x16(self, a: i32x16<Self>, shift: u32) -> i32x16<Self> {
9082        let (a0, a1) = self.split_i32x16(a);
9083        self.combine_i32x8(self.shl_i32x8(a0, shift), self.shl_i32x8(a1, shift))
9084    }
9085    #[inline(always)]
9086    fn shlv_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9087        let (a0, a1) = self.split_i32x16(a);
9088        let (b0, b1) = self.split_i32x16(b);
9089        self.combine_i32x8(self.shlv_i32x8(a0, b0), self.shlv_i32x8(a1, b1))
9090    }
9091    #[inline(always)]
9092    fn shr_i32x16(self, a: i32x16<Self>, shift: u32) -> i32x16<Self> {
9093        let (a0, a1) = self.split_i32x16(a);
9094        self.combine_i32x8(self.shr_i32x8(a0, shift), self.shr_i32x8(a1, shift))
9095    }
9096    #[inline(always)]
9097    fn shrv_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9098        let (a0, a1) = self.split_i32x16(a);
9099        let (b0, b1) = self.split_i32x16(b);
9100        self.combine_i32x8(self.shrv_i32x8(a0, b0), self.shrv_i32x8(a1, b1))
9101    }
9102    #[inline(always)]
9103    fn simd_eq_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
9104        let (a0, a1) = self.split_i32x16(a);
9105        let (b0, b1) = self.split_i32x16(b);
9106        self.combine_mask32x8(self.simd_eq_i32x8(a0, b0), self.simd_eq_i32x8(a1, b1))
9107    }
9108    #[inline(always)]
9109    fn simd_lt_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
9110        let (a0, a1) = self.split_i32x16(a);
9111        let (b0, b1) = self.split_i32x16(b);
9112        self.combine_mask32x8(self.simd_lt_i32x8(a0, b0), self.simd_lt_i32x8(a1, b1))
9113    }
9114    #[inline(always)]
9115    fn simd_le_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
9116        let (a0, a1) = self.split_i32x16(a);
9117        let (b0, b1) = self.split_i32x16(b);
9118        self.combine_mask32x8(self.simd_le_i32x8(a0, b0), self.simd_le_i32x8(a1, b1))
9119    }
9120    #[inline(always)]
9121    fn simd_ge_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
9122        let (a0, a1) = self.split_i32x16(a);
9123        let (b0, b1) = self.split_i32x16(b);
9124        self.combine_mask32x8(self.simd_ge_i32x8(a0, b0), self.simd_ge_i32x8(a1, b1))
9125    }
9126    #[inline(always)]
9127    fn simd_gt_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
9128        let (a0, a1) = self.split_i32x16(a);
9129        let (b0, b1) = self.split_i32x16(b);
9130        self.combine_mask32x8(self.simd_gt_i32x8(a0, b0), self.simd_gt_i32x8(a1, b1))
9131    }
9132    #[inline(always)]
9133    fn zip_low_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9134        let (a0, _) = self.split_i32x16(a);
9135        let (b0, _) = self.split_i32x16(b);
9136        self.combine_i32x8(self.zip_low_i32x8(a0, b0), self.zip_high_i32x8(a0, b0))
9137    }
9138    #[inline(always)]
9139    fn zip_high_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9140        let (_, a1) = self.split_i32x16(a);
9141        let (_, b1) = self.split_i32x16(b);
9142        self.combine_i32x8(self.zip_low_i32x8(a1, b1), self.zip_high_i32x8(a1, b1))
9143    }
9144    #[inline(always)]
9145    fn unzip_low_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9146        let (a0, a1) = self.split_i32x16(a);
9147        let (b0, b1) = self.split_i32x16(b);
9148        self.combine_i32x8(self.unzip_low_i32x8(a0, a1), self.unzip_low_i32x8(b0, b1))
9149    }
9150    #[inline(always)]
9151    fn unzip_high_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9152        let (a0, a1) = self.split_i32x16(a);
9153        let (b0, b1) = self.split_i32x16(b);
9154        self.combine_i32x8(self.unzip_high_i32x8(a0, a1), self.unzip_high_i32x8(b0, b1))
9155    }
9156    #[inline(always)]
9157    fn interleave_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> (i32x16<Self>, i32x16<Self>) {
9158        let (a0, a1) = self.split_i32x16(a);
9159        let (b0, b1) = self.split_i32x16(b);
9160        let lo_lo = self.zip_low_i32x8(a0, b0);
9161        let lo_hi = self.zip_high_i32x8(a0, b0);
9162        let hi_lo = self.zip_low_i32x8(a1, b1);
9163        let hi_hi = self.zip_high_i32x8(a1, b1);
9164        (
9165            self.combine_i32x8(lo_lo, lo_hi),
9166            self.combine_i32x8(hi_lo, hi_hi),
9167        )
9168    }
9169    #[inline(always)]
9170    fn deinterleave_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> (i32x16<Self>, i32x16<Self>) {
9171        let (a0, a1) = self.split_i32x16(a);
9172        let (b0, b1) = self.split_i32x16(b);
9173        let lo_even = self.unzip_low_i32x8(a0, a1);
9174        let lo_odd = self.unzip_high_i32x8(a0, a1);
9175        let hi_even = self.unzip_low_i32x8(b0, b1);
9176        let hi_odd = self.unzip_high_i32x8(b0, b1);
9177        (
9178            self.combine_i32x8(lo_even, hi_even),
9179            self.combine_i32x8(lo_odd, hi_odd),
9180        )
9181    }
9182    #[inline(always)]
9183    fn select_i32x16(self, a: mask32x16<Self>, b: i32x16<Self>, c: i32x16<Self>) -> i32x16<Self> {
9184        let (a0, a1) = self.split_mask32x16(a);
9185        let (b0, b1) = self.split_i32x16(b);
9186        let (c0, c1) = self.split_i32x16(c);
9187        self.combine_i32x8(self.select_i32x8(a0, b0, c0), self.select_i32x8(a1, b1, c1))
9188    }
9189    #[inline(always)]
9190    fn min_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9191        let (a0, a1) = self.split_i32x16(a);
9192        let (b0, b1) = self.split_i32x16(b);
9193        self.combine_i32x8(self.min_i32x8(a0, b0), self.min_i32x8(a1, b1))
9194    }
9195    #[inline(always)]
9196    fn max_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
9197        let (a0, a1) = self.split_i32x16(a);
9198        let (b0, b1) = self.split_i32x16(b);
9199        self.combine_i32x8(self.max_i32x8(a0, b0), self.max_i32x8(a1, b1))
9200    }
9201    #[inline(always)]
9202    fn split_i32x16(self, a: i32x16<Self>) -> (i32x8<Self>, i32x8<Self>) {
9203        let mut b0 = [0; 8usize];
9204        let mut b1 = [0; 8usize];
9205        b0.copy_from_slice(&a.val.0[0..8usize]);
9206        b1.copy_from_slice(&a.val.0[8usize..16usize]);
9207        (b0.simd_into(self), b1.simd_into(self))
9208    }
9209    #[inline(always)]
9210    fn neg_i32x16(self, a: i32x16<Self>) -> i32x16<Self> {
9211        let (a0, a1) = self.split_i32x16(a);
9212        self.combine_i32x8(self.neg_i32x8(a0), self.neg_i32x8(a1))
9213    }
9214    #[inline(always)]
9215    fn reinterpret_u8_i32x16(self, a: i32x16<Self>) -> u8x64<Self> {
9216        let (a0, a1) = self.split_i32x16(a);
9217        self.combine_u8x32(self.reinterpret_u8_i32x8(a0), self.reinterpret_u8_i32x8(a1))
9218    }
9219    #[inline(always)]
9220    fn reinterpret_u32_i32x16(self, a: i32x16<Self>) -> u32x16<Self> {
9221        let (a0, a1) = self.split_i32x16(a);
9222        self.combine_u32x8(
9223            self.reinterpret_u32_i32x8(a0),
9224            self.reinterpret_u32_i32x8(a1),
9225        )
9226    }
9227    #[inline(always)]
9228    fn cvt_f32_i32x16(self, a: i32x16<Self>) -> f32x16<Self> {
9229        let (a0, a1) = self.split_i32x16(a);
9230        self.combine_f32x8(self.cvt_f32_i32x8(a0), self.cvt_f32_i32x8(a1))
9231    }
9232    #[inline(always)]
9233    fn splat_u32x16(self, val: u32) -> u32x16<Self> {
9234        let half = self.splat_u32x8(val);
9235        self.combine_u32x8(half, half)
9236    }
9237    #[inline(always)]
9238    fn load_array_u32x16(self, val: [u32; 16usize]) -> u32x16<Self> {
9239        u32x16 {
9240            val: crate::support::Aligned512(val),
9241            simd: self,
9242        }
9243    }
9244    #[inline(always)]
9245    fn load_array_ref_u32x16(self, val: &[u32; 16usize]) -> u32x16<Self> {
9246        u32x16 {
9247            val: crate::support::Aligned512(*val),
9248            simd: self,
9249        }
9250    }
9251    #[inline(always)]
9252    fn as_array_u32x16(self, a: u32x16<Self>) -> [u32; 16usize] {
9253        a.val.0
9254    }
9255    #[inline(always)]
9256    fn as_array_ref_u32x16(self, a: &u32x16<Self>) -> &[u32; 16usize] {
9257        &a.val.0
9258    }
9259    #[inline(always)]
9260    fn as_array_mut_u32x16(self, a: &mut u32x16<Self>) -> &mut [u32; 16usize] {
9261        &mut a.val.0
9262    }
9263    #[inline(always)]
9264    fn store_array_u32x16(self, a: u32x16<Self>, dest: &mut [u32; 16usize]) -> () {
9265        *dest = a.val.0;
9266    }
9267    #[inline(always)]
9268    fn cvt_from_bytes_u32x16(self, a: u8x64<Self>) -> u32x16<Self> {
9269        unsafe {
9270            u32x16 {
9271                val: core::mem::transmute(a.val),
9272                simd: self,
9273            }
9274        }
9275    }
9276    #[inline(always)]
9277    fn cvt_to_bytes_u32x16(self, a: u32x16<Self>) -> u8x64<Self> {
9278        unsafe {
9279            u8x64 {
9280                val: core::mem::transmute(a.val),
9281                simd: self,
9282            }
9283        }
9284    }
9285    #[inline(always)]
9286    fn slide_u32x16<const SHIFT: usize>(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9287        let mut dest = [Default::default(); 16usize];
9288        dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
9289        dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
9290        dest.simd_into(self)
9291    }
9292    #[inline(always)]
9293    fn slide_within_blocks_u32x16<const SHIFT: usize>(
9294        self,
9295        a: u32x16<Self>,
9296        b: u32x16<Self>,
9297    ) -> u32x16<Self> {
9298        let (a0, a1) = self.split_u32x16(a);
9299        let (b0, b1) = self.split_u32x16(b);
9300        self.combine_u32x8(
9301            self.slide_within_blocks_u32x8::<SHIFT>(a0, b0),
9302            self.slide_within_blocks_u32x8::<SHIFT>(a1, b1),
9303        )
9304    }
9305    #[inline(always)]
9306    fn add_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9307        let (a0, a1) = self.split_u32x16(a);
9308        let (b0, b1) = self.split_u32x16(b);
9309        self.combine_u32x8(self.add_u32x8(a0, b0), self.add_u32x8(a1, b1))
9310    }
9311    #[inline(always)]
9312    fn sub_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9313        let (a0, a1) = self.split_u32x16(a);
9314        let (b0, b1) = self.split_u32x16(b);
9315        self.combine_u32x8(self.sub_u32x8(a0, b0), self.sub_u32x8(a1, b1))
9316    }
9317    #[inline(always)]
9318    fn mul_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9319        let (a0, a1) = self.split_u32x16(a);
9320        let (b0, b1) = self.split_u32x16(b);
9321        self.combine_u32x8(self.mul_u32x8(a0, b0), self.mul_u32x8(a1, b1))
9322    }
9323    #[inline(always)]
9324    fn and_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9325        let (a0, a1) = self.split_u32x16(a);
9326        let (b0, b1) = self.split_u32x16(b);
9327        self.combine_u32x8(self.and_u32x8(a0, b0), self.and_u32x8(a1, b1))
9328    }
9329    #[inline(always)]
9330    fn or_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9331        let (a0, a1) = self.split_u32x16(a);
9332        let (b0, b1) = self.split_u32x16(b);
9333        self.combine_u32x8(self.or_u32x8(a0, b0), self.or_u32x8(a1, b1))
9334    }
9335    #[inline(always)]
9336    fn xor_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9337        let (a0, a1) = self.split_u32x16(a);
9338        let (b0, b1) = self.split_u32x16(b);
9339        self.combine_u32x8(self.xor_u32x8(a0, b0), self.xor_u32x8(a1, b1))
9340    }
9341    #[inline(always)]
9342    fn not_u32x16(self, a: u32x16<Self>) -> u32x16<Self> {
9343        let (a0, a1) = self.split_u32x16(a);
9344        self.combine_u32x8(self.not_u32x8(a0), self.not_u32x8(a1))
9345    }
9346    #[inline(always)]
9347    fn shl_u32x16(self, a: u32x16<Self>, shift: u32) -> u32x16<Self> {
9348        let (a0, a1) = self.split_u32x16(a);
9349        self.combine_u32x8(self.shl_u32x8(a0, shift), self.shl_u32x8(a1, shift))
9350    }
9351    #[inline(always)]
9352    fn shlv_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9353        let (a0, a1) = self.split_u32x16(a);
9354        let (b0, b1) = self.split_u32x16(b);
9355        self.combine_u32x8(self.shlv_u32x8(a0, b0), self.shlv_u32x8(a1, b1))
9356    }
9357    #[inline(always)]
9358    fn shr_u32x16(self, a: u32x16<Self>, shift: u32) -> u32x16<Self> {
9359        let (a0, a1) = self.split_u32x16(a);
9360        self.combine_u32x8(self.shr_u32x8(a0, shift), self.shr_u32x8(a1, shift))
9361    }
9362    #[inline(always)]
9363    fn shrv_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9364        let (a0, a1) = self.split_u32x16(a);
9365        let (b0, b1) = self.split_u32x16(b);
9366        self.combine_u32x8(self.shrv_u32x8(a0, b0), self.shrv_u32x8(a1, b1))
9367    }
9368    #[inline(always)]
9369    fn simd_eq_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
9370        let (a0, a1) = self.split_u32x16(a);
9371        let (b0, b1) = self.split_u32x16(b);
9372        self.combine_mask32x8(self.simd_eq_u32x8(a0, b0), self.simd_eq_u32x8(a1, b1))
9373    }
9374    #[inline(always)]
9375    fn simd_lt_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
9376        let (a0, a1) = self.split_u32x16(a);
9377        let (b0, b1) = self.split_u32x16(b);
9378        self.combine_mask32x8(self.simd_lt_u32x8(a0, b0), self.simd_lt_u32x8(a1, b1))
9379    }
9380    #[inline(always)]
9381    fn simd_le_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
9382        let (a0, a1) = self.split_u32x16(a);
9383        let (b0, b1) = self.split_u32x16(b);
9384        self.combine_mask32x8(self.simd_le_u32x8(a0, b0), self.simd_le_u32x8(a1, b1))
9385    }
9386    #[inline(always)]
9387    fn simd_ge_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
9388        let (a0, a1) = self.split_u32x16(a);
9389        let (b0, b1) = self.split_u32x16(b);
9390        self.combine_mask32x8(self.simd_ge_u32x8(a0, b0), self.simd_ge_u32x8(a1, b1))
9391    }
9392    #[inline(always)]
9393    fn simd_gt_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
9394        let (a0, a1) = self.split_u32x16(a);
9395        let (b0, b1) = self.split_u32x16(b);
9396        self.combine_mask32x8(self.simd_gt_u32x8(a0, b0), self.simd_gt_u32x8(a1, b1))
9397    }
9398    #[inline(always)]
9399    fn zip_low_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9400        let (a0, _) = self.split_u32x16(a);
9401        let (b0, _) = self.split_u32x16(b);
9402        self.combine_u32x8(self.zip_low_u32x8(a0, b0), self.zip_high_u32x8(a0, b0))
9403    }
9404    #[inline(always)]
9405    fn zip_high_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9406        let (_, a1) = self.split_u32x16(a);
9407        let (_, b1) = self.split_u32x16(b);
9408        self.combine_u32x8(self.zip_low_u32x8(a1, b1), self.zip_high_u32x8(a1, b1))
9409    }
9410    #[inline(always)]
9411    fn unzip_low_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9412        let (a0, a1) = self.split_u32x16(a);
9413        let (b0, b1) = self.split_u32x16(b);
9414        self.combine_u32x8(self.unzip_low_u32x8(a0, a1), self.unzip_low_u32x8(b0, b1))
9415    }
9416    #[inline(always)]
9417    fn unzip_high_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9418        let (a0, a1) = self.split_u32x16(a);
9419        let (b0, b1) = self.split_u32x16(b);
9420        self.combine_u32x8(self.unzip_high_u32x8(a0, a1), self.unzip_high_u32x8(b0, b1))
9421    }
9422    #[inline(always)]
9423    fn interleave_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> (u32x16<Self>, u32x16<Self>) {
9424        let (a0, a1) = self.split_u32x16(a);
9425        let (b0, b1) = self.split_u32x16(b);
9426        let lo_lo = self.zip_low_u32x8(a0, b0);
9427        let lo_hi = self.zip_high_u32x8(a0, b0);
9428        let hi_lo = self.zip_low_u32x8(a1, b1);
9429        let hi_hi = self.zip_high_u32x8(a1, b1);
9430        (
9431            self.combine_u32x8(lo_lo, lo_hi),
9432            self.combine_u32x8(hi_lo, hi_hi),
9433        )
9434    }
9435    #[inline(always)]
9436    fn deinterleave_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> (u32x16<Self>, u32x16<Self>) {
9437        let (a0, a1) = self.split_u32x16(a);
9438        let (b0, b1) = self.split_u32x16(b);
9439        let lo_even = self.unzip_low_u32x8(a0, a1);
9440        let lo_odd = self.unzip_high_u32x8(a0, a1);
9441        let hi_even = self.unzip_low_u32x8(b0, b1);
9442        let hi_odd = self.unzip_high_u32x8(b0, b1);
9443        (
9444            self.combine_u32x8(lo_even, hi_even),
9445            self.combine_u32x8(lo_odd, hi_odd),
9446        )
9447    }
9448    #[inline(always)]
9449    fn select_u32x16(self, a: mask32x16<Self>, b: u32x16<Self>, c: u32x16<Self>) -> u32x16<Self> {
9450        let (a0, a1) = self.split_mask32x16(a);
9451        let (b0, b1) = self.split_u32x16(b);
9452        let (c0, c1) = self.split_u32x16(c);
9453        self.combine_u32x8(self.select_u32x8(a0, b0, c0), self.select_u32x8(a1, b1, c1))
9454    }
9455    #[inline(always)]
9456    fn min_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9457        let (a0, a1) = self.split_u32x16(a);
9458        let (b0, b1) = self.split_u32x16(b);
9459        self.combine_u32x8(self.min_u32x8(a0, b0), self.min_u32x8(a1, b1))
9460    }
9461    #[inline(always)]
9462    fn max_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
9463        let (a0, a1) = self.split_u32x16(a);
9464        let (b0, b1) = self.split_u32x16(b);
9465        self.combine_u32x8(self.max_u32x8(a0, b0), self.max_u32x8(a1, b1))
9466    }
9467    #[inline(always)]
9468    fn split_u32x16(self, a: u32x16<Self>) -> (u32x8<Self>, u32x8<Self>) {
9469        let mut b0 = [0; 8usize];
9470        let mut b1 = [0; 8usize];
9471        b0.copy_from_slice(&a.val.0[0..8usize]);
9472        b1.copy_from_slice(&a.val.0[8usize..16usize]);
9473        (b0.simd_into(self), b1.simd_into(self))
9474    }
9475    #[inline(always)]
9476    fn load_interleaved_128_u32x16(self, src: &[u32; 16usize]) -> u32x16<Self> {
9477        [
9478            src[0usize],
9479            src[4usize],
9480            src[8usize],
9481            src[12usize],
9482            src[1usize],
9483            src[5usize],
9484            src[9usize],
9485            src[13usize],
9486            src[2usize],
9487            src[6usize],
9488            src[10usize],
9489            src[14usize],
9490            src[3usize],
9491            src[7usize],
9492            src[11usize],
9493            src[15usize],
9494        ]
9495        .simd_into(self)
9496    }
9497    #[inline(always)]
9498    fn store_interleaved_128_u32x16(self, a: u32x16<Self>, dest: &mut [u32; 16usize]) -> () {
9499        *dest = [
9500            a[0usize], a[4usize], a[8usize], a[12usize], a[1usize], a[5usize], a[9usize],
9501            a[13usize], a[2usize], a[6usize], a[10usize], a[14usize], a[3usize], a[7usize],
9502            a[11usize], a[15usize],
9503        ];
9504    }
9505    #[inline(always)]
9506    fn reinterpret_u8_u32x16(self, a: u32x16<Self>) -> u8x64<Self> {
9507        let (a0, a1) = self.split_u32x16(a);
9508        self.combine_u8x32(self.reinterpret_u8_u32x8(a0), self.reinterpret_u8_u32x8(a1))
9509    }
9510    #[inline(always)]
9511    fn cvt_f32_u32x16(self, a: u32x16<Self>) -> f32x16<Self> {
9512        let (a0, a1) = self.split_u32x16(a);
9513        self.combine_f32x8(self.cvt_f32_u32x8(a0), self.cvt_f32_u32x8(a1))
9514    }
9515    #[inline(always)]
9516    fn splat_mask32x16(self, val: i32) -> mask32x16<Self> {
9517        let half = self.splat_mask32x8(val);
9518        self.combine_mask32x8(half, half)
9519    }
9520    #[inline(always)]
9521    fn load_array_mask32x16(self, val: [i32; 16usize]) -> mask32x16<Self> {
9522        mask32x16 {
9523            val: crate::support::Aligned512(val),
9524            simd: self,
9525        }
9526    }
9527    #[inline(always)]
9528    fn load_array_ref_mask32x16(self, val: &[i32; 16usize]) -> mask32x16<Self> {
9529        mask32x16 {
9530            val: crate::support::Aligned512(*val),
9531            simd: self,
9532        }
9533    }
9534    #[inline(always)]
9535    fn as_array_mask32x16(self, a: mask32x16<Self>) -> [i32; 16usize] {
9536        a.val.0
9537    }
9538    #[inline(always)]
9539    fn as_array_ref_mask32x16(self, a: &mask32x16<Self>) -> &[i32; 16usize] {
9540        &a.val.0
9541    }
9542    #[inline(always)]
9543    fn as_array_mut_mask32x16(self, a: &mut mask32x16<Self>) -> &mut [i32; 16usize] {
9544        &mut a.val.0
9545    }
9546    #[inline(always)]
9547    fn store_array_mask32x16(self, a: mask32x16<Self>, dest: &mut [i32; 16usize]) -> () {
9548        *dest = a.val.0;
9549    }
9550    #[inline(always)]
9551    fn cvt_from_bytes_mask32x16(self, a: u8x64<Self>) -> mask32x16<Self> {
9552        unsafe {
9553            mask32x16 {
9554                val: core::mem::transmute(a.val),
9555                simd: self,
9556            }
9557        }
9558    }
9559    #[inline(always)]
9560    fn cvt_to_bytes_mask32x16(self, a: mask32x16<Self>) -> u8x64<Self> {
9561        unsafe {
9562            u8x64 {
9563                val: core::mem::transmute(a.val),
9564                simd: self,
9565            }
9566        }
9567    }
9568    #[inline(always)]
9569    fn slide_mask32x16<const SHIFT: usize>(
9570        self,
9571        a: mask32x16<Self>,
9572        b: mask32x16<Self>,
9573    ) -> mask32x16<Self> {
9574        let mut dest = [Default::default(); 16usize];
9575        dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
9576        dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
9577        dest.simd_into(self)
9578    }
9579    #[inline(always)]
9580    fn slide_within_blocks_mask32x16<const SHIFT: usize>(
9581        self,
9582        a: mask32x16<Self>,
9583        b: mask32x16<Self>,
9584    ) -> mask32x16<Self> {
9585        let (a0, a1) = self.split_mask32x16(a);
9586        let (b0, b1) = self.split_mask32x16(b);
9587        self.combine_mask32x8(
9588            self.slide_within_blocks_mask32x8::<SHIFT>(a0, b0),
9589            self.slide_within_blocks_mask32x8::<SHIFT>(a1, b1),
9590        )
9591    }
9592    #[inline(always)]
9593    fn and_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
9594        let (a0, a1) = self.split_mask32x16(a);
9595        let (b0, b1) = self.split_mask32x16(b);
9596        self.combine_mask32x8(self.and_mask32x8(a0, b0), self.and_mask32x8(a1, b1))
9597    }
9598    #[inline(always)]
9599    fn or_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
9600        let (a0, a1) = self.split_mask32x16(a);
9601        let (b0, b1) = self.split_mask32x16(b);
9602        self.combine_mask32x8(self.or_mask32x8(a0, b0), self.or_mask32x8(a1, b1))
9603    }
9604    #[inline(always)]
9605    fn xor_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
9606        let (a0, a1) = self.split_mask32x16(a);
9607        let (b0, b1) = self.split_mask32x16(b);
9608        self.combine_mask32x8(self.xor_mask32x8(a0, b0), self.xor_mask32x8(a1, b1))
9609    }
9610    #[inline(always)]
9611    fn not_mask32x16(self, a: mask32x16<Self>) -> mask32x16<Self> {
9612        let (a0, a1) = self.split_mask32x16(a);
9613        self.combine_mask32x8(self.not_mask32x8(a0), self.not_mask32x8(a1))
9614    }
9615    #[inline(always)]
9616    fn select_mask32x16(
9617        self,
9618        a: mask32x16<Self>,
9619        b: mask32x16<Self>,
9620        c: mask32x16<Self>,
9621    ) -> mask32x16<Self> {
9622        let (a0, a1) = self.split_mask32x16(a);
9623        let (b0, b1) = self.split_mask32x16(b);
9624        let (c0, c1) = self.split_mask32x16(c);
9625        self.combine_mask32x8(
9626            self.select_mask32x8(a0, b0, c0),
9627            self.select_mask32x8(a1, b1, c1),
9628        )
9629    }
9630    #[inline(always)]
9631    fn simd_eq_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
9632        let (a0, a1) = self.split_mask32x16(a);
9633        let (b0, b1) = self.split_mask32x16(b);
9634        self.combine_mask32x8(self.simd_eq_mask32x8(a0, b0), self.simd_eq_mask32x8(a1, b1))
9635    }
9636    #[inline(always)]
9637    fn any_true_mask32x16(self, a: mask32x16<Self>) -> bool {
9638        let (a0, a1) = self.split_mask32x16(a);
9639        self.any_true_mask32x8(a0) || self.any_true_mask32x8(a1)
9640    }
9641    #[inline(always)]
9642    fn all_true_mask32x16(self, a: mask32x16<Self>) -> bool {
9643        let (a0, a1) = self.split_mask32x16(a);
9644        self.all_true_mask32x8(a0) && self.all_true_mask32x8(a1)
9645    }
9646    #[inline(always)]
9647    fn any_false_mask32x16(self, a: mask32x16<Self>) -> bool {
9648        let (a0, a1) = self.split_mask32x16(a);
9649        self.any_false_mask32x8(a0) || self.any_false_mask32x8(a1)
9650    }
9651    #[inline(always)]
9652    fn all_false_mask32x16(self, a: mask32x16<Self>) -> bool {
9653        let (a0, a1) = self.split_mask32x16(a);
9654        self.all_false_mask32x8(a0) && self.all_false_mask32x8(a1)
9655    }
9656    #[inline(always)]
9657    fn split_mask32x16(self, a: mask32x16<Self>) -> (mask32x8<Self>, mask32x8<Self>) {
9658        let mut b0 = [0; 8usize];
9659        let mut b1 = [0; 8usize];
9660        b0.copy_from_slice(&a.val.0[0..8usize]);
9661        b1.copy_from_slice(&a.val.0[8usize..16usize]);
9662        (b0.simd_into(self), b1.simd_into(self))
9663    }
9664    #[inline(always)]
9665    fn splat_f64x8(self, val: f64) -> f64x8<Self> {
9666        let half = self.splat_f64x4(val);
9667        self.combine_f64x4(half, half)
9668    }
9669    #[inline(always)]
9670    fn load_array_f64x8(self, val: [f64; 8usize]) -> f64x8<Self> {
9671        f64x8 {
9672            val: crate::support::Aligned512(val),
9673            simd: self,
9674        }
9675    }
9676    #[inline(always)]
9677    fn load_array_ref_f64x8(self, val: &[f64; 8usize]) -> f64x8<Self> {
9678        f64x8 {
9679            val: crate::support::Aligned512(*val),
9680            simd: self,
9681        }
9682    }
9683    #[inline(always)]
9684    fn as_array_f64x8(self, a: f64x8<Self>) -> [f64; 8usize] {
9685        a.val.0
9686    }
9687    #[inline(always)]
9688    fn as_array_ref_f64x8(self, a: &f64x8<Self>) -> &[f64; 8usize] {
9689        &a.val.0
9690    }
9691    #[inline(always)]
9692    fn as_array_mut_f64x8(self, a: &mut f64x8<Self>) -> &mut [f64; 8usize] {
9693        &mut a.val.0
9694    }
9695    #[inline(always)]
9696    fn store_array_f64x8(self, a: f64x8<Self>, dest: &mut [f64; 8usize]) -> () {
9697        *dest = a.val.0;
9698    }
9699    #[inline(always)]
9700    fn cvt_from_bytes_f64x8(self, a: u8x64<Self>) -> f64x8<Self> {
9701        unsafe {
9702            f64x8 {
9703                val: core::mem::transmute(a.val),
9704                simd: self,
9705            }
9706        }
9707    }
9708    #[inline(always)]
9709    fn cvt_to_bytes_f64x8(self, a: f64x8<Self>) -> u8x64<Self> {
9710        unsafe {
9711            u8x64 {
9712                val: core::mem::transmute(a.val),
9713                simd: self,
9714            }
9715        }
9716    }
9717    #[inline(always)]
9718    fn slide_f64x8<const SHIFT: usize>(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9719        let mut dest = [Default::default(); 8usize];
9720        dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
9721        dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
9722        dest.simd_into(self)
9723    }
9724    #[inline(always)]
9725    fn slide_within_blocks_f64x8<const SHIFT: usize>(
9726        self,
9727        a: f64x8<Self>,
9728        b: f64x8<Self>,
9729    ) -> f64x8<Self> {
9730        let (a0, a1) = self.split_f64x8(a);
9731        let (b0, b1) = self.split_f64x8(b);
9732        self.combine_f64x4(
9733            self.slide_within_blocks_f64x4::<SHIFT>(a0, b0),
9734            self.slide_within_blocks_f64x4::<SHIFT>(a1, b1),
9735        )
9736    }
9737    #[inline(always)]
9738    fn abs_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
9739        let (a0, a1) = self.split_f64x8(a);
9740        self.combine_f64x4(self.abs_f64x4(a0), self.abs_f64x4(a1))
9741    }
9742    #[inline(always)]
9743    fn neg_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
9744        let (a0, a1) = self.split_f64x8(a);
9745        self.combine_f64x4(self.neg_f64x4(a0), self.neg_f64x4(a1))
9746    }
9747    #[inline(always)]
9748    fn sqrt_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
9749        let (a0, a1) = self.split_f64x8(a);
9750        self.combine_f64x4(self.sqrt_f64x4(a0), self.sqrt_f64x4(a1))
9751    }
9752    #[inline(always)]
9753    fn add_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9754        let (a0, a1) = self.split_f64x8(a);
9755        let (b0, b1) = self.split_f64x8(b);
9756        self.combine_f64x4(self.add_f64x4(a0, b0), self.add_f64x4(a1, b1))
9757    }
9758    #[inline(always)]
9759    fn sub_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9760        let (a0, a1) = self.split_f64x8(a);
9761        let (b0, b1) = self.split_f64x8(b);
9762        self.combine_f64x4(self.sub_f64x4(a0, b0), self.sub_f64x4(a1, b1))
9763    }
9764    #[inline(always)]
9765    fn mul_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9766        let (a0, a1) = self.split_f64x8(a);
9767        let (b0, b1) = self.split_f64x8(b);
9768        self.combine_f64x4(self.mul_f64x4(a0, b0), self.mul_f64x4(a1, b1))
9769    }
9770    #[inline(always)]
9771    fn div_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9772        let (a0, a1) = self.split_f64x8(a);
9773        let (b0, b1) = self.split_f64x8(b);
9774        self.combine_f64x4(self.div_f64x4(a0, b0), self.div_f64x4(a1, b1))
9775    }
9776    #[inline(always)]
9777    fn copysign_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9778        let (a0, a1) = self.split_f64x8(a);
9779        let (b0, b1) = self.split_f64x8(b);
9780        self.combine_f64x4(self.copysign_f64x4(a0, b0), self.copysign_f64x4(a1, b1))
9781    }
9782    #[inline(always)]
9783    fn simd_eq_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
9784        let (a0, a1) = self.split_f64x8(a);
9785        let (b0, b1) = self.split_f64x8(b);
9786        self.combine_mask64x4(self.simd_eq_f64x4(a0, b0), self.simd_eq_f64x4(a1, b1))
9787    }
9788    #[inline(always)]
9789    fn simd_lt_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
9790        let (a0, a1) = self.split_f64x8(a);
9791        let (b0, b1) = self.split_f64x8(b);
9792        self.combine_mask64x4(self.simd_lt_f64x4(a0, b0), self.simd_lt_f64x4(a1, b1))
9793    }
9794    #[inline(always)]
9795    fn simd_le_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
9796        let (a0, a1) = self.split_f64x8(a);
9797        let (b0, b1) = self.split_f64x8(b);
9798        self.combine_mask64x4(self.simd_le_f64x4(a0, b0), self.simd_le_f64x4(a1, b1))
9799    }
9800    #[inline(always)]
9801    fn simd_ge_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
9802        let (a0, a1) = self.split_f64x8(a);
9803        let (b0, b1) = self.split_f64x8(b);
9804        self.combine_mask64x4(self.simd_ge_f64x4(a0, b0), self.simd_ge_f64x4(a1, b1))
9805    }
9806    #[inline(always)]
9807    fn simd_gt_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
9808        let (a0, a1) = self.split_f64x8(a);
9809        let (b0, b1) = self.split_f64x8(b);
9810        self.combine_mask64x4(self.simd_gt_f64x4(a0, b0), self.simd_gt_f64x4(a1, b1))
9811    }
9812    #[inline(always)]
9813    fn zip_low_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9814        let (a0, _) = self.split_f64x8(a);
9815        let (b0, _) = self.split_f64x8(b);
9816        self.combine_f64x4(self.zip_low_f64x4(a0, b0), self.zip_high_f64x4(a0, b0))
9817    }
9818    #[inline(always)]
9819    fn zip_high_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9820        let (_, a1) = self.split_f64x8(a);
9821        let (_, b1) = self.split_f64x8(b);
9822        self.combine_f64x4(self.zip_low_f64x4(a1, b1), self.zip_high_f64x4(a1, b1))
9823    }
9824    #[inline(always)]
9825    fn unzip_low_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9826        let (a0, a1) = self.split_f64x8(a);
9827        let (b0, b1) = self.split_f64x8(b);
9828        self.combine_f64x4(self.unzip_low_f64x4(a0, a1), self.unzip_low_f64x4(b0, b1))
9829    }
9830    #[inline(always)]
9831    fn unzip_high_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9832        let (a0, a1) = self.split_f64x8(a);
9833        let (b0, b1) = self.split_f64x8(b);
9834        self.combine_f64x4(self.unzip_high_f64x4(a0, a1), self.unzip_high_f64x4(b0, b1))
9835    }
9836    #[inline(always)]
9837    fn interleave_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> (f64x8<Self>, f64x8<Self>) {
9838        let (a0, a1) = self.split_f64x8(a);
9839        let (b0, b1) = self.split_f64x8(b);
9840        let lo_lo = self.zip_low_f64x4(a0, b0);
9841        let lo_hi = self.zip_high_f64x4(a0, b0);
9842        let hi_lo = self.zip_low_f64x4(a1, b1);
9843        let hi_hi = self.zip_high_f64x4(a1, b1);
9844        (
9845            self.combine_f64x4(lo_lo, lo_hi),
9846            self.combine_f64x4(hi_lo, hi_hi),
9847        )
9848    }
9849    #[inline(always)]
9850    fn deinterleave_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> (f64x8<Self>, f64x8<Self>) {
9851        let (a0, a1) = self.split_f64x8(a);
9852        let (b0, b1) = self.split_f64x8(b);
9853        let lo_even = self.unzip_low_f64x4(a0, a1);
9854        let lo_odd = self.unzip_high_f64x4(a0, a1);
9855        let hi_even = self.unzip_low_f64x4(b0, b1);
9856        let hi_odd = self.unzip_high_f64x4(b0, b1);
9857        (
9858            self.combine_f64x4(lo_even, hi_even),
9859            self.combine_f64x4(lo_odd, hi_odd),
9860        )
9861    }
9862    #[inline(always)]
9863    fn max_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9864        let (a0, a1) = self.split_f64x8(a);
9865        let (b0, b1) = self.split_f64x8(b);
9866        self.combine_f64x4(self.max_f64x4(a0, b0), self.max_f64x4(a1, b1))
9867    }
9868    #[inline(always)]
9869    fn min_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9870        let (a0, a1) = self.split_f64x8(a);
9871        let (b0, b1) = self.split_f64x8(b);
9872        self.combine_f64x4(self.min_f64x4(a0, b0), self.min_f64x4(a1, b1))
9873    }
9874    #[inline(always)]
9875    fn max_precise_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9876        let (a0, a1) = self.split_f64x8(a);
9877        let (b0, b1) = self.split_f64x8(b);
9878        self.combine_f64x4(
9879            self.max_precise_f64x4(a0, b0),
9880            self.max_precise_f64x4(a1, b1),
9881        )
9882    }
9883    #[inline(always)]
9884    fn min_precise_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
9885        let (a0, a1) = self.split_f64x8(a);
9886        let (b0, b1) = self.split_f64x8(b);
9887        self.combine_f64x4(
9888            self.min_precise_f64x4(a0, b0),
9889            self.min_precise_f64x4(a1, b1),
9890        )
9891    }
9892    #[inline(always)]
9893    fn mul_add_f64x8(self, a: f64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
9894        let (a0, a1) = self.split_f64x8(a);
9895        let (b0, b1) = self.split_f64x8(b);
9896        let (c0, c1) = self.split_f64x8(c);
9897        self.combine_f64x4(
9898            self.mul_add_f64x4(a0, b0, c0),
9899            self.mul_add_f64x4(a1, b1, c1),
9900        )
9901    }
9902    #[inline(always)]
9903    fn mul_sub_f64x8(self, a: f64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
9904        let (a0, a1) = self.split_f64x8(a);
9905        let (b0, b1) = self.split_f64x8(b);
9906        let (c0, c1) = self.split_f64x8(c);
9907        self.combine_f64x4(
9908            self.mul_sub_f64x4(a0, b0, c0),
9909            self.mul_sub_f64x4(a1, b1, c1),
9910        )
9911    }
9912    #[inline(always)]
9913    fn floor_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
9914        let (a0, a1) = self.split_f64x8(a);
9915        self.combine_f64x4(self.floor_f64x4(a0), self.floor_f64x4(a1))
9916    }
9917    #[inline(always)]
9918    fn ceil_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
9919        let (a0, a1) = self.split_f64x8(a);
9920        self.combine_f64x4(self.ceil_f64x4(a0), self.ceil_f64x4(a1))
9921    }
9922    #[inline(always)]
9923    fn round_ties_even_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
9924        let (a0, a1) = self.split_f64x8(a);
9925        self.combine_f64x4(
9926            self.round_ties_even_f64x4(a0),
9927            self.round_ties_even_f64x4(a1),
9928        )
9929    }
9930    #[inline(always)]
9931    fn fract_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
9932        let (a0, a1) = self.split_f64x8(a);
9933        self.combine_f64x4(self.fract_f64x4(a0), self.fract_f64x4(a1))
9934    }
9935    #[inline(always)]
9936    fn trunc_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
9937        let (a0, a1) = self.split_f64x8(a);
9938        self.combine_f64x4(self.trunc_f64x4(a0), self.trunc_f64x4(a1))
9939    }
9940    #[inline(always)]
9941    fn select_f64x8(self, a: mask64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
9942        let (a0, a1) = self.split_mask64x8(a);
9943        let (b0, b1) = self.split_f64x8(b);
9944        let (c0, c1) = self.split_f64x8(c);
9945        self.combine_f64x4(self.select_f64x4(a0, b0, c0), self.select_f64x4(a1, b1, c1))
9946    }
9947    #[inline(always)]
9948    fn split_f64x8(self, a: f64x8<Self>) -> (f64x4<Self>, f64x4<Self>) {
9949        let mut b0 = [0.0; 4usize];
9950        let mut b1 = [0.0; 4usize];
9951        b0.copy_from_slice(&a.val.0[0..4usize]);
9952        b1.copy_from_slice(&a.val.0[4usize..8usize]);
9953        (b0.simd_into(self), b1.simd_into(self))
9954    }
9955    #[inline(always)]
9956    fn reinterpret_f32_f64x8(self, a: f64x8<Self>) -> f32x16<Self> {
9957        let (a0, a1) = self.split_f64x8(a);
9958        self.combine_f32x8(
9959            self.reinterpret_f32_f64x4(a0),
9960            self.reinterpret_f32_f64x4(a1),
9961        )
9962    }
9963    #[inline(always)]
9964    fn splat_mask64x8(self, val: i64) -> mask64x8<Self> {
9965        let half = self.splat_mask64x4(val);
9966        self.combine_mask64x4(half, half)
9967    }
9968    #[inline(always)]
9969    fn load_array_mask64x8(self, val: [i64; 8usize]) -> mask64x8<Self> {
9970        mask64x8 {
9971            val: crate::support::Aligned512(val),
9972            simd: self,
9973        }
9974    }
9975    #[inline(always)]
9976    fn load_array_ref_mask64x8(self, val: &[i64; 8usize]) -> mask64x8<Self> {
9977        mask64x8 {
9978            val: crate::support::Aligned512(*val),
9979            simd: self,
9980        }
9981    }
9982    #[inline(always)]
9983    fn as_array_mask64x8(self, a: mask64x8<Self>) -> [i64; 8usize] {
9984        a.val.0
9985    }
9986    #[inline(always)]
9987    fn as_array_ref_mask64x8(self, a: &mask64x8<Self>) -> &[i64; 8usize] {
9988        &a.val.0
9989    }
9990    #[inline(always)]
9991    fn as_array_mut_mask64x8(self, a: &mut mask64x8<Self>) -> &mut [i64; 8usize] {
9992        &mut a.val.0
9993    }
9994    #[inline(always)]
9995    fn store_array_mask64x8(self, a: mask64x8<Self>, dest: &mut [i64; 8usize]) -> () {
9996        *dest = a.val.0;
9997    }
9998    #[inline(always)]
9999    fn cvt_from_bytes_mask64x8(self, a: u8x64<Self>) -> mask64x8<Self> {
10000        unsafe {
10001            mask64x8 {
10002                val: core::mem::transmute(a.val),
10003                simd: self,
10004            }
10005        }
10006    }
10007    #[inline(always)]
10008    fn cvt_to_bytes_mask64x8(self, a: mask64x8<Self>) -> u8x64<Self> {
10009        unsafe {
10010            u8x64 {
10011                val: core::mem::transmute(a.val),
10012                simd: self,
10013            }
10014        }
10015    }
10016    #[inline(always)]
10017    fn slide_mask64x8<const SHIFT: usize>(
10018        self,
10019        a: mask64x8<Self>,
10020        b: mask64x8<Self>,
10021    ) -> mask64x8<Self> {
10022        let mut dest = [Default::default(); 8usize];
10023        dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
10024        dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
10025        dest.simd_into(self)
10026    }
10027    #[inline(always)]
10028    fn slide_within_blocks_mask64x8<const SHIFT: usize>(
10029        self,
10030        a: mask64x8<Self>,
10031        b: mask64x8<Self>,
10032    ) -> mask64x8<Self> {
10033        let (a0, a1) = self.split_mask64x8(a);
10034        let (b0, b1) = self.split_mask64x8(b);
10035        self.combine_mask64x4(
10036            self.slide_within_blocks_mask64x4::<SHIFT>(a0, b0),
10037            self.slide_within_blocks_mask64x4::<SHIFT>(a1, b1),
10038        )
10039    }
10040    #[inline(always)]
10041    fn and_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
10042        let (a0, a1) = self.split_mask64x8(a);
10043        let (b0, b1) = self.split_mask64x8(b);
10044        self.combine_mask64x4(self.and_mask64x4(a0, b0), self.and_mask64x4(a1, b1))
10045    }
10046    #[inline(always)]
10047    fn or_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
10048        let (a0, a1) = self.split_mask64x8(a);
10049        let (b0, b1) = self.split_mask64x8(b);
10050        self.combine_mask64x4(self.or_mask64x4(a0, b0), self.or_mask64x4(a1, b1))
10051    }
10052    #[inline(always)]
10053    fn xor_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
10054        let (a0, a1) = self.split_mask64x8(a);
10055        let (b0, b1) = self.split_mask64x8(b);
10056        self.combine_mask64x4(self.xor_mask64x4(a0, b0), self.xor_mask64x4(a1, b1))
10057    }
10058    #[inline(always)]
10059    fn not_mask64x8(self, a: mask64x8<Self>) -> mask64x8<Self> {
10060        let (a0, a1) = self.split_mask64x8(a);
10061        self.combine_mask64x4(self.not_mask64x4(a0), self.not_mask64x4(a1))
10062    }
10063    #[inline(always)]
10064    fn select_mask64x8(
10065        self,
10066        a: mask64x8<Self>,
10067        b: mask64x8<Self>,
10068        c: mask64x8<Self>,
10069    ) -> mask64x8<Self> {
10070        let (a0, a1) = self.split_mask64x8(a);
10071        let (b0, b1) = self.split_mask64x8(b);
10072        let (c0, c1) = self.split_mask64x8(c);
10073        self.combine_mask64x4(
10074            self.select_mask64x4(a0, b0, c0),
10075            self.select_mask64x4(a1, b1, c1),
10076        )
10077    }
10078    #[inline(always)]
10079    fn simd_eq_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
10080        let (a0, a1) = self.split_mask64x8(a);
10081        let (b0, b1) = self.split_mask64x8(b);
10082        self.combine_mask64x4(self.simd_eq_mask64x4(a0, b0), self.simd_eq_mask64x4(a1, b1))
10083    }
10084    #[inline(always)]
10085    fn any_true_mask64x8(self, a: mask64x8<Self>) -> bool {
10086        let (a0, a1) = self.split_mask64x8(a);
10087        self.any_true_mask64x4(a0) || self.any_true_mask64x4(a1)
10088    }
10089    #[inline(always)]
10090    fn all_true_mask64x8(self, a: mask64x8<Self>) -> bool {
10091        let (a0, a1) = self.split_mask64x8(a);
10092        self.all_true_mask64x4(a0) && self.all_true_mask64x4(a1)
10093    }
10094    #[inline(always)]
10095    fn any_false_mask64x8(self, a: mask64x8<Self>) -> bool {
10096        let (a0, a1) = self.split_mask64x8(a);
10097        self.any_false_mask64x4(a0) || self.any_false_mask64x4(a1)
10098    }
10099    #[inline(always)]
10100    fn all_false_mask64x8(self, a: mask64x8<Self>) -> bool {
10101        let (a0, a1) = self.split_mask64x8(a);
10102        self.all_false_mask64x4(a0) && self.all_false_mask64x4(a1)
10103    }
10104    #[inline(always)]
10105    fn split_mask64x8(self, a: mask64x8<Self>) -> (mask64x4<Self>, mask64x4<Self>) {
10106        let mut b0 = [0; 4usize];
10107        let mut b1 = [0; 4usize];
10108        b0.copy_from_slice(&a.val.0[0..4usize]);
10109        b1.copy_from_slice(&a.val.0[4usize..8usize]);
10110        (b0.simd_into(self), b1.simd_into(self))
10111    }
10112}