fearless_simd/generated/
fallback.rs

1// Copyright 2025 the Fearless_SIMD Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4// This file is autogenerated by fearless_simd_gen
5
6use crate::{Level, Simd, SimdInto, seal::Seal};
7use crate::{
8    f32x4, f32x8, f32x16, f64x2, f64x4, f64x8, i8x16, i8x32, i8x64, i16x8, i16x16, i16x32, i32x4,
9    i32x8, i32x16, mask8x16, mask8x32, mask8x64, mask16x8, mask16x16, mask16x32, mask32x4,
10    mask32x8, mask32x16, mask64x2, mask64x4, mask64x8, u8x16, u8x32, u8x64, u16x8, u16x16, u16x32,
11    u32x4, u32x8, u32x16,
12};
13use core::ops::*;
14#[cfg(all(feature = "libm", not(feature = "std")))]
15trait FloatExt {
16    fn floor(self) -> Self;
17    fn fract(self) -> Self;
18    fn sqrt(self) -> Self;
19    fn trunc(self) -> Self;
20}
21#[cfg(all(feature = "libm", not(feature = "std")))]
22impl FloatExt for f32 {
23    #[inline(always)]
24    fn floor(self) -> f32 {
25        libm::floorf(self)
26    }
27    #[inline(always)]
28    fn sqrt(self) -> f32 {
29        libm::sqrtf(self)
30    }
31    #[inline(always)]
32    fn fract(self) -> f32 {
33        self - self.trunc()
34    }
35    #[inline(always)]
36    fn trunc(self) -> f32 {
37        libm::truncf(self)
38    }
39}
40#[cfg(all(feature = "libm", not(feature = "std")))]
41impl FloatExt for f64 {
42    #[inline(always)]
43    fn floor(self) -> f64 {
44        libm::floor(self)
45    }
46    #[inline(always)]
47    fn sqrt(self) -> f64 {
48        libm::sqrt(self)
49    }
50    #[inline(always)]
51    fn fract(self) -> f64 {
52        self - self.trunc()
53    }
54    #[inline(always)]
55    fn trunc(self) -> f64 {
56        libm::trunc(self)
57    }
58}
59#[doc = r#" The SIMD token for the "fallback" level."#]
60#[derive(Clone, Copy, Debug)]
61pub struct Fallback {
62    pub fallback: crate::core_arch::fallback::Fallback,
63}
64impl Fallback {
65    #[inline]
66    pub const fn new() -> Self {
67        Fallback {
68            fallback: crate::core_arch::fallback::Fallback::new(),
69        }
70    }
71}
72impl Seal for Fallback {}
73impl Simd for Fallback {
74    type f32s = f32x4<Self>;
75    type u8s = u8x16<Self>;
76    type i8s = i8x16<Self>;
77    type u16s = u16x8<Self>;
78    type i16s = i16x8<Self>;
79    type u32s = u32x4<Self>;
80    type i32s = i32x4<Self>;
81    type mask8s = mask8x16<Self>;
82    type mask16s = mask16x8<Self>;
83    type mask32s = mask32x4<Self>;
84    #[inline(always)]
85    fn level(self) -> Level {
86        Level::Fallback(self)
87    }
88    #[inline]
89    fn vectorize<F: FnOnce() -> R, R>(self, f: F) -> R {
90        f()
91    }
92    #[inline(always)]
93    fn splat_f32x4(self, val: f32) -> f32x4<Self> {
94        [val; 4usize].simd_into(self)
95    }
96    #[inline(always)]
97    fn abs_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
98        [
99            f32::abs(a[0usize]),
100            f32::abs(a[1usize]),
101            f32::abs(a[2usize]),
102            f32::abs(a[3usize]),
103        ]
104        .simd_into(self)
105    }
106    #[inline(always)]
107    fn neg_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
108        [
109            f32::neg(a[0usize]),
110            f32::neg(a[1usize]),
111            f32::neg(a[2usize]),
112            f32::neg(a[3usize]),
113        ]
114        .simd_into(self)
115    }
116    #[inline(always)]
117    fn sqrt_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
118        [
119            f32::sqrt(a[0usize]),
120            f32::sqrt(a[1usize]),
121            f32::sqrt(a[2usize]),
122            f32::sqrt(a[3usize]),
123        ]
124        .simd_into(self)
125    }
126    #[inline(always)]
127    fn add_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
128        [
129            f32::add(a[0usize], &b[0usize]),
130            f32::add(a[1usize], &b[1usize]),
131            f32::add(a[2usize], &b[2usize]),
132            f32::add(a[3usize], &b[3usize]),
133        ]
134        .simd_into(self)
135    }
136    #[inline(always)]
137    fn sub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
138        [
139            f32::sub(a[0usize], &b[0usize]),
140            f32::sub(a[1usize], &b[1usize]),
141            f32::sub(a[2usize], &b[2usize]),
142            f32::sub(a[3usize], &b[3usize]),
143        ]
144        .simd_into(self)
145    }
146    #[inline(always)]
147    fn mul_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
148        [
149            f32::mul(a[0usize], &b[0usize]),
150            f32::mul(a[1usize], &b[1usize]),
151            f32::mul(a[2usize], &b[2usize]),
152            f32::mul(a[3usize], &b[3usize]),
153        ]
154        .simd_into(self)
155    }
156    #[inline(always)]
157    fn div_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
158        [
159            f32::div(a[0usize], &b[0usize]),
160            f32::div(a[1usize], &b[1usize]),
161            f32::div(a[2usize], &b[2usize]),
162            f32::div(a[3usize], &b[3usize]),
163        ]
164        .simd_into(self)
165    }
166    #[inline(always)]
167    fn copysign_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
168        [
169            f32::copysign(a[0usize], b[0usize]),
170            f32::copysign(a[1usize], b[1usize]),
171            f32::copysign(a[2usize], b[2usize]),
172            f32::copysign(a[3usize], b[3usize]),
173        ]
174        .simd_into(self)
175    }
176    #[inline(always)]
177    fn simd_eq_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
178        [
179            -(f32::eq(&a[0usize], &b[0usize]) as i32),
180            -(f32::eq(&a[1usize], &b[1usize]) as i32),
181            -(f32::eq(&a[2usize], &b[2usize]) as i32),
182            -(f32::eq(&a[3usize], &b[3usize]) as i32),
183        ]
184        .simd_into(self)
185    }
186    #[inline(always)]
187    fn simd_lt_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
188        [
189            -(f32::lt(&a[0usize], &b[0usize]) as i32),
190            -(f32::lt(&a[1usize], &b[1usize]) as i32),
191            -(f32::lt(&a[2usize], &b[2usize]) as i32),
192            -(f32::lt(&a[3usize], &b[3usize]) as i32),
193        ]
194        .simd_into(self)
195    }
196    #[inline(always)]
197    fn simd_le_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
198        [
199            -(f32::le(&a[0usize], &b[0usize]) as i32),
200            -(f32::le(&a[1usize], &b[1usize]) as i32),
201            -(f32::le(&a[2usize], &b[2usize]) as i32),
202            -(f32::le(&a[3usize], &b[3usize]) as i32),
203        ]
204        .simd_into(self)
205    }
206    #[inline(always)]
207    fn simd_ge_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
208        [
209            -(f32::ge(&a[0usize], &b[0usize]) as i32),
210            -(f32::ge(&a[1usize], &b[1usize]) as i32),
211            -(f32::ge(&a[2usize], &b[2usize]) as i32),
212            -(f32::ge(&a[3usize], &b[3usize]) as i32),
213        ]
214        .simd_into(self)
215    }
216    #[inline(always)]
217    fn simd_gt_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
218        [
219            -(f32::gt(&a[0usize], &b[0usize]) as i32),
220            -(f32::gt(&a[1usize], &b[1usize]) as i32),
221            -(f32::gt(&a[2usize], &b[2usize]) as i32),
222            -(f32::gt(&a[3usize], &b[3usize]) as i32),
223        ]
224        .simd_into(self)
225    }
226    #[inline(always)]
227    fn zip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
228        [a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
229    }
230    #[inline(always)]
231    fn zip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
232        [a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
233    }
234    #[inline(always)]
235    fn unzip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
236        [a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
237    }
238    #[inline(always)]
239    fn unzip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
240        [a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
241    }
242    #[inline(always)]
243    fn max_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
244        [
245            f32::max(a[0usize], b[0usize]),
246            f32::max(a[1usize], b[1usize]),
247            f32::max(a[2usize], b[2usize]),
248            f32::max(a[3usize], b[3usize]),
249        ]
250        .simd_into(self)
251    }
252    #[inline(always)]
253    fn max_precise_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
254        [
255            f32::max(a[0usize], b[0usize]),
256            f32::max(a[1usize], b[1usize]),
257            f32::max(a[2usize], b[2usize]),
258            f32::max(a[3usize], b[3usize]),
259        ]
260        .simd_into(self)
261    }
262    #[inline(always)]
263    fn min_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
264        [
265            f32::min(a[0usize], b[0usize]),
266            f32::min(a[1usize], b[1usize]),
267            f32::min(a[2usize], b[2usize]),
268            f32::min(a[3usize], b[3usize]),
269        ]
270        .simd_into(self)
271    }
272    #[inline(always)]
273    fn min_precise_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
274        [
275            f32::min(a[0usize], b[0usize]),
276            f32::min(a[1usize], b[1usize]),
277            f32::min(a[2usize], b[2usize]),
278            f32::min(a[3usize], b[3usize]),
279        ]
280        .simd_into(self)
281    }
282    #[inline(always)]
283    fn madd_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
284        a.mul(b).add(c)
285    }
286    #[inline(always)]
287    fn msub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
288        a.mul(b).sub(c)
289    }
290    #[inline(always)]
291    fn floor_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
292        [
293            f32::floor(a[0usize]),
294            f32::floor(a[1usize]),
295            f32::floor(a[2usize]),
296            f32::floor(a[3usize]),
297        ]
298        .simd_into(self)
299    }
300    #[inline(always)]
301    fn fract_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
302        [
303            f32::fract(a[0usize]),
304            f32::fract(a[1usize]),
305            f32::fract(a[2usize]),
306            f32::fract(a[3usize]),
307        ]
308        .simd_into(self)
309    }
310    #[inline(always)]
311    fn trunc_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
312        [
313            f32::trunc(a[0usize]),
314            f32::trunc(a[1usize]),
315            f32::trunc(a[2usize]),
316            f32::trunc(a[3usize]),
317        ]
318        .simd_into(self)
319    }
320    #[inline(always)]
321    fn select_f32x4(self, a: mask32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
322        [
323            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
324            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
325            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
326            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
327        ]
328        .simd_into(self)
329    }
330    #[inline(always)]
331    fn combine_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x8<Self> {
332        let mut result = [0.0; 8usize];
333        result[0..4usize].copy_from_slice(&a.val);
334        result[4usize..8usize].copy_from_slice(&b.val);
335        result.simd_into(self)
336    }
337    #[inline(always)]
338    fn reinterpret_f64_f32x4(self, a: f32x4<Self>) -> f64x2<Self> {
339        f64x2 {
340            val: bytemuck::cast(a.val),
341            simd: a.simd,
342        }
343    }
344    #[inline(always)]
345    fn reinterpret_i32_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
346        i32x4 {
347            val: bytemuck::cast(a.val),
348            simd: a.simd,
349        }
350    }
351    #[inline(always)]
352    fn reinterpret_u8_f32x4(self, a: f32x4<Self>) -> u8x16<Self> {
353        u8x16 {
354            val: bytemuck::cast(a.val),
355            simd: a.simd,
356        }
357    }
358    #[inline(always)]
359    fn reinterpret_u32_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
360        u32x4 {
361            val: bytemuck::cast(a.val),
362            simd: a.simd,
363        }
364    }
365    #[inline(always)]
366    fn cvt_u32_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
367        [
368            a[0usize] as u32,
369            a[1usize] as u32,
370            a[2usize] as u32,
371            a[3usize] as u32,
372        ]
373        .simd_into(self)
374    }
375    #[inline(always)]
376    fn cvt_i32_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
377        [
378            a[0usize] as i32,
379            a[1usize] as i32,
380            a[2usize] as i32,
381            a[3usize] as i32,
382        ]
383        .simd_into(self)
384    }
385    #[inline(always)]
386    fn splat_i8x16(self, val: i8) -> i8x16<Self> {
387        [val; 16usize].simd_into(self)
388    }
389    #[inline(always)]
390    fn not_i8x16(self, a: i8x16<Self>) -> i8x16<Self> {
391        [
392            i8::not(a[0usize]),
393            i8::not(a[1usize]),
394            i8::not(a[2usize]),
395            i8::not(a[3usize]),
396            i8::not(a[4usize]),
397            i8::not(a[5usize]),
398            i8::not(a[6usize]),
399            i8::not(a[7usize]),
400            i8::not(a[8usize]),
401            i8::not(a[9usize]),
402            i8::not(a[10usize]),
403            i8::not(a[11usize]),
404            i8::not(a[12usize]),
405            i8::not(a[13usize]),
406            i8::not(a[14usize]),
407            i8::not(a[15usize]),
408        ]
409        .simd_into(self)
410    }
411    #[inline(always)]
412    fn add_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
413        [
414            i8::wrapping_add(a[0usize], b[0usize]),
415            i8::wrapping_add(a[1usize], b[1usize]),
416            i8::wrapping_add(a[2usize], b[2usize]),
417            i8::wrapping_add(a[3usize], b[3usize]),
418            i8::wrapping_add(a[4usize], b[4usize]),
419            i8::wrapping_add(a[5usize], b[5usize]),
420            i8::wrapping_add(a[6usize], b[6usize]),
421            i8::wrapping_add(a[7usize], b[7usize]),
422            i8::wrapping_add(a[8usize], b[8usize]),
423            i8::wrapping_add(a[9usize], b[9usize]),
424            i8::wrapping_add(a[10usize], b[10usize]),
425            i8::wrapping_add(a[11usize], b[11usize]),
426            i8::wrapping_add(a[12usize], b[12usize]),
427            i8::wrapping_add(a[13usize], b[13usize]),
428            i8::wrapping_add(a[14usize], b[14usize]),
429            i8::wrapping_add(a[15usize], b[15usize]),
430        ]
431        .simd_into(self)
432    }
433    #[inline(always)]
434    fn sub_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
435        [
436            i8::wrapping_sub(a[0usize], b[0usize]),
437            i8::wrapping_sub(a[1usize], b[1usize]),
438            i8::wrapping_sub(a[2usize], b[2usize]),
439            i8::wrapping_sub(a[3usize], b[3usize]),
440            i8::wrapping_sub(a[4usize], b[4usize]),
441            i8::wrapping_sub(a[5usize], b[5usize]),
442            i8::wrapping_sub(a[6usize], b[6usize]),
443            i8::wrapping_sub(a[7usize], b[7usize]),
444            i8::wrapping_sub(a[8usize], b[8usize]),
445            i8::wrapping_sub(a[9usize], b[9usize]),
446            i8::wrapping_sub(a[10usize], b[10usize]),
447            i8::wrapping_sub(a[11usize], b[11usize]),
448            i8::wrapping_sub(a[12usize], b[12usize]),
449            i8::wrapping_sub(a[13usize], b[13usize]),
450            i8::wrapping_sub(a[14usize], b[14usize]),
451            i8::wrapping_sub(a[15usize], b[15usize]),
452        ]
453        .simd_into(self)
454    }
455    #[inline(always)]
456    fn mul_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
457        [
458            i8::wrapping_mul(a[0usize], b[0usize]),
459            i8::wrapping_mul(a[1usize], b[1usize]),
460            i8::wrapping_mul(a[2usize], b[2usize]),
461            i8::wrapping_mul(a[3usize], b[3usize]),
462            i8::wrapping_mul(a[4usize], b[4usize]),
463            i8::wrapping_mul(a[5usize], b[5usize]),
464            i8::wrapping_mul(a[6usize], b[6usize]),
465            i8::wrapping_mul(a[7usize], b[7usize]),
466            i8::wrapping_mul(a[8usize], b[8usize]),
467            i8::wrapping_mul(a[9usize], b[9usize]),
468            i8::wrapping_mul(a[10usize], b[10usize]),
469            i8::wrapping_mul(a[11usize], b[11usize]),
470            i8::wrapping_mul(a[12usize], b[12usize]),
471            i8::wrapping_mul(a[13usize], b[13usize]),
472            i8::wrapping_mul(a[14usize], b[14usize]),
473            i8::wrapping_mul(a[15usize], b[15usize]),
474        ]
475        .simd_into(self)
476    }
477    #[inline(always)]
478    fn and_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
479        [
480            i8::bitand(a[0usize], &b[0usize]),
481            i8::bitand(a[1usize], &b[1usize]),
482            i8::bitand(a[2usize], &b[2usize]),
483            i8::bitand(a[3usize], &b[3usize]),
484            i8::bitand(a[4usize], &b[4usize]),
485            i8::bitand(a[5usize], &b[5usize]),
486            i8::bitand(a[6usize], &b[6usize]),
487            i8::bitand(a[7usize], &b[7usize]),
488            i8::bitand(a[8usize], &b[8usize]),
489            i8::bitand(a[9usize], &b[9usize]),
490            i8::bitand(a[10usize], &b[10usize]),
491            i8::bitand(a[11usize], &b[11usize]),
492            i8::bitand(a[12usize], &b[12usize]),
493            i8::bitand(a[13usize], &b[13usize]),
494            i8::bitand(a[14usize], &b[14usize]),
495            i8::bitand(a[15usize], &b[15usize]),
496        ]
497        .simd_into(self)
498    }
499    #[inline(always)]
500    fn or_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
501        [
502            i8::bitor(a[0usize], &b[0usize]),
503            i8::bitor(a[1usize], &b[1usize]),
504            i8::bitor(a[2usize], &b[2usize]),
505            i8::bitor(a[3usize], &b[3usize]),
506            i8::bitor(a[4usize], &b[4usize]),
507            i8::bitor(a[5usize], &b[5usize]),
508            i8::bitor(a[6usize], &b[6usize]),
509            i8::bitor(a[7usize], &b[7usize]),
510            i8::bitor(a[8usize], &b[8usize]),
511            i8::bitor(a[9usize], &b[9usize]),
512            i8::bitor(a[10usize], &b[10usize]),
513            i8::bitor(a[11usize], &b[11usize]),
514            i8::bitor(a[12usize], &b[12usize]),
515            i8::bitor(a[13usize], &b[13usize]),
516            i8::bitor(a[14usize], &b[14usize]),
517            i8::bitor(a[15usize], &b[15usize]),
518        ]
519        .simd_into(self)
520    }
521    #[inline(always)]
522    fn xor_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
523        [
524            i8::bitxor(a[0usize], &b[0usize]),
525            i8::bitxor(a[1usize], &b[1usize]),
526            i8::bitxor(a[2usize], &b[2usize]),
527            i8::bitxor(a[3usize], &b[3usize]),
528            i8::bitxor(a[4usize], &b[4usize]),
529            i8::bitxor(a[5usize], &b[5usize]),
530            i8::bitxor(a[6usize], &b[6usize]),
531            i8::bitxor(a[7usize], &b[7usize]),
532            i8::bitxor(a[8usize], &b[8usize]),
533            i8::bitxor(a[9usize], &b[9usize]),
534            i8::bitxor(a[10usize], &b[10usize]),
535            i8::bitxor(a[11usize], &b[11usize]),
536            i8::bitxor(a[12usize], &b[12usize]),
537            i8::bitxor(a[13usize], &b[13usize]),
538            i8::bitxor(a[14usize], &b[14usize]),
539            i8::bitxor(a[15usize], &b[15usize]),
540        ]
541        .simd_into(self)
542    }
543    #[inline(always)]
544    fn shr_i8x16(self, a: i8x16<Self>, shift: u32) -> i8x16<Self> {
545        [
546            i8::shr(a[0usize], shift as i8),
547            i8::shr(a[1usize], shift as i8),
548            i8::shr(a[2usize], shift as i8),
549            i8::shr(a[3usize], shift as i8),
550            i8::shr(a[4usize], shift as i8),
551            i8::shr(a[5usize], shift as i8),
552            i8::shr(a[6usize], shift as i8),
553            i8::shr(a[7usize], shift as i8),
554            i8::shr(a[8usize], shift as i8),
555            i8::shr(a[9usize], shift as i8),
556            i8::shr(a[10usize], shift as i8),
557            i8::shr(a[11usize], shift as i8),
558            i8::shr(a[12usize], shift as i8),
559            i8::shr(a[13usize], shift as i8),
560            i8::shr(a[14usize], shift as i8),
561            i8::shr(a[15usize], shift as i8),
562        ]
563        .simd_into(self)
564    }
565    #[inline(always)]
566    fn shrv_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
567        [
568            i8::shr(a[0usize], &b[0usize]),
569            i8::shr(a[1usize], &b[1usize]),
570            i8::shr(a[2usize], &b[2usize]),
571            i8::shr(a[3usize], &b[3usize]),
572            i8::shr(a[4usize], &b[4usize]),
573            i8::shr(a[5usize], &b[5usize]),
574            i8::shr(a[6usize], &b[6usize]),
575            i8::shr(a[7usize], &b[7usize]),
576            i8::shr(a[8usize], &b[8usize]),
577            i8::shr(a[9usize], &b[9usize]),
578            i8::shr(a[10usize], &b[10usize]),
579            i8::shr(a[11usize], &b[11usize]),
580            i8::shr(a[12usize], &b[12usize]),
581            i8::shr(a[13usize], &b[13usize]),
582            i8::shr(a[14usize], &b[14usize]),
583            i8::shr(a[15usize], &b[15usize]),
584        ]
585        .simd_into(self)
586    }
587    #[inline(always)]
588    fn shl_i8x16(self, a: i8x16<Self>, shift: u32) -> i8x16<Self> {
589        [
590            i8::shl(a[0usize], shift as i8),
591            i8::shl(a[1usize], shift as i8),
592            i8::shl(a[2usize], shift as i8),
593            i8::shl(a[3usize], shift as i8),
594            i8::shl(a[4usize], shift as i8),
595            i8::shl(a[5usize], shift as i8),
596            i8::shl(a[6usize], shift as i8),
597            i8::shl(a[7usize], shift as i8),
598            i8::shl(a[8usize], shift as i8),
599            i8::shl(a[9usize], shift as i8),
600            i8::shl(a[10usize], shift as i8),
601            i8::shl(a[11usize], shift as i8),
602            i8::shl(a[12usize], shift as i8),
603            i8::shl(a[13usize], shift as i8),
604            i8::shl(a[14usize], shift as i8),
605            i8::shl(a[15usize], shift as i8),
606        ]
607        .simd_into(self)
608    }
609    #[inline(always)]
610    fn simd_eq_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
611        [
612            -(i8::eq(&a[0usize], &b[0usize]) as i8),
613            -(i8::eq(&a[1usize], &b[1usize]) as i8),
614            -(i8::eq(&a[2usize], &b[2usize]) as i8),
615            -(i8::eq(&a[3usize], &b[3usize]) as i8),
616            -(i8::eq(&a[4usize], &b[4usize]) as i8),
617            -(i8::eq(&a[5usize], &b[5usize]) as i8),
618            -(i8::eq(&a[6usize], &b[6usize]) as i8),
619            -(i8::eq(&a[7usize], &b[7usize]) as i8),
620            -(i8::eq(&a[8usize], &b[8usize]) as i8),
621            -(i8::eq(&a[9usize], &b[9usize]) as i8),
622            -(i8::eq(&a[10usize], &b[10usize]) as i8),
623            -(i8::eq(&a[11usize], &b[11usize]) as i8),
624            -(i8::eq(&a[12usize], &b[12usize]) as i8),
625            -(i8::eq(&a[13usize], &b[13usize]) as i8),
626            -(i8::eq(&a[14usize], &b[14usize]) as i8),
627            -(i8::eq(&a[15usize], &b[15usize]) as i8),
628        ]
629        .simd_into(self)
630    }
631    #[inline(always)]
632    fn simd_lt_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
633        [
634            -(i8::lt(&a[0usize], &b[0usize]) as i8),
635            -(i8::lt(&a[1usize], &b[1usize]) as i8),
636            -(i8::lt(&a[2usize], &b[2usize]) as i8),
637            -(i8::lt(&a[3usize], &b[3usize]) as i8),
638            -(i8::lt(&a[4usize], &b[4usize]) as i8),
639            -(i8::lt(&a[5usize], &b[5usize]) as i8),
640            -(i8::lt(&a[6usize], &b[6usize]) as i8),
641            -(i8::lt(&a[7usize], &b[7usize]) as i8),
642            -(i8::lt(&a[8usize], &b[8usize]) as i8),
643            -(i8::lt(&a[9usize], &b[9usize]) as i8),
644            -(i8::lt(&a[10usize], &b[10usize]) as i8),
645            -(i8::lt(&a[11usize], &b[11usize]) as i8),
646            -(i8::lt(&a[12usize], &b[12usize]) as i8),
647            -(i8::lt(&a[13usize], &b[13usize]) as i8),
648            -(i8::lt(&a[14usize], &b[14usize]) as i8),
649            -(i8::lt(&a[15usize], &b[15usize]) as i8),
650        ]
651        .simd_into(self)
652    }
653    #[inline(always)]
654    fn simd_le_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
655        [
656            -(i8::le(&a[0usize], &b[0usize]) as i8),
657            -(i8::le(&a[1usize], &b[1usize]) as i8),
658            -(i8::le(&a[2usize], &b[2usize]) as i8),
659            -(i8::le(&a[3usize], &b[3usize]) as i8),
660            -(i8::le(&a[4usize], &b[4usize]) as i8),
661            -(i8::le(&a[5usize], &b[5usize]) as i8),
662            -(i8::le(&a[6usize], &b[6usize]) as i8),
663            -(i8::le(&a[7usize], &b[7usize]) as i8),
664            -(i8::le(&a[8usize], &b[8usize]) as i8),
665            -(i8::le(&a[9usize], &b[9usize]) as i8),
666            -(i8::le(&a[10usize], &b[10usize]) as i8),
667            -(i8::le(&a[11usize], &b[11usize]) as i8),
668            -(i8::le(&a[12usize], &b[12usize]) as i8),
669            -(i8::le(&a[13usize], &b[13usize]) as i8),
670            -(i8::le(&a[14usize], &b[14usize]) as i8),
671            -(i8::le(&a[15usize], &b[15usize]) as i8),
672        ]
673        .simd_into(self)
674    }
675    #[inline(always)]
676    fn simd_ge_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
677        [
678            -(i8::ge(&a[0usize], &b[0usize]) as i8),
679            -(i8::ge(&a[1usize], &b[1usize]) as i8),
680            -(i8::ge(&a[2usize], &b[2usize]) as i8),
681            -(i8::ge(&a[3usize], &b[3usize]) as i8),
682            -(i8::ge(&a[4usize], &b[4usize]) as i8),
683            -(i8::ge(&a[5usize], &b[5usize]) as i8),
684            -(i8::ge(&a[6usize], &b[6usize]) as i8),
685            -(i8::ge(&a[7usize], &b[7usize]) as i8),
686            -(i8::ge(&a[8usize], &b[8usize]) as i8),
687            -(i8::ge(&a[9usize], &b[9usize]) as i8),
688            -(i8::ge(&a[10usize], &b[10usize]) as i8),
689            -(i8::ge(&a[11usize], &b[11usize]) as i8),
690            -(i8::ge(&a[12usize], &b[12usize]) as i8),
691            -(i8::ge(&a[13usize], &b[13usize]) as i8),
692            -(i8::ge(&a[14usize], &b[14usize]) as i8),
693            -(i8::ge(&a[15usize], &b[15usize]) as i8),
694        ]
695        .simd_into(self)
696    }
697    #[inline(always)]
698    fn simd_gt_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
699        [
700            -(i8::gt(&a[0usize], &b[0usize]) as i8),
701            -(i8::gt(&a[1usize], &b[1usize]) as i8),
702            -(i8::gt(&a[2usize], &b[2usize]) as i8),
703            -(i8::gt(&a[3usize], &b[3usize]) as i8),
704            -(i8::gt(&a[4usize], &b[4usize]) as i8),
705            -(i8::gt(&a[5usize], &b[5usize]) as i8),
706            -(i8::gt(&a[6usize], &b[6usize]) as i8),
707            -(i8::gt(&a[7usize], &b[7usize]) as i8),
708            -(i8::gt(&a[8usize], &b[8usize]) as i8),
709            -(i8::gt(&a[9usize], &b[9usize]) as i8),
710            -(i8::gt(&a[10usize], &b[10usize]) as i8),
711            -(i8::gt(&a[11usize], &b[11usize]) as i8),
712            -(i8::gt(&a[12usize], &b[12usize]) as i8),
713            -(i8::gt(&a[13usize], &b[13usize]) as i8),
714            -(i8::gt(&a[14usize], &b[14usize]) as i8),
715            -(i8::gt(&a[15usize], &b[15usize]) as i8),
716        ]
717        .simd_into(self)
718    }
719    #[inline(always)]
720    fn zip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
721        [
722            a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
723            a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
724        ]
725        .simd_into(self)
726    }
727    #[inline(always)]
728    fn zip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
729        [
730            a[8usize], b[8usize], a[9usize], b[9usize], a[10usize], b[10usize], a[11usize],
731            b[11usize], a[12usize], b[12usize], a[13usize], b[13usize], a[14usize], b[14usize],
732            a[15usize], b[15usize],
733        ]
734        .simd_into(self)
735    }
736    #[inline(always)]
737    fn unzip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
738        [
739            a[0usize], a[2usize], a[4usize], a[6usize], a[8usize], a[10usize], a[12usize],
740            a[14usize], b[0usize], b[2usize], b[4usize], b[6usize], b[8usize], b[10usize],
741            b[12usize], b[14usize],
742        ]
743        .simd_into(self)
744    }
745    #[inline(always)]
746    fn unzip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
747        [
748            a[1usize], a[3usize], a[5usize], a[7usize], a[9usize], a[11usize], a[13usize],
749            a[15usize], b[1usize], b[3usize], b[5usize], b[7usize], b[9usize], b[11usize],
750            b[13usize], b[15usize],
751        ]
752        .simd_into(self)
753    }
754    #[inline(always)]
755    fn select_i8x16(self, a: mask8x16<Self>, b: i8x16<Self>, c: i8x16<Self>) -> i8x16<Self> {
756        [
757            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
758            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
759            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
760            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
761            if a[4usize] != 0 { b[4usize] } else { c[4usize] },
762            if a[5usize] != 0 { b[5usize] } else { c[5usize] },
763            if a[6usize] != 0 { b[6usize] } else { c[6usize] },
764            if a[7usize] != 0 { b[7usize] } else { c[7usize] },
765            if a[8usize] != 0 { b[8usize] } else { c[8usize] },
766            if a[9usize] != 0 { b[9usize] } else { c[9usize] },
767            if a[10usize] != 0 {
768                b[10usize]
769            } else {
770                c[10usize]
771            },
772            if a[11usize] != 0 {
773                b[11usize]
774            } else {
775                c[11usize]
776            },
777            if a[12usize] != 0 {
778                b[12usize]
779            } else {
780                c[12usize]
781            },
782            if a[13usize] != 0 {
783                b[13usize]
784            } else {
785                c[13usize]
786            },
787            if a[14usize] != 0 {
788                b[14usize]
789            } else {
790                c[14usize]
791            },
792            if a[15usize] != 0 {
793                b[15usize]
794            } else {
795                c[15usize]
796            },
797        ]
798        .simd_into(self)
799    }
800    #[inline(always)]
801    fn min_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
802        [
803            i8::min(a[0usize], b[0usize]),
804            i8::min(a[1usize], b[1usize]),
805            i8::min(a[2usize], b[2usize]),
806            i8::min(a[3usize], b[3usize]),
807            i8::min(a[4usize], b[4usize]),
808            i8::min(a[5usize], b[5usize]),
809            i8::min(a[6usize], b[6usize]),
810            i8::min(a[7usize], b[7usize]),
811            i8::min(a[8usize], b[8usize]),
812            i8::min(a[9usize], b[9usize]),
813            i8::min(a[10usize], b[10usize]),
814            i8::min(a[11usize], b[11usize]),
815            i8::min(a[12usize], b[12usize]),
816            i8::min(a[13usize], b[13usize]),
817            i8::min(a[14usize], b[14usize]),
818            i8::min(a[15usize], b[15usize]),
819        ]
820        .simd_into(self)
821    }
822    #[inline(always)]
823    fn max_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
824        [
825            i8::max(a[0usize], b[0usize]),
826            i8::max(a[1usize], b[1usize]),
827            i8::max(a[2usize], b[2usize]),
828            i8::max(a[3usize], b[3usize]),
829            i8::max(a[4usize], b[4usize]),
830            i8::max(a[5usize], b[5usize]),
831            i8::max(a[6usize], b[6usize]),
832            i8::max(a[7usize], b[7usize]),
833            i8::max(a[8usize], b[8usize]),
834            i8::max(a[9usize], b[9usize]),
835            i8::max(a[10usize], b[10usize]),
836            i8::max(a[11usize], b[11usize]),
837            i8::max(a[12usize], b[12usize]),
838            i8::max(a[13usize], b[13usize]),
839            i8::max(a[14usize], b[14usize]),
840            i8::max(a[15usize], b[15usize]),
841        ]
842        .simd_into(self)
843    }
844    #[inline(always)]
845    fn combine_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x32<Self> {
846        let mut result = [0; 32usize];
847        result[0..16usize].copy_from_slice(&a.val);
848        result[16usize..32usize].copy_from_slice(&b.val);
849        result.simd_into(self)
850    }
851    #[inline(always)]
852    fn neg_i8x16(self, a: i8x16<Self>) -> i8x16<Self> {
853        [
854            i8::neg(a[0usize]),
855            i8::neg(a[1usize]),
856            i8::neg(a[2usize]),
857            i8::neg(a[3usize]),
858            i8::neg(a[4usize]),
859            i8::neg(a[5usize]),
860            i8::neg(a[6usize]),
861            i8::neg(a[7usize]),
862            i8::neg(a[8usize]),
863            i8::neg(a[9usize]),
864            i8::neg(a[10usize]),
865            i8::neg(a[11usize]),
866            i8::neg(a[12usize]),
867            i8::neg(a[13usize]),
868            i8::neg(a[14usize]),
869            i8::neg(a[15usize]),
870        ]
871        .simd_into(self)
872    }
873    #[inline(always)]
874    fn reinterpret_u8_i8x16(self, a: i8x16<Self>) -> u8x16<Self> {
875        u8x16 {
876            val: bytemuck::cast(a.val),
877            simd: a.simd,
878        }
879    }
880    #[inline(always)]
881    fn reinterpret_u32_i8x16(self, a: i8x16<Self>) -> u32x4<Self> {
882        u32x4 {
883            val: bytemuck::cast(a.val),
884            simd: a.simd,
885        }
886    }
887    #[inline(always)]
888    fn splat_u8x16(self, val: u8) -> u8x16<Self> {
889        [val; 16usize].simd_into(self)
890    }
891    #[inline(always)]
892    fn not_u8x16(self, a: u8x16<Self>) -> u8x16<Self> {
893        [
894            u8::not(a[0usize]),
895            u8::not(a[1usize]),
896            u8::not(a[2usize]),
897            u8::not(a[3usize]),
898            u8::not(a[4usize]),
899            u8::not(a[5usize]),
900            u8::not(a[6usize]),
901            u8::not(a[7usize]),
902            u8::not(a[8usize]),
903            u8::not(a[9usize]),
904            u8::not(a[10usize]),
905            u8::not(a[11usize]),
906            u8::not(a[12usize]),
907            u8::not(a[13usize]),
908            u8::not(a[14usize]),
909            u8::not(a[15usize]),
910        ]
911        .simd_into(self)
912    }
913    #[inline(always)]
914    fn add_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
915        [
916            u8::wrapping_add(a[0usize], b[0usize]),
917            u8::wrapping_add(a[1usize], b[1usize]),
918            u8::wrapping_add(a[2usize], b[2usize]),
919            u8::wrapping_add(a[3usize], b[3usize]),
920            u8::wrapping_add(a[4usize], b[4usize]),
921            u8::wrapping_add(a[5usize], b[5usize]),
922            u8::wrapping_add(a[6usize], b[6usize]),
923            u8::wrapping_add(a[7usize], b[7usize]),
924            u8::wrapping_add(a[8usize], b[8usize]),
925            u8::wrapping_add(a[9usize], b[9usize]),
926            u8::wrapping_add(a[10usize], b[10usize]),
927            u8::wrapping_add(a[11usize], b[11usize]),
928            u8::wrapping_add(a[12usize], b[12usize]),
929            u8::wrapping_add(a[13usize], b[13usize]),
930            u8::wrapping_add(a[14usize], b[14usize]),
931            u8::wrapping_add(a[15usize], b[15usize]),
932        ]
933        .simd_into(self)
934    }
935    #[inline(always)]
936    fn sub_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
937        [
938            u8::wrapping_sub(a[0usize], b[0usize]),
939            u8::wrapping_sub(a[1usize], b[1usize]),
940            u8::wrapping_sub(a[2usize], b[2usize]),
941            u8::wrapping_sub(a[3usize], b[3usize]),
942            u8::wrapping_sub(a[4usize], b[4usize]),
943            u8::wrapping_sub(a[5usize], b[5usize]),
944            u8::wrapping_sub(a[6usize], b[6usize]),
945            u8::wrapping_sub(a[7usize], b[7usize]),
946            u8::wrapping_sub(a[8usize], b[8usize]),
947            u8::wrapping_sub(a[9usize], b[9usize]),
948            u8::wrapping_sub(a[10usize], b[10usize]),
949            u8::wrapping_sub(a[11usize], b[11usize]),
950            u8::wrapping_sub(a[12usize], b[12usize]),
951            u8::wrapping_sub(a[13usize], b[13usize]),
952            u8::wrapping_sub(a[14usize], b[14usize]),
953            u8::wrapping_sub(a[15usize], b[15usize]),
954        ]
955        .simd_into(self)
956    }
957    #[inline(always)]
958    fn mul_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
959        [
960            u8::wrapping_mul(a[0usize], b[0usize]),
961            u8::wrapping_mul(a[1usize], b[1usize]),
962            u8::wrapping_mul(a[2usize], b[2usize]),
963            u8::wrapping_mul(a[3usize], b[3usize]),
964            u8::wrapping_mul(a[4usize], b[4usize]),
965            u8::wrapping_mul(a[5usize], b[5usize]),
966            u8::wrapping_mul(a[6usize], b[6usize]),
967            u8::wrapping_mul(a[7usize], b[7usize]),
968            u8::wrapping_mul(a[8usize], b[8usize]),
969            u8::wrapping_mul(a[9usize], b[9usize]),
970            u8::wrapping_mul(a[10usize], b[10usize]),
971            u8::wrapping_mul(a[11usize], b[11usize]),
972            u8::wrapping_mul(a[12usize], b[12usize]),
973            u8::wrapping_mul(a[13usize], b[13usize]),
974            u8::wrapping_mul(a[14usize], b[14usize]),
975            u8::wrapping_mul(a[15usize], b[15usize]),
976        ]
977        .simd_into(self)
978    }
979    #[inline(always)]
980    fn and_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
981        [
982            u8::bitand(a[0usize], &b[0usize]),
983            u8::bitand(a[1usize], &b[1usize]),
984            u8::bitand(a[2usize], &b[2usize]),
985            u8::bitand(a[3usize], &b[3usize]),
986            u8::bitand(a[4usize], &b[4usize]),
987            u8::bitand(a[5usize], &b[5usize]),
988            u8::bitand(a[6usize], &b[6usize]),
989            u8::bitand(a[7usize], &b[7usize]),
990            u8::bitand(a[8usize], &b[8usize]),
991            u8::bitand(a[9usize], &b[9usize]),
992            u8::bitand(a[10usize], &b[10usize]),
993            u8::bitand(a[11usize], &b[11usize]),
994            u8::bitand(a[12usize], &b[12usize]),
995            u8::bitand(a[13usize], &b[13usize]),
996            u8::bitand(a[14usize], &b[14usize]),
997            u8::bitand(a[15usize], &b[15usize]),
998        ]
999        .simd_into(self)
1000    }
1001    #[inline(always)]
1002    fn or_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1003        [
1004            u8::bitor(a[0usize], &b[0usize]),
1005            u8::bitor(a[1usize], &b[1usize]),
1006            u8::bitor(a[2usize], &b[2usize]),
1007            u8::bitor(a[3usize], &b[3usize]),
1008            u8::bitor(a[4usize], &b[4usize]),
1009            u8::bitor(a[5usize], &b[5usize]),
1010            u8::bitor(a[6usize], &b[6usize]),
1011            u8::bitor(a[7usize], &b[7usize]),
1012            u8::bitor(a[8usize], &b[8usize]),
1013            u8::bitor(a[9usize], &b[9usize]),
1014            u8::bitor(a[10usize], &b[10usize]),
1015            u8::bitor(a[11usize], &b[11usize]),
1016            u8::bitor(a[12usize], &b[12usize]),
1017            u8::bitor(a[13usize], &b[13usize]),
1018            u8::bitor(a[14usize], &b[14usize]),
1019            u8::bitor(a[15usize], &b[15usize]),
1020        ]
1021        .simd_into(self)
1022    }
1023    #[inline(always)]
1024    fn xor_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1025        [
1026            u8::bitxor(a[0usize], &b[0usize]),
1027            u8::bitxor(a[1usize], &b[1usize]),
1028            u8::bitxor(a[2usize], &b[2usize]),
1029            u8::bitxor(a[3usize], &b[3usize]),
1030            u8::bitxor(a[4usize], &b[4usize]),
1031            u8::bitxor(a[5usize], &b[5usize]),
1032            u8::bitxor(a[6usize], &b[6usize]),
1033            u8::bitxor(a[7usize], &b[7usize]),
1034            u8::bitxor(a[8usize], &b[8usize]),
1035            u8::bitxor(a[9usize], &b[9usize]),
1036            u8::bitxor(a[10usize], &b[10usize]),
1037            u8::bitxor(a[11usize], &b[11usize]),
1038            u8::bitxor(a[12usize], &b[12usize]),
1039            u8::bitxor(a[13usize], &b[13usize]),
1040            u8::bitxor(a[14usize], &b[14usize]),
1041            u8::bitxor(a[15usize], &b[15usize]),
1042        ]
1043        .simd_into(self)
1044    }
1045    #[inline(always)]
1046    fn shr_u8x16(self, a: u8x16<Self>, shift: u32) -> u8x16<Self> {
1047        [
1048            u8::shr(a[0usize], shift as u8),
1049            u8::shr(a[1usize], shift as u8),
1050            u8::shr(a[2usize], shift as u8),
1051            u8::shr(a[3usize], shift as u8),
1052            u8::shr(a[4usize], shift as u8),
1053            u8::shr(a[5usize], shift as u8),
1054            u8::shr(a[6usize], shift as u8),
1055            u8::shr(a[7usize], shift as u8),
1056            u8::shr(a[8usize], shift as u8),
1057            u8::shr(a[9usize], shift as u8),
1058            u8::shr(a[10usize], shift as u8),
1059            u8::shr(a[11usize], shift as u8),
1060            u8::shr(a[12usize], shift as u8),
1061            u8::shr(a[13usize], shift as u8),
1062            u8::shr(a[14usize], shift as u8),
1063            u8::shr(a[15usize], shift as u8),
1064        ]
1065        .simd_into(self)
1066    }
1067    #[inline(always)]
1068    fn shrv_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1069        [
1070            u8::shr(a[0usize], &b[0usize]),
1071            u8::shr(a[1usize], &b[1usize]),
1072            u8::shr(a[2usize], &b[2usize]),
1073            u8::shr(a[3usize], &b[3usize]),
1074            u8::shr(a[4usize], &b[4usize]),
1075            u8::shr(a[5usize], &b[5usize]),
1076            u8::shr(a[6usize], &b[6usize]),
1077            u8::shr(a[7usize], &b[7usize]),
1078            u8::shr(a[8usize], &b[8usize]),
1079            u8::shr(a[9usize], &b[9usize]),
1080            u8::shr(a[10usize], &b[10usize]),
1081            u8::shr(a[11usize], &b[11usize]),
1082            u8::shr(a[12usize], &b[12usize]),
1083            u8::shr(a[13usize], &b[13usize]),
1084            u8::shr(a[14usize], &b[14usize]),
1085            u8::shr(a[15usize], &b[15usize]),
1086        ]
1087        .simd_into(self)
1088    }
1089    #[inline(always)]
1090    fn shl_u8x16(self, a: u8x16<Self>, shift: u32) -> u8x16<Self> {
1091        [
1092            u8::shl(a[0usize], shift as u8),
1093            u8::shl(a[1usize], shift as u8),
1094            u8::shl(a[2usize], shift as u8),
1095            u8::shl(a[3usize], shift as u8),
1096            u8::shl(a[4usize], shift as u8),
1097            u8::shl(a[5usize], shift as u8),
1098            u8::shl(a[6usize], shift as u8),
1099            u8::shl(a[7usize], shift as u8),
1100            u8::shl(a[8usize], shift as u8),
1101            u8::shl(a[9usize], shift as u8),
1102            u8::shl(a[10usize], shift as u8),
1103            u8::shl(a[11usize], shift as u8),
1104            u8::shl(a[12usize], shift as u8),
1105            u8::shl(a[13usize], shift as u8),
1106            u8::shl(a[14usize], shift as u8),
1107            u8::shl(a[15usize], shift as u8),
1108        ]
1109        .simd_into(self)
1110    }
1111    #[inline(always)]
1112    fn simd_eq_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1113        [
1114            -(u8::eq(&a[0usize], &b[0usize]) as i8),
1115            -(u8::eq(&a[1usize], &b[1usize]) as i8),
1116            -(u8::eq(&a[2usize], &b[2usize]) as i8),
1117            -(u8::eq(&a[3usize], &b[3usize]) as i8),
1118            -(u8::eq(&a[4usize], &b[4usize]) as i8),
1119            -(u8::eq(&a[5usize], &b[5usize]) as i8),
1120            -(u8::eq(&a[6usize], &b[6usize]) as i8),
1121            -(u8::eq(&a[7usize], &b[7usize]) as i8),
1122            -(u8::eq(&a[8usize], &b[8usize]) as i8),
1123            -(u8::eq(&a[9usize], &b[9usize]) as i8),
1124            -(u8::eq(&a[10usize], &b[10usize]) as i8),
1125            -(u8::eq(&a[11usize], &b[11usize]) as i8),
1126            -(u8::eq(&a[12usize], &b[12usize]) as i8),
1127            -(u8::eq(&a[13usize], &b[13usize]) as i8),
1128            -(u8::eq(&a[14usize], &b[14usize]) as i8),
1129            -(u8::eq(&a[15usize], &b[15usize]) as i8),
1130        ]
1131        .simd_into(self)
1132    }
1133    #[inline(always)]
1134    fn simd_lt_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1135        [
1136            -(u8::lt(&a[0usize], &b[0usize]) as i8),
1137            -(u8::lt(&a[1usize], &b[1usize]) as i8),
1138            -(u8::lt(&a[2usize], &b[2usize]) as i8),
1139            -(u8::lt(&a[3usize], &b[3usize]) as i8),
1140            -(u8::lt(&a[4usize], &b[4usize]) as i8),
1141            -(u8::lt(&a[5usize], &b[5usize]) as i8),
1142            -(u8::lt(&a[6usize], &b[6usize]) as i8),
1143            -(u8::lt(&a[7usize], &b[7usize]) as i8),
1144            -(u8::lt(&a[8usize], &b[8usize]) as i8),
1145            -(u8::lt(&a[9usize], &b[9usize]) as i8),
1146            -(u8::lt(&a[10usize], &b[10usize]) as i8),
1147            -(u8::lt(&a[11usize], &b[11usize]) as i8),
1148            -(u8::lt(&a[12usize], &b[12usize]) as i8),
1149            -(u8::lt(&a[13usize], &b[13usize]) as i8),
1150            -(u8::lt(&a[14usize], &b[14usize]) as i8),
1151            -(u8::lt(&a[15usize], &b[15usize]) as i8),
1152        ]
1153        .simd_into(self)
1154    }
1155    #[inline(always)]
1156    fn simd_le_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1157        [
1158            -(u8::le(&a[0usize], &b[0usize]) as i8),
1159            -(u8::le(&a[1usize], &b[1usize]) as i8),
1160            -(u8::le(&a[2usize], &b[2usize]) as i8),
1161            -(u8::le(&a[3usize], &b[3usize]) as i8),
1162            -(u8::le(&a[4usize], &b[4usize]) as i8),
1163            -(u8::le(&a[5usize], &b[5usize]) as i8),
1164            -(u8::le(&a[6usize], &b[6usize]) as i8),
1165            -(u8::le(&a[7usize], &b[7usize]) as i8),
1166            -(u8::le(&a[8usize], &b[8usize]) as i8),
1167            -(u8::le(&a[9usize], &b[9usize]) as i8),
1168            -(u8::le(&a[10usize], &b[10usize]) as i8),
1169            -(u8::le(&a[11usize], &b[11usize]) as i8),
1170            -(u8::le(&a[12usize], &b[12usize]) as i8),
1171            -(u8::le(&a[13usize], &b[13usize]) as i8),
1172            -(u8::le(&a[14usize], &b[14usize]) as i8),
1173            -(u8::le(&a[15usize], &b[15usize]) as i8),
1174        ]
1175        .simd_into(self)
1176    }
1177    #[inline(always)]
1178    fn simd_ge_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1179        [
1180            -(u8::ge(&a[0usize], &b[0usize]) as i8),
1181            -(u8::ge(&a[1usize], &b[1usize]) as i8),
1182            -(u8::ge(&a[2usize], &b[2usize]) as i8),
1183            -(u8::ge(&a[3usize], &b[3usize]) as i8),
1184            -(u8::ge(&a[4usize], &b[4usize]) as i8),
1185            -(u8::ge(&a[5usize], &b[5usize]) as i8),
1186            -(u8::ge(&a[6usize], &b[6usize]) as i8),
1187            -(u8::ge(&a[7usize], &b[7usize]) as i8),
1188            -(u8::ge(&a[8usize], &b[8usize]) as i8),
1189            -(u8::ge(&a[9usize], &b[9usize]) as i8),
1190            -(u8::ge(&a[10usize], &b[10usize]) as i8),
1191            -(u8::ge(&a[11usize], &b[11usize]) as i8),
1192            -(u8::ge(&a[12usize], &b[12usize]) as i8),
1193            -(u8::ge(&a[13usize], &b[13usize]) as i8),
1194            -(u8::ge(&a[14usize], &b[14usize]) as i8),
1195            -(u8::ge(&a[15usize], &b[15usize]) as i8),
1196        ]
1197        .simd_into(self)
1198    }
1199    #[inline(always)]
1200    fn simd_gt_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1201        [
1202            -(u8::gt(&a[0usize], &b[0usize]) as i8),
1203            -(u8::gt(&a[1usize], &b[1usize]) as i8),
1204            -(u8::gt(&a[2usize], &b[2usize]) as i8),
1205            -(u8::gt(&a[3usize], &b[3usize]) as i8),
1206            -(u8::gt(&a[4usize], &b[4usize]) as i8),
1207            -(u8::gt(&a[5usize], &b[5usize]) as i8),
1208            -(u8::gt(&a[6usize], &b[6usize]) as i8),
1209            -(u8::gt(&a[7usize], &b[7usize]) as i8),
1210            -(u8::gt(&a[8usize], &b[8usize]) as i8),
1211            -(u8::gt(&a[9usize], &b[9usize]) as i8),
1212            -(u8::gt(&a[10usize], &b[10usize]) as i8),
1213            -(u8::gt(&a[11usize], &b[11usize]) as i8),
1214            -(u8::gt(&a[12usize], &b[12usize]) as i8),
1215            -(u8::gt(&a[13usize], &b[13usize]) as i8),
1216            -(u8::gt(&a[14usize], &b[14usize]) as i8),
1217            -(u8::gt(&a[15usize], &b[15usize]) as i8),
1218        ]
1219        .simd_into(self)
1220    }
1221    #[inline(always)]
1222    fn zip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1223        [
1224            a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
1225            a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
1226        ]
1227        .simd_into(self)
1228    }
1229    #[inline(always)]
1230    fn zip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1231        [
1232            a[8usize], b[8usize], a[9usize], b[9usize], a[10usize], b[10usize], a[11usize],
1233            b[11usize], a[12usize], b[12usize], a[13usize], b[13usize], a[14usize], b[14usize],
1234            a[15usize], b[15usize],
1235        ]
1236        .simd_into(self)
1237    }
1238    #[inline(always)]
1239    fn unzip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1240        [
1241            a[0usize], a[2usize], a[4usize], a[6usize], a[8usize], a[10usize], a[12usize],
1242            a[14usize], b[0usize], b[2usize], b[4usize], b[6usize], b[8usize], b[10usize],
1243            b[12usize], b[14usize],
1244        ]
1245        .simd_into(self)
1246    }
1247    #[inline(always)]
1248    fn unzip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1249        [
1250            a[1usize], a[3usize], a[5usize], a[7usize], a[9usize], a[11usize], a[13usize],
1251            a[15usize], b[1usize], b[3usize], b[5usize], b[7usize], b[9usize], b[11usize],
1252            b[13usize], b[15usize],
1253        ]
1254        .simd_into(self)
1255    }
1256    #[inline(always)]
1257    fn select_u8x16(self, a: mask8x16<Self>, b: u8x16<Self>, c: u8x16<Self>) -> u8x16<Self> {
1258        [
1259            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1260            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1261            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1262            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1263            if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1264            if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1265            if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1266            if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1267            if a[8usize] != 0 { b[8usize] } else { c[8usize] },
1268            if a[9usize] != 0 { b[9usize] } else { c[9usize] },
1269            if a[10usize] != 0 {
1270                b[10usize]
1271            } else {
1272                c[10usize]
1273            },
1274            if a[11usize] != 0 {
1275                b[11usize]
1276            } else {
1277                c[11usize]
1278            },
1279            if a[12usize] != 0 {
1280                b[12usize]
1281            } else {
1282                c[12usize]
1283            },
1284            if a[13usize] != 0 {
1285                b[13usize]
1286            } else {
1287                c[13usize]
1288            },
1289            if a[14usize] != 0 {
1290                b[14usize]
1291            } else {
1292                c[14usize]
1293            },
1294            if a[15usize] != 0 {
1295                b[15usize]
1296            } else {
1297                c[15usize]
1298            },
1299        ]
1300        .simd_into(self)
1301    }
1302    #[inline(always)]
1303    fn min_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1304        [
1305            u8::min(a[0usize], b[0usize]),
1306            u8::min(a[1usize], b[1usize]),
1307            u8::min(a[2usize], b[2usize]),
1308            u8::min(a[3usize], b[3usize]),
1309            u8::min(a[4usize], b[4usize]),
1310            u8::min(a[5usize], b[5usize]),
1311            u8::min(a[6usize], b[6usize]),
1312            u8::min(a[7usize], b[7usize]),
1313            u8::min(a[8usize], b[8usize]),
1314            u8::min(a[9usize], b[9usize]),
1315            u8::min(a[10usize], b[10usize]),
1316            u8::min(a[11usize], b[11usize]),
1317            u8::min(a[12usize], b[12usize]),
1318            u8::min(a[13usize], b[13usize]),
1319            u8::min(a[14usize], b[14usize]),
1320            u8::min(a[15usize], b[15usize]),
1321        ]
1322        .simd_into(self)
1323    }
1324    #[inline(always)]
1325    fn max_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1326        [
1327            u8::max(a[0usize], b[0usize]),
1328            u8::max(a[1usize], b[1usize]),
1329            u8::max(a[2usize], b[2usize]),
1330            u8::max(a[3usize], b[3usize]),
1331            u8::max(a[4usize], b[4usize]),
1332            u8::max(a[5usize], b[5usize]),
1333            u8::max(a[6usize], b[6usize]),
1334            u8::max(a[7usize], b[7usize]),
1335            u8::max(a[8usize], b[8usize]),
1336            u8::max(a[9usize], b[9usize]),
1337            u8::max(a[10usize], b[10usize]),
1338            u8::max(a[11usize], b[11usize]),
1339            u8::max(a[12usize], b[12usize]),
1340            u8::max(a[13usize], b[13usize]),
1341            u8::max(a[14usize], b[14usize]),
1342            u8::max(a[15usize], b[15usize]),
1343        ]
1344        .simd_into(self)
1345    }
1346    #[inline(always)]
1347    fn combine_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x32<Self> {
1348        let mut result = [0; 32usize];
1349        result[0..16usize].copy_from_slice(&a.val);
1350        result[16usize..32usize].copy_from_slice(&b.val);
1351        result.simd_into(self)
1352    }
1353    #[inline(always)]
1354    fn widen_u8x16(self, a: u8x16<Self>) -> u16x16<Self> {
1355        [
1356            a[0usize] as u16,
1357            a[1usize] as u16,
1358            a[2usize] as u16,
1359            a[3usize] as u16,
1360            a[4usize] as u16,
1361            a[5usize] as u16,
1362            a[6usize] as u16,
1363            a[7usize] as u16,
1364            a[8usize] as u16,
1365            a[9usize] as u16,
1366            a[10usize] as u16,
1367            a[11usize] as u16,
1368            a[12usize] as u16,
1369            a[13usize] as u16,
1370            a[14usize] as u16,
1371            a[15usize] as u16,
1372        ]
1373        .simd_into(self)
1374    }
1375    #[inline(always)]
1376    fn reinterpret_u32_u8x16(self, a: u8x16<Self>) -> u32x4<Self> {
1377        u32x4 {
1378            val: bytemuck::cast(a.val),
1379            simd: a.simd,
1380        }
1381    }
1382    #[inline(always)]
1383    fn splat_mask8x16(self, val: i8) -> mask8x16<Self> {
1384        [val; 16usize].simd_into(self)
1385    }
1386    #[inline(always)]
1387    fn not_mask8x16(self, a: mask8x16<Self>) -> mask8x16<Self> {
1388        [
1389            i8::not(a[0usize]),
1390            i8::not(a[1usize]),
1391            i8::not(a[2usize]),
1392            i8::not(a[3usize]),
1393            i8::not(a[4usize]),
1394            i8::not(a[5usize]),
1395            i8::not(a[6usize]),
1396            i8::not(a[7usize]),
1397            i8::not(a[8usize]),
1398            i8::not(a[9usize]),
1399            i8::not(a[10usize]),
1400            i8::not(a[11usize]),
1401            i8::not(a[12usize]),
1402            i8::not(a[13usize]),
1403            i8::not(a[14usize]),
1404            i8::not(a[15usize]),
1405        ]
1406        .simd_into(self)
1407    }
1408    #[inline(always)]
1409    fn and_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1410        [
1411            i8::bitand(a[0usize], &b[0usize]),
1412            i8::bitand(a[1usize], &b[1usize]),
1413            i8::bitand(a[2usize], &b[2usize]),
1414            i8::bitand(a[3usize], &b[3usize]),
1415            i8::bitand(a[4usize], &b[4usize]),
1416            i8::bitand(a[5usize], &b[5usize]),
1417            i8::bitand(a[6usize], &b[6usize]),
1418            i8::bitand(a[7usize], &b[7usize]),
1419            i8::bitand(a[8usize], &b[8usize]),
1420            i8::bitand(a[9usize], &b[9usize]),
1421            i8::bitand(a[10usize], &b[10usize]),
1422            i8::bitand(a[11usize], &b[11usize]),
1423            i8::bitand(a[12usize], &b[12usize]),
1424            i8::bitand(a[13usize], &b[13usize]),
1425            i8::bitand(a[14usize], &b[14usize]),
1426            i8::bitand(a[15usize], &b[15usize]),
1427        ]
1428        .simd_into(self)
1429    }
1430    #[inline(always)]
1431    fn or_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1432        [
1433            i8::bitor(a[0usize], &b[0usize]),
1434            i8::bitor(a[1usize], &b[1usize]),
1435            i8::bitor(a[2usize], &b[2usize]),
1436            i8::bitor(a[3usize], &b[3usize]),
1437            i8::bitor(a[4usize], &b[4usize]),
1438            i8::bitor(a[5usize], &b[5usize]),
1439            i8::bitor(a[6usize], &b[6usize]),
1440            i8::bitor(a[7usize], &b[7usize]),
1441            i8::bitor(a[8usize], &b[8usize]),
1442            i8::bitor(a[9usize], &b[9usize]),
1443            i8::bitor(a[10usize], &b[10usize]),
1444            i8::bitor(a[11usize], &b[11usize]),
1445            i8::bitor(a[12usize], &b[12usize]),
1446            i8::bitor(a[13usize], &b[13usize]),
1447            i8::bitor(a[14usize], &b[14usize]),
1448            i8::bitor(a[15usize], &b[15usize]),
1449        ]
1450        .simd_into(self)
1451    }
1452    #[inline(always)]
1453    fn xor_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1454        [
1455            i8::bitxor(a[0usize], &b[0usize]),
1456            i8::bitxor(a[1usize], &b[1usize]),
1457            i8::bitxor(a[2usize], &b[2usize]),
1458            i8::bitxor(a[3usize], &b[3usize]),
1459            i8::bitxor(a[4usize], &b[4usize]),
1460            i8::bitxor(a[5usize], &b[5usize]),
1461            i8::bitxor(a[6usize], &b[6usize]),
1462            i8::bitxor(a[7usize], &b[7usize]),
1463            i8::bitxor(a[8usize], &b[8usize]),
1464            i8::bitxor(a[9usize], &b[9usize]),
1465            i8::bitxor(a[10usize], &b[10usize]),
1466            i8::bitxor(a[11usize], &b[11usize]),
1467            i8::bitxor(a[12usize], &b[12usize]),
1468            i8::bitxor(a[13usize], &b[13usize]),
1469            i8::bitxor(a[14usize], &b[14usize]),
1470            i8::bitxor(a[15usize], &b[15usize]),
1471        ]
1472        .simd_into(self)
1473    }
1474    #[inline(always)]
1475    fn select_mask8x16(
1476        self,
1477        a: mask8x16<Self>,
1478        b: mask8x16<Self>,
1479        c: mask8x16<Self>,
1480    ) -> mask8x16<Self> {
1481        [
1482            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1483            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1484            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1485            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1486            if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1487            if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1488            if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1489            if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1490            if a[8usize] != 0 { b[8usize] } else { c[8usize] },
1491            if a[9usize] != 0 { b[9usize] } else { c[9usize] },
1492            if a[10usize] != 0 {
1493                b[10usize]
1494            } else {
1495                c[10usize]
1496            },
1497            if a[11usize] != 0 {
1498                b[11usize]
1499            } else {
1500                c[11usize]
1501            },
1502            if a[12usize] != 0 {
1503                b[12usize]
1504            } else {
1505                c[12usize]
1506            },
1507            if a[13usize] != 0 {
1508                b[13usize]
1509            } else {
1510                c[13usize]
1511            },
1512            if a[14usize] != 0 {
1513                b[14usize]
1514            } else {
1515                c[14usize]
1516            },
1517            if a[15usize] != 0 {
1518                b[15usize]
1519            } else {
1520                c[15usize]
1521            },
1522        ]
1523        .simd_into(self)
1524    }
1525    #[inline(always)]
1526    fn simd_eq_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1527        [
1528            -(i8::eq(&a[0usize], &b[0usize]) as i8),
1529            -(i8::eq(&a[1usize], &b[1usize]) as i8),
1530            -(i8::eq(&a[2usize], &b[2usize]) as i8),
1531            -(i8::eq(&a[3usize], &b[3usize]) as i8),
1532            -(i8::eq(&a[4usize], &b[4usize]) as i8),
1533            -(i8::eq(&a[5usize], &b[5usize]) as i8),
1534            -(i8::eq(&a[6usize], &b[6usize]) as i8),
1535            -(i8::eq(&a[7usize], &b[7usize]) as i8),
1536            -(i8::eq(&a[8usize], &b[8usize]) as i8),
1537            -(i8::eq(&a[9usize], &b[9usize]) as i8),
1538            -(i8::eq(&a[10usize], &b[10usize]) as i8),
1539            -(i8::eq(&a[11usize], &b[11usize]) as i8),
1540            -(i8::eq(&a[12usize], &b[12usize]) as i8),
1541            -(i8::eq(&a[13usize], &b[13usize]) as i8),
1542            -(i8::eq(&a[14usize], &b[14usize]) as i8),
1543            -(i8::eq(&a[15usize], &b[15usize]) as i8),
1544        ]
1545        .simd_into(self)
1546    }
1547    #[inline(always)]
1548    fn combine_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x32<Self> {
1549        let mut result = [0; 32usize];
1550        result[0..16usize].copy_from_slice(&a.val);
1551        result[16usize..32usize].copy_from_slice(&b.val);
1552        result.simd_into(self)
1553    }
1554    #[inline(always)]
1555    fn splat_i16x8(self, val: i16) -> i16x8<Self> {
1556        [val; 8usize].simd_into(self)
1557    }
1558    #[inline(always)]
1559    fn not_i16x8(self, a: i16x8<Self>) -> i16x8<Self> {
1560        [
1561            i16::not(a[0usize]),
1562            i16::not(a[1usize]),
1563            i16::not(a[2usize]),
1564            i16::not(a[3usize]),
1565            i16::not(a[4usize]),
1566            i16::not(a[5usize]),
1567            i16::not(a[6usize]),
1568            i16::not(a[7usize]),
1569        ]
1570        .simd_into(self)
1571    }
1572    #[inline(always)]
1573    fn add_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1574        [
1575            i16::wrapping_add(a[0usize], b[0usize]),
1576            i16::wrapping_add(a[1usize], b[1usize]),
1577            i16::wrapping_add(a[2usize], b[2usize]),
1578            i16::wrapping_add(a[3usize], b[3usize]),
1579            i16::wrapping_add(a[4usize], b[4usize]),
1580            i16::wrapping_add(a[5usize], b[5usize]),
1581            i16::wrapping_add(a[6usize], b[6usize]),
1582            i16::wrapping_add(a[7usize], b[7usize]),
1583        ]
1584        .simd_into(self)
1585    }
1586    #[inline(always)]
1587    fn sub_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1588        [
1589            i16::wrapping_sub(a[0usize], b[0usize]),
1590            i16::wrapping_sub(a[1usize], b[1usize]),
1591            i16::wrapping_sub(a[2usize], b[2usize]),
1592            i16::wrapping_sub(a[3usize], b[3usize]),
1593            i16::wrapping_sub(a[4usize], b[4usize]),
1594            i16::wrapping_sub(a[5usize], b[5usize]),
1595            i16::wrapping_sub(a[6usize], b[6usize]),
1596            i16::wrapping_sub(a[7usize], b[7usize]),
1597        ]
1598        .simd_into(self)
1599    }
1600    #[inline(always)]
1601    fn mul_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1602        [
1603            i16::wrapping_mul(a[0usize], b[0usize]),
1604            i16::wrapping_mul(a[1usize], b[1usize]),
1605            i16::wrapping_mul(a[2usize], b[2usize]),
1606            i16::wrapping_mul(a[3usize], b[3usize]),
1607            i16::wrapping_mul(a[4usize], b[4usize]),
1608            i16::wrapping_mul(a[5usize], b[5usize]),
1609            i16::wrapping_mul(a[6usize], b[6usize]),
1610            i16::wrapping_mul(a[7usize], b[7usize]),
1611        ]
1612        .simd_into(self)
1613    }
1614    #[inline(always)]
1615    fn and_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1616        [
1617            i16::bitand(a[0usize], &b[0usize]),
1618            i16::bitand(a[1usize], &b[1usize]),
1619            i16::bitand(a[2usize], &b[2usize]),
1620            i16::bitand(a[3usize], &b[3usize]),
1621            i16::bitand(a[4usize], &b[4usize]),
1622            i16::bitand(a[5usize], &b[5usize]),
1623            i16::bitand(a[6usize], &b[6usize]),
1624            i16::bitand(a[7usize], &b[7usize]),
1625        ]
1626        .simd_into(self)
1627    }
1628    #[inline(always)]
1629    fn or_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1630        [
1631            i16::bitor(a[0usize], &b[0usize]),
1632            i16::bitor(a[1usize], &b[1usize]),
1633            i16::bitor(a[2usize], &b[2usize]),
1634            i16::bitor(a[3usize], &b[3usize]),
1635            i16::bitor(a[4usize], &b[4usize]),
1636            i16::bitor(a[5usize], &b[5usize]),
1637            i16::bitor(a[6usize], &b[6usize]),
1638            i16::bitor(a[7usize], &b[7usize]),
1639        ]
1640        .simd_into(self)
1641    }
1642    #[inline(always)]
1643    fn xor_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1644        [
1645            i16::bitxor(a[0usize], &b[0usize]),
1646            i16::bitxor(a[1usize], &b[1usize]),
1647            i16::bitxor(a[2usize], &b[2usize]),
1648            i16::bitxor(a[3usize], &b[3usize]),
1649            i16::bitxor(a[4usize], &b[4usize]),
1650            i16::bitxor(a[5usize], &b[5usize]),
1651            i16::bitxor(a[6usize], &b[6usize]),
1652            i16::bitxor(a[7usize], &b[7usize]),
1653        ]
1654        .simd_into(self)
1655    }
1656    #[inline(always)]
1657    fn shr_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {
1658        [
1659            i16::shr(a[0usize], shift as i16),
1660            i16::shr(a[1usize], shift as i16),
1661            i16::shr(a[2usize], shift as i16),
1662            i16::shr(a[3usize], shift as i16),
1663            i16::shr(a[4usize], shift as i16),
1664            i16::shr(a[5usize], shift as i16),
1665            i16::shr(a[6usize], shift as i16),
1666            i16::shr(a[7usize], shift as i16),
1667        ]
1668        .simd_into(self)
1669    }
1670    #[inline(always)]
1671    fn shrv_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1672        [
1673            i16::shr(a[0usize], &b[0usize]),
1674            i16::shr(a[1usize], &b[1usize]),
1675            i16::shr(a[2usize], &b[2usize]),
1676            i16::shr(a[3usize], &b[3usize]),
1677            i16::shr(a[4usize], &b[4usize]),
1678            i16::shr(a[5usize], &b[5usize]),
1679            i16::shr(a[6usize], &b[6usize]),
1680            i16::shr(a[7usize], &b[7usize]),
1681        ]
1682        .simd_into(self)
1683    }
1684    #[inline(always)]
1685    fn shl_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {
1686        [
1687            i16::shl(a[0usize], shift as i16),
1688            i16::shl(a[1usize], shift as i16),
1689            i16::shl(a[2usize], shift as i16),
1690            i16::shl(a[3usize], shift as i16),
1691            i16::shl(a[4usize], shift as i16),
1692            i16::shl(a[5usize], shift as i16),
1693            i16::shl(a[6usize], shift as i16),
1694            i16::shl(a[7usize], shift as i16),
1695        ]
1696        .simd_into(self)
1697    }
1698    #[inline(always)]
1699    fn simd_eq_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1700        [
1701            -(i16::eq(&a[0usize], &b[0usize]) as i16),
1702            -(i16::eq(&a[1usize], &b[1usize]) as i16),
1703            -(i16::eq(&a[2usize], &b[2usize]) as i16),
1704            -(i16::eq(&a[3usize], &b[3usize]) as i16),
1705            -(i16::eq(&a[4usize], &b[4usize]) as i16),
1706            -(i16::eq(&a[5usize], &b[5usize]) as i16),
1707            -(i16::eq(&a[6usize], &b[6usize]) as i16),
1708            -(i16::eq(&a[7usize], &b[7usize]) as i16),
1709        ]
1710        .simd_into(self)
1711    }
1712    #[inline(always)]
1713    fn simd_lt_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1714        [
1715            -(i16::lt(&a[0usize], &b[0usize]) as i16),
1716            -(i16::lt(&a[1usize], &b[1usize]) as i16),
1717            -(i16::lt(&a[2usize], &b[2usize]) as i16),
1718            -(i16::lt(&a[3usize], &b[3usize]) as i16),
1719            -(i16::lt(&a[4usize], &b[4usize]) as i16),
1720            -(i16::lt(&a[5usize], &b[5usize]) as i16),
1721            -(i16::lt(&a[6usize], &b[6usize]) as i16),
1722            -(i16::lt(&a[7usize], &b[7usize]) as i16),
1723        ]
1724        .simd_into(self)
1725    }
1726    #[inline(always)]
1727    fn simd_le_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1728        [
1729            -(i16::le(&a[0usize], &b[0usize]) as i16),
1730            -(i16::le(&a[1usize], &b[1usize]) as i16),
1731            -(i16::le(&a[2usize], &b[2usize]) as i16),
1732            -(i16::le(&a[3usize], &b[3usize]) as i16),
1733            -(i16::le(&a[4usize], &b[4usize]) as i16),
1734            -(i16::le(&a[5usize], &b[5usize]) as i16),
1735            -(i16::le(&a[6usize], &b[6usize]) as i16),
1736            -(i16::le(&a[7usize], &b[7usize]) as i16),
1737        ]
1738        .simd_into(self)
1739    }
1740    #[inline(always)]
1741    fn simd_ge_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1742        [
1743            -(i16::ge(&a[0usize], &b[0usize]) as i16),
1744            -(i16::ge(&a[1usize], &b[1usize]) as i16),
1745            -(i16::ge(&a[2usize], &b[2usize]) as i16),
1746            -(i16::ge(&a[3usize], &b[3usize]) as i16),
1747            -(i16::ge(&a[4usize], &b[4usize]) as i16),
1748            -(i16::ge(&a[5usize], &b[5usize]) as i16),
1749            -(i16::ge(&a[6usize], &b[6usize]) as i16),
1750            -(i16::ge(&a[7usize], &b[7usize]) as i16),
1751        ]
1752        .simd_into(self)
1753    }
1754    #[inline(always)]
1755    fn simd_gt_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1756        [
1757            -(i16::gt(&a[0usize], &b[0usize]) as i16),
1758            -(i16::gt(&a[1usize], &b[1usize]) as i16),
1759            -(i16::gt(&a[2usize], &b[2usize]) as i16),
1760            -(i16::gt(&a[3usize], &b[3usize]) as i16),
1761            -(i16::gt(&a[4usize], &b[4usize]) as i16),
1762            -(i16::gt(&a[5usize], &b[5usize]) as i16),
1763            -(i16::gt(&a[6usize], &b[6usize]) as i16),
1764            -(i16::gt(&a[7usize], &b[7usize]) as i16),
1765        ]
1766        .simd_into(self)
1767    }
1768    #[inline(always)]
1769    fn zip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1770        [
1771            a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
1772        ]
1773        .simd_into(self)
1774    }
1775    #[inline(always)]
1776    fn zip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1777        [
1778            a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
1779        ]
1780        .simd_into(self)
1781    }
1782    #[inline(always)]
1783    fn unzip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1784        [
1785            a[0usize], a[2usize], a[4usize], a[6usize], b[0usize], b[2usize], b[4usize], b[6usize],
1786        ]
1787        .simd_into(self)
1788    }
1789    #[inline(always)]
1790    fn unzip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1791        [
1792            a[1usize], a[3usize], a[5usize], a[7usize], b[1usize], b[3usize], b[5usize], b[7usize],
1793        ]
1794        .simd_into(self)
1795    }
1796    #[inline(always)]
1797    fn select_i16x8(self, a: mask16x8<Self>, b: i16x8<Self>, c: i16x8<Self>) -> i16x8<Self> {
1798        [
1799            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1800            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1801            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1802            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1803            if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1804            if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1805            if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1806            if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1807        ]
1808        .simd_into(self)
1809    }
1810    #[inline(always)]
1811    fn min_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1812        [
1813            i16::min(a[0usize], b[0usize]),
1814            i16::min(a[1usize], b[1usize]),
1815            i16::min(a[2usize], b[2usize]),
1816            i16::min(a[3usize], b[3usize]),
1817            i16::min(a[4usize], b[4usize]),
1818            i16::min(a[5usize], b[5usize]),
1819            i16::min(a[6usize], b[6usize]),
1820            i16::min(a[7usize], b[7usize]),
1821        ]
1822        .simd_into(self)
1823    }
1824    #[inline(always)]
1825    fn max_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1826        [
1827            i16::max(a[0usize], b[0usize]),
1828            i16::max(a[1usize], b[1usize]),
1829            i16::max(a[2usize], b[2usize]),
1830            i16::max(a[3usize], b[3usize]),
1831            i16::max(a[4usize], b[4usize]),
1832            i16::max(a[5usize], b[5usize]),
1833            i16::max(a[6usize], b[6usize]),
1834            i16::max(a[7usize], b[7usize]),
1835        ]
1836        .simd_into(self)
1837    }
1838    #[inline(always)]
1839    fn combine_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x16<Self> {
1840        let mut result = [0; 16usize];
1841        result[0..8usize].copy_from_slice(&a.val);
1842        result[8usize..16usize].copy_from_slice(&b.val);
1843        result.simd_into(self)
1844    }
1845    #[inline(always)]
1846    fn neg_i16x8(self, a: i16x8<Self>) -> i16x8<Self> {
1847        [
1848            i16::neg(a[0usize]),
1849            i16::neg(a[1usize]),
1850            i16::neg(a[2usize]),
1851            i16::neg(a[3usize]),
1852            i16::neg(a[4usize]),
1853            i16::neg(a[5usize]),
1854            i16::neg(a[6usize]),
1855            i16::neg(a[7usize]),
1856        ]
1857        .simd_into(self)
1858    }
1859    #[inline(always)]
1860    fn reinterpret_u8_i16x8(self, a: i16x8<Self>) -> u8x16<Self> {
1861        u8x16 {
1862            val: bytemuck::cast(a.val),
1863            simd: a.simd,
1864        }
1865    }
1866    #[inline(always)]
1867    fn reinterpret_u32_i16x8(self, a: i16x8<Self>) -> u32x4<Self> {
1868        u32x4 {
1869            val: bytemuck::cast(a.val),
1870            simd: a.simd,
1871        }
1872    }
1873    #[inline(always)]
1874    fn splat_u16x8(self, val: u16) -> u16x8<Self> {
1875        [val; 8usize].simd_into(self)
1876    }
1877    #[inline(always)]
1878    fn not_u16x8(self, a: u16x8<Self>) -> u16x8<Self> {
1879        [
1880            u16::not(a[0usize]),
1881            u16::not(a[1usize]),
1882            u16::not(a[2usize]),
1883            u16::not(a[3usize]),
1884            u16::not(a[4usize]),
1885            u16::not(a[5usize]),
1886            u16::not(a[6usize]),
1887            u16::not(a[7usize]),
1888        ]
1889        .simd_into(self)
1890    }
1891    #[inline(always)]
1892    fn add_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1893        [
1894            u16::wrapping_add(a[0usize], b[0usize]),
1895            u16::wrapping_add(a[1usize], b[1usize]),
1896            u16::wrapping_add(a[2usize], b[2usize]),
1897            u16::wrapping_add(a[3usize], b[3usize]),
1898            u16::wrapping_add(a[4usize], b[4usize]),
1899            u16::wrapping_add(a[5usize], b[5usize]),
1900            u16::wrapping_add(a[6usize], b[6usize]),
1901            u16::wrapping_add(a[7usize], b[7usize]),
1902        ]
1903        .simd_into(self)
1904    }
1905    #[inline(always)]
1906    fn sub_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1907        [
1908            u16::wrapping_sub(a[0usize], b[0usize]),
1909            u16::wrapping_sub(a[1usize], b[1usize]),
1910            u16::wrapping_sub(a[2usize], b[2usize]),
1911            u16::wrapping_sub(a[3usize], b[3usize]),
1912            u16::wrapping_sub(a[4usize], b[4usize]),
1913            u16::wrapping_sub(a[5usize], b[5usize]),
1914            u16::wrapping_sub(a[6usize], b[6usize]),
1915            u16::wrapping_sub(a[7usize], b[7usize]),
1916        ]
1917        .simd_into(self)
1918    }
1919    #[inline(always)]
1920    fn mul_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1921        [
1922            u16::wrapping_mul(a[0usize], b[0usize]),
1923            u16::wrapping_mul(a[1usize], b[1usize]),
1924            u16::wrapping_mul(a[2usize], b[2usize]),
1925            u16::wrapping_mul(a[3usize], b[3usize]),
1926            u16::wrapping_mul(a[4usize], b[4usize]),
1927            u16::wrapping_mul(a[5usize], b[5usize]),
1928            u16::wrapping_mul(a[6usize], b[6usize]),
1929            u16::wrapping_mul(a[7usize], b[7usize]),
1930        ]
1931        .simd_into(self)
1932    }
1933    #[inline(always)]
1934    fn and_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1935        [
1936            u16::bitand(a[0usize], &b[0usize]),
1937            u16::bitand(a[1usize], &b[1usize]),
1938            u16::bitand(a[2usize], &b[2usize]),
1939            u16::bitand(a[3usize], &b[3usize]),
1940            u16::bitand(a[4usize], &b[4usize]),
1941            u16::bitand(a[5usize], &b[5usize]),
1942            u16::bitand(a[6usize], &b[6usize]),
1943            u16::bitand(a[7usize], &b[7usize]),
1944        ]
1945        .simd_into(self)
1946    }
1947    #[inline(always)]
1948    fn or_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1949        [
1950            u16::bitor(a[0usize], &b[0usize]),
1951            u16::bitor(a[1usize], &b[1usize]),
1952            u16::bitor(a[2usize], &b[2usize]),
1953            u16::bitor(a[3usize], &b[3usize]),
1954            u16::bitor(a[4usize], &b[4usize]),
1955            u16::bitor(a[5usize], &b[5usize]),
1956            u16::bitor(a[6usize], &b[6usize]),
1957            u16::bitor(a[7usize], &b[7usize]),
1958        ]
1959        .simd_into(self)
1960    }
1961    #[inline(always)]
1962    fn xor_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1963        [
1964            u16::bitxor(a[0usize], &b[0usize]),
1965            u16::bitxor(a[1usize], &b[1usize]),
1966            u16::bitxor(a[2usize], &b[2usize]),
1967            u16::bitxor(a[3usize], &b[3usize]),
1968            u16::bitxor(a[4usize], &b[4usize]),
1969            u16::bitxor(a[5usize], &b[5usize]),
1970            u16::bitxor(a[6usize], &b[6usize]),
1971            u16::bitxor(a[7usize], &b[7usize]),
1972        ]
1973        .simd_into(self)
1974    }
1975    #[inline(always)]
1976    fn shr_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {
1977        [
1978            u16::shr(a[0usize], shift as u16),
1979            u16::shr(a[1usize], shift as u16),
1980            u16::shr(a[2usize], shift as u16),
1981            u16::shr(a[3usize], shift as u16),
1982            u16::shr(a[4usize], shift as u16),
1983            u16::shr(a[5usize], shift as u16),
1984            u16::shr(a[6usize], shift as u16),
1985            u16::shr(a[7usize], shift as u16),
1986        ]
1987        .simd_into(self)
1988    }
1989    #[inline(always)]
1990    fn shrv_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1991        [
1992            u16::shr(a[0usize], &b[0usize]),
1993            u16::shr(a[1usize], &b[1usize]),
1994            u16::shr(a[2usize], &b[2usize]),
1995            u16::shr(a[3usize], &b[3usize]),
1996            u16::shr(a[4usize], &b[4usize]),
1997            u16::shr(a[5usize], &b[5usize]),
1998            u16::shr(a[6usize], &b[6usize]),
1999            u16::shr(a[7usize], &b[7usize]),
2000        ]
2001        .simd_into(self)
2002    }
2003    #[inline(always)]
2004    fn shl_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {
2005        [
2006            u16::shl(a[0usize], shift as u16),
2007            u16::shl(a[1usize], shift as u16),
2008            u16::shl(a[2usize], shift as u16),
2009            u16::shl(a[3usize], shift as u16),
2010            u16::shl(a[4usize], shift as u16),
2011            u16::shl(a[5usize], shift as u16),
2012            u16::shl(a[6usize], shift as u16),
2013            u16::shl(a[7usize], shift as u16),
2014        ]
2015        .simd_into(self)
2016    }
2017    #[inline(always)]
2018    fn simd_eq_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2019        [
2020            -(u16::eq(&a[0usize], &b[0usize]) as i16),
2021            -(u16::eq(&a[1usize], &b[1usize]) as i16),
2022            -(u16::eq(&a[2usize], &b[2usize]) as i16),
2023            -(u16::eq(&a[3usize], &b[3usize]) as i16),
2024            -(u16::eq(&a[4usize], &b[4usize]) as i16),
2025            -(u16::eq(&a[5usize], &b[5usize]) as i16),
2026            -(u16::eq(&a[6usize], &b[6usize]) as i16),
2027            -(u16::eq(&a[7usize], &b[7usize]) as i16),
2028        ]
2029        .simd_into(self)
2030    }
2031    #[inline(always)]
2032    fn simd_lt_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2033        [
2034            -(u16::lt(&a[0usize], &b[0usize]) as i16),
2035            -(u16::lt(&a[1usize], &b[1usize]) as i16),
2036            -(u16::lt(&a[2usize], &b[2usize]) as i16),
2037            -(u16::lt(&a[3usize], &b[3usize]) as i16),
2038            -(u16::lt(&a[4usize], &b[4usize]) as i16),
2039            -(u16::lt(&a[5usize], &b[5usize]) as i16),
2040            -(u16::lt(&a[6usize], &b[6usize]) as i16),
2041            -(u16::lt(&a[7usize], &b[7usize]) as i16),
2042        ]
2043        .simd_into(self)
2044    }
2045    #[inline(always)]
2046    fn simd_le_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2047        [
2048            -(u16::le(&a[0usize], &b[0usize]) as i16),
2049            -(u16::le(&a[1usize], &b[1usize]) as i16),
2050            -(u16::le(&a[2usize], &b[2usize]) as i16),
2051            -(u16::le(&a[3usize], &b[3usize]) as i16),
2052            -(u16::le(&a[4usize], &b[4usize]) as i16),
2053            -(u16::le(&a[5usize], &b[5usize]) as i16),
2054            -(u16::le(&a[6usize], &b[6usize]) as i16),
2055            -(u16::le(&a[7usize], &b[7usize]) as i16),
2056        ]
2057        .simd_into(self)
2058    }
2059    #[inline(always)]
2060    fn simd_ge_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2061        [
2062            -(u16::ge(&a[0usize], &b[0usize]) as i16),
2063            -(u16::ge(&a[1usize], &b[1usize]) as i16),
2064            -(u16::ge(&a[2usize], &b[2usize]) as i16),
2065            -(u16::ge(&a[3usize], &b[3usize]) as i16),
2066            -(u16::ge(&a[4usize], &b[4usize]) as i16),
2067            -(u16::ge(&a[5usize], &b[5usize]) as i16),
2068            -(u16::ge(&a[6usize], &b[6usize]) as i16),
2069            -(u16::ge(&a[7usize], &b[7usize]) as i16),
2070        ]
2071        .simd_into(self)
2072    }
2073    #[inline(always)]
2074    fn simd_gt_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2075        [
2076            -(u16::gt(&a[0usize], &b[0usize]) as i16),
2077            -(u16::gt(&a[1usize], &b[1usize]) as i16),
2078            -(u16::gt(&a[2usize], &b[2usize]) as i16),
2079            -(u16::gt(&a[3usize], &b[3usize]) as i16),
2080            -(u16::gt(&a[4usize], &b[4usize]) as i16),
2081            -(u16::gt(&a[5usize], &b[5usize]) as i16),
2082            -(u16::gt(&a[6usize], &b[6usize]) as i16),
2083            -(u16::gt(&a[7usize], &b[7usize]) as i16),
2084        ]
2085        .simd_into(self)
2086    }
2087    #[inline(always)]
2088    fn zip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2089        [
2090            a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
2091        ]
2092        .simd_into(self)
2093    }
2094    #[inline(always)]
2095    fn zip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2096        [
2097            a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
2098        ]
2099        .simd_into(self)
2100    }
2101    #[inline(always)]
2102    fn unzip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2103        [
2104            a[0usize], a[2usize], a[4usize], a[6usize], b[0usize], b[2usize], b[4usize], b[6usize],
2105        ]
2106        .simd_into(self)
2107    }
2108    #[inline(always)]
2109    fn unzip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2110        [
2111            a[1usize], a[3usize], a[5usize], a[7usize], b[1usize], b[3usize], b[5usize], b[7usize],
2112        ]
2113        .simd_into(self)
2114    }
2115    #[inline(always)]
2116    fn select_u16x8(self, a: mask16x8<Self>, b: u16x8<Self>, c: u16x8<Self>) -> u16x8<Self> {
2117        [
2118            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2119            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2120            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2121            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2122            if a[4usize] != 0 { b[4usize] } else { c[4usize] },
2123            if a[5usize] != 0 { b[5usize] } else { c[5usize] },
2124            if a[6usize] != 0 { b[6usize] } else { c[6usize] },
2125            if a[7usize] != 0 { b[7usize] } else { c[7usize] },
2126        ]
2127        .simd_into(self)
2128    }
2129    #[inline(always)]
2130    fn min_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2131        [
2132            u16::min(a[0usize], b[0usize]),
2133            u16::min(a[1usize], b[1usize]),
2134            u16::min(a[2usize], b[2usize]),
2135            u16::min(a[3usize], b[3usize]),
2136            u16::min(a[4usize], b[4usize]),
2137            u16::min(a[5usize], b[5usize]),
2138            u16::min(a[6usize], b[6usize]),
2139            u16::min(a[7usize], b[7usize]),
2140        ]
2141        .simd_into(self)
2142    }
2143    #[inline(always)]
2144    fn max_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2145        [
2146            u16::max(a[0usize], b[0usize]),
2147            u16::max(a[1usize], b[1usize]),
2148            u16::max(a[2usize], b[2usize]),
2149            u16::max(a[3usize], b[3usize]),
2150            u16::max(a[4usize], b[4usize]),
2151            u16::max(a[5usize], b[5usize]),
2152            u16::max(a[6usize], b[6usize]),
2153            u16::max(a[7usize], b[7usize]),
2154        ]
2155        .simd_into(self)
2156    }
2157    #[inline(always)]
2158    fn combine_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x16<Self> {
2159        let mut result = [0; 16usize];
2160        result[0..8usize].copy_from_slice(&a.val);
2161        result[8usize..16usize].copy_from_slice(&b.val);
2162        result.simd_into(self)
2163    }
2164    #[inline(always)]
2165    fn reinterpret_u8_u16x8(self, a: u16x8<Self>) -> u8x16<Self> {
2166        u8x16 {
2167            val: bytemuck::cast(a.val),
2168            simd: a.simd,
2169        }
2170    }
2171    #[inline(always)]
2172    fn reinterpret_u32_u16x8(self, a: u16x8<Self>) -> u32x4<Self> {
2173        u32x4 {
2174            val: bytemuck::cast(a.val),
2175            simd: a.simd,
2176        }
2177    }
2178    #[inline(always)]
2179    fn splat_mask16x8(self, val: i16) -> mask16x8<Self> {
2180        [val; 8usize].simd_into(self)
2181    }
2182    #[inline(always)]
2183    fn not_mask16x8(self, a: mask16x8<Self>) -> mask16x8<Self> {
2184        [
2185            i16::not(a[0usize]),
2186            i16::not(a[1usize]),
2187            i16::not(a[2usize]),
2188            i16::not(a[3usize]),
2189            i16::not(a[4usize]),
2190            i16::not(a[5usize]),
2191            i16::not(a[6usize]),
2192            i16::not(a[7usize]),
2193        ]
2194        .simd_into(self)
2195    }
2196    #[inline(always)]
2197    fn and_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2198        [
2199            i16::bitand(a[0usize], &b[0usize]),
2200            i16::bitand(a[1usize], &b[1usize]),
2201            i16::bitand(a[2usize], &b[2usize]),
2202            i16::bitand(a[3usize], &b[3usize]),
2203            i16::bitand(a[4usize], &b[4usize]),
2204            i16::bitand(a[5usize], &b[5usize]),
2205            i16::bitand(a[6usize], &b[6usize]),
2206            i16::bitand(a[7usize], &b[7usize]),
2207        ]
2208        .simd_into(self)
2209    }
2210    #[inline(always)]
2211    fn or_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2212        [
2213            i16::bitor(a[0usize], &b[0usize]),
2214            i16::bitor(a[1usize], &b[1usize]),
2215            i16::bitor(a[2usize], &b[2usize]),
2216            i16::bitor(a[3usize], &b[3usize]),
2217            i16::bitor(a[4usize], &b[4usize]),
2218            i16::bitor(a[5usize], &b[5usize]),
2219            i16::bitor(a[6usize], &b[6usize]),
2220            i16::bitor(a[7usize], &b[7usize]),
2221        ]
2222        .simd_into(self)
2223    }
2224    #[inline(always)]
2225    fn xor_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2226        [
2227            i16::bitxor(a[0usize], &b[0usize]),
2228            i16::bitxor(a[1usize], &b[1usize]),
2229            i16::bitxor(a[2usize], &b[2usize]),
2230            i16::bitxor(a[3usize], &b[3usize]),
2231            i16::bitxor(a[4usize], &b[4usize]),
2232            i16::bitxor(a[5usize], &b[5usize]),
2233            i16::bitxor(a[6usize], &b[6usize]),
2234            i16::bitxor(a[7usize], &b[7usize]),
2235        ]
2236        .simd_into(self)
2237    }
2238    #[inline(always)]
2239    fn select_mask16x8(
2240        self,
2241        a: mask16x8<Self>,
2242        b: mask16x8<Self>,
2243        c: mask16x8<Self>,
2244    ) -> mask16x8<Self> {
2245        [
2246            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2247            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2248            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2249            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2250            if a[4usize] != 0 { b[4usize] } else { c[4usize] },
2251            if a[5usize] != 0 { b[5usize] } else { c[5usize] },
2252            if a[6usize] != 0 { b[6usize] } else { c[6usize] },
2253            if a[7usize] != 0 { b[7usize] } else { c[7usize] },
2254        ]
2255        .simd_into(self)
2256    }
2257    #[inline(always)]
2258    fn simd_eq_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2259        [
2260            -(i16::eq(&a[0usize], &b[0usize]) as i16),
2261            -(i16::eq(&a[1usize], &b[1usize]) as i16),
2262            -(i16::eq(&a[2usize], &b[2usize]) as i16),
2263            -(i16::eq(&a[3usize], &b[3usize]) as i16),
2264            -(i16::eq(&a[4usize], &b[4usize]) as i16),
2265            -(i16::eq(&a[5usize], &b[5usize]) as i16),
2266            -(i16::eq(&a[6usize], &b[6usize]) as i16),
2267            -(i16::eq(&a[7usize], &b[7usize]) as i16),
2268        ]
2269        .simd_into(self)
2270    }
2271    #[inline(always)]
2272    fn combine_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x16<Self> {
2273        let mut result = [0; 16usize];
2274        result[0..8usize].copy_from_slice(&a.val);
2275        result[8usize..16usize].copy_from_slice(&b.val);
2276        result.simd_into(self)
2277    }
2278    #[inline(always)]
2279    fn splat_i32x4(self, val: i32) -> i32x4<Self> {
2280        [val; 4usize].simd_into(self)
2281    }
2282    #[inline(always)]
2283    fn not_i32x4(self, a: i32x4<Self>) -> i32x4<Self> {
2284        [
2285            i32::not(a[0usize]),
2286            i32::not(a[1usize]),
2287            i32::not(a[2usize]),
2288            i32::not(a[3usize]),
2289        ]
2290        .simd_into(self)
2291    }
2292    #[inline(always)]
2293    fn add_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2294        [
2295            i32::wrapping_add(a[0usize], b[0usize]),
2296            i32::wrapping_add(a[1usize], b[1usize]),
2297            i32::wrapping_add(a[2usize], b[2usize]),
2298            i32::wrapping_add(a[3usize], b[3usize]),
2299        ]
2300        .simd_into(self)
2301    }
2302    #[inline(always)]
2303    fn sub_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2304        [
2305            i32::wrapping_sub(a[0usize], b[0usize]),
2306            i32::wrapping_sub(a[1usize], b[1usize]),
2307            i32::wrapping_sub(a[2usize], b[2usize]),
2308            i32::wrapping_sub(a[3usize], b[3usize]),
2309        ]
2310        .simd_into(self)
2311    }
2312    #[inline(always)]
2313    fn mul_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2314        [
2315            i32::wrapping_mul(a[0usize], b[0usize]),
2316            i32::wrapping_mul(a[1usize], b[1usize]),
2317            i32::wrapping_mul(a[2usize], b[2usize]),
2318            i32::wrapping_mul(a[3usize], b[3usize]),
2319        ]
2320        .simd_into(self)
2321    }
2322    #[inline(always)]
2323    fn and_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2324        [
2325            i32::bitand(a[0usize], &b[0usize]),
2326            i32::bitand(a[1usize], &b[1usize]),
2327            i32::bitand(a[2usize], &b[2usize]),
2328            i32::bitand(a[3usize], &b[3usize]),
2329        ]
2330        .simd_into(self)
2331    }
2332    #[inline(always)]
2333    fn or_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2334        [
2335            i32::bitor(a[0usize], &b[0usize]),
2336            i32::bitor(a[1usize], &b[1usize]),
2337            i32::bitor(a[2usize], &b[2usize]),
2338            i32::bitor(a[3usize], &b[3usize]),
2339        ]
2340        .simd_into(self)
2341    }
2342    #[inline(always)]
2343    fn xor_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2344        [
2345            i32::bitxor(a[0usize], &b[0usize]),
2346            i32::bitxor(a[1usize], &b[1usize]),
2347            i32::bitxor(a[2usize], &b[2usize]),
2348            i32::bitxor(a[3usize], &b[3usize]),
2349        ]
2350        .simd_into(self)
2351    }
2352    #[inline(always)]
2353    fn shr_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {
2354        [
2355            i32::shr(a[0usize], shift as i32),
2356            i32::shr(a[1usize], shift as i32),
2357            i32::shr(a[2usize], shift as i32),
2358            i32::shr(a[3usize], shift as i32),
2359        ]
2360        .simd_into(self)
2361    }
2362    #[inline(always)]
2363    fn shrv_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2364        [
2365            i32::shr(a[0usize], &b[0usize]),
2366            i32::shr(a[1usize], &b[1usize]),
2367            i32::shr(a[2usize], &b[2usize]),
2368            i32::shr(a[3usize], &b[3usize]),
2369        ]
2370        .simd_into(self)
2371    }
2372    #[inline(always)]
2373    fn shl_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {
2374        [
2375            i32::shl(a[0usize], shift as i32),
2376            i32::shl(a[1usize], shift as i32),
2377            i32::shl(a[2usize], shift as i32),
2378            i32::shl(a[3usize], shift as i32),
2379        ]
2380        .simd_into(self)
2381    }
2382    #[inline(always)]
2383    fn simd_eq_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2384        [
2385            -(i32::eq(&a[0usize], &b[0usize]) as i32),
2386            -(i32::eq(&a[1usize], &b[1usize]) as i32),
2387            -(i32::eq(&a[2usize], &b[2usize]) as i32),
2388            -(i32::eq(&a[3usize], &b[3usize]) as i32),
2389        ]
2390        .simd_into(self)
2391    }
2392    #[inline(always)]
2393    fn simd_lt_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2394        [
2395            -(i32::lt(&a[0usize], &b[0usize]) as i32),
2396            -(i32::lt(&a[1usize], &b[1usize]) as i32),
2397            -(i32::lt(&a[2usize], &b[2usize]) as i32),
2398            -(i32::lt(&a[3usize], &b[3usize]) as i32),
2399        ]
2400        .simd_into(self)
2401    }
2402    #[inline(always)]
2403    fn simd_le_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2404        [
2405            -(i32::le(&a[0usize], &b[0usize]) as i32),
2406            -(i32::le(&a[1usize], &b[1usize]) as i32),
2407            -(i32::le(&a[2usize], &b[2usize]) as i32),
2408            -(i32::le(&a[3usize], &b[3usize]) as i32),
2409        ]
2410        .simd_into(self)
2411    }
2412    #[inline(always)]
2413    fn simd_ge_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2414        [
2415            -(i32::ge(&a[0usize], &b[0usize]) as i32),
2416            -(i32::ge(&a[1usize], &b[1usize]) as i32),
2417            -(i32::ge(&a[2usize], &b[2usize]) as i32),
2418            -(i32::ge(&a[3usize], &b[3usize]) as i32),
2419        ]
2420        .simd_into(self)
2421    }
2422    #[inline(always)]
2423    fn simd_gt_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2424        [
2425            -(i32::gt(&a[0usize], &b[0usize]) as i32),
2426            -(i32::gt(&a[1usize], &b[1usize]) as i32),
2427            -(i32::gt(&a[2usize], &b[2usize]) as i32),
2428            -(i32::gt(&a[3usize], &b[3usize]) as i32),
2429        ]
2430        .simd_into(self)
2431    }
2432    #[inline(always)]
2433    fn zip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2434        [a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
2435    }
2436    #[inline(always)]
2437    fn zip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2438        [a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
2439    }
2440    #[inline(always)]
2441    fn unzip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2442        [a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
2443    }
2444    #[inline(always)]
2445    fn unzip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2446        [a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
2447    }
2448    #[inline(always)]
2449    fn select_i32x4(self, a: mask32x4<Self>, b: i32x4<Self>, c: i32x4<Self>) -> i32x4<Self> {
2450        [
2451            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2452            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2453            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2454            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2455        ]
2456        .simd_into(self)
2457    }
2458    #[inline(always)]
2459    fn min_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2460        [
2461            i32::min(a[0usize], b[0usize]),
2462            i32::min(a[1usize], b[1usize]),
2463            i32::min(a[2usize], b[2usize]),
2464            i32::min(a[3usize], b[3usize]),
2465        ]
2466        .simd_into(self)
2467    }
2468    #[inline(always)]
2469    fn max_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2470        [
2471            i32::max(a[0usize], b[0usize]),
2472            i32::max(a[1usize], b[1usize]),
2473            i32::max(a[2usize], b[2usize]),
2474            i32::max(a[3usize], b[3usize]),
2475        ]
2476        .simd_into(self)
2477    }
2478    #[inline(always)]
2479    fn combine_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x8<Self> {
2480        let mut result = [0; 8usize];
2481        result[0..4usize].copy_from_slice(&a.val);
2482        result[4usize..8usize].copy_from_slice(&b.val);
2483        result.simd_into(self)
2484    }
2485    #[inline(always)]
2486    fn neg_i32x4(self, a: i32x4<Self>) -> i32x4<Self> {
2487        [
2488            i32::neg(a[0usize]),
2489            i32::neg(a[1usize]),
2490            i32::neg(a[2usize]),
2491            i32::neg(a[3usize]),
2492        ]
2493        .simd_into(self)
2494    }
2495    #[inline(always)]
2496    fn reinterpret_u8_i32x4(self, a: i32x4<Self>) -> u8x16<Self> {
2497        u8x16 {
2498            val: bytemuck::cast(a.val),
2499            simd: a.simd,
2500        }
2501    }
2502    #[inline(always)]
2503    fn reinterpret_u32_i32x4(self, a: i32x4<Self>) -> u32x4<Self> {
2504        u32x4 {
2505            val: bytemuck::cast(a.val),
2506            simd: a.simd,
2507        }
2508    }
2509    #[inline(always)]
2510    fn cvt_f32_i32x4(self, a: i32x4<Self>) -> f32x4<Self> {
2511        [
2512            a[0usize] as f32,
2513            a[1usize] as f32,
2514            a[2usize] as f32,
2515            a[3usize] as f32,
2516        ]
2517        .simd_into(self)
2518    }
2519    #[inline(always)]
2520    fn splat_u32x4(self, val: u32) -> u32x4<Self> {
2521        [val; 4usize].simd_into(self)
2522    }
2523    #[inline(always)]
2524    fn not_u32x4(self, a: u32x4<Self>) -> u32x4<Self> {
2525        [
2526            u32::not(a[0usize]),
2527            u32::not(a[1usize]),
2528            u32::not(a[2usize]),
2529            u32::not(a[3usize]),
2530        ]
2531        .simd_into(self)
2532    }
2533    #[inline(always)]
2534    fn add_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2535        [
2536            u32::wrapping_add(a[0usize], b[0usize]),
2537            u32::wrapping_add(a[1usize], b[1usize]),
2538            u32::wrapping_add(a[2usize], b[2usize]),
2539            u32::wrapping_add(a[3usize], b[3usize]),
2540        ]
2541        .simd_into(self)
2542    }
2543    #[inline(always)]
2544    fn sub_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2545        [
2546            u32::wrapping_sub(a[0usize], b[0usize]),
2547            u32::wrapping_sub(a[1usize], b[1usize]),
2548            u32::wrapping_sub(a[2usize], b[2usize]),
2549            u32::wrapping_sub(a[3usize], b[3usize]),
2550        ]
2551        .simd_into(self)
2552    }
2553    #[inline(always)]
2554    fn mul_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2555        [
2556            u32::wrapping_mul(a[0usize], b[0usize]),
2557            u32::wrapping_mul(a[1usize], b[1usize]),
2558            u32::wrapping_mul(a[2usize], b[2usize]),
2559            u32::wrapping_mul(a[3usize], b[3usize]),
2560        ]
2561        .simd_into(self)
2562    }
2563    #[inline(always)]
2564    fn and_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2565        [
2566            u32::bitand(a[0usize], &b[0usize]),
2567            u32::bitand(a[1usize], &b[1usize]),
2568            u32::bitand(a[2usize], &b[2usize]),
2569            u32::bitand(a[3usize], &b[3usize]),
2570        ]
2571        .simd_into(self)
2572    }
2573    #[inline(always)]
2574    fn or_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2575        [
2576            u32::bitor(a[0usize], &b[0usize]),
2577            u32::bitor(a[1usize], &b[1usize]),
2578            u32::bitor(a[2usize], &b[2usize]),
2579            u32::bitor(a[3usize], &b[3usize]),
2580        ]
2581        .simd_into(self)
2582    }
2583    #[inline(always)]
2584    fn xor_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2585        [
2586            u32::bitxor(a[0usize], &b[0usize]),
2587            u32::bitxor(a[1usize], &b[1usize]),
2588            u32::bitxor(a[2usize], &b[2usize]),
2589            u32::bitxor(a[3usize], &b[3usize]),
2590        ]
2591        .simd_into(self)
2592    }
2593    #[inline(always)]
2594    fn shr_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {
2595        [
2596            u32::shr(a[0usize], shift as u32),
2597            u32::shr(a[1usize], shift as u32),
2598            u32::shr(a[2usize], shift as u32),
2599            u32::shr(a[3usize], shift as u32),
2600        ]
2601        .simd_into(self)
2602    }
2603    #[inline(always)]
2604    fn shrv_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2605        [
2606            u32::shr(a[0usize], &b[0usize]),
2607            u32::shr(a[1usize], &b[1usize]),
2608            u32::shr(a[2usize], &b[2usize]),
2609            u32::shr(a[3usize], &b[3usize]),
2610        ]
2611        .simd_into(self)
2612    }
2613    #[inline(always)]
2614    fn shl_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {
2615        [
2616            u32::shl(a[0usize], shift as u32),
2617            u32::shl(a[1usize], shift as u32),
2618            u32::shl(a[2usize], shift as u32),
2619            u32::shl(a[3usize], shift as u32),
2620        ]
2621        .simd_into(self)
2622    }
2623    #[inline(always)]
2624    fn simd_eq_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2625        [
2626            -(u32::eq(&a[0usize], &b[0usize]) as i32),
2627            -(u32::eq(&a[1usize], &b[1usize]) as i32),
2628            -(u32::eq(&a[2usize], &b[2usize]) as i32),
2629            -(u32::eq(&a[3usize], &b[3usize]) as i32),
2630        ]
2631        .simd_into(self)
2632    }
2633    #[inline(always)]
2634    fn simd_lt_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2635        [
2636            -(u32::lt(&a[0usize], &b[0usize]) as i32),
2637            -(u32::lt(&a[1usize], &b[1usize]) as i32),
2638            -(u32::lt(&a[2usize], &b[2usize]) as i32),
2639            -(u32::lt(&a[3usize], &b[3usize]) as i32),
2640        ]
2641        .simd_into(self)
2642    }
2643    #[inline(always)]
2644    fn simd_le_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2645        [
2646            -(u32::le(&a[0usize], &b[0usize]) as i32),
2647            -(u32::le(&a[1usize], &b[1usize]) as i32),
2648            -(u32::le(&a[2usize], &b[2usize]) as i32),
2649            -(u32::le(&a[3usize], &b[3usize]) as i32),
2650        ]
2651        .simd_into(self)
2652    }
2653    #[inline(always)]
2654    fn simd_ge_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2655        [
2656            -(u32::ge(&a[0usize], &b[0usize]) as i32),
2657            -(u32::ge(&a[1usize], &b[1usize]) as i32),
2658            -(u32::ge(&a[2usize], &b[2usize]) as i32),
2659            -(u32::ge(&a[3usize], &b[3usize]) as i32),
2660        ]
2661        .simd_into(self)
2662    }
2663    #[inline(always)]
2664    fn simd_gt_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2665        [
2666            -(u32::gt(&a[0usize], &b[0usize]) as i32),
2667            -(u32::gt(&a[1usize], &b[1usize]) as i32),
2668            -(u32::gt(&a[2usize], &b[2usize]) as i32),
2669            -(u32::gt(&a[3usize], &b[3usize]) as i32),
2670        ]
2671        .simd_into(self)
2672    }
2673    #[inline(always)]
2674    fn zip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2675        [a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
2676    }
2677    #[inline(always)]
2678    fn zip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2679        [a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
2680    }
2681    #[inline(always)]
2682    fn unzip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2683        [a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
2684    }
2685    #[inline(always)]
2686    fn unzip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2687        [a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
2688    }
2689    #[inline(always)]
2690    fn select_u32x4(self, a: mask32x4<Self>, b: u32x4<Self>, c: u32x4<Self>) -> u32x4<Self> {
2691        [
2692            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2693            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2694            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2695            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2696        ]
2697        .simd_into(self)
2698    }
2699    #[inline(always)]
2700    fn min_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2701        [
2702            u32::min(a[0usize], b[0usize]),
2703            u32::min(a[1usize], b[1usize]),
2704            u32::min(a[2usize], b[2usize]),
2705            u32::min(a[3usize], b[3usize]),
2706        ]
2707        .simd_into(self)
2708    }
2709    #[inline(always)]
2710    fn max_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2711        [
2712            u32::max(a[0usize], b[0usize]),
2713            u32::max(a[1usize], b[1usize]),
2714            u32::max(a[2usize], b[2usize]),
2715            u32::max(a[3usize], b[3usize]),
2716        ]
2717        .simd_into(self)
2718    }
2719    #[inline(always)]
2720    fn combine_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x8<Self> {
2721        let mut result = [0; 8usize];
2722        result[0..4usize].copy_from_slice(&a.val);
2723        result[4usize..8usize].copy_from_slice(&b.val);
2724        result.simd_into(self)
2725    }
2726    #[inline(always)]
2727    fn reinterpret_u8_u32x4(self, a: u32x4<Self>) -> u8x16<Self> {
2728        u8x16 {
2729            val: bytemuck::cast(a.val),
2730            simd: a.simd,
2731        }
2732    }
2733    #[inline(always)]
2734    fn cvt_f32_u32x4(self, a: u32x4<Self>) -> f32x4<Self> {
2735        [
2736            a[0usize] as f32,
2737            a[1usize] as f32,
2738            a[2usize] as f32,
2739            a[3usize] as f32,
2740        ]
2741        .simd_into(self)
2742    }
2743    #[inline(always)]
2744    fn splat_mask32x4(self, val: i32) -> mask32x4<Self> {
2745        [val; 4usize].simd_into(self)
2746    }
2747    #[inline(always)]
2748    fn not_mask32x4(self, a: mask32x4<Self>) -> mask32x4<Self> {
2749        [
2750            i32::not(a[0usize]),
2751            i32::not(a[1usize]),
2752            i32::not(a[2usize]),
2753            i32::not(a[3usize]),
2754        ]
2755        .simd_into(self)
2756    }
2757    #[inline(always)]
2758    fn and_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
2759        [
2760            i32::bitand(a[0usize], &b[0usize]),
2761            i32::bitand(a[1usize], &b[1usize]),
2762            i32::bitand(a[2usize], &b[2usize]),
2763            i32::bitand(a[3usize], &b[3usize]),
2764        ]
2765        .simd_into(self)
2766    }
2767    #[inline(always)]
2768    fn or_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
2769        [
2770            i32::bitor(a[0usize], &b[0usize]),
2771            i32::bitor(a[1usize], &b[1usize]),
2772            i32::bitor(a[2usize], &b[2usize]),
2773            i32::bitor(a[3usize], &b[3usize]),
2774        ]
2775        .simd_into(self)
2776    }
2777    #[inline(always)]
2778    fn xor_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
2779        [
2780            i32::bitxor(a[0usize], &b[0usize]),
2781            i32::bitxor(a[1usize], &b[1usize]),
2782            i32::bitxor(a[2usize], &b[2usize]),
2783            i32::bitxor(a[3usize], &b[3usize]),
2784        ]
2785        .simd_into(self)
2786    }
2787    #[inline(always)]
2788    fn select_mask32x4(
2789        self,
2790        a: mask32x4<Self>,
2791        b: mask32x4<Self>,
2792        c: mask32x4<Self>,
2793    ) -> mask32x4<Self> {
2794        [
2795            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2796            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2797            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2798            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2799        ]
2800        .simd_into(self)
2801    }
2802    #[inline(always)]
2803    fn simd_eq_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
2804        [
2805            -(i32::eq(&a[0usize], &b[0usize]) as i32),
2806            -(i32::eq(&a[1usize], &b[1usize]) as i32),
2807            -(i32::eq(&a[2usize], &b[2usize]) as i32),
2808            -(i32::eq(&a[3usize], &b[3usize]) as i32),
2809        ]
2810        .simd_into(self)
2811    }
2812    #[inline(always)]
2813    fn combine_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x8<Self> {
2814        let mut result = [0; 8usize];
2815        result[0..4usize].copy_from_slice(&a.val);
2816        result[4usize..8usize].copy_from_slice(&b.val);
2817        result.simd_into(self)
2818    }
2819    #[inline(always)]
2820    fn splat_f64x2(self, val: f64) -> f64x2<Self> {
2821        [val; 2usize].simd_into(self)
2822    }
2823    #[inline(always)]
2824    fn abs_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2825        [f64::abs(a[0usize]), f64::abs(a[1usize])].simd_into(self)
2826    }
2827    #[inline(always)]
2828    fn neg_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2829        [f64::neg(a[0usize]), f64::neg(a[1usize])].simd_into(self)
2830    }
2831    #[inline(always)]
2832    fn sqrt_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2833        [f64::sqrt(a[0usize]), f64::sqrt(a[1usize])].simd_into(self)
2834    }
2835    #[inline(always)]
2836    fn add_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2837        [
2838            f64::add(a[0usize], &b[0usize]),
2839            f64::add(a[1usize], &b[1usize]),
2840        ]
2841        .simd_into(self)
2842    }
2843    #[inline(always)]
2844    fn sub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2845        [
2846            f64::sub(a[0usize], &b[0usize]),
2847            f64::sub(a[1usize], &b[1usize]),
2848        ]
2849        .simd_into(self)
2850    }
2851    #[inline(always)]
2852    fn mul_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2853        [
2854            f64::mul(a[0usize], &b[0usize]),
2855            f64::mul(a[1usize], &b[1usize]),
2856        ]
2857        .simd_into(self)
2858    }
2859    #[inline(always)]
2860    fn div_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2861        [
2862            f64::div(a[0usize], &b[0usize]),
2863            f64::div(a[1usize], &b[1usize]),
2864        ]
2865        .simd_into(self)
2866    }
2867    #[inline(always)]
2868    fn copysign_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2869        [
2870            f64::copysign(a[0usize], b[0usize]),
2871            f64::copysign(a[1usize], b[1usize]),
2872        ]
2873        .simd_into(self)
2874    }
2875    #[inline(always)]
2876    fn simd_eq_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2877        [
2878            -(f64::eq(&a[0usize], &b[0usize]) as i64),
2879            -(f64::eq(&a[1usize], &b[1usize]) as i64),
2880        ]
2881        .simd_into(self)
2882    }
2883    #[inline(always)]
2884    fn simd_lt_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2885        [
2886            -(f64::lt(&a[0usize], &b[0usize]) as i64),
2887            -(f64::lt(&a[1usize], &b[1usize]) as i64),
2888        ]
2889        .simd_into(self)
2890    }
2891    #[inline(always)]
2892    fn simd_le_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2893        [
2894            -(f64::le(&a[0usize], &b[0usize]) as i64),
2895            -(f64::le(&a[1usize], &b[1usize]) as i64),
2896        ]
2897        .simd_into(self)
2898    }
2899    #[inline(always)]
2900    fn simd_ge_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2901        [
2902            -(f64::ge(&a[0usize], &b[0usize]) as i64),
2903            -(f64::ge(&a[1usize], &b[1usize]) as i64),
2904        ]
2905        .simd_into(self)
2906    }
2907    #[inline(always)]
2908    fn simd_gt_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2909        [
2910            -(f64::gt(&a[0usize], &b[0usize]) as i64),
2911            -(f64::gt(&a[1usize], &b[1usize]) as i64),
2912        ]
2913        .simd_into(self)
2914    }
2915    #[inline(always)]
2916    fn zip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2917        [a[0usize], b[0usize]].simd_into(self)
2918    }
2919    #[inline(always)]
2920    fn zip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2921        [a[1usize], b[1usize]].simd_into(self)
2922    }
2923    #[inline(always)]
2924    fn unzip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2925        [a[0usize], b[0usize]].simd_into(self)
2926    }
2927    #[inline(always)]
2928    fn unzip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2929        [a[1usize], b[1usize]].simd_into(self)
2930    }
2931    #[inline(always)]
2932    fn max_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2933        [
2934            f64::max(a[0usize], b[0usize]),
2935            f64::max(a[1usize], b[1usize]),
2936        ]
2937        .simd_into(self)
2938    }
2939    #[inline(always)]
2940    fn max_precise_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2941        [
2942            f64::max(a[0usize], b[0usize]),
2943            f64::max(a[1usize], b[1usize]),
2944        ]
2945        .simd_into(self)
2946    }
2947    #[inline(always)]
2948    fn min_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2949        [
2950            f64::min(a[0usize], b[0usize]),
2951            f64::min(a[1usize], b[1usize]),
2952        ]
2953        .simd_into(self)
2954    }
2955    #[inline(always)]
2956    fn min_precise_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2957        [
2958            f64::min(a[0usize], b[0usize]),
2959            f64::min(a[1usize], b[1usize]),
2960        ]
2961        .simd_into(self)
2962    }
2963    #[inline(always)]
2964    fn madd_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
2965        a.mul(b).add(c)
2966    }
2967    #[inline(always)]
2968    fn msub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
2969        a.mul(b).sub(c)
2970    }
2971    #[inline(always)]
2972    fn floor_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2973        [f64::floor(a[0usize]), f64::floor(a[1usize])].simd_into(self)
2974    }
2975    #[inline(always)]
2976    fn fract_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2977        [f64::fract(a[0usize]), f64::fract(a[1usize])].simd_into(self)
2978    }
2979    #[inline(always)]
2980    fn trunc_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2981        [f64::trunc(a[0usize]), f64::trunc(a[1usize])].simd_into(self)
2982    }
2983    #[inline(always)]
2984    fn select_f64x2(self, a: mask64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
2985        [
2986            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2987            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2988        ]
2989        .simd_into(self)
2990    }
2991    #[inline(always)]
2992    fn combine_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x4<Self> {
2993        let mut result = [0.0; 4usize];
2994        result[0..2usize].copy_from_slice(&a.val);
2995        result[2usize..4usize].copy_from_slice(&b.val);
2996        result.simd_into(self)
2997    }
2998    #[inline(always)]
2999    fn reinterpret_f32_f64x2(self, a: f64x2<Self>) -> f32x4<Self> {
3000        f32x4 {
3001            val: bytemuck::cast(a.val),
3002            simd: a.simd,
3003        }
3004    }
3005    #[inline(always)]
3006    fn splat_mask64x2(self, val: i64) -> mask64x2<Self> {
3007        [val; 2usize].simd_into(self)
3008    }
3009    #[inline(always)]
3010    fn not_mask64x2(self, a: mask64x2<Self>) -> mask64x2<Self> {
3011        [i64::not(a[0usize]), i64::not(a[1usize])].simd_into(self)
3012    }
3013    #[inline(always)]
3014    fn and_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
3015        [
3016            i64::bitand(a[0usize], &b[0usize]),
3017            i64::bitand(a[1usize], &b[1usize]),
3018        ]
3019        .simd_into(self)
3020    }
3021    #[inline(always)]
3022    fn or_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
3023        [
3024            i64::bitor(a[0usize], &b[0usize]),
3025            i64::bitor(a[1usize], &b[1usize]),
3026        ]
3027        .simd_into(self)
3028    }
3029    #[inline(always)]
3030    fn xor_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
3031        [
3032            i64::bitxor(a[0usize], &b[0usize]),
3033            i64::bitxor(a[1usize], &b[1usize]),
3034        ]
3035        .simd_into(self)
3036    }
3037    #[inline(always)]
3038    fn select_mask64x2(
3039        self,
3040        a: mask64x2<Self>,
3041        b: mask64x2<Self>,
3042        c: mask64x2<Self>,
3043    ) -> mask64x2<Self> {
3044        [
3045            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
3046            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
3047        ]
3048        .simd_into(self)
3049    }
3050    #[inline(always)]
3051    fn simd_eq_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
3052        [
3053            -(i64::eq(&a[0usize], &b[0usize]) as i64),
3054            -(i64::eq(&a[1usize], &b[1usize]) as i64),
3055        ]
3056        .simd_into(self)
3057    }
3058    #[inline(always)]
3059    fn combine_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x4<Self> {
3060        let mut result = [0; 4usize];
3061        result[0..2usize].copy_from_slice(&a.val);
3062        result[2usize..4usize].copy_from_slice(&b.val);
3063        result.simd_into(self)
3064    }
3065    #[inline(always)]
3066    fn splat_f32x8(self, a: f32) -> f32x8<Self> {
3067        let half = self.splat_f32x4(a);
3068        self.combine_f32x4(half, half)
3069    }
3070    #[inline(always)]
3071    fn abs_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
3072        let (a0, a1) = self.split_f32x8(a);
3073        self.combine_f32x4(self.abs_f32x4(a0), self.abs_f32x4(a1))
3074    }
3075    #[inline(always)]
3076    fn neg_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
3077        let (a0, a1) = self.split_f32x8(a);
3078        self.combine_f32x4(self.neg_f32x4(a0), self.neg_f32x4(a1))
3079    }
3080    #[inline(always)]
3081    fn sqrt_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
3082        let (a0, a1) = self.split_f32x8(a);
3083        self.combine_f32x4(self.sqrt_f32x4(a0), self.sqrt_f32x4(a1))
3084    }
3085    #[inline(always)]
3086    fn add_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3087        let (a0, a1) = self.split_f32x8(a);
3088        let (b0, b1) = self.split_f32x8(b);
3089        self.combine_f32x4(self.add_f32x4(a0, b0), self.add_f32x4(a1, b1))
3090    }
3091    #[inline(always)]
3092    fn sub_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3093        let (a0, a1) = self.split_f32x8(a);
3094        let (b0, b1) = self.split_f32x8(b);
3095        self.combine_f32x4(self.sub_f32x4(a0, b0), self.sub_f32x4(a1, b1))
3096    }
3097    #[inline(always)]
3098    fn mul_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3099        let (a0, a1) = self.split_f32x8(a);
3100        let (b0, b1) = self.split_f32x8(b);
3101        self.combine_f32x4(self.mul_f32x4(a0, b0), self.mul_f32x4(a1, b1))
3102    }
3103    #[inline(always)]
3104    fn div_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3105        let (a0, a1) = self.split_f32x8(a);
3106        let (b0, b1) = self.split_f32x8(b);
3107        self.combine_f32x4(self.div_f32x4(a0, b0), self.div_f32x4(a1, b1))
3108    }
3109    #[inline(always)]
3110    fn copysign_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3111        let (a0, a1) = self.split_f32x8(a);
3112        let (b0, b1) = self.split_f32x8(b);
3113        self.combine_f32x4(self.copysign_f32x4(a0, b0), self.copysign_f32x4(a1, b1))
3114    }
3115    #[inline(always)]
3116    fn simd_eq_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
3117        let (a0, a1) = self.split_f32x8(a);
3118        let (b0, b1) = self.split_f32x8(b);
3119        self.combine_mask32x4(self.simd_eq_f32x4(a0, b0), self.simd_eq_f32x4(a1, b1))
3120    }
3121    #[inline(always)]
3122    fn simd_lt_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
3123        let (a0, a1) = self.split_f32x8(a);
3124        let (b0, b1) = self.split_f32x8(b);
3125        self.combine_mask32x4(self.simd_lt_f32x4(a0, b0), self.simd_lt_f32x4(a1, b1))
3126    }
3127    #[inline(always)]
3128    fn simd_le_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
3129        let (a0, a1) = self.split_f32x8(a);
3130        let (b0, b1) = self.split_f32x8(b);
3131        self.combine_mask32x4(self.simd_le_f32x4(a0, b0), self.simd_le_f32x4(a1, b1))
3132    }
3133    #[inline(always)]
3134    fn simd_ge_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
3135        let (a0, a1) = self.split_f32x8(a);
3136        let (b0, b1) = self.split_f32x8(b);
3137        self.combine_mask32x4(self.simd_ge_f32x4(a0, b0), self.simd_ge_f32x4(a1, b1))
3138    }
3139    #[inline(always)]
3140    fn simd_gt_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
3141        let (a0, a1) = self.split_f32x8(a);
3142        let (b0, b1) = self.split_f32x8(b);
3143        self.combine_mask32x4(self.simd_gt_f32x4(a0, b0), self.simd_gt_f32x4(a1, b1))
3144    }
3145    #[inline(always)]
3146    fn zip_low_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3147        let (a0, _) = self.split_f32x8(a);
3148        let (b0, _) = self.split_f32x8(b);
3149        self.combine_f32x4(self.zip_low_f32x4(a0, b0), self.zip_high_f32x4(a0, b0))
3150    }
3151    #[inline(always)]
3152    fn zip_high_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3153        let (_, a1) = self.split_f32x8(a);
3154        let (_, b1) = self.split_f32x8(b);
3155        self.combine_f32x4(self.zip_low_f32x4(a1, b1), self.zip_high_f32x4(a1, b1))
3156    }
3157    #[inline(always)]
3158    fn unzip_low_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3159        let (a0, a1) = self.split_f32x8(a);
3160        let (b0, b1) = self.split_f32x8(b);
3161        self.combine_f32x4(self.unzip_low_f32x4(a0, a1), self.unzip_low_f32x4(b0, b1))
3162    }
3163    #[inline(always)]
3164    fn unzip_high_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3165        let (a0, a1) = self.split_f32x8(a);
3166        let (b0, b1) = self.split_f32x8(b);
3167        self.combine_f32x4(self.unzip_high_f32x4(a0, a1), self.unzip_high_f32x4(b0, b1))
3168    }
3169    #[inline(always)]
3170    fn max_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3171        let (a0, a1) = self.split_f32x8(a);
3172        let (b0, b1) = self.split_f32x8(b);
3173        self.combine_f32x4(self.max_f32x4(a0, b0), self.max_f32x4(a1, b1))
3174    }
3175    #[inline(always)]
3176    fn max_precise_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3177        let (a0, a1) = self.split_f32x8(a);
3178        let (b0, b1) = self.split_f32x8(b);
3179        self.combine_f32x4(
3180            self.max_precise_f32x4(a0, b0),
3181            self.max_precise_f32x4(a1, b1),
3182        )
3183    }
3184    #[inline(always)]
3185    fn min_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3186        let (a0, a1) = self.split_f32x8(a);
3187        let (b0, b1) = self.split_f32x8(b);
3188        self.combine_f32x4(self.min_f32x4(a0, b0), self.min_f32x4(a1, b1))
3189    }
3190    #[inline(always)]
3191    fn min_precise_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3192        let (a0, a1) = self.split_f32x8(a);
3193        let (b0, b1) = self.split_f32x8(b);
3194        self.combine_f32x4(
3195            self.min_precise_f32x4(a0, b0),
3196            self.min_precise_f32x4(a1, b1),
3197        )
3198    }
3199    #[inline(always)]
3200    fn madd_f32x8(self, a: f32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
3201        let (a0, a1) = self.split_f32x8(a);
3202        let (b0, b1) = self.split_f32x8(b);
3203        let (c0, c1) = self.split_f32x8(c);
3204        self.combine_f32x4(self.madd_f32x4(a0, b0, c0), self.madd_f32x4(a1, b1, c1))
3205    }
3206    #[inline(always)]
3207    fn msub_f32x8(self, a: f32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
3208        let (a0, a1) = self.split_f32x8(a);
3209        let (b0, b1) = self.split_f32x8(b);
3210        let (c0, c1) = self.split_f32x8(c);
3211        self.combine_f32x4(self.msub_f32x4(a0, b0, c0), self.msub_f32x4(a1, b1, c1))
3212    }
3213    #[inline(always)]
3214    fn floor_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
3215        let (a0, a1) = self.split_f32x8(a);
3216        self.combine_f32x4(self.floor_f32x4(a0), self.floor_f32x4(a1))
3217    }
3218    #[inline(always)]
3219    fn fract_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
3220        let (a0, a1) = self.split_f32x8(a);
3221        self.combine_f32x4(self.fract_f32x4(a0), self.fract_f32x4(a1))
3222    }
3223    #[inline(always)]
3224    fn trunc_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
3225        let (a0, a1) = self.split_f32x8(a);
3226        self.combine_f32x4(self.trunc_f32x4(a0), self.trunc_f32x4(a1))
3227    }
3228    #[inline(always)]
3229    fn select_f32x8(self, a: mask32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
3230        let (a0, a1) = self.split_mask32x8(a);
3231        let (b0, b1) = self.split_f32x8(b);
3232        let (c0, c1) = self.split_f32x8(c);
3233        self.combine_f32x4(self.select_f32x4(a0, b0, c0), self.select_f32x4(a1, b1, c1))
3234    }
3235    #[inline(always)]
3236    fn combine_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x16<Self> {
3237        let mut result = [0.0; 16usize];
3238        result[0..8usize].copy_from_slice(&a.val);
3239        result[8usize..16usize].copy_from_slice(&b.val);
3240        result.simd_into(self)
3241    }
3242    #[inline(always)]
3243    fn split_f32x8(self, a: f32x8<Self>) -> (f32x4<Self>, f32x4<Self>) {
3244        let mut b0 = [0.0; 4usize];
3245        let mut b1 = [0.0; 4usize];
3246        b0.copy_from_slice(&a.val[0..4usize]);
3247        b1.copy_from_slice(&a.val[4usize..8usize]);
3248        (b0.simd_into(self), b1.simd_into(self))
3249    }
3250    #[inline(always)]
3251    fn reinterpret_f64_f32x8(self, a: f32x8<Self>) -> f64x4<Self> {
3252        let (a0, a1) = self.split_f32x8(a);
3253        self.combine_f64x2(
3254            self.reinterpret_f64_f32x4(a0),
3255            self.reinterpret_f64_f32x4(a1),
3256        )
3257    }
3258    #[inline(always)]
3259    fn reinterpret_i32_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
3260        let (a0, a1) = self.split_f32x8(a);
3261        self.combine_i32x4(
3262            self.reinterpret_i32_f32x4(a0),
3263            self.reinterpret_i32_f32x4(a1),
3264        )
3265    }
3266    #[inline(always)]
3267    fn reinterpret_u8_f32x8(self, a: f32x8<Self>) -> u8x32<Self> {
3268        let (a0, a1) = self.split_f32x8(a);
3269        self.combine_u8x16(self.reinterpret_u8_f32x4(a0), self.reinterpret_u8_f32x4(a1))
3270    }
3271    #[inline(always)]
3272    fn reinterpret_u32_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
3273        let (a0, a1) = self.split_f32x8(a);
3274        self.combine_u32x4(
3275            self.reinterpret_u32_f32x4(a0),
3276            self.reinterpret_u32_f32x4(a1),
3277        )
3278    }
3279    #[inline(always)]
3280    fn cvt_u32_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
3281        let (a0, a1) = self.split_f32x8(a);
3282        self.combine_u32x4(self.cvt_u32_f32x4(a0), self.cvt_u32_f32x4(a1))
3283    }
3284    #[inline(always)]
3285    fn cvt_i32_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
3286        let (a0, a1) = self.split_f32x8(a);
3287        self.combine_i32x4(self.cvt_i32_f32x4(a0), self.cvt_i32_f32x4(a1))
3288    }
3289    #[inline(always)]
3290    fn splat_i8x32(self, a: i8) -> i8x32<Self> {
3291        let half = self.splat_i8x16(a);
3292        self.combine_i8x16(half, half)
3293    }
3294    #[inline(always)]
3295    fn not_i8x32(self, a: i8x32<Self>) -> i8x32<Self> {
3296        let (a0, a1) = self.split_i8x32(a);
3297        self.combine_i8x16(self.not_i8x16(a0), self.not_i8x16(a1))
3298    }
3299    #[inline(always)]
3300    fn add_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3301        let (a0, a1) = self.split_i8x32(a);
3302        let (b0, b1) = self.split_i8x32(b);
3303        self.combine_i8x16(self.add_i8x16(a0, b0), self.add_i8x16(a1, b1))
3304    }
3305    #[inline(always)]
3306    fn sub_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3307        let (a0, a1) = self.split_i8x32(a);
3308        let (b0, b1) = self.split_i8x32(b);
3309        self.combine_i8x16(self.sub_i8x16(a0, b0), self.sub_i8x16(a1, b1))
3310    }
3311    #[inline(always)]
3312    fn mul_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3313        let (a0, a1) = self.split_i8x32(a);
3314        let (b0, b1) = self.split_i8x32(b);
3315        self.combine_i8x16(self.mul_i8x16(a0, b0), self.mul_i8x16(a1, b1))
3316    }
3317    #[inline(always)]
3318    fn and_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3319        let (a0, a1) = self.split_i8x32(a);
3320        let (b0, b1) = self.split_i8x32(b);
3321        self.combine_i8x16(self.and_i8x16(a0, b0), self.and_i8x16(a1, b1))
3322    }
3323    #[inline(always)]
3324    fn or_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3325        let (a0, a1) = self.split_i8x32(a);
3326        let (b0, b1) = self.split_i8x32(b);
3327        self.combine_i8x16(self.or_i8x16(a0, b0), self.or_i8x16(a1, b1))
3328    }
3329    #[inline(always)]
3330    fn xor_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3331        let (a0, a1) = self.split_i8x32(a);
3332        let (b0, b1) = self.split_i8x32(b);
3333        self.combine_i8x16(self.xor_i8x16(a0, b0), self.xor_i8x16(a1, b1))
3334    }
3335    #[inline(always)]
3336    fn shr_i8x32(self, a: i8x32<Self>, b: u32) -> i8x32<Self> {
3337        let (a0, a1) = self.split_i8x32(a);
3338        self.combine_i8x16(self.shr_i8x16(a0, b), self.shr_i8x16(a1, b))
3339    }
3340    #[inline(always)]
3341    fn shrv_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3342        let (a0, a1) = self.split_i8x32(a);
3343        let (b0, b1) = self.split_i8x32(b);
3344        self.combine_i8x16(self.shrv_i8x16(a0, b0), self.shrv_i8x16(a1, b1))
3345    }
3346    #[inline(always)]
3347    fn shl_i8x32(self, a: i8x32<Self>, b: u32) -> i8x32<Self> {
3348        let (a0, a1) = self.split_i8x32(a);
3349        self.combine_i8x16(self.shl_i8x16(a0, b), self.shl_i8x16(a1, b))
3350    }
3351    #[inline(always)]
3352    fn simd_eq_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3353        let (a0, a1) = self.split_i8x32(a);
3354        let (b0, b1) = self.split_i8x32(b);
3355        self.combine_mask8x16(self.simd_eq_i8x16(a0, b0), self.simd_eq_i8x16(a1, b1))
3356    }
3357    #[inline(always)]
3358    fn simd_lt_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3359        let (a0, a1) = self.split_i8x32(a);
3360        let (b0, b1) = self.split_i8x32(b);
3361        self.combine_mask8x16(self.simd_lt_i8x16(a0, b0), self.simd_lt_i8x16(a1, b1))
3362    }
3363    #[inline(always)]
3364    fn simd_le_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3365        let (a0, a1) = self.split_i8x32(a);
3366        let (b0, b1) = self.split_i8x32(b);
3367        self.combine_mask8x16(self.simd_le_i8x16(a0, b0), self.simd_le_i8x16(a1, b1))
3368    }
3369    #[inline(always)]
3370    fn simd_ge_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3371        let (a0, a1) = self.split_i8x32(a);
3372        let (b0, b1) = self.split_i8x32(b);
3373        self.combine_mask8x16(self.simd_ge_i8x16(a0, b0), self.simd_ge_i8x16(a1, b1))
3374    }
3375    #[inline(always)]
3376    fn simd_gt_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3377        let (a0, a1) = self.split_i8x32(a);
3378        let (b0, b1) = self.split_i8x32(b);
3379        self.combine_mask8x16(self.simd_gt_i8x16(a0, b0), self.simd_gt_i8x16(a1, b1))
3380    }
3381    #[inline(always)]
3382    fn zip_low_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3383        let (a0, _) = self.split_i8x32(a);
3384        let (b0, _) = self.split_i8x32(b);
3385        self.combine_i8x16(self.zip_low_i8x16(a0, b0), self.zip_high_i8x16(a0, b0))
3386    }
3387    #[inline(always)]
3388    fn zip_high_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3389        let (_, a1) = self.split_i8x32(a);
3390        let (_, b1) = self.split_i8x32(b);
3391        self.combine_i8x16(self.zip_low_i8x16(a1, b1), self.zip_high_i8x16(a1, b1))
3392    }
3393    #[inline(always)]
3394    fn unzip_low_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3395        let (a0, a1) = self.split_i8x32(a);
3396        let (b0, b1) = self.split_i8x32(b);
3397        self.combine_i8x16(self.unzip_low_i8x16(a0, a1), self.unzip_low_i8x16(b0, b1))
3398    }
3399    #[inline(always)]
3400    fn unzip_high_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3401        let (a0, a1) = self.split_i8x32(a);
3402        let (b0, b1) = self.split_i8x32(b);
3403        self.combine_i8x16(self.unzip_high_i8x16(a0, a1), self.unzip_high_i8x16(b0, b1))
3404    }
3405    #[inline(always)]
3406    fn select_i8x32(self, a: mask8x32<Self>, b: i8x32<Self>, c: i8x32<Self>) -> i8x32<Self> {
3407        let (a0, a1) = self.split_mask8x32(a);
3408        let (b0, b1) = self.split_i8x32(b);
3409        let (c0, c1) = self.split_i8x32(c);
3410        self.combine_i8x16(self.select_i8x16(a0, b0, c0), self.select_i8x16(a1, b1, c1))
3411    }
3412    #[inline(always)]
3413    fn min_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3414        let (a0, a1) = self.split_i8x32(a);
3415        let (b0, b1) = self.split_i8x32(b);
3416        self.combine_i8x16(self.min_i8x16(a0, b0), self.min_i8x16(a1, b1))
3417    }
3418    #[inline(always)]
3419    fn max_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3420        let (a0, a1) = self.split_i8x32(a);
3421        let (b0, b1) = self.split_i8x32(b);
3422        self.combine_i8x16(self.max_i8x16(a0, b0), self.max_i8x16(a1, b1))
3423    }
3424    #[inline(always)]
3425    fn combine_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x64<Self> {
3426        let mut result = [0; 64usize];
3427        result[0..32usize].copy_from_slice(&a.val);
3428        result[32usize..64usize].copy_from_slice(&b.val);
3429        result.simd_into(self)
3430    }
3431    #[inline(always)]
3432    fn split_i8x32(self, a: i8x32<Self>) -> (i8x16<Self>, i8x16<Self>) {
3433        let mut b0 = [0; 16usize];
3434        let mut b1 = [0; 16usize];
3435        b0.copy_from_slice(&a.val[0..16usize]);
3436        b1.copy_from_slice(&a.val[16usize..32usize]);
3437        (b0.simd_into(self), b1.simd_into(self))
3438    }
3439    #[inline(always)]
3440    fn neg_i8x32(self, a: i8x32<Self>) -> i8x32<Self> {
3441        let (a0, a1) = self.split_i8x32(a);
3442        self.combine_i8x16(self.neg_i8x16(a0), self.neg_i8x16(a1))
3443    }
3444    #[inline(always)]
3445    fn reinterpret_u8_i8x32(self, a: i8x32<Self>) -> u8x32<Self> {
3446        let (a0, a1) = self.split_i8x32(a);
3447        self.combine_u8x16(self.reinterpret_u8_i8x16(a0), self.reinterpret_u8_i8x16(a1))
3448    }
3449    #[inline(always)]
3450    fn reinterpret_u32_i8x32(self, a: i8x32<Self>) -> u32x8<Self> {
3451        let (a0, a1) = self.split_i8x32(a);
3452        self.combine_u32x4(
3453            self.reinterpret_u32_i8x16(a0),
3454            self.reinterpret_u32_i8x16(a1),
3455        )
3456    }
3457    #[inline(always)]
3458    fn splat_u8x32(self, a: u8) -> u8x32<Self> {
3459        let half = self.splat_u8x16(a);
3460        self.combine_u8x16(half, half)
3461    }
3462    #[inline(always)]
3463    fn not_u8x32(self, a: u8x32<Self>) -> u8x32<Self> {
3464        let (a0, a1) = self.split_u8x32(a);
3465        self.combine_u8x16(self.not_u8x16(a0), self.not_u8x16(a1))
3466    }
3467    #[inline(always)]
3468    fn add_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3469        let (a0, a1) = self.split_u8x32(a);
3470        let (b0, b1) = self.split_u8x32(b);
3471        self.combine_u8x16(self.add_u8x16(a0, b0), self.add_u8x16(a1, b1))
3472    }
3473    #[inline(always)]
3474    fn sub_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3475        let (a0, a1) = self.split_u8x32(a);
3476        let (b0, b1) = self.split_u8x32(b);
3477        self.combine_u8x16(self.sub_u8x16(a0, b0), self.sub_u8x16(a1, b1))
3478    }
3479    #[inline(always)]
3480    fn mul_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3481        let (a0, a1) = self.split_u8x32(a);
3482        let (b0, b1) = self.split_u8x32(b);
3483        self.combine_u8x16(self.mul_u8x16(a0, b0), self.mul_u8x16(a1, b1))
3484    }
3485    #[inline(always)]
3486    fn and_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3487        let (a0, a1) = self.split_u8x32(a);
3488        let (b0, b1) = self.split_u8x32(b);
3489        self.combine_u8x16(self.and_u8x16(a0, b0), self.and_u8x16(a1, b1))
3490    }
3491    #[inline(always)]
3492    fn or_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3493        let (a0, a1) = self.split_u8x32(a);
3494        let (b0, b1) = self.split_u8x32(b);
3495        self.combine_u8x16(self.or_u8x16(a0, b0), self.or_u8x16(a1, b1))
3496    }
3497    #[inline(always)]
3498    fn xor_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3499        let (a0, a1) = self.split_u8x32(a);
3500        let (b0, b1) = self.split_u8x32(b);
3501        self.combine_u8x16(self.xor_u8x16(a0, b0), self.xor_u8x16(a1, b1))
3502    }
3503    #[inline(always)]
3504    fn shr_u8x32(self, a: u8x32<Self>, b: u32) -> u8x32<Self> {
3505        let (a0, a1) = self.split_u8x32(a);
3506        self.combine_u8x16(self.shr_u8x16(a0, b), self.shr_u8x16(a1, b))
3507    }
3508    #[inline(always)]
3509    fn shrv_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3510        let (a0, a1) = self.split_u8x32(a);
3511        let (b0, b1) = self.split_u8x32(b);
3512        self.combine_u8x16(self.shrv_u8x16(a0, b0), self.shrv_u8x16(a1, b1))
3513    }
3514    #[inline(always)]
3515    fn shl_u8x32(self, a: u8x32<Self>, b: u32) -> u8x32<Self> {
3516        let (a0, a1) = self.split_u8x32(a);
3517        self.combine_u8x16(self.shl_u8x16(a0, b), self.shl_u8x16(a1, b))
3518    }
3519    #[inline(always)]
3520    fn simd_eq_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3521        let (a0, a1) = self.split_u8x32(a);
3522        let (b0, b1) = self.split_u8x32(b);
3523        self.combine_mask8x16(self.simd_eq_u8x16(a0, b0), self.simd_eq_u8x16(a1, b1))
3524    }
3525    #[inline(always)]
3526    fn simd_lt_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3527        let (a0, a1) = self.split_u8x32(a);
3528        let (b0, b1) = self.split_u8x32(b);
3529        self.combine_mask8x16(self.simd_lt_u8x16(a0, b0), self.simd_lt_u8x16(a1, b1))
3530    }
3531    #[inline(always)]
3532    fn simd_le_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3533        let (a0, a1) = self.split_u8x32(a);
3534        let (b0, b1) = self.split_u8x32(b);
3535        self.combine_mask8x16(self.simd_le_u8x16(a0, b0), self.simd_le_u8x16(a1, b1))
3536    }
3537    #[inline(always)]
3538    fn simd_ge_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3539        let (a0, a1) = self.split_u8x32(a);
3540        let (b0, b1) = self.split_u8x32(b);
3541        self.combine_mask8x16(self.simd_ge_u8x16(a0, b0), self.simd_ge_u8x16(a1, b1))
3542    }
3543    #[inline(always)]
3544    fn simd_gt_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3545        let (a0, a1) = self.split_u8x32(a);
3546        let (b0, b1) = self.split_u8x32(b);
3547        self.combine_mask8x16(self.simd_gt_u8x16(a0, b0), self.simd_gt_u8x16(a1, b1))
3548    }
3549    #[inline(always)]
3550    fn zip_low_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3551        let (a0, _) = self.split_u8x32(a);
3552        let (b0, _) = self.split_u8x32(b);
3553        self.combine_u8x16(self.zip_low_u8x16(a0, b0), self.zip_high_u8x16(a0, b0))
3554    }
3555    #[inline(always)]
3556    fn zip_high_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3557        let (_, a1) = self.split_u8x32(a);
3558        let (_, b1) = self.split_u8x32(b);
3559        self.combine_u8x16(self.zip_low_u8x16(a1, b1), self.zip_high_u8x16(a1, b1))
3560    }
3561    #[inline(always)]
3562    fn unzip_low_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3563        let (a0, a1) = self.split_u8x32(a);
3564        let (b0, b1) = self.split_u8x32(b);
3565        self.combine_u8x16(self.unzip_low_u8x16(a0, a1), self.unzip_low_u8x16(b0, b1))
3566    }
3567    #[inline(always)]
3568    fn unzip_high_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3569        let (a0, a1) = self.split_u8x32(a);
3570        let (b0, b1) = self.split_u8x32(b);
3571        self.combine_u8x16(self.unzip_high_u8x16(a0, a1), self.unzip_high_u8x16(b0, b1))
3572    }
3573    #[inline(always)]
3574    fn select_u8x32(self, a: mask8x32<Self>, b: u8x32<Self>, c: u8x32<Self>) -> u8x32<Self> {
3575        let (a0, a1) = self.split_mask8x32(a);
3576        let (b0, b1) = self.split_u8x32(b);
3577        let (c0, c1) = self.split_u8x32(c);
3578        self.combine_u8x16(self.select_u8x16(a0, b0, c0), self.select_u8x16(a1, b1, c1))
3579    }
3580    #[inline(always)]
3581    fn min_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3582        let (a0, a1) = self.split_u8x32(a);
3583        let (b0, b1) = self.split_u8x32(b);
3584        self.combine_u8x16(self.min_u8x16(a0, b0), self.min_u8x16(a1, b1))
3585    }
3586    #[inline(always)]
3587    fn max_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3588        let (a0, a1) = self.split_u8x32(a);
3589        let (b0, b1) = self.split_u8x32(b);
3590        self.combine_u8x16(self.max_u8x16(a0, b0), self.max_u8x16(a1, b1))
3591    }
3592    #[inline(always)]
3593    fn combine_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x64<Self> {
3594        let mut result = [0; 64usize];
3595        result[0..32usize].copy_from_slice(&a.val);
3596        result[32usize..64usize].copy_from_slice(&b.val);
3597        result.simd_into(self)
3598    }
3599    #[inline(always)]
3600    fn split_u8x32(self, a: u8x32<Self>) -> (u8x16<Self>, u8x16<Self>) {
3601        let mut b0 = [0; 16usize];
3602        let mut b1 = [0; 16usize];
3603        b0.copy_from_slice(&a.val[0..16usize]);
3604        b1.copy_from_slice(&a.val[16usize..32usize]);
3605        (b0.simd_into(self), b1.simd_into(self))
3606    }
3607    #[inline(always)]
3608    fn widen_u8x32(self, a: u8x32<Self>) -> u16x32<Self> {
3609        let (a0, a1) = self.split_u8x32(a);
3610        self.combine_u16x16(self.widen_u8x16(a0), self.widen_u8x16(a1))
3611    }
3612    #[inline(always)]
3613    fn reinterpret_u32_u8x32(self, a: u8x32<Self>) -> u32x8<Self> {
3614        let (a0, a1) = self.split_u8x32(a);
3615        self.combine_u32x4(
3616            self.reinterpret_u32_u8x16(a0),
3617            self.reinterpret_u32_u8x16(a1),
3618        )
3619    }
3620    #[inline(always)]
3621    fn splat_mask8x32(self, a: i8) -> mask8x32<Self> {
3622        let half = self.splat_mask8x16(a);
3623        self.combine_mask8x16(half, half)
3624    }
3625    #[inline(always)]
3626    fn not_mask8x32(self, a: mask8x32<Self>) -> mask8x32<Self> {
3627        let (a0, a1) = self.split_mask8x32(a);
3628        self.combine_mask8x16(self.not_mask8x16(a0), self.not_mask8x16(a1))
3629    }
3630    #[inline(always)]
3631    fn and_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
3632        let (a0, a1) = self.split_mask8x32(a);
3633        let (b0, b1) = self.split_mask8x32(b);
3634        self.combine_mask8x16(self.and_mask8x16(a0, b0), self.and_mask8x16(a1, b1))
3635    }
3636    #[inline(always)]
3637    fn or_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
3638        let (a0, a1) = self.split_mask8x32(a);
3639        let (b0, b1) = self.split_mask8x32(b);
3640        self.combine_mask8x16(self.or_mask8x16(a0, b0), self.or_mask8x16(a1, b1))
3641    }
3642    #[inline(always)]
3643    fn xor_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
3644        let (a0, a1) = self.split_mask8x32(a);
3645        let (b0, b1) = self.split_mask8x32(b);
3646        self.combine_mask8x16(self.xor_mask8x16(a0, b0), self.xor_mask8x16(a1, b1))
3647    }
3648    #[inline(always)]
3649    fn select_mask8x32(
3650        self,
3651        a: mask8x32<Self>,
3652        b: mask8x32<Self>,
3653        c: mask8x32<Self>,
3654    ) -> mask8x32<Self> {
3655        let (a0, a1) = self.split_mask8x32(a);
3656        let (b0, b1) = self.split_mask8x32(b);
3657        let (c0, c1) = self.split_mask8x32(c);
3658        self.combine_mask8x16(
3659            self.select_mask8x16(a0, b0, c0),
3660            self.select_mask8x16(a1, b1, c1),
3661        )
3662    }
3663    #[inline(always)]
3664    fn simd_eq_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
3665        let (a0, a1) = self.split_mask8x32(a);
3666        let (b0, b1) = self.split_mask8x32(b);
3667        self.combine_mask8x16(self.simd_eq_mask8x16(a0, b0), self.simd_eq_mask8x16(a1, b1))
3668    }
3669    #[inline(always)]
3670    fn combine_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x64<Self> {
3671        let mut result = [0; 64usize];
3672        result[0..32usize].copy_from_slice(&a.val);
3673        result[32usize..64usize].copy_from_slice(&b.val);
3674        result.simd_into(self)
3675    }
3676    #[inline(always)]
3677    fn split_mask8x32(self, a: mask8x32<Self>) -> (mask8x16<Self>, mask8x16<Self>) {
3678        let mut b0 = [0; 16usize];
3679        let mut b1 = [0; 16usize];
3680        b0.copy_from_slice(&a.val[0..16usize]);
3681        b1.copy_from_slice(&a.val[16usize..32usize]);
3682        (b0.simd_into(self), b1.simd_into(self))
3683    }
3684    #[inline(always)]
3685    fn splat_i16x16(self, a: i16) -> i16x16<Self> {
3686        let half = self.splat_i16x8(a);
3687        self.combine_i16x8(half, half)
3688    }
3689    #[inline(always)]
3690    fn not_i16x16(self, a: i16x16<Self>) -> i16x16<Self> {
3691        let (a0, a1) = self.split_i16x16(a);
3692        self.combine_i16x8(self.not_i16x8(a0), self.not_i16x8(a1))
3693    }
3694    #[inline(always)]
3695    fn add_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3696        let (a0, a1) = self.split_i16x16(a);
3697        let (b0, b1) = self.split_i16x16(b);
3698        self.combine_i16x8(self.add_i16x8(a0, b0), self.add_i16x8(a1, b1))
3699    }
3700    #[inline(always)]
3701    fn sub_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3702        let (a0, a1) = self.split_i16x16(a);
3703        let (b0, b1) = self.split_i16x16(b);
3704        self.combine_i16x8(self.sub_i16x8(a0, b0), self.sub_i16x8(a1, b1))
3705    }
3706    #[inline(always)]
3707    fn mul_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3708        let (a0, a1) = self.split_i16x16(a);
3709        let (b0, b1) = self.split_i16x16(b);
3710        self.combine_i16x8(self.mul_i16x8(a0, b0), self.mul_i16x8(a1, b1))
3711    }
3712    #[inline(always)]
3713    fn and_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3714        let (a0, a1) = self.split_i16x16(a);
3715        let (b0, b1) = self.split_i16x16(b);
3716        self.combine_i16x8(self.and_i16x8(a0, b0), self.and_i16x8(a1, b1))
3717    }
3718    #[inline(always)]
3719    fn or_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3720        let (a0, a1) = self.split_i16x16(a);
3721        let (b0, b1) = self.split_i16x16(b);
3722        self.combine_i16x8(self.or_i16x8(a0, b0), self.or_i16x8(a1, b1))
3723    }
3724    #[inline(always)]
3725    fn xor_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3726        let (a0, a1) = self.split_i16x16(a);
3727        let (b0, b1) = self.split_i16x16(b);
3728        self.combine_i16x8(self.xor_i16x8(a0, b0), self.xor_i16x8(a1, b1))
3729    }
3730    #[inline(always)]
3731    fn shr_i16x16(self, a: i16x16<Self>, b: u32) -> i16x16<Self> {
3732        let (a0, a1) = self.split_i16x16(a);
3733        self.combine_i16x8(self.shr_i16x8(a0, b), self.shr_i16x8(a1, b))
3734    }
3735    #[inline(always)]
3736    fn shrv_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3737        let (a0, a1) = self.split_i16x16(a);
3738        let (b0, b1) = self.split_i16x16(b);
3739        self.combine_i16x8(self.shrv_i16x8(a0, b0), self.shrv_i16x8(a1, b1))
3740    }
3741    #[inline(always)]
3742    fn shl_i16x16(self, a: i16x16<Self>, b: u32) -> i16x16<Self> {
3743        let (a0, a1) = self.split_i16x16(a);
3744        self.combine_i16x8(self.shl_i16x8(a0, b), self.shl_i16x8(a1, b))
3745    }
3746    #[inline(always)]
3747    fn simd_eq_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3748        let (a0, a1) = self.split_i16x16(a);
3749        let (b0, b1) = self.split_i16x16(b);
3750        self.combine_mask16x8(self.simd_eq_i16x8(a0, b0), self.simd_eq_i16x8(a1, b1))
3751    }
3752    #[inline(always)]
3753    fn simd_lt_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3754        let (a0, a1) = self.split_i16x16(a);
3755        let (b0, b1) = self.split_i16x16(b);
3756        self.combine_mask16x8(self.simd_lt_i16x8(a0, b0), self.simd_lt_i16x8(a1, b1))
3757    }
3758    #[inline(always)]
3759    fn simd_le_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3760        let (a0, a1) = self.split_i16x16(a);
3761        let (b0, b1) = self.split_i16x16(b);
3762        self.combine_mask16x8(self.simd_le_i16x8(a0, b0), self.simd_le_i16x8(a1, b1))
3763    }
3764    #[inline(always)]
3765    fn simd_ge_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3766        let (a0, a1) = self.split_i16x16(a);
3767        let (b0, b1) = self.split_i16x16(b);
3768        self.combine_mask16x8(self.simd_ge_i16x8(a0, b0), self.simd_ge_i16x8(a1, b1))
3769    }
3770    #[inline(always)]
3771    fn simd_gt_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3772        let (a0, a1) = self.split_i16x16(a);
3773        let (b0, b1) = self.split_i16x16(b);
3774        self.combine_mask16x8(self.simd_gt_i16x8(a0, b0), self.simd_gt_i16x8(a1, b1))
3775    }
3776    #[inline(always)]
3777    fn zip_low_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3778        let (a0, _) = self.split_i16x16(a);
3779        let (b0, _) = self.split_i16x16(b);
3780        self.combine_i16x8(self.zip_low_i16x8(a0, b0), self.zip_high_i16x8(a0, b0))
3781    }
3782    #[inline(always)]
3783    fn zip_high_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3784        let (_, a1) = self.split_i16x16(a);
3785        let (_, b1) = self.split_i16x16(b);
3786        self.combine_i16x8(self.zip_low_i16x8(a1, b1), self.zip_high_i16x8(a1, b1))
3787    }
3788    #[inline(always)]
3789    fn unzip_low_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3790        let (a0, a1) = self.split_i16x16(a);
3791        let (b0, b1) = self.split_i16x16(b);
3792        self.combine_i16x8(self.unzip_low_i16x8(a0, a1), self.unzip_low_i16x8(b0, b1))
3793    }
3794    #[inline(always)]
3795    fn unzip_high_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3796        let (a0, a1) = self.split_i16x16(a);
3797        let (b0, b1) = self.split_i16x16(b);
3798        self.combine_i16x8(self.unzip_high_i16x8(a0, a1), self.unzip_high_i16x8(b0, b1))
3799    }
3800    #[inline(always)]
3801    fn select_i16x16(self, a: mask16x16<Self>, b: i16x16<Self>, c: i16x16<Self>) -> i16x16<Self> {
3802        let (a0, a1) = self.split_mask16x16(a);
3803        let (b0, b1) = self.split_i16x16(b);
3804        let (c0, c1) = self.split_i16x16(c);
3805        self.combine_i16x8(self.select_i16x8(a0, b0, c0), self.select_i16x8(a1, b1, c1))
3806    }
3807    #[inline(always)]
3808    fn min_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3809        let (a0, a1) = self.split_i16x16(a);
3810        let (b0, b1) = self.split_i16x16(b);
3811        self.combine_i16x8(self.min_i16x8(a0, b0), self.min_i16x8(a1, b1))
3812    }
3813    #[inline(always)]
3814    fn max_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3815        let (a0, a1) = self.split_i16x16(a);
3816        let (b0, b1) = self.split_i16x16(b);
3817        self.combine_i16x8(self.max_i16x8(a0, b0), self.max_i16x8(a1, b1))
3818    }
3819    #[inline(always)]
3820    fn combine_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x32<Self> {
3821        let mut result = [0; 32usize];
3822        result[0..16usize].copy_from_slice(&a.val);
3823        result[16usize..32usize].copy_from_slice(&b.val);
3824        result.simd_into(self)
3825    }
3826    #[inline(always)]
3827    fn split_i16x16(self, a: i16x16<Self>) -> (i16x8<Self>, i16x8<Self>) {
3828        let mut b0 = [0; 8usize];
3829        let mut b1 = [0; 8usize];
3830        b0.copy_from_slice(&a.val[0..8usize]);
3831        b1.copy_from_slice(&a.val[8usize..16usize]);
3832        (b0.simd_into(self), b1.simd_into(self))
3833    }
3834    #[inline(always)]
3835    fn neg_i16x16(self, a: i16x16<Self>) -> i16x16<Self> {
3836        let (a0, a1) = self.split_i16x16(a);
3837        self.combine_i16x8(self.neg_i16x8(a0), self.neg_i16x8(a1))
3838    }
3839    #[inline(always)]
3840    fn reinterpret_u8_i16x16(self, a: i16x16<Self>) -> u8x32<Self> {
3841        let (a0, a1) = self.split_i16x16(a);
3842        self.combine_u8x16(self.reinterpret_u8_i16x8(a0), self.reinterpret_u8_i16x8(a1))
3843    }
3844    #[inline(always)]
3845    fn reinterpret_u32_i16x16(self, a: i16x16<Self>) -> u32x8<Self> {
3846        let (a0, a1) = self.split_i16x16(a);
3847        self.combine_u32x4(
3848            self.reinterpret_u32_i16x8(a0),
3849            self.reinterpret_u32_i16x8(a1),
3850        )
3851    }
3852    #[inline(always)]
3853    fn splat_u16x16(self, a: u16) -> u16x16<Self> {
3854        let half = self.splat_u16x8(a);
3855        self.combine_u16x8(half, half)
3856    }
3857    #[inline(always)]
3858    fn not_u16x16(self, a: u16x16<Self>) -> u16x16<Self> {
3859        let (a0, a1) = self.split_u16x16(a);
3860        self.combine_u16x8(self.not_u16x8(a0), self.not_u16x8(a1))
3861    }
3862    #[inline(always)]
3863    fn add_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3864        let (a0, a1) = self.split_u16x16(a);
3865        let (b0, b1) = self.split_u16x16(b);
3866        self.combine_u16x8(self.add_u16x8(a0, b0), self.add_u16x8(a1, b1))
3867    }
3868    #[inline(always)]
3869    fn sub_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3870        let (a0, a1) = self.split_u16x16(a);
3871        let (b0, b1) = self.split_u16x16(b);
3872        self.combine_u16x8(self.sub_u16x8(a0, b0), self.sub_u16x8(a1, b1))
3873    }
3874    #[inline(always)]
3875    fn mul_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3876        let (a0, a1) = self.split_u16x16(a);
3877        let (b0, b1) = self.split_u16x16(b);
3878        self.combine_u16x8(self.mul_u16x8(a0, b0), self.mul_u16x8(a1, b1))
3879    }
3880    #[inline(always)]
3881    fn and_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3882        let (a0, a1) = self.split_u16x16(a);
3883        let (b0, b1) = self.split_u16x16(b);
3884        self.combine_u16x8(self.and_u16x8(a0, b0), self.and_u16x8(a1, b1))
3885    }
3886    #[inline(always)]
3887    fn or_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3888        let (a0, a1) = self.split_u16x16(a);
3889        let (b0, b1) = self.split_u16x16(b);
3890        self.combine_u16x8(self.or_u16x8(a0, b0), self.or_u16x8(a1, b1))
3891    }
3892    #[inline(always)]
3893    fn xor_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3894        let (a0, a1) = self.split_u16x16(a);
3895        let (b0, b1) = self.split_u16x16(b);
3896        self.combine_u16x8(self.xor_u16x8(a0, b0), self.xor_u16x8(a1, b1))
3897    }
3898    #[inline(always)]
3899    fn shr_u16x16(self, a: u16x16<Self>, b: u32) -> u16x16<Self> {
3900        let (a0, a1) = self.split_u16x16(a);
3901        self.combine_u16x8(self.shr_u16x8(a0, b), self.shr_u16x8(a1, b))
3902    }
3903    #[inline(always)]
3904    fn shrv_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3905        let (a0, a1) = self.split_u16x16(a);
3906        let (b0, b1) = self.split_u16x16(b);
3907        self.combine_u16x8(self.shrv_u16x8(a0, b0), self.shrv_u16x8(a1, b1))
3908    }
3909    #[inline(always)]
3910    fn shl_u16x16(self, a: u16x16<Self>, b: u32) -> u16x16<Self> {
3911        let (a0, a1) = self.split_u16x16(a);
3912        self.combine_u16x8(self.shl_u16x8(a0, b), self.shl_u16x8(a1, b))
3913    }
3914    #[inline(always)]
3915    fn simd_eq_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3916        let (a0, a1) = self.split_u16x16(a);
3917        let (b0, b1) = self.split_u16x16(b);
3918        self.combine_mask16x8(self.simd_eq_u16x8(a0, b0), self.simd_eq_u16x8(a1, b1))
3919    }
3920    #[inline(always)]
3921    fn simd_lt_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3922        let (a0, a1) = self.split_u16x16(a);
3923        let (b0, b1) = self.split_u16x16(b);
3924        self.combine_mask16x8(self.simd_lt_u16x8(a0, b0), self.simd_lt_u16x8(a1, b1))
3925    }
3926    #[inline(always)]
3927    fn simd_le_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3928        let (a0, a1) = self.split_u16x16(a);
3929        let (b0, b1) = self.split_u16x16(b);
3930        self.combine_mask16x8(self.simd_le_u16x8(a0, b0), self.simd_le_u16x8(a1, b1))
3931    }
3932    #[inline(always)]
3933    fn simd_ge_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3934        let (a0, a1) = self.split_u16x16(a);
3935        let (b0, b1) = self.split_u16x16(b);
3936        self.combine_mask16x8(self.simd_ge_u16x8(a0, b0), self.simd_ge_u16x8(a1, b1))
3937    }
3938    #[inline(always)]
3939    fn simd_gt_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3940        let (a0, a1) = self.split_u16x16(a);
3941        let (b0, b1) = self.split_u16x16(b);
3942        self.combine_mask16x8(self.simd_gt_u16x8(a0, b0), self.simd_gt_u16x8(a1, b1))
3943    }
3944    #[inline(always)]
3945    fn zip_low_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3946        let (a0, _) = self.split_u16x16(a);
3947        let (b0, _) = self.split_u16x16(b);
3948        self.combine_u16x8(self.zip_low_u16x8(a0, b0), self.zip_high_u16x8(a0, b0))
3949    }
3950    #[inline(always)]
3951    fn zip_high_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3952        let (_, a1) = self.split_u16x16(a);
3953        let (_, b1) = self.split_u16x16(b);
3954        self.combine_u16x8(self.zip_low_u16x8(a1, b1), self.zip_high_u16x8(a1, b1))
3955    }
3956    #[inline(always)]
3957    fn unzip_low_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3958        let (a0, a1) = self.split_u16x16(a);
3959        let (b0, b1) = self.split_u16x16(b);
3960        self.combine_u16x8(self.unzip_low_u16x8(a0, a1), self.unzip_low_u16x8(b0, b1))
3961    }
3962    #[inline(always)]
3963    fn unzip_high_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3964        let (a0, a1) = self.split_u16x16(a);
3965        let (b0, b1) = self.split_u16x16(b);
3966        self.combine_u16x8(self.unzip_high_u16x8(a0, a1), self.unzip_high_u16x8(b0, b1))
3967    }
3968    #[inline(always)]
3969    fn select_u16x16(self, a: mask16x16<Self>, b: u16x16<Self>, c: u16x16<Self>) -> u16x16<Self> {
3970        let (a0, a1) = self.split_mask16x16(a);
3971        let (b0, b1) = self.split_u16x16(b);
3972        let (c0, c1) = self.split_u16x16(c);
3973        self.combine_u16x8(self.select_u16x8(a0, b0, c0), self.select_u16x8(a1, b1, c1))
3974    }
3975    #[inline(always)]
3976    fn min_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3977        let (a0, a1) = self.split_u16x16(a);
3978        let (b0, b1) = self.split_u16x16(b);
3979        self.combine_u16x8(self.min_u16x8(a0, b0), self.min_u16x8(a1, b1))
3980    }
3981    #[inline(always)]
3982    fn max_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3983        let (a0, a1) = self.split_u16x16(a);
3984        let (b0, b1) = self.split_u16x16(b);
3985        self.combine_u16x8(self.max_u16x8(a0, b0), self.max_u16x8(a1, b1))
3986    }
3987    #[inline(always)]
3988    fn combine_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x32<Self> {
3989        let mut result = [0; 32usize];
3990        result[0..16usize].copy_from_slice(&a.val);
3991        result[16usize..32usize].copy_from_slice(&b.val);
3992        result.simd_into(self)
3993    }
3994    #[inline(always)]
3995    fn split_u16x16(self, a: u16x16<Self>) -> (u16x8<Self>, u16x8<Self>) {
3996        let mut b0 = [0; 8usize];
3997        let mut b1 = [0; 8usize];
3998        b0.copy_from_slice(&a.val[0..8usize]);
3999        b1.copy_from_slice(&a.val[8usize..16usize]);
4000        (b0.simd_into(self), b1.simd_into(self))
4001    }
4002    #[inline(always)]
4003    fn narrow_u16x16(self, a: u16x16<Self>) -> u8x16<Self> {
4004        [
4005            a[0usize] as u8,
4006            a[1usize] as u8,
4007            a[2usize] as u8,
4008            a[3usize] as u8,
4009            a[4usize] as u8,
4010            a[5usize] as u8,
4011            a[6usize] as u8,
4012            a[7usize] as u8,
4013            a[8usize] as u8,
4014            a[9usize] as u8,
4015            a[10usize] as u8,
4016            a[11usize] as u8,
4017            a[12usize] as u8,
4018            a[13usize] as u8,
4019            a[14usize] as u8,
4020            a[15usize] as u8,
4021        ]
4022        .simd_into(self)
4023    }
4024    #[inline(always)]
4025    fn reinterpret_u8_u16x16(self, a: u16x16<Self>) -> u8x32<Self> {
4026        let (a0, a1) = self.split_u16x16(a);
4027        self.combine_u8x16(self.reinterpret_u8_u16x8(a0), self.reinterpret_u8_u16x8(a1))
4028    }
4029    #[inline(always)]
4030    fn reinterpret_u32_u16x16(self, a: u16x16<Self>) -> u32x8<Self> {
4031        let (a0, a1) = self.split_u16x16(a);
4032        self.combine_u32x4(
4033            self.reinterpret_u32_u16x8(a0),
4034            self.reinterpret_u32_u16x8(a1),
4035        )
4036    }
4037    #[inline(always)]
4038    fn splat_mask16x16(self, a: i16) -> mask16x16<Self> {
4039        let half = self.splat_mask16x8(a);
4040        self.combine_mask16x8(half, half)
4041    }
4042    #[inline(always)]
4043    fn not_mask16x16(self, a: mask16x16<Self>) -> mask16x16<Self> {
4044        let (a0, a1) = self.split_mask16x16(a);
4045        self.combine_mask16x8(self.not_mask16x8(a0), self.not_mask16x8(a1))
4046    }
4047    #[inline(always)]
4048    fn and_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
4049        let (a0, a1) = self.split_mask16x16(a);
4050        let (b0, b1) = self.split_mask16x16(b);
4051        self.combine_mask16x8(self.and_mask16x8(a0, b0), self.and_mask16x8(a1, b1))
4052    }
4053    #[inline(always)]
4054    fn or_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
4055        let (a0, a1) = self.split_mask16x16(a);
4056        let (b0, b1) = self.split_mask16x16(b);
4057        self.combine_mask16x8(self.or_mask16x8(a0, b0), self.or_mask16x8(a1, b1))
4058    }
4059    #[inline(always)]
4060    fn xor_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
4061        let (a0, a1) = self.split_mask16x16(a);
4062        let (b0, b1) = self.split_mask16x16(b);
4063        self.combine_mask16x8(self.xor_mask16x8(a0, b0), self.xor_mask16x8(a1, b1))
4064    }
4065    #[inline(always)]
4066    fn select_mask16x16(
4067        self,
4068        a: mask16x16<Self>,
4069        b: mask16x16<Self>,
4070        c: mask16x16<Self>,
4071    ) -> mask16x16<Self> {
4072        let (a0, a1) = self.split_mask16x16(a);
4073        let (b0, b1) = self.split_mask16x16(b);
4074        let (c0, c1) = self.split_mask16x16(c);
4075        self.combine_mask16x8(
4076            self.select_mask16x8(a0, b0, c0),
4077            self.select_mask16x8(a1, b1, c1),
4078        )
4079    }
4080    #[inline(always)]
4081    fn simd_eq_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
4082        let (a0, a1) = self.split_mask16x16(a);
4083        let (b0, b1) = self.split_mask16x16(b);
4084        self.combine_mask16x8(self.simd_eq_mask16x8(a0, b0), self.simd_eq_mask16x8(a1, b1))
4085    }
4086    #[inline(always)]
4087    fn combine_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x32<Self> {
4088        let mut result = [0; 32usize];
4089        result[0..16usize].copy_from_slice(&a.val);
4090        result[16usize..32usize].copy_from_slice(&b.val);
4091        result.simd_into(self)
4092    }
4093    #[inline(always)]
4094    fn split_mask16x16(self, a: mask16x16<Self>) -> (mask16x8<Self>, mask16x8<Self>) {
4095        let mut b0 = [0; 8usize];
4096        let mut b1 = [0; 8usize];
4097        b0.copy_from_slice(&a.val[0..8usize]);
4098        b1.copy_from_slice(&a.val[8usize..16usize]);
4099        (b0.simd_into(self), b1.simd_into(self))
4100    }
4101    #[inline(always)]
4102    fn splat_i32x8(self, a: i32) -> i32x8<Self> {
4103        let half = self.splat_i32x4(a);
4104        self.combine_i32x4(half, half)
4105    }
4106    #[inline(always)]
4107    fn not_i32x8(self, a: i32x8<Self>) -> i32x8<Self> {
4108        let (a0, a1) = self.split_i32x8(a);
4109        self.combine_i32x4(self.not_i32x4(a0), self.not_i32x4(a1))
4110    }
4111    #[inline(always)]
4112    fn add_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4113        let (a0, a1) = self.split_i32x8(a);
4114        let (b0, b1) = self.split_i32x8(b);
4115        self.combine_i32x4(self.add_i32x4(a0, b0), self.add_i32x4(a1, b1))
4116    }
4117    #[inline(always)]
4118    fn sub_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4119        let (a0, a1) = self.split_i32x8(a);
4120        let (b0, b1) = self.split_i32x8(b);
4121        self.combine_i32x4(self.sub_i32x4(a0, b0), self.sub_i32x4(a1, b1))
4122    }
4123    #[inline(always)]
4124    fn mul_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4125        let (a0, a1) = self.split_i32x8(a);
4126        let (b0, b1) = self.split_i32x8(b);
4127        self.combine_i32x4(self.mul_i32x4(a0, b0), self.mul_i32x4(a1, b1))
4128    }
4129    #[inline(always)]
4130    fn and_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4131        let (a0, a1) = self.split_i32x8(a);
4132        let (b0, b1) = self.split_i32x8(b);
4133        self.combine_i32x4(self.and_i32x4(a0, b0), self.and_i32x4(a1, b1))
4134    }
4135    #[inline(always)]
4136    fn or_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4137        let (a0, a1) = self.split_i32x8(a);
4138        let (b0, b1) = self.split_i32x8(b);
4139        self.combine_i32x4(self.or_i32x4(a0, b0), self.or_i32x4(a1, b1))
4140    }
4141    #[inline(always)]
4142    fn xor_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4143        let (a0, a1) = self.split_i32x8(a);
4144        let (b0, b1) = self.split_i32x8(b);
4145        self.combine_i32x4(self.xor_i32x4(a0, b0), self.xor_i32x4(a1, b1))
4146    }
4147    #[inline(always)]
4148    fn shr_i32x8(self, a: i32x8<Self>, b: u32) -> i32x8<Self> {
4149        let (a0, a1) = self.split_i32x8(a);
4150        self.combine_i32x4(self.shr_i32x4(a0, b), self.shr_i32x4(a1, b))
4151    }
4152    #[inline(always)]
4153    fn shrv_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4154        let (a0, a1) = self.split_i32x8(a);
4155        let (b0, b1) = self.split_i32x8(b);
4156        self.combine_i32x4(self.shrv_i32x4(a0, b0), self.shrv_i32x4(a1, b1))
4157    }
4158    #[inline(always)]
4159    fn shl_i32x8(self, a: i32x8<Self>, b: u32) -> i32x8<Self> {
4160        let (a0, a1) = self.split_i32x8(a);
4161        self.combine_i32x4(self.shl_i32x4(a0, b), self.shl_i32x4(a1, b))
4162    }
4163    #[inline(always)]
4164    fn simd_eq_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
4165        let (a0, a1) = self.split_i32x8(a);
4166        let (b0, b1) = self.split_i32x8(b);
4167        self.combine_mask32x4(self.simd_eq_i32x4(a0, b0), self.simd_eq_i32x4(a1, b1))
4168    }
4169    #[inline(always)]
4170    fn simd_lt_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
4171        let (a0, a1) = self.split_i32x8(a);
4172        let (b0, b1) = self.split_i32x8(b);
4173        self.combine_mask32x4(self.simd_lt_i32x4(a0, b0), self.simd_lt_i32x4(a1, b1))
4174    }
4175    #[inline(always)]
4176    fn simd_le_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
4177        let (a0, a1) = self.split_i32x8(a);
4178        let (b0, b1) = self.split_i32x8(b);
4179        self.combine_mask32x4(self.simd_le_i32x4(a0, b0), self.simd_le_i32x4(a1, b1))
4180    }
4181    #[inline(always)]
4182    fn simd_ge_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
4183        let (a0, a1) = self.split_i32x8(a);
4184        let (b0, b1) = self.split_i32x8(b);
4185        self.combine_mask32x4(self.simd_ge_i32x4(a0, b0), self.simd_ge_i32x4(a1, b1))
4186    }
4187    #[inline(always)]
4188    fn simd_gt_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
4189        let (a0, a1) = self.split_i32x8(a);
4190        let (b0, b1) = self.split_i32x8(b);
4191        self.combine_mask32x4(self.simd_gt_i32x4(a0, b0), self.simd_gt_i32x4(a1, b1))
4192    }
4193    #[inline(always)]
4194    fn zip_low_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4195        let (a0, _) = self.split_i32x8(a);
4196        let (b0, _) = self.split_i32x8(b);
4197        self.combine_i32x4(self.zip_low_i32x4(a0, b0), self.zip_high_i32x4(a0, b0))
4198    }
4199    #[inline(always)]
4200    fn zip_high_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4201        let (_, a1) = self.split_i32x8(a);
4202        let (_, b1) = self.split_i32x8(b);
4203        self.combine_i32x4(self.zip_low_i32x4(a1, b1), self.zip_high_i32x4(a1, b1))
4204    }
4205    #[inline(always)]
4206    fn unzip_low_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4207        let (a0, a1) = self.split_i32x8(a);
4208        let (b0, b1) = self.split_i32x8(b);
4209        self.combine_i32x4(self.unzip_low_i32x4(a0, a1), self.unzip_low_i32x4(b0, b1))
4210    }
4211    #[inline(always)]
4212    fn unzip_high_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4213        let (a0, a1) = self.split_i32x8(a);
4214        let (b0, b1) = self.split_i32x8(b);
4215        self.combine_i32x4(self.unzip_high_i32x4(a0, a1), self.unzip_high_i32x4(b0, b1))
4216    }
4217    #[inline(always)]
4218    fn select_i32x8(self, a: mask32x8<Self>, b: i32x8<Self>, c: i32x8<Self>) -> i32x8<Self> {
4219        let (a0, a1) = self.split_mask32x8(a);
4220        let (b0, b1) = self.split_i32x8(b);
4221        let (c0, c1) = self.split_i32x8(c);
4222        self.combine_i32x4(self.select_i32x4(a0, b0, c0), self.select_i32x4(a1, b1, c1))
4223    }
4224    #[inline(always)]
4225    fn min_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4226        let (a0, a1) = self.split_i32x8(a);
4227        let (b0, b1) = self.split_i32x8(b);
4228        self.combine_i32x4(self.min_i32x4(a0, b0), self.min_i32x4(a1, b1))
4229    }
4230    #[inline(always)]
4231    fn max_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4232        let (a0, a1) = self.split_i32x8(a);
4233        let (b0, b1) = self.split_i32x8(b);
4234        self.combine_i32x4(self.max_i32x4(a0, b0), self.max_i32x4(a1, b1))
4235    }
4236    #[inline(always)]
4237    fn combine_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x16<Self> {
4238        let mut result = [0; 16usize];
4239        result[0..8usize].copy_from_slice(&a.val);
4240        result[8usize..16usize].copy_from_slice(&b.val);
4241        result.simd_into(self)
4242    }
4243    #[inline(always)]
4244    fn split_i32x8(self, a: i32x8<Self>) -> (i32x4<Self>, i32x4<Self>) {
4245        let mut b0 = [0; 4usize];
4246        let mut b1 = [0; 4usize];
4247        b0.copy_from_slice(&a.val[0..4usize]);
4248        b1.copy_from_slice(&a.val[4usize..8usize]);
4249        (b0.simd_into(self), b1.simd_into(self))
4250    }
4251    #[inline(always)]
4252    fn neg_i32x8(self, a: i32x8<Self>) -> i32x8<Self> {
4253        let (a0, a1) = self.split_i32x8(a);
4254        self.combine_i32x4(self.neg_i32x4(a0), self.neg_i32x4(a1))
4255    }
4256    #[inline(always)]
4257    fn reinterpret_u8_i32x8(self, a: i32x8<Self>) -> u8x32<Self> {
4258        let (a0, a1) = self.split_i32x8(a);
4259        self.combine_u8x16(self.reinterpret_u8_i32x4(a0), self.reinterpret_u8_i32x4(a1))
4260    }
4261    #[inline(always)]
4262    fn reinterpret_u32_i32x8(self, a: i32x8<Self>) -> u32x8<Self> {
4263        let (a0, a1) = self.split_i32x8(a);
4264        self.combine_u32x4(
4265            self.reinterpret_u32_i32x4(a0),
4266            self.reinterpret_u32_i32x4(a1),
4267        )
4268    }
4269    #[inline(always)]
4270    fn cvt_f32_i32x8(self, a: i32x8<Self>) -> f32x8<Self> {
4271        let (a0, a1) = self.split_i32x8(a);
4272        self.combine_f32x4(self.cvt_f32_i32x4(a0), self.cvt_f32_i32x4(a1))
4273    }
4274    #[inline(always)]
4275    fn splat_u32x8(self, a: u32) -> u32x8<Self> {
4276        let half = self.splat_u32x4(a);
4277        self.combine_u32x4(half, half)
4278    }
4279    #[inline(always)]
4280    fn not_u32x8(self, a: u32x8<Self>) -> u32x8<Self> {
4281        let (a0, a1) = self.split_u32x8(a);
4282        self.combine_u32x4(self.not_u32x4(a0), self.not_u32x4(a1))
4283    }
4284    #[inline(always)]
4285    fn add_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4286        let (a0, a1) = self.split_u32x8(a);
4287        let (b0, b1) = self.split_u32x8(b);
4288        self.combine_u32x4(self.add_u32x4(a0, b0), self.add_u32x4(a1, b1))
4289    }
4290    #[inline(always)]
4291    fn sub_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4292        let (a0, a1) = self.split_u32x8(a);
4293        let (b0, b1) = self.split_u32x8(b);
4294        self.combine_u32x4(self.sub_u32x4(a0, b0), self.sub_u32x4(a1, b1))
4295    }
4296    #[inline(always)]
4297    fn mul_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4298        let (a0, a1) = self.split_u32x8(a);
4299        let (b0, b1) = self.split_u32x8(b);
4300        self.combine_u32x4(self.mul_u32x4(a0, b0), self.mul_u32x4(a1, b1))
4301    }
4302    #[inline(always)]
4303    fn and_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4304        let (a0, a1) = self.split_u32x8(a);
4305        let (b0, b1) = self.split_u32x8(b);
4306        self.combine_u32x4(self.and_u32x4(a0, b0), self.and_u32x4(a1, b1))
4307    }
4308    #[inline(always)]
4309    fn or_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4310        let (a0, a1) = self.split_u32x8(a);
4311        let (b0, b1) = self.split_u32x8(b);
4312        self.combine_u32x4(self.or_u32x4(a0, b0), self.or_u32x4(a1, b1))
4313    }
4314    #[inline(always)]
4315    fn xor_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4316        let (a0, a1) = self.split_u32x8(a);
4317        let (b0, b1) = self.split_u32x8(b);
4318        self.combine_u32x4(self.xor_u32x4(a0, b0), self.xor_u32x4(a1, b1))
4319    }
4320    #[inline(always)]
4321    fn shr_u32x8(self, a: u32x8<Self>, b: u32) -> u32x8<Self> {
4322        let (a0, a1) = self.split_u32x8(a);
4323        self.combine_u32x4(self.shr_u32x4(a0, b), self.shr_u32x4(a1, b))
4324    }
4325    #[inline(always)]
4326    fn shrv_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4327        let (a0, a1) = self.split_u32x8(a);
4328        let (b0, b1) = self.split_u32x8(b);
4329        self.combine_u32x4(self.shrv_u32x4(a0, b0), self.shrv_u32x4(a1, b1))
4330    }
4331    #[inline(always)]
4332    fn shl_u32x8(self, a: u32x8<Self>, b: u32) -> u32x8<Self> {
4333        let (a0, a1) = self.split_u32x8(a);
4334        self.combine_u32x4(self.shl_u32x4(a0, b), self.shl_u32x4(a1, b))
4335    }
4336    #[inline(always)]
4337    fn simd_eq_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4338        let (a0, a1) = self.split_u32x8(a);
4339        let (b0, b1) = self.split_u32x8(b);
4340        self.combine_mask32x4(self.simd_eq_u32x4(a0, b0), self.simd_eq_u32x4(a1, b1))
4341    }
4342    #[inline(always)]
4343    fn simd_lt_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4344        let (a0, a1) = self.split_u32x8(a);
4345        let (b0, b1) = self.split_u32x8(b);
4346        self.combine_mask32x4(self.simd_lt_u32x4(a0, b0), self.simd_lt_u32x4(a1, b1))
4347    }
4348    #[inline(always)]
4349    fn simd_le_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4350        let (a0, a1) = self.split_u32x8(a);
4351        let (b0, b1) = self.split_u32x8(b);
4352        self.combine_mask32x4(self.simd_le_u32x4(a0, b0), self.simd_le_u32x4(a1, b1))
4353    }
4354    #[inline(always)]
4355    fn simd_ge_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4356        let (a0, a1) = self.split_u32x8(a);
4357        let (b0, b1) = self.split_u32x8(b);
4358        self.combine_mask32x4(self.simd_ge_u32x4(a0, b0), self.simd_ge_u32x4(a1, b1))
4359    }
4360    #[inline(always)]
4361    fn simd_gt_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4362        let (a0, a1) = self.split_u32x8(a);
4363        let (b0, b1) = self.split_u32x8(b);
4364        self.combine_mask32x4(self.simd_gt_u32x4(a0, b0), self.simd_gt_u32x4(a1, b1))
4365    }
4366    #[inline(always)]
4367    fn zip_low_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4368        let (a0, _) = self.split_u32x8(a);
4369        let (b0, _) = self.split_u32x8(b);
4370        self.combine_u32x4(self.zip_low_u32x4(a0, b0), self.zip_high_u32x4(a0, b0))
4371    }
4372    #[inline(always)]
4373    fn zip_high_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4374        let (_, a1) = self.split_u32x8(a);
4375        let (_, b1) = self.split_u32x8(b);
4376        self.combine_u32x4(self.zip_low_u32x4(a1, b1), self.zip_high_u32x4(a1, b1))
4377    }
4378    #[inline(always)]
4379    fn unzip_low_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4380        let (a0, a1) = self.split_u32x8(a);
4381        let (b0, b1) = self.split_u32x8(b);
4382        self.combine_u32x4(self.unzip_low_u32x4(a0, a1), self.unzip_low_u32x4(b0, b1))
4383    }
4384    #[inline(always)]
4385    fn unzip_high_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4386        let (a0, a1) = self.split_u32x8(a);
4387        let (b0, b1) = self.split_u32x8(b);
4388        self.combine_u32x4(self.unzip_high_u32x4(a0, a1), self.unzip_high_u32x4(b0, b1))
4389    }
4390    #[inline(always)]
4391    fn select_u32x8(self, a: mask32x8<Self>, b: u32x8<Self>, c: u32x8<Self>) -> u32x8<Self> {
4392        let (a0, a1) = self.split_mask32x8(a);
4393        let (b0, b1) = self.split_u32x8(b);
4394        let (c0, c1) = self.split_u32x8(c);
4395        self.combine_u32x4(self.select_u32x4(a0, b0, c0), self.select_u32x4(a1, b1, c1))
4396    }
4397    #[inline(always)]
4398    fn min_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4399        let (a0, a1) = self.split_u32x8(a);
4400        let (b0, b1) = self.split_u32x8(b);
4401        self.combine_u32x4(self.min_u32x4(a0, b0), self.min_u32x4(a1, b1))
4402    }
4403    #[inline(always)]
4404    fn max_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4405        let (a0, a1) = self.split_u32x8(a);
4406        let (b0, b1) = self.split_u32x8(b);
4407        self.combine_u32x4(self.max_u32x4(a0, b0), self.max_u32x4(a1, b1))
4408    }
4409    #[inline(always)]
4410    fn combine_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x16<Self> {
4411        let mut result = [0; 16usize];
4412        result[0..8usize].copy_from_slice(&a.val);
4413        result[8usize..16usize].copy_from_slice(&b.val);
4414        result.simd_into(self)
4415    }
4416    #[inline(always)]
4417    fn split_u32x8(self, a: u32x8<Self>) -> (u32x4<Self>, u32x4<Self>) {
4418        let mut b0 = [0; 4usize];
4419        let mut b1 = [0; 4usize];
4420        b0.copy_from_slice(&a.val[0..4usize]);
4421        b1.copy_from_slice(&a.val[4usize..8usize]);
4422        (b0.simd_into(self), b1.simd_into(self))
4423    }
4424    #[inline(always)]
4425    fn reinterpret_u8_u32x8(self, a: u32x8<Self>) -> u8x32<Self> {
4426        let (a0, a1) = self.split_u32x8(a);
4427        self.combine_u8x16(self.reinterpret_u8_u32x4(a0), self.reinterpret_u8_u32x4(a1))
4428    }
4429    #[inline(always)]
4430    fn cvt_f32_u32x8(self, a: u32x8<Self>) -> f32x8<Self> {
4431        let (a0, a1) = self.split_u32x8(a);
4432        self.combine_f32x4(self.cvt_f32_u32x4(a0), self.cvt_f32_u32x4(a1))
4433    }
4434    #[inline(always)]
4435    fn splat_mask32x8(self, a: i32) -> mask32x8<Self> {
4436        let half = self.splat_mask32x4(a);
4437        self.combine_mask32x4(half, half)
4438    }
4439    #[inline(always)]
4440    fn not_mask32x8(self, a: mask32x8<Self>) -> mask32x8<Self> {
4441        let (a0, a1) = self.split_mask32x8(a);
4442        self.combine_mask32x4(self.not_mask32x4(a0), self.not_mask32x4(a1))
4443    }
4444    #[inline(always)]
4445    fn and_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
4446        let (a0, a1) = self.split_mask32x8(a);
4447        let (b0, b1) = self.split_mask32x8(b);
4448        self.combine_mask32x4(self.and_mask32x4(a0, b0), self.and_mask32x4(a1, b1))
4449    }
4450    #[inline(always)]
4451    fn or_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
4452        let (a0, a1) = self.split_mask32x8(a);
4453        let (b0, b1) = self.split_mask32x8(b);
4454        self.combine_mask32x4(self.or_mask32x4(a0, b0), self.or_mask32x4(a1, b1))
4455    }
4456    #[inline(always)]
4457    fn xor_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
4458        let (a0, a1) = self.split_mask32x8(a);
4459        let (b0, b1) = self.split_mask32x8(b);
4460        self.combine_mask32x4(self.xor_mask32x4(a0, b0), self.xor_mask32x4(a1, b1))
4461    }
4462    #[inline(always)]
4463    fn select_mask32x8(
4464        self,
4465        a: mask32x8<Self>,
4466        b: mask32x8<Self>,
4467        c: mask32x8<Self>,
4468    ) -> mask32x8<Self> {
4469        let (a0, a1) = self.split_mask32x8(a);
4470        let (b0, b1) = self.split_mask32x8(b);
4471        let (c0, c1) = self.split_mask32x8(c);
4472        self.combine_mask32x4(
4473            self.select_mask32x4(a0, b0, c0),
4474            self.select_mask32x4(a1, b1, c1),
4475        )
4476    }
4477    #[inline(always)]
4478    fn simd_eq_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
4479        let (a0, a1) = self.split_mask32x8(a);
4480        let (b0, b1) = self.split_mask32x8(b);
4481        self.combine_mask32x4(self.simd_eq_mask32x4(a0, b0), self.simd_eq_mask32x4(a1, b1))
4482    }
4483    #[inline(always)]
4484    fn combine_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x16<Self> {
4485        let mut result = [0; 16usize];
4486        result[0..8usize].copy_from_slice(&a.val);
4487        result[8usize..16usize].copy_from_slice(&b.val);
4488        result.simd_into(self)
4489    }
4490    #[inline(always)]
4491    fn split_mask32x8(self, a: mask32x8<Self>) -> (mask32x4<Self>, mask32x4<Self>) {
4492        let mut b0 = [0; 4usize];
4493        let mut b1 = [0; 4usize];
4494        b0.copy_from_slice(&a.val[0..4usize]);
4495        b1.copy_from_slice(&a.val[4usize..8usize]);
4496        (b0.simd_into(self), b1.simd_into(self))
4497    }
4498    #[inline(always)]
4499    fn splat_f64x4(self, a: f64) -> f64x4<Self> {
4500        let half = self.splat_f64x2(a);
4501        self.combine_f64x2(half, half)
4502    }
4503    #[inline(always)]
4504    fn abs_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4505        let (a0, a1) = self.split_f64x4(a);
4506        self.combine_f64x2(self.abs_f64x2(a0), self.abs_f64x2(a1))
4507    }
4508    #[inline(always)]
4509    fn neg_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4510        let (a0, a1) = self.split_f64x4(a);
4511        self.combine_f64x2(self.neg_f64x2(a0), self.neg_f64x2(a1))
4512    }
4513    #[inline(always)]
4514    fn sqrt_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4515        let (a0, a1) = self.split_f64x4(a);
4516        self.combine_f64x2(self.sqrt_f64x2(a0), self.sqrt_f64x2(a1))
4517    }
4518    #[inline(always)]
4519    fn add_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4520        let (a0, a1) = self.split_f64x4(a);
4521        let (b0, b1) = self.split_f64x4(b);
4522        self.combine_f64x2(self.add_f64x2(a0, b0), self.add_f64x2(a1, b1))
4523    }
4524    #[inline(always)]
4525    fn sub_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4526        let (a0, a1) = self.split_f64x4(a);
4527        let (b0, b1) = self.split_f64x4(b);
4528        self.combine_f64x2(self.sub_f64x2(a0, b0), self.sub_f64x2(a1, b1))
4529    }
4530    #[inline(always)]
4531    fn mul_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4532        let (a0, a1) = self.split_f64x4(a);
4533        let (b0, b1) = self.split_f64x4(b);
4534        self.combine_f64x2(self.mul_f64x2(a0, b0), self.mul_f64x2(a1, b1))
4535    }
4536    #[inline(always)]
4537    fn div_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4538        let (a0, a1) = self.split_f64x4(a);
4539        let (b0, b1) = self.split_f64x4(b);
4540        self.combine_f64x2(self.div_f64x2(a0, b0), self.div_f64x2(a1, b1))
4541    }
4542    #[inline(always)]
4543    fn copysign_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4544        let (a0, a1) = self.split_f64x4(a);
4545        let (b0, b1) = self.split_f64x4(b);
4546        self.combine_f64x2(self.copysign_f64x2(a0, b0), self.copysign_f64x2(a1, b1))
4547    }
4548    #[inline(always)]
4549    fn simd_eq_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4550        let (a0, a1) = self.split_f64x4(a);
4551        let (b0, b1) = self.split_f64x4(b);
4552        self.combine_mask64x2(self.simd_eq_f64x2(a0, b0), self.simd_eq_f64x2(a1, b1))
4553    }
4554    #[inline(always)]
4555    fn simd_lt_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4556        let (a0, a1) = self.split_f64x4(a);
4557        let (b0, b1) = self.split_f64x4(b);
4558        self.combine_mask64x2(self.simd_lt_f64x2(a0, b0), self.simd_lt_f64x2(a1, b1))
4559    }
4560    #[inline(always)]
4561    fn simd_le_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4562        let (a0, a1) = self.split_f64x4(a);
4563        let (b0, b1) = self.split_f64x4(b);
4564        self.combine_mask64x2(self.simd_le_f64x2(a0, b0), self.simd_le_f64x2(a1, b1))
4565    }
4566    #[inline(always)]
4567    fn simd_ge_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4568        let (a0, a1) = self.split_f64x4(a);
4569        let (b0, b1) = self.split_f64x4(b);
4570        self.combine_mask64x2(self.simd_ge_f64x2(a0, b0), self.simd_ge_f64x2(a1, b1))
4571    }
4572    #[inline(always)]
4573    fn simd_gt_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4574        let (a0, a1) = self.split_f64x4(a);
4575        let (b0, b1) = self.split_f64x4(b);
4576        self.combine_mask64x2(self.simd_gt_f64x2(a0, b0), self.simd_gt_f64x2(a1, b1))
4577    }
4578    #[inline(always)]
4579    fn zip_low_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4580        let (a0, _) = self.split_f64x4(a);
4581        let (b0, _) = self.split_f64x4(b);
4582        self.combine_f64x2(self.zip_low_f64x2(a0, b0), self.zip_high_f64x2(a0, b0))
4583    }
4584    #[inline(always)]
4585    fn zip_high_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4586        let (_, a1) = self.split_f64x4(a);
4587        let (_, b1) = self.split_f64x4(b);
4588        self.combine_f64x2(self.zip_low_f64x2(a1, b1), self.zip_high_f64x2(a1, b1))
4589    }
4590    #[inline(always)]
4591    fn unzip_low_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4592        let (a0, a1) = self.split_f64x4(a);
4593        let (b0, b1) = self.split_f64x4(b);
4594        self.combine_f64x2(self.unzip_low_f64x2(a0, a1), self.unzip_low_f64x2(b0, b1))
4595    }
4596    #[inline(always)]
4597    fn unzip_high_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4598        let (a0, a1) = self.split_f64x4(a);
4599        let (b0, b1) = self.split_f64x4(b);
4600        self.combine_f64x2(self.unzip_high_f64x2(a0, a1), self.unzip_high_f64x2(b0, b1))
4601    }
4602    #[inline(always)]
4603    fn max_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4604        let (a0, a1) = self.split_f64x4(a);
4605        let (b0, b1) = self.split_f64x4(b);
4606        self.combine_f64x2(self.max_f64x2(a0, b0), self.max_f64x2(a1, b1))
4607    }
4608    #[inline(always)]
4609    fn max_precise_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4610        let (a0, a1) = self.split_f64x4(a);
4611        let (b0, b1) = self.split_f64x4(b);
4612        self.combine_f64x2(
4613            self.max_precise_f64x2(a0, b0),
4614            self.max_precise_f64x2(a1, b1),
4615        )
4616    }
4617    #[inline(always)]
4618    fn min_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4619        let (a0, a1) = self.split_f64x4(a);
4620        let (b0, b1) = self.split_f64x4(b);
4621        self.combine_f64x2(self.min_f64x2(a0, b0), self.min_f64x2(a1, b1))
4622    }
4623    #[inline(always)]
4624    fn min_precise_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4625        let (a0, a1) = self.split_f64x4(a);
4626        let (b0, b1) = self.split_f64x4(b);
4627        self.combine_f64x2(
4628            self.min_precise_f64x2(a0, b0),
4629            self.min_precise_f64x2(a1, b1),
4630        )
4631    }
4632    #[inline(always)]
4633    fn madd_f64x4(self, a: f64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
4634        let (a0, a1) = self.split_f64x4(a);
4635        let (b0, b1) = self.split_f64x4(b);
4636        let (c0, c1) = self.split_f64x4(c);
4637        self.combine_f64x2(self.madd_f64x2(a0, b0, c0), self.madd_f64x2(a1, b1, c1))
4638    }
4639    #[inline(always)]
4640    fn msub_f64x4(self, a: f64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
4641        let (a0, a1) = self.split_f64x4(a);
4642        let (b0, b1) = self.split_f64x4(b);
4643        let (c0, c1) = self.split_f64x4(c);
4644        self.combine_f64x2(self.msub_f64x2(a0, b0, c0), self.msub_f64x2(a1, b1, c1))
4645    }
4646    #[inline(always)]
4647    fn floor_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4648        let (a0, a1) = self.split_f64x4(a);
4649        self.combine_f64x2(self.floor_f64x2(a0), self.floor_f64x2(a1))
4650    }
4651    #[inline(always)]
4652    fn fract_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4653        let (a0, a1) = self.split_f64x4(a);
4654        self.combine_f64x2(self.fract_f64x2(a0), self.fract_f64x2(a1))
4655    }
4656    #[inline(always)]
4657    fn trunc_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4658        let (a0, a1) = self.split_f64x4(a);
4659        self.combine_f64x2(self.trunc_f64x2(a0), self.trunc_f64x2(a1))
4660    }
4661    #[inline(always)]
4662    fn select_f64x4(self, a: mask64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
4663        let (a0, a1) = self.split_mask64x4(a);
4664        let (b0, b1) = self.split_f64x4(b);
4665        let (c0, c1) = self.split_f64x4(c);
4666        self.combine_f64x2(self.select_f64x2(a0, b0, c0), self.select_f64x2(a1, b1, c1))
4667    }
4668    #[inline(always)]
4669    fn combine_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x8<Self> {
4670        let mut result = [0.0; 8usize];
4671        result[0..4usize].copy_from_slice(&a.val);
4672        result[4usize..8usize].copy_from_slice(&b.val);
4673        result.simd_into(self)
4674    }
4675    #[inline(always)]
4676    fn split_f64x4(self, a: f64x4<Self>) -> (f64x2<Self>, f64x2<Self>) {
4677        let mut b0 = [0.0; 2usize];
4678        let mut b1 = [0.0; 2usize];
4679        b0.copy_from_slice(&a.val[0..2usize]);
4680        b1.copy_from_slice(&a.val[2usize..4usize]);
4681        (b0.simd_into(self), b1.simd_into(self))
4682    }
4683    #[inline(always)]
4684    fn reinterpret_f32_f64x4(self, a: f64x4<Self>) -> f32x8<Self> {
4685        let (a0, a1) = self.split_f64x4(a);
4686        self.combine_f32x4(
4687            self.reinterpret_f32_f64x2(a0),
4688            self.reinterpret_f32_f64x2(a1),
4689        )
4690    }
4691    #[inline(always)]
4692    fn splat_mask64x4(self, a: i64) -> mask64x4<Self> {
4693        let half = self.splat_mask64x2(a);
4694        self.combine_mask64x2(half, half)
4695    }
4696    #[inline(always)]
4697    fn not_mask64x4(self, a: mask64x4<Self>) -> mask64x4<Self> {
4698        let (a0, a1) = self.split_mask64x4(a);
4699        self.combine_mask64x2(self.not_mask64x2(a0), self.not_mask64x2(a1))
4700    }
4701    #[inline(always)]
4702    fn and_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
4703        let (a0, a1) = self.split_mask64x4(a);
4704        let (b0, b1) = self.split_mask64x4(b);
4705        self.combine_mask64x2(self.and_mask64x2(a0, b0), self.and_mask64x2(a1, b1))
4706    }
4707    #[inline(always)]
4708    fn or_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
4709        let (a0, a1) = self.split_mask64x4(a);
4710        let (b0, b1) = self.split_mask64x4(b);
4711        self.combine_mask64x2(self.or_mask64x2(a0, b0), self.or_mask64x2(a1, b1))
4712    }
4713    #[inline(always)]
4714    fn xor_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
4715        let (a0, a1) = self.split_mask64x4(a);
4716        let (b0, b1) = self.split_mask64x4(b);
4717        self.combine_mask64x2(self.xor_mask64x2(a0, b0), self.xor_mask64x2(a1, b1))
4718    }
4719    #[inline(always)]
4720    fn select_mask64x4(
4721        self,
4722        a: mask64x4<Self>,
4723        b: mask64x4<Self>,
4724        c: mask64x4<Self>,
4725    ) -> mask64x4<Self> {
4726        let (a0, a1) = self.split_mask64x4(a);
4727        let (b0, b1) = self.split_mask64x4(b);
4728        let (c0, c1) = self.split_mask64x4(c);
4729        self.combine_mask64x2(
4730            self.select_mask64x2(a0, b0, c0),
4731            self.select_mask64x2(a1, b1, c1),
4732        )
4733    }
4734    #[inline(always)]
4735    fn simd_eq_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
4736        let (a0, a1) = self.split_mask64x4(a);
4737        let (b0, b1) = self.split_mask64x4(b);
4738        self.combine_mask64x2(self.simd_eq_mask64x2(a0, b0), self.simd_eq_mask64x2(a1, b1))
4739    }
4740    #[inline(always)]
4741    fn combine_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x8<Self> {
4742        let mut result = [0; 8usize];
4743        result[0..4usize].copy_from_slice(&a.val);
4744        result[4usize..8usize].copy_from_slice(&b.val);
4745        result.simd_into(self)
4746    }
4747    #[inline(always)]
4748    fn split_mask64x4(self, a: mask64x4<Self>) -> (mask64x2<Self>, mask64x2<Self>) {
4749        let mut b0 = [0; 2usize];
4750        let mut b1 = [0; 2usize];
4751        b0.copy_from_slice(&a.val[0..2usize]);
4752        b1.copy_from_slice(&a.val[2usize..4usize]);
4753        (b0.simd_into(self), b1.simd_into(self))
4754    }
4755    #[inline(always)]
4756    fn splat_f32x16(self, a: f32) -> f32x16<Self> {
4757        let half = self.splat_f32x8(a);
4758        self.combine_f32x8(half, half)
4759    }
4760    #[inline(always)]
4761    fn abs_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4762        let (a0, a1) = self.split_f32x16(a);
4763        self.combine_f32x8(self.abs_f32x8(a0), self.abs_f32x8(a1))
4764    }
4765    #[inline(always)]
4766    fn neg_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4767        let (a0, a1) = self.split_f32x16(a);
4768        self.combine_f32x8(self.neg_f32x8(a0), self.neg_f32x8(a1))
4769    }
4770    #[inline(always)]
4771    fn sqrt_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4772        let (a0, a1) = self.split_f32x16(a);
4773        self.combine_f32x8(self.sqrt_f32x8(a0), self.sqrt_f32x8(a1))
4774    }
4775    #[inline(always)]
4776    fn add_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4777        let (a0, a1) = self.split_f32x16(a);
4778        let (b0, b1) = self.split_f32x16(b);
4779        self.combine_f32x8(self.add_f32x8(a0, b0), self.add_f32x8(a1, b1))
4780    }
4781    #[inline(always)]
4782    fn sub_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4783        let (a0, a1) = self.split_f32x16(a);
4784        let (b0, b1) = self.split_f32x16(b);
4785        self.combine_f32x8(self.sub_f32x8(a0, b0), self.sub_f32x8(a1, b1))
4786    }
4787    #[inline(always)]
4788    fn mul_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4789        let (a0, a1) = self.split_f32x16(a);
4790        let (b0, b1) = self.split_f32x16(b);
4791        self.combine_f32x8(self.mul_f32x8(a0, b0), self.mul_f32x8(a1, b1))
4792    }
4793    #[inline(always)]
4794    fn div_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4795        let (a0, a1) = self.split_f32x16(a);
4796        let (b0, b1) = self.split_f32x16(b);
4797        self.combine_f32x8(self.div_f32x8(a0, b0), self.div_f32x8(a1, b1))
4798    }
4799    #[inline(always)]
4800    fn copysign_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4801        let (a0, a1) = self.split_f32x16(a);
4802        let (b0, b1) = self.split_f32x16(b);
4803        self.combine_f32x8(self.copysign_f32x8(a0, b0), self.copysign_f32x8(a1, b1))
4804    }
4805    #[inline(always)]
4806    fn simd_eq_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4807        let (a0, a1) = self.split_f32x16(a);
4808        let (b0, b1) = self.split_f32x16(b);
4809        self.combine_mask32x8(self.simd_eq_f32x8(a0, b0), self.simd_eq_f32x8(a1, b1))
4810    }
4811    #[inline(always)]
4812    fn simd_lt_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4813        let (a0, a1) = self.split_f32x16(a);
4814        let (b0, b1) = self.split_f32x16(b);
4815        self.combine_mask32x8(self.simd_lt_f32x8(a0, b0), self.simd_lt_f32x8(a1, b1))
4816    }
4817    #[inline(always)]
4818    fn simd_le_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4819        let (a0, a1) = self.split_f32x16(a);
4820        let (b0, b1) = self.split_f32x16(b);
4821        self.combine_mask32x8(self.simd_le_f32x8(a0, b0), self.simd_le_f32x8(a1, b1))
4822    }
4823    #[inline(always)]
4824    fn simd_ge_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4825        let (a0, a1) = self.split_f32x16(a);
4826        let (b0, b1) = self.split_f32x16(b);
4827        self.combine_mask32x8(self.simd_ge_f32x8(a0, b0), self.simd_ge_f32x8(a1, b1))
4828    }
4829    #[inline(always)]
4830    fn simd_gt_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4831        let (a0, a1) = self.split_f32x16(a);
4832        let (b0, b1) = self.split_f32x16(b);
4833        self.combine_mask32x8(self.simd_gt_f32x8(a0, b0), self.simd_gt_f32x8(a1, b1))
4834    }
4835    #[inline(always)]
4836    fn zip_low_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4837        let (a0, _) = self.split_f32x16(a);
4838        let (b0, _) = self.split_f32x16(b);
4839        self.combine_f32x8(self.zip_low_f32x8(a0, b0), self.zip_high_f32x8(a0, b0))
4840    }
4841    #[inline(always)]
4842    fn zip_high_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4843        let (_, a1) = self.split_f32x16(a);
4844        let (_, b1) = self.split_f32x16(b);
4845        self.combine_f32x8(self.zip_low_f32x8(a1, b1), self.zip_high_f32x8(a1, b1))
4846    }
4847    #[inline(always)]
4848    fn unzip_low_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4849        let (a0, a1) = self.split_f32x16(a);
4850        let (b0, b1) = self.split_f32x16(b);
4851        self.combine_f32x8(self.unzip_low_f32x8(a0, a1), self.unzip_low_f32x8(b0, b1))
4852    }
4853    #[inline(always)]
4854    fn unzip_high_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4855        let (a0, a1) = self.split_f32x16(a);
4856        let (b0, b1) = self.split_f32x16(b);
4857        self.combine_f32x8(self.unzip_high_f32x8(a0, a1), self.unzip_high_f32x8(b0, b1))
4858    }
4859    #[inline(always)]
4860    fn max_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4861        let (a0, a1) = self.split_f32x16(a);
4862        let (b0, b1) = self.split_f32x16(b);
4863        self.combine_f32x8(self.max_f32x8(a0, b0), self.max_f32x8(a1, b1))
4864    }
4865    #[inline(always)]
4866    fn max_precise_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4867        let (a0, a1) = self.split_f32x16(a);
4868        let (b0, b1) = self.split_f32x16(b);
4869        self.combine_f32x8(
4870            self.max_precise_f32x8(a0, b0),
4871            self.max_precise_f32x8(a1, b1),
4872        )
4873    }
4874    #[inline(always)]
4875    fn min_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4876        let (a0, a1) = self.split_f32x16(a);
4877        let (b0, b1) = self.split_f32x16(b);
4878        self.combine_f32x8(self.min_f32x8(a0, b0), self.min_f32x8(a1, b1))
4879    }
4880    #[inline(always)]
4881    fn min_precise_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4882        let (a0, a1) = self.split_f32x16(a);
4883        let (b0, b1) = self.split_f32x16(b);
4884        self.combine_f32x8(
4885            self.min_precise_f32x8(a0, b0),
4886            self.min_precise_f32x8(a1, b1),
4887        )
4888    }
4889    #[inline(always)]
4890    fn madd_f32x16(self, a: f32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
4891        let (a0, a1) = self.split_f32x16(a);
4892        let (b0, b1) = self.split_f32x16(b);
4893        let (c0, c1) = self.split_f32x16(c);
4894        self.combine_f32x8(self.madd_f32x8(a0, b0, c0), self.madd_f32x8(a1, b1, c1))
4895    }
4896    #[inline(always)]
4897    fn msub_f32x16(self, a: f32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
4898        let (a0, a1) = self.split_f32x16(a);
4899        let (b0, b1) = self.split_f32x16(b);
4900        let (c0, c1) = self.split_f32x16(c);
4901        self.combine_f32x8(self.msub_f32x8(a0, b0, c0), self.msub_f32x8(a1, b1, c1))
4902    }
4903    #[inline(always)]
4904    fn floor_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4905        let (a0, a1) = self.split_f32x16(a);
4906        self.combine_f32x8(self.floor_f32x8(a0), self.floor_f32x8(a1))
4907    }
4908    #[inline(always)]
4909    fn fract_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4910        let (a0, a1) = self.split_f32x16(a);
4911        self.combine_f32x8(self.fract_f32x8(a0), self.fract_f32x8(a1))
4912    }
4913    #[inline(always)]
4914    fn trunc_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4915        let (a0, a1) = self.split_f32x16(a);
4916        self.combine_f32x8(self.trunc_f32x8(a0), self.trunc_f32x8(a1))
4917    }
4918    #[inline(always)]
4919    fn select_f32x16(self, a: mask32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
4920        let (a0, a1) = self.split_mask32x16(a);
4921        let (b0, b1) = self.split_f32x16(b);
4922        let (c0, c1) = self.split_f32x16(c);
4923        self.combine_f32x8(self.select_f32x8(a0, b0, c0), self.select_f32x8(a1, b1, c1))
4924    }
4925    #[inline(always)]
4926    fn split_f32x16(self, a: f32x16<Self>) -> (f32x8<Self>, f32x8<Self>) {
4927        let mut b0 = [0.0; 8usize];
4928        let mut b1 = [0.0; 8usize];
4929        b0.copy_from_slice(&a.val[0..8usize]);
4930        b1.copy_from_slice(&a.val[8usize..16usize]);
4931        (b0.simd_into(self), b1.simd_into(self))
4932    }
4933    #[inline(always)]
4934    fn reinterpret_f64_f32x16(self, a: f32x16<Self>) -> f64x8<Self> {
4935        let (a0, a1) = self.split_f32x16(a);
4936        self.combine_f64x4(
4937            self.reinterpret_f64_f32x8(a0),
4938            self.reinterpret_f64_f32x8(a1),
4939        )
4940    }
4941    #[inline(always)]
4942    fn reinterpret_i32_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
4943        let (a0, a1) = self.split_f32x16(a);
4944        self.combine_i32x8(
4945            self.reinterpret_i32_f32x8(a0),
4946            self.reinterpret_i32_f32x8(a1),
4947        )
4948    }
4949    #[inline(always)]
4950    fn load_interleaved_128_f32x16(self, src: &[f32; 16usize]) -> f32x16<Self> {
4951        [
4952            src[0usize],
4953            src[4usize],
4954            src[8usize],
4955            src[12usize],
4956            src[1usize],
4957            src[5usize],
4958            src[9usize],
4959            src[13usize],
4960            src[2usize],
4961            src[6usize],
4962            src[10usize],
4963            src[14usize],
4964            src[3usize],
4965            src[7usize],
4966            src[11usize],
4967            src[15usize],
4968        ]
4969        .simd_into(self)
4970    }
4971    #[inline(always)]
4972    fn store_interleaved_128_f32x16(self, a: f32x16<Self>, dest: &mut [f32; 16usize]) -> () {
4973        *dest = [
4974            a[0usize], a[4usize], a[8usize], a[12usize], a[1usize], a[5usize], a[9usize],
4975            a[13usize], a[2usize], a[6usize], a[10usize], a[14usize], a[3usize], a[7usize],
4976            a[11usize], a[15usize],
4977        ];
4978    }
4979    #[inline(always)]
4980    fn reinterpret_u8_f32x16(self, a: f32x16<Self>) -> u8x64<Self> {
4981        let (a0, a1) = self.split_f32x16(a);
4982        self.combine_u8x32(self.reinterpret_u8_f32x8(a0), self.reinterpret_u8_f32x8(a1))
4983    }
4984    #[inline(always)]
4985    fn reinterpret_u32_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
4986        let (a0, a1) = self.split_f32x16(a);
4987        self.combine_u32x8(
4988            self.reinterpret_u32_f32x8(a0),
4989            self.reinterpret_u32_f32x8(a1),
4990        )
4991    }
4992    #[inline(always)]
4993    fn cvt_u32_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
4994        let (a0, a1) = self.split_f32x16(a);
4995        self.combine_u32x8(self.cvt_u32_f32x8(a0), self.cvt_u32_f32x8(a1))
4996    }
4997    #[inline(always)]
4998    fn cvt_i32_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
4999        let (a0, a1) = self.split_f32x16(a);
5000        self.combine_i32x8(self.cvt_i32_f32x8(a0), self.cvt_i32_f32x8(a1))
5001    }
5002    #[inline(always)]
5003    fn splat_i8x64(self, a: i8) -> i8x64<Self> {
5004        let half = self.splat_i8x32(a);
5005        self.combine_i8x32(half, half)
5006    }
5007    #[inline(always)]
5008    fn not_i8x64(self, a: i8x64<Self>) -> i8x64<Self> {
5009        let (a0, a1) = self.split_i8x64(a);
5010        self.combine_i8x32(self.not_i8x32(a0), self.not_i8x32(a1))
5011    }
5012    #[inline(always)]
5013    fn add_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5014        let (a0, a1) = self.split_i8x64(a);
5015        let (b0, b1) = self.split_i8x64(b);
5016        self.combine_i8x32(self.add_i8x32(a0, b0), self.add_i8x32(a1, b1))
5017    }
5018    #[inline(always)]
5019    fn sub_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5020        let (a0, a1) = self.split_i8x64(a);
5021        let (b0, b1) = self.split_i8x64(b);
5022        self.combine_i8x32(self.sub_i8x32(a0, b0), self.sub_i8x32(a1, b1))
5023    }
5024    #[inline(always)]
5025    fn mul_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5026        let (a0, a1) = self.split_i8x64(a);
5027        let (b0, b1) = self.split_i8x64(b);
5028        self.combine_i8x32(self.mul_i8x32(a0, b0), self.mul_i8x32(a1, b1))
5029    }
5030    #[inline(always)]
5031    fn and_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5032        let (a0, a1) = self.split_i8x64(a);
5033        let (b0, b1) = self.split_i8x64(b);
5034        self.combine_i8x32(self.and_i8x32(a0, b0), self.and_i8x32(a1, b1))
5035    }
5036    #[inline(always)]
5037    fn or_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5038        let (a0, a1) = self.split_i8x64(a);
5039        let (b0, b1) = self.split_i8x64(b);
5040        self.combine_i8x32(self.or_i8x32(a0, b0), self.or_i8x32(a1, b1))
5041    }
5042    #[inline(always)]
5043    fn xor_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5044        let (a0, a1) = self.split_i8x64(a);
5045        let (b0, b1) = self.split_i8x64(b);
5046        self.combine_i8x32(self.xor_i8x32(a0, b0), self.xor_i8x32(a1, b1))
5047    }
5048    #[inline(always)]
5049    fn shr_i8x64(self, a: i8x64<Self>, b: u32) -> i8x64<Self> {
5050        let (a0, a1) = self.split_i8x64(a);
5051        self.combine_i8x32(self.shr_i8x32(a0, b), self.shr_i8x32(a1, b))
5052    }
5053    #[inline(always)]
5054    fn shrv_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5055        let (a0, a1) = self.split_i8x64(a);
5056        let (b0, b1) = self.split_i8x64(b);
5057        self.combine_i8x32(self.shrv_i8x32(a0, b0), self.shrv_i8x32(a1, b1))
5058    }
5059    #[inline(always)]
5060    fn shl_i8x64(self, a: i8x64<Self>, b: u32) -> i8x64<Self> {
5061        let (a0, a1) = self.split_i8x64(a);
5062        self.combine_i8x32(self.shl_i8x32(a0, b), self.shl_i8x32(a1, b))
5063    }
5064    #[inline(always)]
5065    fn simd_eq_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
5066        let (a0, a1) = self.split_i8x64(a);
5067        let (b0, b1) = self.split_i8x64(b);
5068        self.combine_mask8x32(self.simd_eq_i8x32(a0, b0), self.simd_eq_i8x32(a1, b1))
5069    }
5070    #[inline(always)]
5071    fn simd_lt_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
5072        let (a0, a1) = self.split_i8x64(a);
5073        let (b0, b1) = self.split_i8x64(b);
5074        self.combine_mask8x32(self.simd_lt_i8x32(a0, b0), self.simd_lt_i8x32(a1, b1))
5075    }
5076    #[inline(always)]
5077    fn simd_le_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
5078        let (a0, a1) = self.split_i8x64(a);
5079        let (b0, b1) = self.split_i8x64(b);
5080        self.combine_mask8x32(self.simd_le_i8x32(a0, b0), self.simd_le_i8x32(a1, b1))
5081    }
5082    #[inline(always)]
5083    fn simd_ge_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
5084        let (a0, a1) = self.split_i8x64(a);
5085        let (b0, b1) = self.split_i8x64(b);
5086        self.combine_mask8x32(self.simd_ge_i8x32(a0, b0), self.simd_ge_i8x32(a1, b1))
5087    }
5088    #[inline(always)]
5089    fn simd_gt_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
5090        let (a0, a1) = self.split_i8x64(a);
5091        let (b0, b1) = self.split_i8x64(b);
5092        self.combine_mask8x32(self.simd_gt_i8x32(a0, b0), self.simd_gt_i8x32(a1, b1))
5093    }
5094    #[inline(always)]
5095    fn zip_low_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5096        let (a0, _) = self.split_i8x64(a);
5097        let (b0, _) = self.split_i8x64(b);
5098        self.combine_i8x32(self.zip_low_i8x32(a0, b0), self.zip_high_i8x32(a0, b0))
5099    }
5100    #[inline(always)]
5101    fn zip_high_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5102        let (_, a1) = self.split_i8x64(a);
5103        let (_, b1) = self.split_i8x64(b);
5104        self.combine_i8x32(self.zip_low_i8x32(a1, b1), self.zip_high_i8x32(a1, b1))
5105    }
5106    #[inline(always)]
5107    fn unzip_low_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5108        let (a0, a1) = self.split_i8x64(a);
5109        let (b0, b1) = self.split_i8x64(b);
5110        self.combine_i8x32(self.unzip_low_i8x32(a0, a1), self.unzip_low_i8x32(b0, b1))
5111    }
5112    #[inline(always)]
5113    fn unzip_high_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5114        let (a0, a1) = self.split_i8x64(a);
5115        let (b0, b1) = self.split_i8x64(b);
5116        self.combine_i8x32(self.unzip_high_i8x32(a0, a1), self.unzip_high_i8x32(b0, b1))
5117    }
5118    #[inline(always)]
5119    fn select_i8x64(self, a: mask8x64<Self>, b: i8x64<Self>, c: i8x64<Self>) -> i8x64<Self> {
5120        let (a0, a1) = self.split_mask8x64(a);
5121        let (b0, b1) = self.split_i8x64(b);
5122        let (c0, c1) = self.split_i8x64(c);
5123        self.combine_i8x32(self.select_i8x32(a0, b0, c0), self.select_i8x32(a1, b1, c1))
5124    }
5125    #[inline(always)]
5126    fn min_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5127        let (a0, a1) = self.split_i8x64(a);
5128        let (b0, b1) = self.split_i8x64(b);
5129        self.combine_i8x32(self.min_i8x32(a0, b0), self.min_i8x32(a1, b1))
5130    }
5131    #[inline(always)]
5132    fn max_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5133        let (a0, a1) = self.split_i8x64(a);
5134        let (b0, b1) = self.split_i8x64(b);
5135        self.combine_i8x32(self.max_i8x32(a0, b0), self.max_i8x32(a1, b1))
5136    }
5137    #[inline(always)]
5138    fn split_i8x64(self, a: i8x64<Self>) -> (i8x32<Self>, i8x32<Self>) {
5139        let mut b0 = [0; 32usize];
5140        let mut b1 = [0; 32usize];
5141        b0.copy_from_slice(&a.val[0..32usize]);
5142        b1.copy_from_slice(&a.val[32usize..64usize]);
5143        (b0.simd_into(self), b1.simd_into(self))
5144    }
5145    #[inline(always)]
5146    fn neg_i8x64(self, a: i8x64<Self>) -> i8x64<Self> {
5147        let (a0, a1) = self.split_i8x64(a);
5148        self.combine_i8x32(self.neg_i8x32(a0), self.neg_i8x32(a1))
5149    }
5150    #[inline(always)]
5151    fn reinterpret_u8_i8x64(self, a: i8x64<Self>) -> u8x64<Self> {
5152        let (a0, a1) = self.split_i8x64(a);
5153        self.combine_u8x32(self.reinterpret_u8_i8x32(a0), self.reinterpret_u8_i8x32(a1))
5154    }
5155    #[inline(always)]
5156    fn reinterpret_u32_i8x64(self, a: i8x64<Self>) -> u32x16<Self> {
5157        let (a0, a1) = self.split_i8x64(a);
5158        self.combine_u32x8(
5159            self.reinterpret_u32_i8x32(a0),
5160            self.reinterpret_u32_i8x32(a1),
5161        )
5162    }
5163    #[inline(always)]
5164    fn splat_u8x64(self, a: u8) -> u8x64<Self> {
5165        let half = self.splat_u8x32(a);
5166        self.combine_u8x32(half, half)
5167    }
5168    #[inline(always)]
5169    fn not_u8x64(self, a: u8x64<Self>) -> u8x64<Self> {
5170        let (a0, a1) = self.split_u8x64(a);
5171        self.combine_u8x32(self.not_u8x32(a0), self.not_u8x32(a1))
5172    }
5173    #[inline(always)]
5174    fn add_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5175        let (a0, a1) = self.split_u8x64(a);
5176        let (b0, b1) = self.split_u8x64(b);
5177        self.combine_u8x32(self.add_u8x32(a0, b0), self.add_u8x32(a1, b1))
5178    }
5179    #[inline(always)]
5180    fn sub_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5181        let (a0, a1) = self.split_u8x64(a);
5182        let (b0, b1) = self.split_u8x64(b);
5183        self.combine_u8x32(self.sub_u8x32(a0, b0), self.sub_u8x32(a1, b1))
5184    }
5185    #[inline(always)]
5186    fn mul_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5187        let (a0, a1) = self.split_u8x64(a);
5188        let (b0, b1) = self.split_u8x64(b);
5189        self.combine_u8x32(self.mul_u8x32(a0, b0), self.mul_u8x32(a1, b1))
5190    }
5191    #[inline(always)]
5192    fn and_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5193        let (a0, a1) = self.split_u8x64(a);
5194        let (b0, b1) = self.split_u8x64(b);
5195        self.combine_u8x32(self.and_u8x32(a0, b0), self.and_u8x32(a1, b1))
5196    }
5197    #[inline(always)]
5198    fn or_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5199        let (a0, a1) = self.split_u8x64(a);
5200        let (b0, b1) = self.split_u8x64(b);
5201        self.combine_u8x32(self.or_u8x32(a0, b0), self.or_u8x32(a1, b1))
5202    }
5203    #[inline(always)]
5204    fn xor_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5205        let (a0, a1) = self.split_u8x64(a);
5206        let (b0, b1) = self.split_u8x64(b);
5207        self.combine_u8x32(self.xor_u8x32(a0, b0), self.xor_u8x32(a1, b1))
5208    }
5209    #[inline(always)]
5210    fn shr_u8x64(self, a: u8x64<Self>, b: u32) -> u8x64<Self> {
5211        let (a0, a1) = self.split_u8x64(a);
5212        self.combine_u8x32(self.shr_u8x32(a0, b), self.shr_u8x32(a1, b))
5213    }
5214    #[inline(always)]
5215    fn shrv_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5216        let (a0, a1) = self.split_u8x64(a);
5217        let (b0, b1) = self.split_u8x64(b);
5218        self.combine_u8x32(self.shrv_u8x32(a0, b0), self.shrv_u8x32(a1, b1))
5219    }
5220    #[inline(always)]
5221    fn shl_u8x64(self, a: u8x64<Self>, b: u32) -> u8x64<Self> {
5222        let (a0, a1) = self.split_u8x64(a);
5223        self.combine_u8x32(self.shl_u8x32(a0, b), self.shl_u8x32(a1, b))
5224    }
5225    #[inline(always)]
5226    fn simd_eq_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
5227        let (a0, a1) = self.split_u8x64(a);
5228        let (b0, b1) = self.split_u8x64(b);
5229        self.combine_mask8x32(self.simd_eq_u8x32(a0, b0), self.simd_eq_u8x32(a1, b1))
5230    }
5231    #[inline(always)]
5232    fn simd_lt_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
5233        let (a0, a1) = self.split_u8x64(a);
5234        let (b0, b1) = self.split_u8x64(b);
5235        self.combine_mask8x32(self.simd_lt_u8x32(a0, b0), self.simd_lt_u8x32(a1, b1))
5236    }
5237    #[inline(always)]
5238    fn simd_le_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
5239        let (a0, a1) = self.split_u8x64(a);
5240        let (b0, b1) = self.split_u8x64(b);
5241        self.combine_mask8x32(self.simd_le_u8x32(a0, b0), self.simd_le_u8x32(a1, b1))
5242    }
5243    #[inline(always)]
5244    fn simd_ge_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
5245        let (a0, a1) = self.split_u8x64(a);
5246        let (b0, b1) = self.split_u8x64(b);
5247        self.combine_mask8x32(self.simd_ge_u8x32(a0, b0), self.simd_ge_u8x32(a1, b1))
5248    }
5249    #[inline(always)]
5250    fn simd_gt_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
5251        let (a0, a1) = self.split_u8x64(a);
5252        let (b0, b1) = self.split_u8x64(b);
5253        self.combine_mask8x32(self.simd_gt_u8x32(a0, b0), self.simd_gt_u8x32(a1, b1))
5254    }
5255    #[inline(always)]
5256    fn zip_low_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5257        let (a0, _) = self.split_u8x64(a);
5258        let (b0, _) = self.split_u8x64(b);
5259        self.combine_u8x32(self.zip_low_u8x32(a0, b0), self.zip_high_u8x32(a0, b0))
5260    }
5261    #[inline(always)]
5262    fn zip_high_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5263        let (_, a1) = self.split_u8x64(a);
5264        let (_, b1) = self.split_u8x64(b);
5265        self.combine_u8x32(self.zip_low_u8x32(a1, b1), self.zip_high_u8x32(a1, b1))
5266    }
5267    #[inline(always)]
5268    fn unzip_low_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5269        let (a0, a1) = self.split_u8x64(a);
5270        let (b0, b1) = self.split_u8x64(b);
5271        self.combine_u8x32(self.unzip_low_u8x32(a0, a1), self.unzip_low_u8x32(b0, b1))
5272    }
5273    #[inline(always)]
5274    fn unzip_high_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5275        let (a0, a1) = self.split_u8x64(a);
5276        let (b0, b1) = self.split_u8x64(b);
5277        self.combine_u8x32(self.unzip_high_u8x32(a0, a1), self.unzip_high_u8x32(b0, b1))
5278    }
5279    #[inline(always)]
5280    fn select_u8x64(self, a: mask8x64<Self>, b: u8x64<Self>, c: u8x64<Self>) -> u8x64<Self> {
5281        let (a0, a1) = self.split_mask8x64(a);
5282        let (b0, b1) = self.split_u8x64(b);
5283        let (c0, c1) = self.split_u8x64(c);
5284        self.combine_u8x32(self.select_u8x32(a0, b0, c0), self.select_u8x32(a1, b1, c1))
5285    }
5286    #[inline(always)]
5287    fn min_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5288        let (a0, a1) = self.split_u8x64(a);
5289        let (b0, b1) = self.split_u8x64(b);
5290        self.combine_u8x32(self.min_u8x32(a0, b0), self.min_u8x32(a1, b1))
5291    }
5292    #[inline(always)]
5293    fn max_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5294        let (a0, a1) = self.split_u8x64(a);
5295        let (b0, b1) = self.split_u8x64(b);
5296        self.combine_u8x32(self.max_u8x32(a0, b0), self.max_u8x32(a1, b1))
5297    }
5298    #[inline(always)]
5299    fn split_u8x64(self, a: u8x64<Self>) -> (u8x32<Self>, u8x32<Self>) {
5300        let mut b0 = [0; 32usize];
5301        let mut b1 = [0; 32usize];
5302        b0.copy_from_slice(&a.val[0..32usize]);
5303        b1.copy_from_slice(&a.val[32usize..64usize]);
5304        (b0.simd_into(self), b1.simd_into(self))
5305    }
5306    #[inline(always)]
5307    fn load_interleaved_128_u8x64(self, src: &[u8; 64usize]) -> u8x64<Self> {
5308        [
5309            src[0usize],
5310            src[4usize],
5311            src[8usize],
5312            src[12usize],
5313            src[16usize],
5314            src[20usize],
5315            src[24usize],
5316            src[28usize],
5317            src[32usize],
5318            src[36usize],
5319            src[40usize],
5320            src[44usize],
5321            src[48usize],
5322            src[52usize],
5323            src[56usize],
5324            src[60usize],
5325            src[1usize],
5326            src[5usize],
5327            src[9usize],
5328            src[13usize],
5329            src[17usize],
5330            src[21usize],
5331            src[25usize],
5332            src[29usize],
5333            src[33usize],
5334            src[37usize],
5335            src[41usize],
5336            src[45usize],
5337            src[49usize],
5338            src[53usize],
5339            src[57usize],
5340            src[61usize],
5341            src[2usize],
5342            src[6usize],
5343            src[10usize],
5344            src[14usize],
5345            src[18usize],
5346            src[22usize],
5347            src[26usize],
5348            src[30usize],
5349            src[34usize],
5350            src[38usize],
5351            src[42usize],
5352            src[46usize],
5353            src[50usize],
5354            src[54usize],
5355            src[58usize],
5356            src[62usize],
5357            src[3usize],
5358            src[7usize],
5359            src[11usize],
5360            src[15usize],
5361            src[19usize],
5362            src[23usize],
5363            src[27usize],
5364            src[31usize],
5365            src[35usize],
5366            src[39usize],
5367            src[43usize],
5368            src[47usize],
5369            src[51usize],
5370            src[55usize],
5371            src[59usize],
5372            src[63usize],
5373        ]
5374        .simd_into(self)
5375    }
5376    #[inline(always)]
5377    fn store_interleaved_128_u8x64(self, a: u8x64<Self>, dest: &mut [u8; 64usize]) -> () {
5378        *dest = [
5379            a[0usize], a[16usize], a[32usize], a[48usize], a[1usize], a[17usize], a[33usize],
5380            a[49usize], a[2usize], a[18usize], a[34usize], a[50usize], a[3usize], a[19usize],
5381            a[35usize], a[51usize], a[4usize], a[20usize], a[36usize], a[52usize], a[5usize],
5382            a[21usize], a[37usize], a[53usize], a[6usize], a[22usize], a[38usize], a[54usize],
5383            a[7usize], a[23usize], a[39usize], a[55usize], a[8usize], a[24usize], a[40usize],
5384            a[56usize], a[9usize], a[25usize], a[41usize], a[57usize], a[10usize], a[26usize],
5385            a[42usize], a[58usize], a[11usize], a[27usize], a[43usize], a[59usize], a[12usize],
5386            a[28usize], a[44usize], a[60usize], a[13usize], a[29usize], a[45usize], a[61usize],
5387            a[14usize], a[30usize], a[46usize], a[62usize], a[15usize], a[31usize], a[47usize],
5388            a[63usize],
5389        ];
5390    }
5391    #[inline(always)]
5392    fn reinterpret_u32_u8x64(self, a: u8x64<Self>) -> u32x16<Self> {
5393        let (a0, a1) = self.split_u8x64(a);
5394        self.combine_u32x8(
5395            self.reinterpret_u32_u8x32(a0),
5396            self.reinterpret_u32_u8x32(a1),
5397        )
5398    }
5399    #[inline(always)]
5400    fn splat_mask8x64(self, a: i8) -> mask8x64<Self> {
5401        let half = self.splat_mask8x32(a);
5402        self.combine_mask8x32(half, half)
5403    }
5404    #[inline(always)]
5405    fn not_mask8x64(self, a: mask8x64<Self>) -> mask8x64<Self> {
5406        let (a0, a1) = self.split_mask8x64(a);
5407        self.combine_mask8x32(self.not_mask8x32(a0), self.not_mask8x32(a1))
5408    }
5409    #[inline(always)]
5410    fn and_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
5411        let (a0, a1) = self.split_mask8x64(a);
5412        let (b0, b1) = self.split_mask8x64(b);
5413        self.combine_mask8x32(self.and_mask8x32(a0, b0), self.and_mask8x32(a1, b1))
5414    }
5415    #[inline(always)]
5416    fn or_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
5417        let (a0, a1) = self.split_mask8x64(a);
5418        let (b0, b1) = self.split_mask8x64(b);
5419        self.combine_mask8x32(self.or_mask8x32(a0, b0), self.or_mask8x32(a1, b1))
5420    }
5421    #[inline(always)]
5422    fn xor_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
5423        let (a0, a1) = self.split_mask8x64(a);
5424        let (b0, b1) = self.split_mask8x64(b);
5425        self.combine_mask8x32(self.xor_mask8x32(a0, b0), self.xor_mask8x32(a1, b1))
5426    }
5427    #[inline(always)]
5428    fn select_mask8x64(
5429        self,
5430        a: mask8x64<Self>,
5431        b: mask8x64<Self>,
5432        c: mask8x64<Self>,
5433    ) -> mask8x64<Self> {
5434        let (a0, a1) = self.split_mask8x64(a);
5435        let (b0, b1) = self.split_mask8x64(b);
5436        let (c0, c1) = self.split_mask8x64(c);
5437        self.combine_mask8x32(
5438            self.select_mask8x32(a0, b0, c0),
5439            self.select_mask8x32(a1, b1, c1),
5440        )
5441    }
5442    #[inline(always)]
5443    fn simd_eq_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
5444        let (a0, a1) = self.split_mask8x64(a);
5445        let (b0, b1) = self.split_mask8x64(b);
5446        self.combine_mask8x32(self.simd_eq_mask8x32(a0, b0), self.simd_eq_mask8x32(a1, b1))
5447    }
5448    #[inline(always)]
5449    fn split_mask8x64(self, a: mask8x64<Self>) -> (mask8x32<Self>, mask8x32<Self>) {
5450        let mut b0 = [0; 32usize];
5451        let mut b1 = [0; 32usize];
5452        b0.copy_from_slice(&a.val[0..32usize]);
5453        b1.copy_from_slice(&a.val[32usize..64usize]);
5454        (b0.simd_into(self), b1.simd_into(self))
5455    }
5456    #[inline(always)]
5457    fn splat_i16x32(self, a: i16) -> i16x32<Self> {
5458        let half = self.splat_i16x16(a);
5459        self.combine_i16x16(half, half)
5460    }
5461    #[inline(always)]
5462    fn not_i16x32(self, a: i16x32<Self>) -> i16x32<Self> {
5463        let (a0, a1) = self.split_i16x32(a);
5464        self.combine_i16x16(self.not_i16x16(a0), self.not_i16x16(a1))
5465    }
5466    #[inline(always)]
5467    fn add_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5468        let (a0, a1) = self.split_i16x32(a);
5469        let (b0, b1) = self.split_i16x32(b);
5470        self.combine_i16x16(self.add_i16x16(a0, b0), self.add_i16x16(a1, b1))
5471    }
5472    #[inline(always)]
5473    fn sub_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5474        let (a0, a1) = self.split_i16x32(a);
5475        let (b0, b1) = self.split_i16x32(b);
5476        self.combine_i16x16(self.sub_i16x16(a0, b0), self.sub_i16x16(a1, b1))
5477    }
5478    #[inline(always)]
5479    fn mul_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5480        let (a0, a1) = self.split_i16x32(a);
5481        let (b0, b1) = self.split_i16x32(b);
5482        self.combine_i16x16(self.mul_i16x16(a0, b0), self.mul_i16x16(a1, b1))
5483    }
5484    #[inline(always)]
5485    fn and_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5486        let (a0, a1) = self.split_i16x32(a);
5487        let (b0, b1) = self.split_i16x32(b);
5488        self.combine_i16x16(self.and_i16x16(a0, b0), self.and_i16x16(a1, b1))
5489    }
5490    #[inline(always)]
5491    fn or_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5492        let (a0, a1) = self.split_i16x32(a);
5493        let (b0, b1) = self.split_i16x32(b);
5494        self.combine_i16x16(self.or_i16x16(a0, b0), self.or_i16x16(a1, b1))
5495    }
5496    #[inline(always)]
5497    fn xor_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5498        let (a0, a1) = self.split_i16x32(a);
5499        let (b0, b1) = self.split_i16x32(b);
5500        self.combine_i16x16(self.xor_i16x16(a0, b0), self.xor_i16x16(a1, b1))
5501    }
5502    #[inline(always)]
5503    fn shr_i16x32(self, a: i16x32<Self>, b: u32) -> i16x32<Self> {
5504        let (a0, a1) = self.split_i16x32(a);
5505        self.combine_i16x16(self.shr_i16x16(a0, b), self.shr_i16x16(a1, b))
5506    }
5507    #[inline(always)]
5508    fn shrv_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5509        let (a0, a1) = self.split_i16x32(a);
5510        let (b0, b1) = self.split_i16x32(b);
5511        self.combine_i16x16(self.shrv_i16x16(a0, b0), self.shrv_i16x16(a1, b1))
5512    }
5513    #[inline(always)]
5514    fn shl_i16x32(self, a: i16x32<Self>, b: u32) -> i16x32<Self> {
5515        let (a0, a1) = self.split_i16x32(a);
5516        self.combine_i16x16(self.shl_i16x16(a0, b), self.shl_i16x16(a1, b))
5517    }
5518    #[inline(always)]
5519    fn simd_eq_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5520        let (a0, a1) = self.split_i16x32(a);
5521        let (b0, b1) = self.split_i16x32(b);
5522        self.combine_mask16x16(self.simd_eq_i16x16(a0, b0), self.simd_eq_i16x16(a1, b1))
5523    }
5524    #[inline(always)]
5525    fn simd_lt_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5526        let (a0, a1) = self.split_i16x32(a);
5527        let (b0, b1) = self.split_i16x32(b);
5528        self.combine_mask16x16(self.simd_lt_i16x16(a0, b0), self.simd_lt_i16x16(a1, b1))
5529    }
5530    #[inline(always)]
5531    fn simd_le_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5532        let (a0, a1) = self.split_i16x32(a);
5533        let (b0, b1) = self.split_i16x32(b);
5534        self.combine_mask16x16(self.simd_le_i16x16(a0, b0), self.simd_le_i16x16(a1, b1))
5535    }
5536    #[inline(always)]
5537    fn simd_ge_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5538        let (a0, a1) = self.split_i16x32(a);
5539        let (b0, b1) = self.split_i16x32(b);
5540        self.combine_mask16x16(self.simd_ge_i16x16(a0, b0), self.simd_ge_i16x16(a1, b1))
5541    }
5542    #[inline(always)]
5543    fn simd_gt_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5544        let (a0, a1) = self.split_i16x32(a);
5545        let (b0, b1) = self.split_i16x32(b);
5546        self.combine_mask16x16(self.simd_gt_i16x16(a0, b0), self.simd_gt_i16x16(a1, b1))
5547    }
5548    #[inline(always)]
5549    fn zip_low_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5550        let (a0, _) = self.split_i16x32(a);
5551        let (b0, _) = self.split_i16x32(b);
5552        self.combine_i16x16(self.zip_low_i16x16(a0, b0), self.zip_high_i16x16(a0, b0))
5553    }
5554    #[inline(always)]
5555    fn zip_high_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5556        let (_, a1) = self.split_i16x32(a);
5557        let (_, b1) = self.split_i16x32(b);
5558        self.combine_i16x16(self.zip_low_i16x16(a1, b1), self.zip_high_i16x16(a1, b1))
5559    }
5560    #[inline(always)]
5561    fn unzip_low_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5562        let (a0, a1) = self.split_i16x32(a);
5563        let (b0, b1) = self.split_i16x32(b);
5564        self.combine_i16x16(self.unzip_low_i16x16(a0, a1), self.unzip_low_i16x16(b0, b1))
5565    }
5566    #[inline(always)]
5567    fn unzip_high_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5568        let (a0, a1) = self.split_i16x32(a);
5569        let (b0, b1) = self.split_i16x32(b);
5570        self.combine_i16x16(
5571            self.unzip_high_i16x16(a0, a1),
5572            self.unzip_high_i16x16(b0, b1),
5573        )
5574    }
5575    #[inline(always)]
5576    fn select_i16x32(self, a: mask16x32<Self>, b: i16x32<Self>, c: i16x32<Self>) -> i16x32<Self> {
5577        let (a0, a1) = self.split_mask16x32(a);
5578        let (b0, b1) = self.split_i16x32(b);
5579        let (c0, c1) = self.split_i16x32(c);
5580        self.combine_i16x16(
5581            self.select_i16x16(a0, b0, c0),
5582            self.select_i16x16(a1, b1, c1),
5583        )
5584    }
5585    #[inline(always)]
5586    fn min_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5587        let (a0, a1) = self.split_i16x32(a);
5588        let (b0, b1) = self.split_i16x32(b);
5589        self.combine_i16x16(self.min_i16x16(a0, b0), self.min_i16x16(a1, b1))
5590    }
5591    #[inline(always)]
5592    fn max_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5593        let (a0, a1) = self.split_i16x32(a);
5594        let (b0, b1) = self.split_i16x32(b);
5595        self.combine_i16x16(self.max_i16x16(a0, b0), self.max_i16x16(a1, b1))
5596    }
5597    #[inline(always)]
5598    fn split_i16x32(self, a: i16x32<Self>) -> (i16x16<Self>, i16x16<Self>) {
5599        let mut b0 = [0; 16usize];
5600        let mut b1 = [0; 16usize];
5601        b0.copy_from_slice(&a.val[0..16usize]);
5602        b1.copy_from_slice(&a.val[16usize..32usize]);
5603        (b0.simd_into(self), b1.simd_into(self))
5604    }
5605    #[inline(always)]
5606    fn neg_i16x32(self, a: i16x32<Self>) -> i16x32<Self> {
5607        let (a0, a1) = self.split_i16x32(a);
5608        self.combine_i16x16(self.neg_i16x16(a0), self.neg_i16x16(a1))
5609    }
5610    #[inline(always)]
5611    fn reinterpret_u8_i16x32(self, a: i16x32<Self>) -> u8x64<Self> {
5612        let (a0, a1) = self.split_i16x32(a);
5613        self.combine_u8x32(
5614            self.reinterpret_u8_i16x16(a0),
5615            self.reinterpret_u8_i16x16(a1),
5616        )
5617    }
5618    #[inline(always)]
5619    fn reinterpret_u32_i16x32(self, a: i16x32<Self>) -> u32x16<Self> {
5620        let (a0, a1) = self.split_i16x32(a);
5621        self.combine_u32x8(
5622            self.reinterpret_u32_i16x16(a0),
5623            self.reinterpret_u32_i16x16(a1),
5624        )
5625    }
5626    #[inline(always)]
5627    fn splat_u16x32(self, a: u16) -> u16x32<Self> {
5628        let half = self.splat_u16x16(a);
5629        self.combine_u16x16(half, half)
5630    }
5631    #[inline(always)]
5632    fn not_u16x32(self, a: u16x32<Self>) -> u16x32<Self> {
5633        let (a0, a1) = self.split_u16x32(a);
5634        self.combine_u16x16(self.not_u16x16(a0), self.not_u16x16(a1))
5635    }
5636    #[inline(always)]
5637    fn add_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5638        let (a0, a1) = self.split_u16x32(a);
5639        let (b0, b1) = self.split_u16x32(b);
5640        self.combine_u16x16(self.add_u16x16(a0, b0), self.add_u16x16(a1, b1))
5641    }
5642    #[inline(always)]
5643    fn sub_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5644        let (a0, a1) = self.split_u16x32(a);
5645        let (b0, b1) = self.split_u16x32(b);
5646        self.combine_u16x16(self.sub_u16x16(a0, b0), self.sub_u16x16(a1, b1))
5647    }
5648    #[inline(always)]
5649    fn mul_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5650        let (a0, a1) = self.split_u16x32(a);
5651        let (b0, b1) = self.split_u16x32(b);
5652        self.combine_u16x16(self.mul_u16x16(a0, b0), self.mul_u16x16(a1, b1))
5653    }
5654    #[inline(always)]
5655    fn and_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5656        let (a0, a1) = self.split_u16x32(a);
5657        let (b0, b1) = self.split_u16x32(b);
5658        self.combine_u16x16(self.and_u16x16(a0, b0), self.and_u16x16(a1, b1))
5659    }
5660    #[inline(always)]
5661    fn or_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5662        let (a0, a1) = self.split_u16x32(a);
5663        let (b0, b1) = self.split_u16x32(b);
5664        self.combine_u16x16(self.or_u16x16(a0, b0), self.or_u16x16(a1, b1))
5665    }
5666    #[inline(always)]
5667    fn xor_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5668        let (a0, a1) = self.split_u16x32(a);
5669        let (b0, b1) = self.split_u16x32(b);
5670        self.combine_u16x16(self.xor_u16x16(a0, b0), self.xor_u16x16(a1, b1))
5671    }
5672    #[inline(always)]
5673    fn shr_u16x32(self, a: u16x32<Self>, b: u32) -> u16x32<Self> {
5674        let (a0, a1) = self.split_u16x32(a);
5675        self.combine_u16x16(self.shr_u16x16(a0, b), self.shr_u16x16(a1, b))
5676    }
5677    #[inline(always)]
5678    fn shrv_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5679        let (a0, a1) = self.split_u16x32(a);
5680        let (b0, b1) = self.split_u16x32(b);
5681        self.combine_u16x16(self.shrv_u16x16(a0, b0), self.shrv_u16x16(a1, b1))
5682    }
5683    #[inline(always)]
5684    fn shl_u16x32(self, a: u16x32<Self>, b: u32) -> u16x32<Self> {
5685        let (a0, a1) = self.split_u16x32(a);
5686        self.combine_u16x16(self.shl_u16x16(a0, b), self.shl_u16x16(a1, b))
5687    }
5688    #[inline(always)]
5689    fn simd_eq_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5690        let (a0, a1) = self.split_u16x32(a);
5691        let (b0, b1) = self.split_u16x32(b);
5692        self.combine_mask16x16(self.simd_eq_u16x16(a0, b0), self.simd_eq_u16x16(a1, b1))
5693    }
5694    #[inline(always)]
5695    fn simd_lt_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5696        let (a0, a1) = self.split_u16x32(a);
5697        let (b0, b1) = self.split_u16x32(b);
5698        self.combine_mask16x16(self.simd_lt_u16x16(a0, b0), self.simd_lt_u16x16(a1, b1))
5699    }
5700    #[inline(always)]
5701    fn simd_le_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5702        let (a0, a1) = self.split_u16x32(a);
5703        let (b0, b1) = self.split_u16x32(b);
5704        self.combine_mask16x16(self.simd_le_u16x16(a0, b0), self.simd_le_u16x16(a1, b1))
5705    }
5706    #[inline(always)]
5707    fn simd_ge_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5708        let (a0, a1) = self.split_u16x32(a);
5709        let (b0, b1) = self.split_u16x32(b);
5710        self.combine_mask16x16(self.simd_ge_u16x16(a0, b0), self.simd_ge_u16x16(a1, b1))
5711    }
5712    #[inline(always)]
5713    fn simd_gt_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5714        let (a0, a1) = self.split_u16x32(a);
5715        let (b0, b1) = self.split_u16x32(b);
5716        self.combine_mask16x16(self.simd_gt_u16x16(a0, b0), self.simd_gt_u16x16(a1, b1))
5717    }
5718    #[inline(always)]
5719    fn zip_low_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5720        let (a0, _) = self.split_u16x32(a);
5721        let (b0, _) = self.split_u16x32(b);
5722        self.combine_u16x16(self.zip_low_u16x16(a0, b0), self.zip_high_u16x16(a0, b0))
5723    }
5724    #[inline(always)]
5725    fn zip_high_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5726        let (_, a1) = self.split_u16x32(a);
5727        let (_, b1) = self.split_u16x32(b);
5728        self.combine_u16x16(self.zip_low_u16x16(a1, b1), self.zip_high_u16x16(a1, b1))
5729    }
5730    #[inline(always)]
5731    fn unzip_low_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5732        let (a0, a1) = self.split_u16x32(a);
5733        let (b0, b1) = self.split_u16x32(b);
5734        self.combine_u16x16(self.unzip_low_u16x16(a0, a1), self.unzip_low_u16x16(b0, b1))
5735    }
5736    #[inline(always)]
5737    fn unzip_high_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5738        let (a0, a1) = self.split_u16x32(a);
5739        let (b0, b1) = self.split_u16x32(b);
5740        self.combine_u16x16(
5741            self.unzip_high_u16x16(a0, a1),
5742            self.unzip_high_u16x16(b0, b1),
5743        )
5744    }
5745    #[inline(always)]
5746    fn select_u16x32(self, a: mask16x32<Self>, b: u16x32<Self>, c: u16x32<Self>) -> u16x32<Self> {
5747        let (a0, a1) = self.split_mask16x32(a);
5748        let (b0, b1) = self.split_u16x32(b);
5749        let (c0, c1) = self.split_u16x32(c);
5750        self.combine_u16x16(
5751            self.select_u16x16(a0, b0, c0),
5752            self.select_u16x16(a1, b1, c1),
5753        )
5754    }
5755    #[inline(always)]
5756    fn min_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5757        let (a0, a1) = self.split_u16x32(a);
5758        let (b0, b1) = self.split_u16x32(b);
5759        self.combine_u16x16(self.min_u16x16(a0, b0), self.min_u16x16(a1, b1))
5760    }
5761    #[inline(always)]
5762    fn max_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5763        let (a0, a1) = self.split_u16x32(a);
5764        let (b0, b1) = self.split_u16x32(b);
5765        self.combine_u16x16(self.max_u16x16(a0, b0), self.max_u16x16(a1, b1))
5766    }
5767    #[inline(always)]
5768    fn split_u16x32(self, a: u16x32<Self>) -> (u16x16<Self>, u16x16<Self>) {
5769        let mut b0 = [0; 16usize];
5770        let mut b1 = [0; 16usize];
5771        b0.copy_from_slice(&a.val[0..16usize]);
5772        b1.copy_from_slice(&a.val[16usize..32usize]);
5773        (b0.simd_into(self), b1.simd_into(self))
5774    }
5775    #[inline(always)]
5776    fn load_interleaved_128_u16x32(self, src: &[u16; 32usize]) -> u16x32<Self> {
5777        [
5778            src[0usize],
5779            src[4usize],
5780            src[8usize],
5781            src[12usize],
5782            src[16usize],
5783            src[20usize],
5784            src[24usize],
5785            src[28usize],
5786            src[1usize],
5787            src[5usize],
5788            src[9usize],
5789            src[13usize],
5790            src[17usize],
5791            src[21usize],
5792            src[25usize],
5793            src[29usize],
5794            src[2usize],
5795            src[6usize],
5796            src[10usize],
5797            src[14usize],
5798            src[18usize],
5799            src[22usize],
5800            src[26usize],
5801            src[30usize],
5802            src[3usize],
5803            src[7usize],
5804            src[11usize],
5805            src[15usize],
5806            src[19usize],
5807            src[23usize],
5808            src[27usize],
5809            src[31usize],
5810        ]
5811        .simd_into(self)
5812    }
5813    #[inline(always)]
5814    fn store_interleaved_128_u16x32(self, a: u16x32<Self>, dest: &mut [u16; 32usize]) -> () {
5815        *dest = [
5816            a[0usize], a[8usize], a[16usize], a[24usize], a[1usize], a[9usize], a[17usize],
5817            a[25usize], a[2usize], a[10usize], a[18usize], a[26usize], a[3usize], a[11usize],
5818            a[19usize], a[27usize], a[4usize], a[12usize], a[20usize], a[28usize], a[5usize],
5819            a[13usize], a[21usize], a[29usize], a[6usize], a[14usize], a[22usize], a[30usize],
5820            a[7usize], a[15usize], a[23usize], a[31usize],
5821        ];
5822    }
5823    #[inline(always)]
5824    fn narrow_u16x32(self, a: u16x32<Self>) -> u8x32<Self> {
5825        let (a0, a1) = self.split_u16x32(a);
5826        self.combine_u8x16(self.narrow_u16x16(a0), self.narrow_u16x16(a1))
5827    }
5828    #[inline(always)]
5829    fn reinterpret_u8_u16x32(self, a: u16x32<Self>) -> u8x64<Self> {
5830        let (a0, a1) = self.split_u16x32(a);
5831        self.combine_u8x32(
5832            self.reinterpret_u8_u16x16(a0),
5833            self.reinterpret_u8_u16x16(a1),
5834        )
5835    }
5836    #[inline(always)]
5837    fn reinterpret_u32_u16x32(self, a: u16x32<Self>) -> u32x16<Self> {
5838        let (a0, a1) = self.split_u16x32(a);
5839        self.combine_u32x8(
5840            self.reinterpret_u32_u16x16(a0),
5841            self.reinterpret_u32_u16x16(a1),
5842        )
5843    }
5844    #[inline(always)]
5845    fn splat_mask16x32(self, a: i16) -> mask16x32<Self> {
5846        let half = self.splat_mask16x16(a);
5847        self.combine_mask16x16(half, half)
5848    }
5849    #[inline(always)]
5850    fn not_mask16x32(self, a: mask16x32<Self>) -> mask16x32<Self> {
5851        let (a0, a1) = self.split_mask16x32(a);
5852        self.combine_mask16x16(self.not_mask16x16(a0), self.not_mask16x16(a1))
5853    }
5854    #[inline(always)]
5855    fn and_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
5856        let (a0, a1) = self.split_mask16x32(a);
5857        let (b0, b1) = self.split_mask16x32(b);
5858        self.combine_mask16x16(self.and_mask16x16(a0, b0), self.and_mask16x16(a1, b1))
5859    }
5860    #[inline(always)]
5861    fn or_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
5862        let (a0, a1) = self.split_mask16x32(a);
5863        let (b0, b1) = self.split_mask16x32(b);
5864        self.combine_mask16x16(self.or_mask16x16(a0, b0), self.or_mask16x16(a1, b1))
5865    }
5866    #[inline(always)]
5867    fn xor_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
5868        let (a0, a1) = self.split_mask16x32(a);
5869        let (b0, b1) = self.split_mask16x32(b);
5870        self.combine_mask16x16(self.xor_mask16x16(a0, b0), self.xor_mask16x16(a1, b1))
5871    }
5872    #[inline(always)]
5873    fn select_mask16x32(
5874        self,
5875        a: mask16x32<Self>,
5876        b: mask16x32<Self>,
5877        c: mask16x32<Self>,
5878    ) -> mask16x32<Self> {
5879        let (a0, a1) = self.split_mask16x32(a);
5880        let (b0, b1) = self.split_mask16x32(b);
5881        let (c0, c1) = self.split_mask16x32(c);
5882        self.combine_mask16x16(
5883            self.select_mask16x16(a0, b0, c0),
5884            self.select_mask16x16(a1, b1, c1),
5885        )
5886    }
5887    #[inline(always)]
5888    fn simd_eq_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
5889        let (a0, a1) = self.split_mask16x32(a);
5890        let (b0, b1) = self.split_mask16x32(b);
5891        self.combine_mask16x16(
5892            self.simd_eq_mask16x16(a0, b0),
5893            self.simd_eq_mask16x16(a1, b1),
5894        )
5895    }
5896    #[inline(always)]
5897    fn split_mask16x32(self, a: mask16x32<Self>) -> (mask16x16<Self>, mask16x16<Self>) {
5898        let mut b0 = [0; 16usize];
5899        let mut b1 = [0; 16usize];
5900        b0.copy_from_slice(&a.val[0..16usize]);
5901        b1.copy_from_slice(&a.val[16usize..32usize]);
5902        (b0.simd_into(self), b1.simd_into(self))
5903    }
5904    #[inline(always)]
5905    fn splat_i32x16(self, a: i32) -> i32x16<Self> {
5906        let half = self.splat_i32x8(a);
5907        self.combine_i32x8(half, half)
5908    }
5909    #[inline(always)]
5910    fn not_i32x16(self, a: i32x16<Self>) -> i32x16<Self> {
5911        let (a0, a1) = self.split_i32x16(a);
5912        self.combine_i32x8(self.not_i32x8(a0), self.not_i32x8(a1))
5913    }
5914    #[inline(always)]
5915    fn add_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5916        let (a0, a1) = self.split_i32x16(a);
5917        let (b0, b1) = self.split_i32x16(b);
5918        self.combine_i32x8(self.add_i32x8(a0, b0), self.add_i32x8(a1, b1))
5919    }
5920    #[inline(always)]
5921    fn sub_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5922        let (a0, a1) = self.split_i32x16(a);
5923        let (b0, b1) = self.split_i32x16(b);
5924        self.combine_i32x8(self.sub_i32x8(a0, b0), self.sub_i32x8(a1, b1))
5925    }
5926    #[inline(always)]
5927    fn mul_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5928        let (a0, a1) = self.split_i32x16(a);
5929        let (b0, b1) = self.split_i32x16(b);
5930        self.combine_i32x8(self.mul_i32x8(a0, b0), self.mul_i32x8(a1, b1))
5931    }
5932    #[inline(always)]
5933    fn and_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5934        let (a0, a1) = self.split_i32x16(a);
5935        let (b0, b1) = self.split_i32x16(b);
5936        self.combine_i32x8(self.and_i32x8(a0, b0), self.and_i32x8(a1, b1))
5937    }
5938    #[inline(always)]
5939    fn or_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5940        let (a0, a1) = self.split_i32x16(a);
5941        let (b0, b1) = self.split_i32x16(b);
5942        self.combine_i32x8(self.or_i32x8(a0, b0), self.or_i32x8(a1, b1))
5943    }
5944    #[inline(always)]
5945    fn xor_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5946        let (a0, a1) = self.split_i32x16(a);
5947        let (b0, b1) = self.split_i32x16(b);
5948        self.combine_i32x8(self.xor_i32x8(a0, b0), self.xor_i32x8(a1, b1))
5949    }
5950    #[inline(always)]
5951    fn shr_i32x16(self, a: i32x16<Self>, b: u32) -> i32x16<Self> {
5952        let (a0, a1) = self.split_i32x16(a);
5953        self.combine_i32x8(self.shr_i32x8(a0, b), self.shr_i32x8(a1, b))
5954    }
5955    #[inline(always)]
5956    fn shrv_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5957        let (a0, a1) = self.split_i32x16(a);
5958        let (b0, b1) = self.split_i32x16(b);
5959        self.combine_i32x8(self.shrv_i32x8(a0, b0), self.shrv_i32x8(a1, b1))
5960    }
5961    #[inline(always)]
5962    fn shl_i32x16(self, a: i32x16<Self>, b: u32) -> i32x16<Self> {
5963        let (a0, a1) = self.split_i32x16(a);
5964        self.combine_i32x8(self.shl_i32x8(a0, b), self.shl_i32x8(a1, b))
5965    }
5966    #[inline(always)]
5967    fn simd_eq_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5968        let (a0, a1) = self.split_i32x16(a);
5969        let (b0, b1) = self.split_i32x16(b);
5970        self.combine_mask32x8(self.simd_eq_i32x8(a0, b0), self.simd_eq_i32x8(a1, b1))
5971    }
5972    #[inline(always)]
5973    fn simd_lt_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5974        let (a0, a1) = self.split_i32x16(a);
5975        let (b0, b1) = self.split_i32x16(b);
5976        self.combine_mask32x8(self.simd_lt_i32x8(a0, b0), self.simd_lt_i32x8(a1, b1))
5977    }
5978    #[inline(always)]
5979    fn simd_le_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5980        let (a0, a1) = self.split_i32x16(a);
5981        let (b0, b1) = self.split_i32x16(b);
5982        self.combine_mask32x8(self.simd_le_i32x8(a0, b0), self.simd_le_i32x8(a1, b1))
5983    }
5984    #[inline(always)]
5985    fn simd_ge_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5986        let (a0, a1) = self.split_i32x16(a);
5987        let (b0, b1) = self.split_i32x16(b);
5988        self.combine_mask32x8(self.simd_ge_i32x8(a0, b0), self.simd_ge_i32x8(a1, b1))
5989    }
5990    #[inline(always)]
5991    fn simd_gt_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5992        let (a0, a1) = self.split_i32x16(a);
5993        let (b0, b1) = self.split_i32x16(b);
5994        self.combine_mask32x8(self.simd_gt_i32x8(a0, b0), self.simd_gt_i32x8(a1, b1))
5995    }
5996    #[inline(always)]
5997    fn zip_low_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5998        let (a0, _) = self.split_i32x16(a);
5999        let (b0, _) = self.split_i32x16(b);
6000        self.combine_i32x8(self.zip_low_i32x8(a0, b0), self.zip_high_i32x8(a0, b0))
6001    }
6002    #[inline(always)]
6003    fn zip_high_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
6004        let (_, a1) = self.split_i32x16(a);
6005        let (_, b1) = self.split_i32x16(b);
6006        self.combine_i32x8(self.zip_low_i32x8(a1, b1), self.zip_high_i32x8(a1, b1))
6007    }
6008    #[inline(always)]
6009    fn unzip_low_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
6010        let (a0, a1) = self.split_i32x16(a);
6011        let (b0, b1) = self.split_i32x16(b);
6012        self.combine_i32x8(self.unzip_low_i32x8(a0, a1), self.unzip_low_i32x8(b0, b1))
6013    }
6014    #[inline(always)]
6015    fn unzip_high_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
6016        let (a0, a1) = self.split_i32x16(a);
6017        let (b0, b1) = self.split_i32x16(b);
6018        self.combine_i32x8(self.unzip_high_i32x8(a0, a1), self.unzip_high_i32x8(b0, b1))
6019    }
6020    #[inline(always)]
6021    fn select_i32x16(self, a: mask32x16<Self>, b: i32x16<Self>, c: i32x16<Self>) -> i32x16<Self> {
6022        let (a0, a1) = self.split_mask32x16(a);
6023        let (b0, b1) = self.split_i32x16(b);
6024        let (c0, c1) = self.split_i32x16(c);
6025        self.combine_i32x8(self.select_i32x8(a0, b0, c0), self.select_i32x8(a1, b1, c1))
6026    }
6027    #[inline(always)]
6028    fn min_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
6029        let (a0, a1) = self.split_i32x16(a);
6030        let (b0, b1) = self.split_i32x16(b);
6031        self.combine_i32x8(self.min_i32x8(a0, b0), self.min_i32x8(a1, b1))
6032    }
6033    #[inline(always)]
6034    fn max_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
6035        let (a0, a1) = self.split_i32x16(a);
6036        let (b0, b1) = self.split_i32x16(b);
6037        self.combine_i32x8(self.max_i32x8(a0, b0), self.max_i32x8(a1, b1))
6038    }
6039    #[inline(always)]
6040    fn split_i32x16(self, a: i32x16<Self>) -> (i32x8<Self>, i32x8<Self>) {
6041        let mut b0 = [0; 8usize];
6042        let mut b1 = [0; 8usize];
6043        b0.copy_from_slice(&a.val[0..8usize]);
6044        b1.copy_from_slice(&a.val[8usize..16usize]);
6045        (b0.simd_into(self), b1.simd_into(self))
6046    }
6047    #[inline(always)]
6048    fn neg_i32x16(self, a: i32x16<Self>) -> i32x16<Self> {
6049        let (a0, a1) = self.split_i32x16(a);
6050        self.combine_i32x8(self.neg_i32x8(a0), self.neg_i32x8(a1))
6051    }
6052    #[inline(always)]
6053    fn reinterpret_u8_i32x16(self, a: i32x16<Self>) -> u8x64<Self> {
6054        let (a0, a1) = self.split_i32x16(a);
6055        self.combine_u8x32(self.reinterpret_u8_i32x8(a0), self.reinterpret_u8_i32x8(a1))
6056    }
6057    #[inline(always)]
6058    fn reinterpret_u32_i32x16(self, a: i32x16<Self>) -> u32x16<Self> {
6059        let (a0, a1) = self.split_i32x16(a);
6060        self.combine_u32x8(
6061            self.reinterpret_u32_i32x8(a0),
6062            self.reinterpret_u32_i32x8(a1),
6063        )
6064    }
6065    #[inline(always)]
6066    fn cvt_f32_i32x16(self, a: i32x16<Self>) -> f32x16<Self> {
6067        let (a0, a1) = self.split_i32x16(a);
6068        self.combine_f32x8(self.cvt_f32_i32x8(a0), self.cvt_f32_i32x8(a1))
6069    }
6070    #[inline(always)]
6071    fn splat_u32x16(self, a: u32) -> u32x16<Self> {
6072        let half = self.splat_u32x8(a);
6073        self.combine_u32x8(half, half)
6074    }
6075    #[inline(always)]
6076    fn not_u32x16(self, a: u32x16<Self>) -> u32x16<Self> {
6077        let (a0, a1) = self.split_u32x16(a);
6078        self.combine_u32x8(self.not_u32x8(a0), self.not_u32x8(a1))
6079    }
6080    #[inline(always)]
6081    fn add_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6082        let (a0, a1) = self.split_u32x16(a);
6083        let (b0, b1) = self.split_u32x16(b);
6084        self.combine_u32x8(self.add_u32x8(a0, b0), self.add_u32x8(a1, b1))
6085    }
6086    #[inline(always)]
6087    fn sub_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6088        let (a0, a1) = self.split_u32x16(a);
6089        let (b0, b1) = self.split_u32x16(b);
6090        self.combine_u32x8(self.sub_u32x8(a0, b0), self.sub_u32x8(a1, b1))
6091    }
6092    #[inline(always)]
6093    fn mul_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6094        let (a0, a1) = self.split_u32x16(a);
6095        let (b0, b1) = self.split_u32x16(b);
6096        self.combine_u32x8(self.mul_u32x8(a0, b0), self.mul_u32x8(a1, b1))
6097    }
6098    #[inline(always)]
6099    fn and_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6100        let (a0, a1) = self.split_u32x16(a);
6101        let (b0, b1) = self.split_u32x16(b);
6102        self.combine_u32x8(self.and_u32x8(a0, b0), self.and_u32x8(a1, b1))
6103    }
6104    #[inline(always)]
6105    fn or_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6106        let (a0, a1) = self.split_u32x16(a);
6107        let (b0, b1) = self.split_u32x16(b);
6108        self.combine_u32x8(self.or_u32x8(a0, b0), self.or_u32x8(a1, b1))
6109    }
6110    #[inline(always)]
6111    fn xor_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6112        let (a0, a1) = self.split_u32x16(a);
6113        let (b0, b1) = self.split_u32x16(b);
6114        self.combine_u32x8(self.xor_u32x8(a0, b0), self.xor_u32x8(a1, b1))
6115    }
6116    #[inline(always)]
6117    fn shr_u32x16(self, a: u32x16<Self>, b: u32) -> u32x16<Self> {
6118        let (a0, a1) = self.split_u32x16(a);
6119        self.combine_u32x8(self.shr_u32x8(a0, b), self.shr_u32x8(a1, b))
6120    }
6121    #[inline(always)]
6122    fn shrv_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6123        let (a0, a1) = self.split_u32x16(a);
6124        let (b0, b1) = self.split_u32x16(b);
6125        self.combine_u32x8(self.shrv_u32x8(a0, b0), self.shrv_u32x8(a1, b1))
6126    }
6127    #[inline(always)]
6128    fn shl_u32x16(self, a: u32x16<Self>, b: u32) -> u32x16<Self> {
6129        let (a0, a1) = self.split_u32x16(a);
6130        self.combine_u32x8(self.shl_u32x8(a0, b), self.shl_u32x8(a1, b))
6131    }
6132    #[inline(always)]
6133    fn simd_eq_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
6134        let (a0, a1) = self.split_u32x16(a);
6135        let (b0, b1) = self.split_u32x16(b);
6136        self.combine_mask32x8(self.simd_eq_u32x8(a0, b0), self.simd_eq_u32x8(a1, b1))
6137    }
6138    #[inline(always)]
6139    fn simd_lt_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
6140        let (a0, a1) = self.split_u32x16(a);
6141        let (b0, b1) = self.split_u32x16(b);
6142        self.combine_mask32x8(self.simd_lt_u32x8(a0, b0), self.simd_lt_u32x8(a1, b1))
6143    }
6144    #[inline(always)]
6145    fn simd_le_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
6146        let (a0, a1) = self.split_u32x16(a);
6147        let (b0, b1) = self.split_u32x16(b);
6148        self.combine_mask32x8(self.simd_le_u32x8(a0, b0), self.simd_le_u32x8(a1, b1))
6149    }
6150    #[inline(always)]
6151    fn simd_ge_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
6152        let (a0, a1) = self.split_u32x16(a);
6153        let (b0, b1) = self.split_u32x16(b);
6154        self.combine_mask32x8(self.simd_ge_u32x8(a0, b0), self.simd_ge_u32x8(a1, b1))
6155    }
6156    #[inline(always)]
6157    fn simd_gt_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
6158        let (a0, a1) = self.split_u32x16(a);
6159        let (b0, b1) = self.split_u32x16(b);
6160        self.combine_mask32x8(self.simd_gt_u32x8(a0, b0), self.simd_gt_u32x8(a1, b1))
6161    }
6162    #[inline(always)]
6163    fn zip_low_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6164        let (a0, _) = self.split_u32x16(a);
6165        let (b0, _) = self.split_u32x16(b);
6166        self.combine_u32x8(self.zip_low_u32x8(a0, b0), self.zip_high_u32x8(a0, b0))
6167    }
6168    #[inline(always)]
6169    fn zip_high_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6170        let (_, a1) = self.split_u32x16(a);
6171        let (_, b1) = self.split_u32x16(b);
6172        self.combine_u32x8(self.zip_low_u32x8(a1, b1), self.zip_high_u32x8(a1, b1))
6173    }
6174    #[inline(always)]
6175    fn unzip_low_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6176        let (a0, a1) = self.split_u32x16(a);
6177        let (b0, b1) = self.split_u32x16(b);
6178        self.combine_u32x8(self.unzip_low_u32x8(a0, a1), self.unzip_low_u32x8(b0, b1))
6179    }
6180    #[inline(always)]
6181    fn unzip_high_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6182        let (a0, a1) = self.split_u32x16(a);
6183        let (b0, b1) = self.split_u32x16(b);
6184        self.combine_u32x8(self.unzip_high_u32x8(a0, a1), self.unzip_high_u32x8(b0, b1))
6185    }
6186    #[inline(always)]
6187    fn select_u32x16(self, a: mask32x16<Self>, b: u32x16<Self>, c: u32x16<Self>) -> u32x16<Self> {
6188        let (a0, a1) = self.split_mask32x16(a);
6189        let (b0, b1) = self.split_u32x16(b);
6190        let (c0, c1) = self.split_u32x16(c);
6191        self.combine_u32x8(self.select_u32x8(a0, b0, c0), self.select_u32x8(a1, b1, c1))
6192    }
6193    #[inline(always)]
6194    fn min_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6195        let (a0, a1) = self.split_u32x16(a);
6196        let (b0, b1) = self.split_u32x16(b);
6197        self.combine_u32x8(self.min_u32x8(a0, b0), self.min_u32x8(a1, b1))
6198    }
6199    #[inline(always)]
6200    fn max_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6201        let (a0, a1) = self.split_u32x16(a);
6202        let (b0, b1) = self.split_u32x16(b);
6203        self.combine_u32x8(self.max_u32x8(a0, b0), self.max_u32x8(a1, b1))
6204    }
6205    #[inline(always)]
6206    fn split_u32x16(self, a: u32x16<Self>) -> (u32x8<Self>, u32x8<Self>) {
6207        let mut b0 = [0; 8usize];
6208        let mut b1 = [0; 8usize];
6209        b0.copy_from_slice(&a.val[0..8usize]);
6210        b1.copy_from_slice(&a.val[8usize..16usize]);
6211        (b0.simd_into(self), b1.simd_into(self))
6212    }
6213    #[inline(always)]
6214    fn load_interleaved_128_u32x16(self, src: &[u32; 16usize]) -> u32x16<Self> {
6215        [
6216            src[0usize],
6217            src[4usize],
6218            src[8usize],
6219            src[12usize],
6220            src[1usize],
6221            src[5usize],
6222            src[9usize],
6223            src[13usize],
6224            src[2usize],
6225            src[6usize],
6226            src[10usize],
6227            src[14usize],
6228            src[3usize],
6229            src[7usize],
6230            src[11usize],
6231            src[15usize],
6232        ]
6233        .simd_into(self)
6234    }
6235    #[inline(always)]
6236    fn store_interleaved_128_u32x16(self, a: u32x16<Self>, dest: &mut [u32; 16usize]) -> () {
6237        *dest = [
6238            a[0usize], a[4usize], a[8usize], a[12usize], a[1usize], a[5usize], a[9usize],
6239            a[13usize], a[2usize], a[6usize], a[10usize], a[14usize], a[3usize], a[7usize],
6240            a[11usize], a[15usize],
6241        ];
6242    }
6243    #[inline(always)]
6244    fn reinterpret_u8_u32x16(self, a: u32x16<Self>) -> u8x64<Self> {
6245        let (a0, a1) = self.split_u32x16(a);
6246        self.combine_u8x32(self.reinterpret_u8_u32x8(a0), self.reinterpret_u8_u32x8(a1))
6247    }
6248    #[inline(always)]
6249    fn cvt_f32_u32x16(self, a: u32x16<Self>) -> f32x16<Self> {
6250        let (a0, a1) = self.split_u32x16(a);
6251        self.combine_f32x8(self.cvt_f32_u32x8(a0), self.cvt_f32_u32x8(a1))
6252    }
6253    #[inline(always)]
6254    fn splat_mask32x16(self, a: i32) -> mask32x16<Self> {
6255        let half = self.splat_mask32x8(a);
6256        self.combine_mask32x8(half, half)
6257    }
6258    #[inline(always)]
6259    fn not_mask32x16(self, a: mask32x16<Self>) -> mask32x16<Self> {
6260        let (a0, a1) = self.split_mask32x16(a);
6261        self.combine_mask32x8(self.not_mask32x8(a0), self.not_mask32x8(a1))
6262    }
6263    #[inline(always)]
6264    fn and_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
6265        let (a0, a1) = self.split_mask32x16(a);
6266        let (b0, b1) = self.split_mask32x16(b);
6267        self.combine_mask32x8(self.and_mask32x8(a0, b0), self.and_mask32x8(a1, b1))
6268    }
6269    #[inline(always)]
6270    fn or_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
6271        let (a0, a1) = self.split_mask32x16(a);
6272        let (b0, b1) = self.split_mask32x16(b);
6273        self.combine_mask32x8(self.or_mask32x8(a0, b0), self.or_mask32x8(a1, b1))
6274    }
6275    #[inline(always)]
6276    fn xor_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
6277        let (a0, a1) = self.split_mask32x16(a);
6278        let (b0, b1) = self.split_mask32x16(b);
6279        self.combine_mask32x8(self.xor_mask32x8(a0, b0), self.xor_mask32x8(a1, b1))
6280    }
6281    #[inline(always)]
6282    fn select_mask32x16(
6283        self,
6284        a: mask32x16<Self>,
6285        b: mask32x16<Self>,
6286        c: mask32x16<Self>,
6287    ) -> mask32x16<Self> {
6288        let (a0, a1) = self.split_mask32x16(a);
6289        let (b0, b1) = self.split_mask32x16(b);
6290        let (c0, c1) = self.split_mask32x16(c);
6291        self.combine_mask32x8(
6292            self.select_mask32x8(a0, b0, c0),
6293            self.select_mask32x8(a1, b1, c1),
6294        )
6295    }
6296    #[inline(always)]
6297    fn simd_eq_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
6298        let (a0, a1) = self.split_mask32x16(a);
6299        let (b0, b1) = self.split_mask32x16(b);
6300        self.combine_mask32x8(self.simd_eq_mask32x8(a0, b0), self.simd_eq_mask32x8(a1, b1))
6301    }
6302    #[inline(always)]
6303    fn split_mask32x16(self, a: mask32x16<Self>) -> (mask32x8<Self>, mask32x8<Self>) {
6304        let mut b0 = [0; 8usize];
6305        let mut b1 = [0; 8usize];
6306        b0.copy_from_slice(&a.val[0..8usize]);
6307        b1.copy_from_slice(&a.val[8usize..16usize]);
6308        (b0.simd_into(self), b1.simd_into(self))
6309    }
6310    #[inline(always)]
6311    fn splat_f64x8(self, a: f64) -> f64x8<Self> {
6312        let half = self.splat_f64x4(a);
6313        self.combine_f64x4(half, half)
6314    }
6315    #[inline(always)]
6316    fn abs_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
6317        let (a0, a1) = self.split_f64x8(a);
6318        self.combine_f64x4(self.abs_f64x4(a0), self.abs_f64x4(a1))
6319    }
6320    #[inline(always)]
6321    fn neg_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
6322        let (a0, a1) = self.split_f64x8(a);
6323        self.combine_f64x4(self.neg_f64x4(a0), self.neg_f64x4(a1))
6324    }
6325    #[inline(always)]
6326    fn sqrt_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
6327        let (a0, a1) = self.split_f64x8(a);
6328        self.combine_f64x4(self.sqrt_f64x4(a0), self.sqrt_f64x4(a1))
6329    }
6330    #[inline(always)]
6331    fn add_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6332        let (a0, a1) = self.split_f64x8(a);
6333        let (b0, b1) = self.split_f64x8(b);
6334        self.combine_f64x4(self.add_f64x4(a0, b0), self.add_f64x4(a1, b1))
6335    }
6336    #[inline(always)]
6337    fn sub_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6338        let (a0, a1) = self.split_f64x8(a);
6339        let (b0, b1) = self.split_f64x8(b);
6340        self.combine_f64x4(self.sub_f64x4(a0, b0), self.sub_f64x4(a1, b1))
6341    }
6342    #[inline(always)]
6343    fn mul_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6344        let (a0, a1) = self.split_f64x8(a);
6345        let (b0, b1) = self.split_f64x8(b);
6346        self.combine_f64x4(self.mul_f64x4(a0, b0), self.mul_f64x4(a1, b1))
6347    }
6348    #[inline(always)]
6349    fn div_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6350        let (a0, a1) = self.split_f64x8(a);
6351        let (b0, b1) = self.split_f64x8(b);
6352        self.combine_f64x4(self.div_f64x4(a0, b0), self.div_f64x4(a1, b1))
6353    }
6354    #[inline(always)]
6355    fn copysign_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6356        let (a0, a1) = self.split_f64x8(a);
6357        let (b0, b1) = self.split_f64x8(b);
6358        self.combine_f64x4(self.copysign_f64x4(a0, b0), self.copysign_f64x4(a1, b1))
6359    }
6360    #[inline(always)]
6361    fn simd_eq_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
6362        let (a0, a1) = self.split_f64x8(a);
6363        let (b0, b1) = self.split_f64x8(b);
6364        self.combine_mask64x4(self.simd_eq_f64x4(a0, b0), self.simd_eq_f64x4(a1, b1))
6365    }
6366    #[inline(always)]
6367    fn simd_lt_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
6368        let (a0, a1) = self.split_f64x8(a);
6369        let (b0, b1) = self.split_f64x8(b);
6370        self.combine_mask64x4(self.simd_lt_f64x4(a0, b0), self.simd_lt_f64x4(a1, b1))
6371    }
6372    #[inline(always)]
6373    fn simd_le_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
6374        let (a0, a1) = self.split_f64x8(a);
6375        let (b0, b1) = self.split_f64x8(b);
6376        self.combine_mask64x4(self.simd_le_f64x4(a0, b0), self.simd_le_f64x4(a1, b1))
6377    }
6378    #[inline(always)]
6379    fn simd_ge_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
6380        let (a0, a1) = self.split_f64x8(a);
6381        let (b0, b1) = self.split_f64x8(b);
6382        self.combine_mask64x4(self.simd_ge_f64x4(a0, b0), self.simd_ge_f64x4(a1, b1))
6383    }
6384    #[inline(always)]
6385    fn simd_gt_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
6386        let (a0, a1) = self.split_f64x8(a);
6387        let (b0, b1) = self.split_f64x8(b);
6388        self.combine_mask64x4(self.simd_gt_f64x4(a0, b0), self.simd_gt_f64x4(a1, b1))
6389    }
6390    #[inline(always)]
6391    fn zip_low_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6392        let (a0, _) = self.split_f64x8(a);
6393        let (b0, _) = self.split_f64x8(b);
6394        self.combine_f64x4(self.zip_low_f64x4(a0, b0), self.zip_high_f64x4(a0, b0))
6395    }
6396    #[inline(always)]
6397    fn zip_high_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6398        let (_, a1) = self.split_f64x8(a);
6399        let (_, b1) = self.split_f64x8(b);
6400        self.combine_f64x4(self.zip_low_f64x4(a1, b1), self.zip_high_f64x4(a1, b1))
6401    }
6402    #[inline(always)]
6403    fn unzip_low_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6404        let (a0, a1) = self.split_f64x8(a);
6405        let (b0, b1) = self.split_f64x8(b);
6406        self.combine_f64x4(self.unzip_low_f64x4(a0, a1), self.unzip_low_f64x4(b0, b1))
6407    }
6408    #[inline(always)]
6409    fn unzip_high_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6410        let (a0, a1) = self.split_f64x8(a);
6411        let (b0, b1) = self.split_f64x8(b);
6412        self.combine_f64x4(self.unzip_high_f64x4(a0, a1), self.unzip_high_f64x4(b0, b1))
6413    }
6414    #[inline(always)]
6415    fn max_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6416        let (a0, a1) = self.split_f64x8(a);
6417        let (b0, b1) = self.split_f64x8(b);
6418        self.combine_f64x4(self.max_f64x4(a0, b0), self.max_f64x4(a1, b1))
6419    }
6420    #[inline(always)]
6421    fn max_precise_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6422        let (a0, a1) = self.split_f64x8(a);
6423        let (b0, b1) = self.split_f64x8(b);
6424        self.combine_f64x4(
6425            self.max_precise_f64x4(a0, b0),
6426            self.max_precise_f64x4(a1, b1),
6427        )
6428    }
6429    #[inline(always)]
6430    fn min_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6431        let (a0, a1) = self.split_f64x8(a);
6432        let (b0, b1) = self.split_f64x8(b);
6433        self.combine_f64x4(self.min_f64x4(a0, b0), self.min_f64x4(a1, b1))
6434    }
6435    #[inline(always)]
6436    fn min_precise_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6437        let (a0, a1) = self.split_f64x8(a);
6438        let (b0, b1) = self.split_f64x8(b);
6439        self.combine_f64x4(
6440            self.min_precise_f64x4(a0, b0),
6441            self.min_precise_f64x4(a1, b1),
6442        )
6443    }
6444    #[inline(always)]
6445    fn madd_f64x8(self, a: f64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
6446        let (a0, a1) = self.split_f64x8(a);
6447        let (b0, b1) = self.split_f64x8(b);
6448        let (c0, c1) = self.split_f64x8(c);
6449        self.combine_f64x4(self.madd_f64x4(a0, b0, c0), self.madd_f64x4(a1, b1, c1))
6450    }
6451    #[inline(always)]
6452    fn msub_f64x8(self, a: f64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
6453        let (a0, a1) = self.split_f64x8(a);
6454        let (b0, b1) = self.split_f64x8(b);
6455        let (c0, c1) = self.split_f64x8(c);
6456        self.combine_f64x4(self.msub_f64x4(a0, b0, c0), self.msub_f64x4(a1, b1, c1))
6457    }
6458    #[inline(always)]
6459    fn floor_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
6460        let (a0, a1) = self.split_f64x8(a);
6461        self.combine_f64x4(self.floor_f64x4(a0), self.floor_f64x4(a1))
6462    }
6463    #[inline(always)]
6464    fn fract_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
6465        let (a0, a1) = self.split_f64x8(a);
6466        self.combine_f64x4(self.fract_f64x4(a0), self.fract_f64x4(a1))
6467    }
6468    #[inline(always)]
6469    fn trunc_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
6470        let (a0, a1) = self.split_f64x8(a);
6471        self.combine_f64x4(self.trunc_f64x4(a0), self.trunc_f64x4(a1))
6472    }
6473    #[inline(always)]
6474    fn select_f64x8(self, a: mask64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
6475        let (a0, a1) = self.split_mask64x8(a);
6476        let (b0, b1) = self.split_f64x8(b);
6477        let (c0, c1) = self.split_f64x8(c);
6478        self.combine_f64x4(self.select_f64x4(a0, b0, c0), self.select_f64x4(a1, b1, c1))
6479    }
6480    #[inline(always)]
6481    fn split_f64x8(self, a: f64x8<Self>) -> (f64x4<Self>, f64x4<Self>) {
6482        let mut b0 = [0.0; 4usize];
6483        let mut b1 = [0.0; 4usize];
6484        b0.copy_from_slice(&a.val[0..4usize]);
6485        b1.copy_from_slice(&a.val[4usize..8usize]);
6486        (b0.simd_into(self), b1.simd_into(self))
6487    }
6488    #[inline(always)]
6489    fn reinterpret_f32_f64x8(self, a: f64x8<Self>) -> f32x16<Self> {
6490        let (a0, a1) = self.split_f64x8(a);
6491        self.combine_f32x8(
6492            self.reinterpret_f32_f64x4(a0),
6493            self.reinterpret_f32_f64x4(a1),
6494        )
6495    }
6496    #[inline(always)]
6497    fn splat_mask64x8(self, a: i64) -> mask64x8<Self> {
6498        let half = self.splat_mask64x4(a);
6499        self.combine_mask64x4(half, half)
6500    }
6501    #[inline(always)]
6502    fn not_mask64x8(self, a: mask64x8<Self>) -> mask64x8<Self> {
6503        let (a0, a1) = self.split_mask64x8(a);
6504        self.combine_mask64x4(self.not_mask64x4(a0), self.not_mask64x4(a1))
6505    }
6506    #[inline(always)]
6507    fn and_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
6508        let (a0, a1) = self.split_mask64x8(a);
6509        let (b0, b1) = self.split_mask64x8(b);
6510        self.combine_mask64x4(self.and_mask64x4(a0, b0), self.and_mask64x4(a1, b1))
6511    }
6512    #[inline(always)]
6513    fn or_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
6514        let (a0, a1) = self.split_mask64x8(a);
6515        let (b0, b1) = self.split_mask64x8(b);
6516        self.combine_mask64x4(self.or_mask64x4(a0, b0), self.or_mask64x4(a1, b1))
6517    }
6518    #[inline(always)]
6519    fn xor_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
6520        let (a0, a1) = self.split_mask64x8(a);
6521        let (b0, b1) = self.split_mask64x8(b);
6522        self.combine_mask64x4(self.xor_mask64x4(a0, b0), self.xor_mask64x4(a1, b1))
6523    }
6524    #[inline(always)]
6525    fn select_mask64x8(
6526        self,
6527        a: mask64x8<Self>,
6528        b: mask64x8<Self>,
6529        c: mask64x8<Self>,
6530    ) -> mask64x8<Self> {
6531        let (a0, a1) = self.split_mask64x8(a);
6532        let (b0, b1) = self.split_mask64x8(b);
6533        let (c0, c1) = self.split_mask64x8(c);
6534        self.combine_mask64x4(
6535            self.select_mask64x4(a0, b0, c0),
6536            self.select_mask64x4(a1, b1, c1),
6537        )
6538    }
6539    #[inline(always)]
6540    fn simd_eq_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
6541        let (a0, a1) = self.split_mask64x8(a);
6542        let (b0, b1) = self.split_mask64x8(b);
6543        self.combine_mask64x4(self.simd_eq_mask64x4(a0, b0), self.simd_eq_mask64x4(a1, b1))
6544    }
6545    #[inline(always)]
6546    fn split_mask64x8(self, a: mask64x8<Self>) -> (mask64x4<Self>, mask64x4<Self>) {
6547        let mut b0 = [0; 4usize];
6548        let mut b1 = [0; 4usize];
6549        b0.copy_from_slice(&a.val[0..4usize]);
6550        b1.copy_from_slice(&a.val[4usize..8usize]);
6551        (b0.simd_into(self), b1.simd_into(self))
6552    }
6553}