fearless_simd/generated/
fallback.rs

1// Copyright 2025 the Fearless_SIMD Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4// This file is autogenerated by fearless_simd_gen
5
6use crate::{Level, Simd, SimdInto, seal::Seal};
7use crate::{
8    f32x4, f32x8, f32x16, f64x2, f64x4, f64x8, i8x16, i8x32, i8x64, i16x8, i16x16, i16x32, i32x4,
9    i32x8, i32x16, mask8x16, mask8x32, mask8x64, mask16x8, mask16x16, mask16x32, mask32x4,
10    mask32x8, mask32x16, mask64x2, mask64x4, mask64x8, u8x16, u8x32, u8x64, u16x8, u16x16, u16x32,
11    u32x4, u32x8, u32x16,
12};
13use core::ops::*;
14#[cfg(all(feature = "libm", not(feature = "std")))]
15trait FloatExt {
16    fn floor(self) -> Self;
17    fn fract(self) -> Self;
18    fn sqrt(self) -> Self;
19    fn trunc(self) -> Self;
20}
21#[cfg(all(feature = "libm", not(feature = "std")))]
22impl FloatExt for f32 {
23    #[inline(always)]
24    fn floor(self) -> f32 {
25        libm::floorf(self)
26    }
27    #[inline(always)]
28    fn sqrt(self) -> f32 {
29        libm::sqrtf(self)
30    }
31    #[inline(always)]
32    fn fract(self) -> f32 {
33        self - self.trunc()
34    }
35    #[inline(always)]
36    fn trunc(self) -> f32 {
37        libm::truncf(self)
38    }
39}
40#[cfg(all(feature = "libm", not(feature = "std")))]
41impl FloatExt for f64 {
42    #[inline(always)]
43    fn floor(self) -> f64 {
44        libm::floor(self)
45    }
46    #[inline(always)]
47    fn sqrt(self) -> f64 {
48        libm::sqrt(self)
49    }
50    #[inline(always)]
51    fn fract(self) -> f64 {
52        self - self.trunc()
53    }
54    #[inline(always)]
55    fn trunc(self) -> f64 {
56        libm::trunc(self)
57    }
58}
59#[doc = r#" The SIMD token for the "fallback" level."#]
60#[derive(Clone, Copy, Debug)]
61pub struct Fallback {
62    pub fallback: crate::core_arch::fallback::Fallback,
63}
64impl Fallback {
65    #[inline]
66    pub fn new() -> Self {
67        Fallback {
68            fallback: crate::core_arch::fallback::Fallback::new(),
69        }
70    }
71}
72impl Seal for Fallback {}
73impl Simd for Fallback {
74    type f32s = f32x4<Self>;
75    type u8s = u8x16<Self>;
76    type i8s = i8x16<Self>;
77    type u16s = u16x8<Self>;
78    type i16s = i16x8<Self>;
79    type u32s = u32x4<Self>;
80    type i32s = i32x4<Self>;
81    type mask8s = mask8x16<Self>;
82    type mask16s = mask16x8<Self>;
83    type mask32s = mask32x4<Self>;
84    #[inline(always)]
85    fn level(self) -> Level {
86        Level::Fallback(self)
87    }
88    #[inline]
89    fn vectorize<F: FnOnce() -> R, R>(self, f: F) -> R {
90        f()
91    }
92    #[inline(always)]
93    fn splat_f32x4(self, val: f32) -> f32x4<Self> {
94        [val; 4usize].simd_into(self)
95    }
96    #[inline(always)]
97    fn abs_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
98        [
99            f32::abs(a[0usize]),
100            f32::abs(a[1usize]),
101            f32::abs(a[2usize]),
102            f32::abs(a[3usize]),
103        ]
104        .simd_into(self)
105    }
106    #[inline(always)]
107    fn neg_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
108        [
109            f32::neg(a[0usize]),
110            f32::neg(a[1usize]),
111            f32::neg(a[2usize]),
112            f32::neg(a[3usize]),
113        ]
114        .simd_into(self)
115    }
116    #[inline(always)]
117    fn sqrt_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
118        [
119            f32::sqrt(a[0usize]),
120            f32::sqrt(a[1usize]),
121            f32::sqrt(a[2usize]),
122            f32::sqrt(a[3usize]),
123        ]
124        .simd_into(self)
125    }
126    #[inline(always)]
127    fn add_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
128        [
129            f32::add(a[0usize], &b[0usize]),
130            f32::add(a[1usize], &b[1usize]),
131            f32::add(a[2usize], &b[2usize]),
132            f32::add(a[3usize], &b[3usize]),
133        ]
134        .simd_into(self)
135    }
136    #[inline(always)]
137    fn sub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
138        [
139            f32::sub(a[0usize], &b[0usize]),
140            f32::sub(a[1usize], &b[1usize]),
141            f32::sub(a[2usize], &b[2usize]),
142            f32::sub(a[3usize], &b[3usize]),
143        ]
144        .simd_into(self)
145    }
146    #[inline(always)]
147    fn mul_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
148        [
149            f32::mul(a[0usize], &b[0usize]),
150            f32::mul(a[1usize], &b[1usize]),
151            f32::mul(a[2usize], &b[2usize]),
152            f32::mul(a[3usize], &b[3usize]),
153        ]
154        .simd_into(self)
155    }
156    #[inline(always)]
157    fn div_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
158        [
159            f32::div(a[0usize], &b[0usize]),
160            f32::div(a[1usize], &b[1usize]),
161            f32::div(a[2usize], &b[2usize]),
162            f32::div(a[3usize], &b[3usize]),
163        ]
164        .simd_into(self)
165    }
166    #[inline(always)]
167    fn copysign_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
168        [
169            f32::copysign(a[0usize], b[0usize]),
170            f32::copysign(a[1usize], b[1usize]),
171            f32::copysign(a[2usize], b[2usize]),
172            f32::copysign(a[3usize], b[3usize]),
173        ]
174        .simd_into(self)
175    }
176    #[inline(always)]
177    fn simd_eq_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
178        [
179            -(f32::eq(&a[0usize], &b[0usize]) as i32),
180            -(f32::eq(&a[1usize], &b[1usize]) as i32),
181            -(f32::eq(&a[2usize], &b[2usize]) as i32),
182            -(f32::eq(&a[3usize], &b[3usize]) as i32),
183        ]
184        .simd_into(self)
185    }
186    #[inline(always)]
187    fn simd_lt_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
188        [
189            -(f32::lt(&a[0usize], &b[0usize]) as i32),
190            -(f32::lt(&a[1usize], &b[1usize]) as i32),
191            -(f32::lt(&a[2usize], &b[2usize]) as i32),
192            -(f32::lt(&a[3usize], &b[3usize]) as i32),
193        ]
194        .simd_into(self)
195    }
196    #[inline(always)]
197    fn simd_le_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
198        [
199            -(f32::le(&a[0usize], &b[0usize]) as i32),
200            -(f32::le(&a[1usize], &b[1usize]) as i32),
201            -(f32::le(&a[2usize], &b[2usize]) as i32),
202            -(f32::le(&a[3usize], &b[3usize]) as i32),
203        ]
204        .simd_into(self)
205    }
206    #[inline(always)]
207    fn simd_ge_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
208        [
209            -(f32::ge(&a[0usize], &b[0usize]) as i32),
210            -(f32::ge(&a[1usize], &b[1usize]) as i32),
211            -(f32::ge(&a[2usize], &b[2usize]) as i32),
212            -(f32::ge(&a[3usize], &b[3usize]) as i32),
213        ]
214        .simd_into(self)
215    }
216    #[inline(always)]
217    fn simd_gt_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
218        [
219            -(f32::gt(&a[0usize], &b[0usize]) as i32),
220            -(f32::gt(&a[1usize], &b[1usize]) as i32),
221            -(f32::gt(&a[2usize], &b[2usize]) as i32),
222            -(f32::gt(&a[3usize], &b[3usize]) as i32),
223        ]
224        .simd_into(self)
225    }
226    #[inline(always)]
227    fn zip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
228        [a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
229    }
230    #[inline(always)]
231    fn zip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
232        [a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
233    }
234    #[inline(always)]
235    fn unzip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
236        [a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
237    }
238    #[inline(always)]
239    fn unzip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
240        [a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
241    }
242    #[inline(always)]
243    fn max_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
244        [
245            f32::max(a[0usize], b[0usize]),
246            f32::max(a[1usize], b[1usize]),
247            f32::max(a[2usize], b[2usize]),
248            f32::max(a[3usize], b[3usize]),
249        ]
250        .simd_into(self)
251    }
252    #[inline(always)]
253    fn max_precise_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
254        [
255            f32::max(a[0usize], b[0usize]),
256            f32::max(a[1usize], b[1usize]),
257            f32::max(a[2usize], b[2usize]),
258            f32::max(a[3usize], b[3usize]),
259        ]
260        .simd_into(self)
261    }
262    #[inline(always)]
263    fn min_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
264        [
265            f32::min(a[0usize], b[0usize]),
266            f32::min(a[1usize], b[1usize]),
267            f32::min(a[2usize], b[2usize]),
268            f32::min(a[3usize], b[3usize]),
269        ]
270        .simd_into(self)
271    }
272    #[inline(always)]
273    fn min_precise_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
274        [
275            f32::min(a[0usize], b[0usize]),
276            f32::min(a[1usize], b[1usize]),
277            f32::min(a[2usize], b[2usize]),
278            f32::min(a[3usize], b[3usize]),
279        ]
280        .simd_into(self)
281    }
282    #[inline(always)]
283    fn madd_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
284        a.add(b.mul(c))
285    }
286    #[inline(always)]
287    fn msub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
288        a.sub(b.mul(c))
289    }
290    #[inline(always)]
291    fn floor_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
292        [
293            f32::floor(a[0usize]),
294            f32::floor(a[1usize]),
295            f32::floor(a[2usize]),
296            f32::floor(a[3usize]),
297        ]
298        .simd_into(self)
299    }
300    #[inline(always)]
301    fn fract_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
302        [
303            f32::fract(a[0usize]),
304            f32::fract(a[1usize]),
305            f32::fract(a[2usize]),
306            f32::fract(a[3usize]),
307        ]
308        .simd_into(self)
309    }
310    #[inline(always)]
311    fn trunc_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
312        [
313            f32::trunc(a[0usize]),
314            f32::trunc(a[1usize]),
315            f32::trunc(a[2usize]),
316            f32::trunc(a[3usize]),
317        ]
318        .simd_into(self)
319    }
320    #[inline(always)]
321    fn select_f32x4(self, a: mask32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
322        [
323            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
324            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
325            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
326            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
327        ]
328        .simd_into(self)
329    }
330    #[inline(always)]
331    fn combine_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x8<Self> {
332        let mut result = [0.0; 8usize];
333        result[0..4usize].copy_from_slice(&a.val);
334        result[4usize..8usize].copy_from_slice(&b.val);
335        result.simd_into(self)
336    }
337    #[inline(always)]
338    fn reinterpret_f64_f32x4(self, a: f32x4<Self>) -> f64x2<Self> {
339        f64x2 {
340            val: bytemuck::cast(a.val),
341            simd: a.simd,
342        }
343    }
344    #[inline(always)]
345    fn reinterpret_i32_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
346        i32x4 {
347            val: bytemuck::cast(a.val),
348            simd: a.simd,
349        }
350    }
351    #[inline(always)]
352    fn reinterpret_u8_f32x4(self, a: f32x4<Self>) -> u8x16<Self> {
353        u8x16 {
354            val: bytemuck::cast(a.val),
355            simd: a.simd,
356        }
357    }
358    #[inline(always)]
359    fn reinterpret_u32_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
360        u32x4 {
361            val: bytemuck::cast(a.val),
362            simd: a.simd,
363        }
364    }
365    #[inline(always)]
366    fn cvt_u32_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
367        [
368            a[0usize] as u32,
369            a[1usize] as u32,
370            a[2usize] as u32,
371            a[3usize] as u32,
372        ]
373        .simd_into(self)
374    }
375    #[inline(always)]
376    fn cvt_i32_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
377        [
378            a[0usize] as i32,
379            a[1usize] as i32,
380            a[2usize] as i32,
381            a[3usize] as i32,
382        ]
383        .simd_into(self)
384    }
385    #[inline(always)]
386    fn splat_i8x16(self, val: i8) -> i8x16<Self> {
387        [val; 16usize].simd_into(self)
388    }
389    #[inline(always)]
390    fn not_i8x16(self, a: i8x16<Self>) -> i8x16<Self> {
391        [
392            i8::not(a[0usize]),
393            i8::not(a[1usize]),
394            i8::not(a[2usize]),
395            i8::not(a[3usize]),
396            i8::not(a[4usize]),
397            i8::not(a[5usize]),
398            i8::not(a[6usize]),
399            i8::not(a[7usize]),
400            i8::not(a[8usize]),
401            i8::not(a[9usize]),
402            i8::not(a[10usize]),
403            i8::not(a[11usize]),
404            i8::not(a[12usize]),
405            i8::not(a[13usize]),
406            i8::not(a[14usize]),
407            i8::not(a[15usize]),
408        ]
409        .simd_into(self)
410    }
411    #[inline(always)]
412    fn add_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
413        [
414            i8::add(a[0usize], &b[0usize]),
415            i8::add(a[1usize], &b[1usize]),
416            i8::add(a[2usize], &b[2usize]),
417            i8::add(a[3usize], &b[3usize]),
418            i8::add(a[4usize], &b[4usize]),
419            i8::add(a[5usize], &b[5usize]),
420            i8::add(a[6usize], &b[6usize]),
421            i8::add(a[7usize], &b[7usize]),
422            i8::add(a[8usize], &b[8usize]),
423            i8::add(a[9usize], &b[9usize]),
424            i8::add(a[10usize], &b[10usize]),
425            i8::add(a[11usize], &b[11usize]),
426            i8::add(a[12usize], &b[12usize]),
427            i8::add(a[13usize], &b[13usize]),
428            i8::add(a[14usize], &b[14usize]),
429            i8::add(a[15usize], &b[15usize]),
430        ]
431        .simd_into(self)
432    }
433    #[inline(always)]
434    fn sub_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
435        [
436            i8::wrapping_sub(a[0usize], b[0usize]),
437            i8::wrapping_sub(a[1usize], b[1usize]),
438            i8::wrapping_sub(a[2usize], b[2usize]),
439            i8::wrapping_sub(a[3usize], b[3usize]),
440            i8::wrapping_sub(a[4usize], b[4usize]),
441            i8::wrapping_sub(a[5usize], b[5usize]),
442            i8::wrapping_sub(a[6usize], b[6usize]),
443            i8::wrapping_sub(a[7usize], b[7usize]),
444            i8::wrapping_sub(a[8usize], b[8usize]),
445            i8::wrapping_sub(a[9usize], b[9usize]),
446            i8::wrapping_sub(a[10usize], b[10usize]),
447            i8::wrapping_sub(a[11usize], b[11usize]),
448            i8::wrapping_sub(a[12usize], b[12usize]),
449            i8::wrapping_sub(a[13usize], b[13usize]),
450            i8::wrapping_sub(a[14usize], b[14usize]),
451            i8::wrapping_sub(a[15usize], b[15usize]),
452        ]
453        .simd_into(self)
454    }
455    #[inline(always)]
456    fn mul_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
457        [
458            i8::wrapping_mul(a[0usize], b[0usize]),
459            i8::wrapping_mul(a[1usize], b[1usize]),
460            i8::wrapping_mul(a[2usize], b[2usize]),
461            i8::wrapping_mul(a[3usize], b[3usize]),
462            i8::wrapping_mul(a[4usize], b[4usize]),
463            i8::wrapping_mul(a[5usize], b[5usize]),
464            i8::wrapping_mul(a[6usize], b[6usize]),
465            i8::wrapping_mul(a[7usize], b[7usize]),
466            i8::wrapping_mul(a[8usize], b[8usize]),
467            i8::wrapping_mul(a[9usize], b[9usize]),
468            i8::wrapping_mul(a[10usize], b[10usize]),
469            i8::wrapping_mul(a[11usize], b[11usize]),
470            i8::wrapping_mul(a[12usize], b[12usize]),
471            i8::wrapping_mul(a[13usize], b[13usize]),
472            i8::wrapping_mul(a[14usize], b[14usize]),
473            i8::wrapping_mul(a[15usize], b[15usize]),
474        ]
475        .simd_into(self)
476    }
477    #[inline(always)]
478    fn and_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
479        [
480            i8::bitand(a[0usize], &b[0usize]),
481            i8::bitand(a[1usize], &b[1usize]),
482            i8::bitand(a[2usize], &b[2usize]),
483            i8::bitand(a[3usize], &b[3usize]),
484            i8::bitand(a[4usize], &b[4usize]),
485            i8::bitand(a[5usize], &b[5usize]),
486            i8::bitand(a[6usize], &b[6usize]),
487            i8::bitand(a[7usize], &b[7usize]),
488            i8::bitand(a[8usize], &b[8usize]),
489            i8::bitand(a[9usize], &b[9usize]),
490            i8::bitand(a[10usize], &b[10usize]),
491            i8::bitand(a[11usize], &b[11usize]),
492            i8::bitand(a[12usize], &b[12usize]),
493            i8::bitand(a[13usize], &b[13usize]),
494            i8::bitand(a[14usize], &b[14usize]),
495            i8::bitand(a[15usize], &b[15usize]),
496        ]
497        .simd_into(self)
498    }
499    #[inline(always)]
500    fn or_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
501        [
502            i8::bitor(a[0usize], &b[0usize]),
503            i8::bitor(a[1usize], &b[1usize]),
504            i8::bitor(a[2usize], &b[2usize]),
505            i8::bitor(a[3usize], &b[3usize]),
506            i8::bitor(a[4usize], &b[4usize]),
507            i8::bitor(a[5usize], &b[5usize]),
508            i8::bitor(a[6usize], &b[6usize]),
509            i8::bitor(a[7usize], &b[7usize]),
510            i8::bitor(a[8usize], &b[8usize]),
511            i8::bitor(a[9usize], &b[9usize]),
512            i8::bitor(a[10usize], &b[10usize]),
513            i8::bitor(a[11usize], &b[11usize]),
514            i8::bitor(a[12usize], &b[12usize]),
515            i8::bitor(a[13usize], &b[13usize]),
516            i8::bitor(a[14usize], &b[14usize]),
517            i8::bitor(a[15usize], &b[15usize]),
518        ]
519        .simd_into(self)
520    }
521    #[inline(always)]
522    fn xor_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
523        [
524            i8::bitxor(a[0usize], &b[0usize]),
525            i8::bitxor(a[1usize], &b[1usize]),
526            i8::bitxor(a[2usize], &b[2usize]),
527            i8::bitxor(a[3usize], &b[3usize]),
528            i8::bitxor(a[4usize], &b[4usize]),
529            i8::bitxor(a[5usize], &b[5usize]),
530            i8::bitxor(a[6usize], &b[6usize]),
531            i8::bitxor(a[7usize], &b[7usize]),
532            i8::bitxor(a[8usize], &b[8usize]),
533            i8::bitxor(a[9usize], &b[9usize]),
534            i8::bitxor(a[10usize], &b[10usize]),
535            i8::bitxor(a[11usize], &b[11usize]),
536            i8::bitxor(a[12usize], &b[12usize]),
537            i8::bitxor(a[13usize], &b[13usize]),
538            i8::bitxor(a[14usize], &b[14usize]),
539            i8::bitxor(a[15usize], &b[15usize]),
540        ]
541        .simd_into(self)
542    }
543    #[inline(always)]
544    fn shr_i8x16(self, a: i8x16<Self>, shift: u32) -> i8x16<Self> {
545        [
546            i8::shr(a[0usize], shift as i8),
547            i8::shr(a[1usize], shift as i8),
548            i8::shr(a[2usize], shift as i8),
549            i8::shr(a[3usize], shift as i8),
550            i8::shr(a[4usize], shift as i8),
551            i8::shr(a[5usize], shift as i8),
552            i8::shr(a[6usize], shift as i8),
553            i8::shr(a[7usize], shift as i8),
554            i8::shr(a[8usize], shift as i8),
555            i8::shr(a[9usize], shift as i8),
556            i8::shr(a[10usize], shift as i8),
557            i8::shr(a[11usize], shift as i8),
558            i8::shr(a[12usize], shift as i8),
559            i8::shr(a[13usize], shift as i8),
560            i8::shr(a[14usize], shift as i8),
561            i8::shr(a[15usize], shift as i8),
562        ]
563        .simd_into(self)
564    }
565    #[inline(always)]
566    fn simd_eq_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
567        [
568            -(i8::eq(&a[0usize], &b[0usize]) as i8),
569            -(i8::eq(&a[1usize], &b[1usize]) as i8),
570            -(i8::eq(&a[2usize], &b[2usize]) as i8),
571            -(i8::eq(&a[3usize], &b[3usize]) as i8),
572            -(i8::eq(&a[4usize], &b[4usize]) as i8),
573            -(i8::eq(&a[5usize], &b[5usize]) as i8),
574            -(i8::eq(&a[6usize], &b[6usize]) as i8),
575            -(i8::eq(&a[7usize], &b[7usize]) as i8),
576            -(i8::eq(&a[8usize], &b[8usize]) as i8),
577            -(i8::eq(&a[9usize], &b[9usize]) as i8),
578            -(i8::eq(&a[10usize], &b[10usize]) as i8),
579            -(i8::eq(&a[11usize], &b[11usize]) as i8),
580            -(i8::eq(&a[12usize], &b[12usize]) as i8),
581            -(i8::eq(&a[13usize], &b[13usize]) as i8),
582            -(i8::eq(&a[14usize], &b[14usize]) as i8),
583            -(i8::eq(&a[15usize], &b[15usize]) as i8),
584        ]
585        .simd_into(self)
586    }
587    #[inline(always)]
588    fn simd_lt_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
589        [
590            -(i8::lt(&a[0usize], &b[0usize]) as i8),
591            -(i8::lt(&a[1usize], &b[1usize]) as i8),
592            -(i8::lt(&a[2usize], &b[2usize]) as i8),
593            -(i8::lt(&a[3usize], &b[3usize]) as i8),
594            -(i8::lt(&a[4usize], &b[4usize]) as i8),
595            -(i8::lt(&a[5usize], &b[5usize]) as i8),
596            -(i8::lt(&a[6usize], &b[6usize]) as i8),
597            -(i8::lt(&a[7usize], &b[7usize]) as i8),
598            -(i8::lt(&a[8usize], &b[8usize]) as i8),
599            -(i8::lt(&a[9usize], &b[9usize]) as i8),
600            -(i8::lt(&a[10usize], &b[10usize]) as i8),
601            -(i8::lt(&a[11usize], &b[11usize]) as i8),
602            -(i8::lt(&a[12usize], &b[12usize]) as i8),
603            -(i8::lt(&a[13usize], &b[13usize]) as i8),
604            -(i8::lt(&a[14usize], &b[14usize]) as i8),
605            -(i8::lt(&a[15usize], &b[15usize]) as i8),
606        ]
607        .simd_into(self)
608    }
609    #[inline(always)]
610    fn simd_le_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
611        [
612            -(i8::le(&a[0usize], &b[0usize]) as i8),
613            -(i8::le(&a[1usize], &b[1usize]) as i8),
614            -(i8::le(&a[2usize], &b[2usize]) as i8),
615            -(i8::le(&a[3usize], &b[3usize]) as i8),
616            -(i8::le(&a[4usize], &b[4usize]) as i8),
617            -(i8::le(&a[5usize], &b[5usize]) as i8),
618            -(i8::le(&a[6usize], &b[6usize]) as i8),
619            -(i8::le(&a[7usize], &b[7usize]) as i8),
620            -(i8::le(&a[8usize], &b[8usize]) as i8),
621            -(i8::le(&a[9usize], &b[9usize]) as i8),
622            -(i8::le(&a[10usize], &b[10usize]) as i8),
623            -(i8::le(&a[11usize], &b[11usize]) as i8),
624            -(i8::le(&a[12usize], &b[12usize]) as i8),
625            -(i8::le(&a[13usize], &b[13usize]) as i8),
626            -(i8::le(&a[14usize], &b[14usize]) as i8),
627            -(i8::le(&a[15usize], &b[15usize]) as i8),
628        ]
629        .simd_into(self)
630    }
631    #[inline(always)]
632    fn simd_ge_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
633        [
634            -(i8::ge(&a[0usize], &b[0usize]) as i8),
635            -(i8::ge(&a[1usize], &b[1usize]) as i8),
636            -(i8::ge(&a[2usize], &b[2usize]) as i8),
637            -(i8::ge(&a[3usize], &b[3usize]) as i8),
638            -(i8::ge(&a[4usize], &b[4usize]) as i8),
639            -(i8::ge(&a[5usize], &b[5usize]) as i8),
640            -(i8::ge(&a[6usize], &b[6usize]) as i8),
641            -(i8::ge(&a[7usize], &b[7usize]) as i8),
642            -(i8::ge(&a[8usize], &b[8usize]) as i8),
643            -(i8::ge(&a[9usize], &b[9usize]) as i8),
644            -(i8::ge(&a[10usize], &b[10usize]) as i8),
645            -(i8::ge(&a[11usize], &b[11usize]) as i8),
646            -(i8::ge(&a[12usize], &b[12usize]) as i8),
647            -(i8::ge(&a[13usize], &b[13usize]) as i8),
648            -(i8::ge(&a[14usize], &b[14usize]) as i8),
649            -(i8::ge(&a[15usize], &b[15usize]) as i8),
650        ]
651        .simd_into(self)
652    }
653    #[inline(always)]
654    fn simd_gt_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
655        [
656            -(i8::gt(&a[0usize], &b[0usize]) as i8),
657            -(i8::gt(&a[1usize], &b[1usize]) as i8),
658            -(i8::gt(&a[2usize], &b[2usize]) as i8),
659            -(i8::gt(&a[3usize], &b[3usize]) as i8),
660            -(i8::gt(&a[4usize], &b[4usize]) as i8),
661            -(i8::gt(&a[5usize], &b[5usize]) as i8),
662            -(i8::gt(&a[6usize], &b[6usize]) as i8),
663            -(i8::gt(&a[7usize], &b[7usize]) as i8),
664            -(i8::gt(&a[8usize], &b[8usize]) as i8),
665            -(i8::gt(&a[9usize], &b[9usize]) as i8),
666            -(i8::gt(&a[10usize], &b[10usize]) as i8),
667            -(i8::gt(&a[11usize], &b[11usize]) as i8),
668            -(i8::gt(&a[12usize], &b[12usize]) as i8),
669            -(i8::gt(&a[13usize], &b[13usize]) as i8),
670            -(i8::gt(&a[14usize], &b[14usize]) as i8),
671            -(i8::gt(&a[15usize], &b[15usize]) as i8),
672        ]
673        .simd_into(self)
674    }
675    #[inline(always)]
676    fn zip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
677        [
678            a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
679            a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
680        ]
681        .simd_into(self)
682    }
683    #[inline(always)]
684    fn zip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
685        [
686            a[8usize], b[8usize], a[9usize], b[9usize], a[10usize], b[10usize], a[11usize],
687            b[11usize], a[12usize], b[12usize], a[13usize], b[13usize], a[14usize], b[14usize],
688            a[15usize], b[15usize],
689        ]
690        .simd_into(self)
691    }
692    #[inline(always)]
693    fn unzip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
694        [
695            a[0usize], a[2usize], a[4usize], a[6usize], a[8usize], a[10usize], a[12usize],
696            a[14usize], b[0usize], b[2usize], b[4usize], b[6usize], b[8usize], b[10usize],
697            b[12usize], b[14usize],
698        ]
699        .simd_into(self)
700    }
701    #[inline(always)]
702    fn unzip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
703        [
704            a[1usize], a[3usize], a[5usize], a[7usize], a[9usize], a[11usize], a[13usize],
705            a[15usize], b[1usize], b[3usize], b[5usize], b[7usize], b[9usize], b[11usize],
706            b[13usize], b[15usize],
707        ]
708        .simd_into(self)
709    }
710    #[inline(always)]
711    fn select_i8x16(self, a: mask8x16<Self>, b: i8x16<Self>, c: i8x16<Self>) -> i8x16<Self> {
712        [
713            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
714            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
715            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
716            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
717            if a[4usize] != 0 { b[4usize] } else { c[4usize] },
718            if a[5usize] != 0 { b[5usize] } else { c[5usize] },
719            if a[6usize] != 0 { b[6usize] } else { c[6usize] },
720            if a[7usize] != 0 { b[7usize] } else { c[7usize] },
721            if a[8usize] != 0 { b[8usize] } else { c[8usize] },
722            if a[9usize] != 0 { b[9usize] } else { c[9usize] },
723            if a[10usize] != 0 {
724                b[10usize]
725            } else {
726                c[10usize]
727            },
728            if a[11usize] != 0 {
729                b[11usize]
730            } else {
731                c[11usize]
732            },
733            if a[12usize] != 0 {
734                b[12usize]
735            } else {
736                c[12usize]
737            },
738            if a[13usize] != 0 {
739                b[13usize]
740            } else {
741                c[13usize]
742            },
743            if a[14usize] != 0 {
744                b[14usize]
745            } else {
746                c[14usize]
747            },
748            if a[15usize] != 0 {
749                b[15usize]
750            } else {
751                c[15usize]
752            },
753        ]
754        .simd_into(self)
755    }
756    #[inline(always)]
757    fn min_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
758        [
759            i8::min(a[0usize], b[0usize]),
760            i8::min(a[1usize], b[1usize]),
761            i8::min(a[2usize], b[2usize]),
762            i8::min(a[3usize], b[3usize]),
763            i8::min(a[4usize], b[4usize]),
764            i8::min(a[5usize], b[5usize]),
765            i8::min(a[6usize], b[6usize]),
766            i8::min(a[7usize], b[7usize]),
767            i8::min(a[8usize], b[8usize]),
768            i8::min(a[9usize], b[9usize]),
769            i8::min(a[10usize], b[10usize]),
770            i8::min(a[11usize], b[11usize]),
771            i8::min(a[12usize], b[12usize]),
772            i8::min(a[13usize], b[13usize]),
773            i8::min(a[14usize], b[14usize]),
774            i8::min(a[15usize], b[15usize]),
775        ]
776        .simd_into(self)
777    }
778    #[inline(always)]
779    fn max_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
780        [
781            i8::max(a[0usize], b[0usize]),
782            i8::max(a[1usize], b[1usize]),
783            i8::max(a[2usize], b[2usize]),
784            i8::max(a[3usize], b[3usize]),
785            i8::max(a[4usize], b[4usize]),
786            i8::max(a[5usize], b[5usize]),
787            i8::max(a[6usize], b[6usize]),
788            i8::max(a[7usize], b[7usize]),
789            i8::max(a[8usize], b[8usize]),
790            i8::max(a[9usize], b[9usize]),
791            i8::max(a[10usize], b[10usize]),
792            i8::max(a[11usize], b[11usize]),
793            i8::max(a[12usize], b[12usize]),
794            i8::max(a[13usize], b[13usize]),
795            i8::max(a[14usize], b[14usize]),
796            i8::max(a[15usize], b[15usize]),
797        ]
798        .simd_into(self)
799    }
800    #[inline(always)]
801    fn combine_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x32<Self> {
802        let mut result = [0; 32usize];
803        result[0..16usize].copy_from_slice(&a.val);
804        result[16usize..32usize].copy_from_slice(&b.val);
805        result.simd_into(self)
806    }
807    #[inline(always)]
808    fn reinterpret_u8_i8x16(self, a: i8x16<Self>) -> u8x16<Self> {
809        u8x16 {
810            val: bytemuck::cast(a.val),
811            simd: a.simd,
812        }
813    }
814    #[inline(always)]
815    fn reinterpret_u32_i8x16(self, a: i8x16<Self>) -> u32x4<Self> {
816        u32x4 {
817            val: bytemuck::cast(a.val),
818            simd: a.simd,
819        }
820    }
821    #[inline(always)]
822    fn splat_u8x16(self, val: u8) -> u8x16<Self> {
823        [val; 16usize].simd_into(self)
824    }
825    #[inline(always)]
826    fn not_u8x16(self, a: u8x16<Self>) -> u8x16<Self> {
827        [
828            u8::not(a[0usize]),
829            u8::not(a[1usize]),
830            u8::not(a[2usize]),
831            u8::not(a[3usize]),
832            u8::not(a[4usize]),
833            u8::not(a[5usize]),
834            u8::not(a[6usize]),
835            u8::not(a[7usize]),
836            u8::not(a[8usize]),
837            u8::not(a[9usize]),
838            u8::not(a[10usize]),
839            u8::not(a[11usize]),
840            u8::not(a[12usize]),
841            u8::not(a[13usize]),
842            u8::not(a[14usize]),
843            u8::not(a[15usize]),
844        ]
845        .simd_into(self)
846    }
847    #[inline(always)]
848    fn add_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
849        [
850            u8::add(a[0usize], &b[0usize]),
851            u8::add(a[1usize], &b[1usize]),
852            u8::add(a[2usize], &b[2usize]),
853            u8::add(a[3usize], &b[3usize]),
854            u8::add(a[4usize], &b[4usize]),
855            u8::add(a[5usize], &b[5usize]),
856            u8::add(a[6usize], &b[6usize]),
857            u8::add(a[7usize], &b[7usize]),
858            u8::add(a[8usize], &b[8usize]),
859            u8::add(a[9usize], &b[9usize]),
860            u8::add(a[10usize], &b[10usize]),
861            u8::add(a[11usize], &b[11usize]),
862            u8::add(a[12usize], &b[12usize]),
863            u8::add(a[13usize], &b[13usize]),
864            u8::add(a[14usize], &b[14usize]),
865            u8::add(a[15usize], &b[15usize]),
866        ]
867        .simd_into(self)
868    }
869    #[inline(always)]
870    fn sub_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
871        [
872            u8::wrapping_sub(a[0usize], b[0usize]),
873            u8::wrapping_sub(a[1usize], b[1usize]),
874            u8::wrapping_sub(a[2usize], b[2usize]),
875            u8::wrapping_sub(a[3usize], b[3usize]),
876            u8::wrapping_sub(a[4usize], b[4usize]),
877            u8::wrapping_sub(a[5usize], b[5usize]),
878            u8::wrapping_sub(a[6usize], b[6usize]),
879            u8::wrapping_sub(a[7usize], b[7usize]),
880            u8::wrapping_sub(a[8usize], b[8usize]),
881            u8::wrapping_sub(a[9usize], b[9usize]),
882            u8::wrapping_sub(a[10usize], b[10usize]),
883            u8::wrapping_sub(a[11usize], b[11usize]),
884            u8::wrapping_sub(a[12usize], b[12usize]),
885            u8::wrapping_sub(a[13usize], b[13usize]),
886            u8::wrapping_sub(a[14usize], b[14usize]),
887            u8::wrapping_sub(a[15usize], b[15usize]),
888        ]
889        .simd_into(self)
890    }
891    #[inline(always)]
892    fn mul_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
893        [
894            u8::wrapping_mul(a[0usize], b[0usize]),
895            u8::wrapping_mul(a[1usize], b[1usize]),
896            u8::wrapping_mul(a[2usize], b[2usize]),
897            u8::wrapping_mul(a[3usize], b[3usize]),
898            u8::wrapping_mul(a[4usize], b[4usize]),
899            u8::wrapping_mul(a[5usize], b[5usize]),
900            u8::wrapping_mul(a[6usize], b[6usize]),
901            u8::wrapping_mul(a[7usize], b[7usize]),
902            u8::wrapping_mul(a[8usize], b[8usize]),
903            u8::wrapping_mul(a[9usize], b[9usize]),
904            u8::wrapping_mul(a[10usize], b[10usize]),
905            u8::wrapping_mul(a[11usize], b[11usize]),
906            u8::wrapping_mul(a[12usize], b[12usize]),
907            u8::wrapping_mul(a[13usize], b[13usize]),
908            u8::wrapping_mul(a[14usize], b[14usize]),
909            u8::wrapping_mul(a[15usize], b[15usize]),
910        ]
911        .simd_into(self)
912    }
913    #[inline(always)]
914    fn and_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
915        [
916            u8::bitand(a[0usize], &b[0usize]),
917            u8::bitand(a[1usize], &b[1usize]),
918            u8::bitand(a[2usize], &b[2usize]),
919            u8::bitand(a[3usize], &b[3usize]),
920            u8::bitand(a[4usize], &b[4usize]),
921            u8::bitand(a[5usize], &b[5usize]),
922            u8::bitand(a[6usize], &b[6usize]),
923            u8::bitand(a[7usize], &b[7usize]),
924            u8::bitand(a[8usize], &b[8usize]),
925            u8::bitand(a[9usize], &b[9usize]),
926            u8::bitand(a[10usize], &b[10usize]),
927            u8::bitand(a[11usize], &b[11usize]),
928            u8::bitand(a[12usize], &b[12usize]),
929            u8::bitand(a[13usize], &b[13usize]),
930            u8::bitand(a[14usize], &b[14usize]),
931            u8::bitand(a[15usize], &b[15usize]),
932        ]
933        .simd_into(self)
934    }
935    #[inline(always)]
936    fn or_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
937        [
938            u8::bitor(a[0usize], &b[0usize]),
939            u8::bitor(a[1usize], &b[1usize]),
940            u8::bitor(a[2usize], &b[2usize]),
941            u8::bitor(a[3usize], &b[3usize]),
942            u8::bitor(a[4usize], &b[4usize]),
943            u8::bitor(a[5usize], &b[5usize]),
944            u8::bitor(a[6usize], &b[6usize]),
945            u8::bitor(a[7usize], &b[7usize]),
946            u8::bitor(a[8usize], &b[8usize]),
947            u8::bitor(a[9usize], &b[9usize]),
948            u8::bitor(a[10usize], &b[10usize]),
949            u8::bitor(a[11usize], &b[11usize]),
950            u8::bitor(a[12usize], &b[12usize]),
951            u8::bitor(a[13usize], &b[13usize]),
952            u8::bitor(a[14usize], &b[14usize]),
953            u8::bitor(a[15usize], &b[15usize]),
954        ]
955        .simd_into(self)
956    }
957    #[inline(always)]
958    fn xor_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
959        [
960            u8::bitxor(a[0usize], &b[0usize]),
961            u8::bitxor(a[1usize], &b[1usize]),
962            u8::bitxor(a[2usize], &b[2usize]),
963            u8::bitxor(a[3usize], &b[3usize]),
964            u8::bitxor(a[4usize], &b[4usize]),
965            u8::bitxor(a[5usize], &b[5usize]),
966            u8::bitxor(a[6usize], &b[6usize]),
967            u8::bitxor(a[7usize], &b[7usize]),
968            u8::bitxor(a[8usize], &b[8usize]),
969            u8::bitxor(a[9usize], &b[9usize]),
970            u8::bitxor(a[10usize], &b[10usize]),
971            u8::bitxor(a[11usize], &b[11usize]),
972            u8::bitxor(a[12usize], &b[12usize]),
973            u8::bitxor(a[13usize], &b[13usize]),
974            u8::bitxor(a[14usize], &b[14usize]),
975            u8::bitxor(a[15usize], &b[15usize]),
976        ]
977        .simd_into(self)
978    }
979    #[inline(always)]
980    fn shr_u8x16(self, a: u8x16<Self>, shift: u32) -> u8x16<Self> {
981        [
982            u8::shr(a[0usize], shift as u8),
983            u8::shr(a[1usize], shift as u8),
984            u8::shr(a[2usize], shift as u8),
985            u8::shr(a[3usize], shift as u8),
986            u8::shr(a[4usize], shift as u8),
987            u8::shr(a[5usize], shift as u8),
988            u8::shr(a[6usize], shift as u8),
989            u8::shr(a[7usize], shift as u8),
990            u8::shr(a[8usize], shift as u8),
991            u8::shr(a[9usize], shift as u8),
992            u8::shr(a[10usize], shift as u8),
993            u8::shr(a[11usize], shift as u8),
994            u8::shr(a[12usize], shift as u8),
995            u8::shr(a[13usize], shift as u8),
996            u8::shr(a[14usize], shift as u8),
997            u8::shr(a[15usize], shift as u8),
998        ]
999        .simd_into(self)
1000    }
1001    #[inline(always)]
1002    fn simd_eq_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1003        [
1004            -(u8::eq(&a[0usize], &b[0usize]) as i8),
1005            -(u8::eq(&a[1usize], &b[1usize]) as i8),
1006            -(u8::eq(&a[2usize], &b[2usize]) as i8),
1007            -(u8::eq(&a[3usize], &b[3usize]) as i8),
1008            -(u8::eq(&a[4usize], &b[4usize]) as i8),
1009            -(u8::eq(&a[5usize], &b[5usize]) as i8),
1010            -(u8::eq(&a[6usize], &b[6usize]) as i8),
1011            -(u8::eq(&a[7usize], &b[7usize]) as i8),
1012            -(u8::eq(&a[8usize], &b[8usize]) as i8),
1013            -(u8::eq(&a[9usize], &b[9usize]) as i8),
1014            -(u8::eq(&a[10usize], &b[10usize]) as i8),
1015            -(u8::eq(&a[11usize], &b[11usize]) as i8),
1016            -(u8::eq(&a[12usize], &b[12usize]) as i8),
1017            -(u8::eq(&a[13usize], &b[13usize]) as i8),
1018            -(u8::eq(&a[14usize], &b[14usize]) as i8),
1019            -(u8::eq(&a[15usize], &b[15usize]) as i8),
1020        ]
1021        .simd_into(self)
1022    }
1023    #[inline(always)]
1024    fn simd_lt_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1025        [
1026            -(u8::lt(&a[0usize], &b[0usize]) as i8),
1027            -(u8::lt(&a[1usize], &b[1usize]) as i8),
1028            -(u8::lt(&a[2usize], &b[2usize]) as i8),
1029            -(u8::lt(&a[3usize], &b[3usize]) as i8),
1030            -(u8::lt(&a[4usize], &b[4usize]) as i8),
1031            -(u8::lt(&a[5usize], &b[5usize]) as i8),
1032            -(u8::lt(&a[6usize], &b[6usize]) as i8),
1033            -(u8::lt(&a[7usize], &b[7usize]) as i8),
1034            -(u8::lt(&a[8usize], &b[8usize]) as i8),
1035            -(u8::lt(&a[9usize], &b[9usize]) as i8),
1036            -(u8::lt(&a[10usize], &b[10usize]) as i8),
1037            -(u8::lt(&a[11usize], &b[11usize]) as i8),
1038            -(u8::lt(&a[12usize], &b[12usize]) as i8),
1039            -(u8::lt(&a[13usize], &b[13usize]) as i8),
1040            -(u8::lt(&a[14usize], &b[14usize]) as i8),
1041            -(u8::lt(&a[15usize], &b[15usize]) as i8),
1042        ]
1043        .simd_into(self)
1044    }
1045    #[inline(always)]
1046    fn simd_le_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1047        [
1048            -(u8::le(&a[0usize], &b[0usize]) as i8),
1049            -(u8::le(&a[1usize], &b[1usize]) as i8),
1050            -(u8::le(&a[2usize], &b[2usize]) as i8),
1051            -(u8::le(&a[3usize], &b[3usize]) as i8),
1052            -(u8::le(&a[4usize], &b[4usize]) as i8),
1053            -(u8::le(&a[5usize], &b[5usize]) as i8),
1054            -(u8::le(&a[6usize], &b[6usize]) as i8),
1055            -(u8::le(&a[7usize], &b[7usize]) as i8),
1056            -(u8::le(&a[8usize], &b[8usize]) as i8),
1057            -(u8::le(&a[9usize], &b[9usize]) as i8),
1058            -(u8::le(&a[10usize], &b[10usize]) as i8),
1059            -(u8::le(&a[11usize], &b[11usize]) as i8),
1060            -(u8::le(&a[12usize], &b[12usize]) as i8),
1061            -(u8::le(&a[13usize], &b[13usize]) as i8),
1062            -(u8::le(&a[14usize], &b[14usize]) as i8),
1063            -(u8::le(&a[15usize], &b[15usize]) as i8),
1064        ]
1065        .simd_into(self)
1066    }
1067    #[inline(always)]
1068    fn simd_ge_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1069        [
1070            -(u8::ge(&a[0usize], &b[0usize]) as i8),
1071            -(u8::ge(&a[1usize], &b[1usize]) as i8),
1072            -(u8::ge(&a[2usize], &b[2usize]) as i8),
1073            -(u8::ge(&a[3usize], &b[3usize]) as i8),
1074            -(u8::ge(&a[4usize], &b[4usize]) as i8),
1075            -(u8::ge(&a[5usize], &b[5usize]) as i8),
1076            -(u8::ge(&a[6usize], &b[6usize]) as i8),
1077            -(u8::ge(&a[7usize], &b[7usize]) as i8),
1078            -(u8::ge(&a[8usize], &b[8usize]) as i8),
1079            -(u8::ge(&a[9usize], &b[9usize]) as i8),
1080            -(u8::ge(&a[10usize], &b[10usize]) as i8),
1081            -(u8::ge(&a[11usize], &b[11usize]) as i8),
1082            -(u8::ge(&a[12usize], &b[12usize]) as i8),
1083            -(u8::ge(&a[13usize], &b[13usize]) as i8),
1084            -(u8::ge(&a[14usize], &b[14usize]) as i8),
1085            -(u8::ge(&a[15usize], &b[15usize]) as i8),
1086        ]
1087        .simd_into(self)
1088    }
1089    #[inline(always)]
1090    fn simd_gt_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1091        [
1092            -(u8::gt(&a[0usize], &b[0usize]) as i8),
1093            -(u8::gt(&a[1usize], &b[1usize]) as i8),
1094            -(u8::gt(&a[2usize], &b[2usize]) as i8),
1095            -(u8::gt(&a[3usize], &b[3usize]) as i8),
1096            -(u8::gt(&a[4usize], &b[4usize]) as i8),
1097            -(u8::gt(&a[5usize], &b[5usize]) as i8),
1098            -(u8::gt(&a[6usize], &b[6usize]) as i8),
1099            -(u8::gt(&a[7usize], &b[7usize]) as i8),
1100            -(u8::gt(&a[8usize], &b[8usize]) as i8),
1101            -(u8::gt(&a[9usize], &b[9usize]) as i8),
1102            -(u8::gt(&a[10usize], &b[10usize]) as i8),
1103            -(u8::gt(&a[11usize], &b[11usize]) as i8),
1104            -(u8::gt(&a[12usize], &b[12usize]) as i8),
1105            -(u8::gt(&a[13usize], &b[13usize]) as i8),
1106            -(u8::gt(&a[14usize], &b[14usize]) as i8),
1107            -(u8::gt(&a[15usize], &b[15usize]) as i8),
1108        ]
1109        .simd_into(self)
1110    }
1111    #[inline(always)]
1112    fn zip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1113        [
1114            a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
1115            a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
1116        ]
1117        .simd_into(self)
1118    }
1119    #[inline(always)]
1120    fn zip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1121        [
1122            a[8usize], b[8usize], a[9usize], b[9usize], a[10usize], b[10usize], a[11usize],
1123            b[11usize], a[12usize], b[12usize], a[13usize], b[13usize], a[14usize], b[14usize],
1124            a[15usize], b[15usize],
1125        ]
1126        .simd_into(self)
1127    }
1128    #[inline(always)]
1129    fn unzip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1130        [
1131            a[0usize], a[2usize], a[4usize], a[6usize], a[8usize], a[10usize], a[12usize],
1132            a[14usize], b[0usize], b[2usize], b[4usize], b[6usize], b[8usize], b[10usize],
1133            b[12usize], b[14usize],
1134        ]
1135        .simd_into(self)
1136    }
1137    #[inline(always)]
1138    fn unzip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1139        [
1140            a[1usize], a[3usize], a[5usize], a[7usize], a[9usize], a[11usize], a[13usize],
1141            a[15usize], b[1usize], b[3usize], b[5usize], b[7usize], b[9usize], b[11usize],
1142            b[13usize], b[15usize],
1143        ]
1144        .simd_into(self)
1145    }
1146    #[inline(always)]
1147    fn select_u8x16(self, a: mask8x16<Self>, b: u8x16<Self>, c: u8x16<Self>) -> u8x16<Self> {
1148        [
1149            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1150            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1151            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1152            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1153            if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1154            if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1155            if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1156            if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1157            if a[8usize] != 0 { b[8usize] } else { c[8usize] },
1158            if a[9usize] != 0 { b[9usize] } else { c[9usize] },
1159            if a[10usize] != 0 {
1160                b[10usize]
1161            } else {
1162                c[10usize]
1163            },
1164            if a[11usize] != 0 {
1165                b[11usize]
1166            } else {
1167                c[11usize]
1168            },
1169            if a[12usize] != 0 {
1170                b[12usize]
1171            } else {
1172                c[12usize]
1173            },
1174            if a[13usize] != 0 {
1175                b[13usize]
1176            } else {
1177                c[13usize]
1178            },
1179            if a[14usize] != 0 {
1180                b[14usize]
1181            } else {
1182                c[14usize]
1183            },
1184            if a[15usize] != 0 {
1185                b[15usize]
1186            } else {
1187                c[15usize]
1188            },
1189        ]
1190        .simd_into(self)
1191    }
1192    #[inline(always)]
1193    fn min_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1194        [
1195            u8::min(a[0usize], b[0usize]),
1196            u8::min(a[1usize], b[1usize]),
1197            u8::min(a[2usize], b[2usize]),
1198            u8::min(a[3usize], b[3usize]),
1199            u8::min(a[4usize], b[4usize]),
1200            u8::min(a[5usize], b[5usize]),
1201            u8::min(a[6usize], b[6usize]),
1202            u8::min(a[7usize], b[7usize]),
1203            u8::min(a[8usize], b[8usize]),
1204            u8::min(a[9usize], b[9usize]),
1205            u8::min(a[10usize], b[10usize]),
1206            u8::min(a[11usize], b[11usize]),
1207            u8::min(a[12usize], b[12usize]),
1208            u8::min(a[13usize], b[13usize]),
1209            u8::min(a[14usize], b[14usize]),
1210            u8::min(a[15usize], b[15usize]),
1211        ]
1212        .simd_into(self)
1213    }
1214    #[inline(always)]
1215    fn max_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1216        [
1217            u8::max(a[0usize], b[0usize]),
1218            u8::max(a[1usize], b[1usize]),
1219            u8::max(a[2usize], b[2usize]),
1220            u8::max(a[3usize], b[3usize]),
1221            u8::max(a[4usize], b[4usize]),
1222            u8::max(a[5usize], b[5usize]),
1223            u8::max(a[6usize], b[6usize]),
1224            u8::max(a[7usize], b[7usize]),
1225            u8::max(a[8usize], b[8usize]),
1226            u8::max(a[9usize], b[9usize]),
1227            u8::max(a[10usize], b[10usize]),
1228            u8::max(a[11usize], b[11usize]),
1229            u8::max(a[12usize], b[12usize]),
1230            u8::max(a[13usize], b[13usize]),
1231            u8::max(a[14usize], b[14usize]),
1232            u8::max(a[15usize], b[15usize]),
1233        ]
1234        .simd_into(self)
1235    }
1236    #[inline(always)]
1237    fn combine_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x32<Self> {
1238        let mut result = [0; 32usize];
1239        result[0..16usize].copy_from_slice(&a.val);
1240        result[16usize..32usize].copy_from_slice(&b.val);
1241        result.simd_into(self)
1242    }
1243    #[inline(always)]
1244    fn widen_u8x16(self, a: u8x16<Self>) -> u16x16<Self> {
1245        [
1246            a[0usize] as u16,
1247            a[1usize] as u16,
1248            a[2usize] as u16,
1249            a[3usize] as u16,
1250            a[4usize] as u16,
1251            a[5usize] as u16,
1252            a[6usize] as u16,
1253            a[7usize] as u16,
1254            a[8usize] as u16,
1255            a[9usize] as u16,
1256            a[10usize] as u16,
1257            a[11usize] as u16,
1258            a[12usize] as u16,
1259            a[13usize] as u16,
1260            a[14usize] as u16,
1261            a[15usize] as u16,
1262        ]
1263        .simd_into(self)
1264    }
1265    #[inline(always)]
1266    fn reinterpret_u32_u8x16(self, a: u8x16<Self>) -> u32x4<Self> {
1267        u32x4 {
1268            val: bytemuck::cast(a.val),
1269            simd: a.simd,
1270        }
1271    }
1272    #[inline(always)]
1273    fn splat_mask8x16(self, val: i8) -> mask8x16<Self> {
1274        [val; 16usize].simd_into(self)
1275    }
1276    #[inline(always)]
1277    fn not_mask8x16(self, a: mask8x16<Self>) -> mask8x16<Self> {
1278        [
1279            i8::not(a[0usize]),
1280            i8::not(a[1usize]),
1281            i8::not(a[2usize]),
1282            i8::not(a[3usize]),
1283            i8::not(a[4usize]),
1284            i8::not(a[5usize]),
1285            i8::not(a[6usize]),
1286            i8::not(a[7usize]),
1287            i8::not(a[8usize]),
1288            i8::not(a[9usize]),
1289            i8::not(a[10usize]),
1290            i8::not(a[11usize]),
1291            i8::not(a[12usize]),
1292            i8::not(a[13usize]),
1293            i8::not(a[14usize]),
1294            i8::not(a[15usize]),
1295        ]
1296        .simd_into(self)
1297    }
1298    #[inline(always)]
1299    fn and_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1300        [
1301            i8::bitand(a[0usize], &b[0usize]),
1302            i8::bitand(a[1usize], &b[1usize]),
1303            i8::bitand(a[2usize], &b[2usize]),
1304            i8::bitand(a[3usize], &b[3usize]),
1305            i8::bitand(a[4usize], &b[4usize]),
1306            i8::bitand(a[5usize], &b[5usize]),
1307            i8::bitand(a[6usize], &b[6usize]),
1308            i8::bitand(a[7usize], &b[7usize]),
1309            i8::bitand(a[8usize], &b[8usize]),
1310            i8::bitand(a[9usize], &b[9usize]),
1311            i8::bitand(a[10usize], &b[10usize]),
1312            i8::bitand(a[11usize], &b[11usize]),
1313            i8::bitand(a[12usize], &b[12usize]),
1314            i8::bitand(a[13usize], &b[13usize]),
1315            i8::bitand(a[14usize], &b[14usize]),
1316            i8::bitand(a[15usize], &b[15usize]),
1317        ]
1318        .simd_into(self)
1319    }
1320    #[inline(always)]
1321    fn or_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1322        [
1323            i8::bitor(a[0usize], &b[0usize]),
1324            i8::bitor(a[1usize], &b[1usize]),
1325            i8::bitor(a[2usize], &b[2usize]),
1326            i8::bitor(a[3usize], &b[3usize]),
1327            i8::bitor(a[4usize], &b[4usize]),
1328            i8::bitor(a[5usize], &b[5usize]),
1329            i8::bitor(a[6usize], &b[6usize]),
1330            i8::bitor(a[7usize], &b[7usize]),
1331            i8::bitor(a[8usize], &b[8usize]),
1332            i8::bitor(a[9usize], &b[9usize]),
1333            i8::bitor(a[10usize], &b[10usize]),
1334            i8::bitor(a[11usize], &b[11usize]),
1335            i8::bitor(a[12usize], &b[12usize]),
1336            i8::bitor(a[13usize], &b[13usize]),
1337            i8::bitor(a[14usize], &b[14usize]),
1338            i8::bitor(a[15usize], &b[15usize]),
1339        ]
1340        .simd_into(self)
1341    }
1342    #[inline(always)]
1343    fn xor_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1344        [
1345            i8::bitxor(a[0usize], &b[0usize]),
1346            i8::bitxor(a[1usize], &b[1usize]),
1347            i8::bitxor(a[2usize], &b[2usize]),
1348            i8::bitxor(a[3usize], &b[3usize]),
1349            i8::bitxor(a[4usize], &b[4usize]),
1350            i8::bitxor(a[5usize], &b[5usize]),
1351            i8::bitxor(a[6usize], &b[6usize]),
1352            i8::bitxor(a[7usize], &b[7usize]),
1353            i8::bitxor(a[8usize], &b[8usize]),
1354            i8::bitxor(a[9usize], &b[9usize]),
1355            i8::bitxor(a[10usize], &b[10usize]),
1356            i8::bitxor(a[11usize], &b[11usize]),
1357            i8::bitxor(a[12usize], &b[12usize]),
1358            i8::bitxor(a[13usize], &b[13usize]),
1359            i8::bitxor(a[14usize], &b[14usize]),
1360            i8::bitxor(a[15usize], &b[15usize]),
1361        ]
1362        .simd_into(self)
1363    }
1364    #[inline(always)]
1365    fn select_mask8x16(
1366        self,
1367        a: mask8x16<Self>,
1368        b: mask8x16<Self>,
1369        c: mask8x16<Self>,
1370    ) -> mask8x16<Self> {
1371        [
1372            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1373            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1374            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1375            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1376            if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1377            if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1378            if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1379            if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1380            if a[8usize] != 0 { b[8usize] } else { c[8usize] },
1381            if a[9usize] != 0 { b[9usize] } else { c[9usize] },
1382            if a[10usize] != 0 {
1383                b[10usize]
1384            } else {
1385                c[10usize]
1386            },
1387            if a[11usize] != 0 {
1388                b[11usize]
1389            } else {
1390                c[11usize]
1391            },
1392            if a[12usize] != 0 {
1393                b[12usize]
1394            } else {
1395                c[12usize]
1396            },
1397            if a[13usize] != 0 {
1398                b[13usize]
1399            } else {
1400                c[13usize]
1401            },
1402            if a[14usize] != 0 {
1403                b[14usize]
1404            } else {
1405                c[14usize]
1406            },
1407            if a[15usize] != 0 {
1408                b[15usize]
1409            } else {
1410                c[15usize]
1411            },
1412        ]
1413        .simd_into(self)
1414    }
1415    #[inline(always)]
1416    fn simd_eq_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1417        [
1418            -(i8::eq(&a[0usize], &b[0usize]) as i8),
1419            -(i8::eq(&a[1usize], &b[1usize]) as i8),
1420            -(i8::eq(&a[2usize], &b[2usize]) as i8),
1421            -(i8::eq(&a[3usize], &b[3usize]) as i8),
1422            -(i8::eq(&a[4usize], &b[4usize]) as i8),
1423            -(i8::eq(&a[5usize], &b[5usize]) as i8),
1424            -(i8::eq(&a[6usize], &b[6usize]) as i8),
1425            -(i8::eq(&a[7usize], &b[7usize]) as i8),
1426            -(i8::eq(&a[8usize], &b[8usize]) as i8),
1427            -(i8::eq(&a[9usize], &b[9usize]) as i8),
1428            -(i8::eq(&a[10usize], &b[10usize]) as i8),
1429            -(i8::eq(&a[11usize], &b[11usize]) as i8),
1430            -(i8::eq(&a[12usize], &b[12usize]) as i8),
1431            -(i8::eq(&a[13usize], &b[13usize]) as i8),
1432            -(i8::eq(&a[14usize], &b[14usize]) as i8),
1433            -(i8::eq(&a[15usize], &b[15usize]) as i8),
1434        ]
1435        .simd_into(self)
1436    }
1437    #[inline(always)]
1438    fn combine_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x32<Self> {
1439        let mut result = [0; 32usize];
1440        result[0..16usize].copy_from_slice(&a.val);
1441        result[16usize..32usize].copy_from_slice(&b.val);
1442        result.simd_into(self)
1443    }
1444    #[inline(always)]
1445    fn splat_i16x8(self, val: i16) -> i16x8<Self> {
1446        [val; 8usize].simd_into(self)
1447    }
1448    #[inline(always)]
1449    fn not_i16x8(self, a: i16x8<Self>) -> i16x8<Self> {
1450        [
1451            i16::not(a[0usize]),
1452            i16::not(a[1usize]),
1453            i16::not(a[2usize]),
1454            i16::not(a[3usize]),
1455            i16::not(a[4usize]),
1456            i16::not(a[5usize]),
1457            i16::not(a[6usize]),
1458            i16::not(a[7usize]),
1459        ]
1460        .simd_into(self)
1461    }
1462    #[inline(always)]
1463    fn add_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1464        [
1465            i16::add(a[0usize], &b[0usize]),
1466            i16::add(a[1usize], &b[1usize]),
1467            i16::add(a[2usize], &b[2usize]),
1468            i16::add(a[3usize], &b[3usize]),
1469            i16::add(a[4usize], &b[4usize]),
1470            i16::add(a[5usize], &b[5usize]),
1471            i16::add(a[6usize], &b[6usize]),
1472            i16::add(a[7usize], &b[7usize]),
1473        ]
1474        .simd_into(self)
1475    }
1476    #[inline(always)]
1477    fn sub_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1478        [
1479            i16::wrapping_sub(a[0usize], b[0usize]),
1480            i16::wrapping_sub(a[1usize], b[1usize]),
1481            i16::wrapping_sub(a[2usize], b[2usize]),
1482            i16::wrapping_sub(a[3usize], b[3usize]),
1483            i16::wrapping_sub(a[4usize], b[4usize]),
1484            i16::wrapping_sub(a[5usize], b[5usize]),
1485            i16::wrapping_sub(a[6usize], b[6usize]),
1486            i16::wrapping_sub(a[7usize], b[7usize]),
1487        ]
1488        .simd_into(self)
1489    }
1490    #[inline(always)]
1491    fn mul_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1492        [
1493            i16::wrapping_mul(a[0usize], b[0usize]),
1494            i16::wrapping_mul(a[1usize], b[1usize]),
1495            i16::wrapping_mul(a[2usize], b[2usize]),
1496            i16::wrapping_mul(a[3usize], b[3usize]),
1497            i16::wrapping_mul(a[4usize], b[4usize]),
1498            i16::wrapping_mul(a[5usize], b[5usize]),
1499            i16::wrapping_mul(a[6usize], b[6usize]),
1500            i16::wrapping_mul(a[7usize], b[7usize]),
1501        ]
1502        .simd_into(self)
1503    }
1504    #[inline(always)]
1505    fn and_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1506        [
1507            i16::bitand(a[0usize], &b[0usize]),
1508            i16::bitand(a[1usize], &b[1usize]),
1509            i16::bitand(a[2usize], &b[2usize]),
1510            i16::bitand(a[3usize], &b[3usize]),
1511            i16::bitand(a[4usize], &b[4usize]),
1512            i16::bitand(a[5usize], &b[5usize]),
1513            i16::bitand(a[6usize], &b[6usize]),
1514            i16::bitand(a[7usize], &b[7usize]),
1515        ]
1516        .simd_into(self)
1517    }
1518    #[inline(always)]
1519    fn or_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1520        [
1521            i16::bitor(a[0usize], &b[0usize]),
1522            i16::bitor(a[1usize], &b[1usize]),
1523            i16::bitor(a[2usize], &b[2usize]),
1524            i16::bitor(a[3usize], &b[3usize]),
1525            i16::bitor(a[4usize], &b[4usize]),
1526            i16::bitor(a[5usize], &b[5usize]),
1527            i16::bitor(a[6usize], &b[6usize]),
1528            i16::bitor(a[7usize], &b[7usize]),
1529        ]
1530        .simd_into(self)
1531    }
1532    #[inline(always)]
1533    fn xor_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1534        [
1535            i16::bitxor(a[0usize], &b[0usize]),
1536            i16::bitxor(a[1usize], &b[1usize]),
1537            i16::bitxor(a[2usize], &b[2usize]),
1538            i16::bitxor(a[3usize], &b[3usize]),
1539            i16::bitxor(a[4usize], &b[4usize]),
1540            i16::bitxor(a[5usize], &b[5usize]),
1541            i16::bitxor(a[6usize], &b[6usize]),
1542            i16::bitxor(a[7usize], &b[7usize]),
1543        ]
1544        .simd_into(self)
1545    }
1546    #[inline(always)]
1547    fn shr_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {
1548        [
1549            i16::shr(a[0usize], shift as i16),
1550            i16::shr(a[1usize], shift as i16),
1551            i16::shr(a[2usize], shift as i16),
1552            i16::shr(a[3usize], shift as i16),
1553            i16::shr(a[4usize], shift as i16),
1554            i16::shr(a[5usize], shift as i16),
1555            i16::shr(a[6usize], shift as i16),
1556            i16::shr(a[7usize], shift as i16),
1557        ]
1558        .simd_into(self)
1559    }
1560    #[inline(always)]
1561    fn simd_eq_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1562        [
1563            -(i16::eq(&a[0usize], &b[0usize]) as i16),
1564            -(i16::eq(&a[1usize], &b[1usize]) as i16),
1565            -(i16::eq(&a[2usize], &b[2usize]) as i16),
1566            -(i16::eq(&a[3usize], &b[3usize]) as i16),
1567            -(i16::eq(&a[4usize], &b[4usize]) as i16),
1568            -(i16::eq(&a[5usize], &b[5usize]) as i16),
1569            -(i16::eq(&a[6usize], &b[6usize]) as i16),
1570            -(i16::eq(&a[7usize], &b[7usize]) as i16),
1571        ]
1572        .simd_into(self)
1573    }
1574    #[inline(always)]
1575    fn simd_lt_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1576        [
1577            -(i16::lt(&a[0usize], &b[0usize]) as i16),
1578            -(i16::lt(&a[1usize], &b[1usize]) as i16),
1579            -(i16::lt(&a[2usize], &b[2usize]) as i16),
1580            -(i16::lt(&a[3usize], &b[3usize]) as i16),
1581            -(i16::lt(&a[4usize], &b[4usize]) as i16),
1582            -(i16::lt(&a[5usize], &b[5usize]) as i16),
1583            -(i16::lt(&a[6usize], &b[6usize]) as i16),
1584            -(i16::lt(&a[7usize], &b[7usize]) as i16),
1585        ]
1586        .simd_into(self)
1587    }
1588    #[inline(always)]
1589    fn simd_le_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1590        [
1591            -(i16::le(&a[0usize], &b[0usize]) as i16),
1592            -(i16::le(&a[1usize], &b[1usize]) as i16),
1593            -(i16::le(&a[2usize], &b[2usize]) as i16),
1594            -(i16::le(&a[3usize], &b[3usize]) as i16),
1595            -(i16::le(&a[4usize], &b[4usize]) as i16),
1596            -(i16::le(&a[5usize], &b[5usize]) as i16),
1597            -(i16::le(&a[6usize], &b[6usize]) as i16),
1598            -(i16::le(&a[7usize], &b[7usize]) as i16),
1599        ]
1600        .simd_into(self)
1601    }
1602    #[inline(always)]
1603    fn simd_ge_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1604        [
1605            -(i16::ge(&a[0usize], &b[0usize]) as i16),
1606            -(i16::ge(&a[1usize], &b[1usize]) as i16),
1607            -(i16::ge(&a[2usize], &b[2usize]) as i16),
1608            -(i16::ge(&a[3usize], &b[3usize]) as i16),
1609            -(i16::ge(&a[4usize], &b[4usize]) as i16),
1610            -(i16::ge(&a[5usize], &b[5usize]) as i16),
1611            -(i16::ge(&a[6usize], &b[6usize]) as i16),
1612            -(i16::ge(&a[7usize], &b[7usize]) as i16),
1613        ]
1614        .simd_into(self)
1615    }
1616    #[inline(always)]
1617    fn simd_gt_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1618        [
1619            -(i16::gt(&a[0usize], &b[0usize]) as i16),
1620            -(i16::gt(&a[1usize], &b[1usize]) as i16),
1621            -(i16::gt(&a[2usize], &b[2usize]) as i16),
1622            -(i16::gt(&a[3usize], &b[3usize]) as i16),
1623            -(i16::gt(&a[4usize], &b[4usize]) as i16),
1624            -(i16::gt(&a[5usize], &b[5usize]) as i16),
1625            -(i16::gt(&a[6usize], &b[6usize]) as i16),
1626            -(i16::gt(&a[7usize], &b[7usize]) as i16),
1627        ]
1628        .simd_into(self)
1629    }
1630    #[inline(always)]
1631    fn zip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1632        [
1633            a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
1634        ]
1635        .simd_into(self)
1636    }
1637    #[inline(always)]
1638    fn zip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1639        [
1640            a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
1641        ]
1642        .simd_into(self)
1643    }
1644    #[inline(always)]
1645    fn unzip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1646        [
1647            a[0usize], a[2usize], a[4usize], a[6usize], b[0usize], b[2usize], b[4usize], b[6usize],
1648        ]
1649        .simd_into(self)
1650    }
1651    #[inline(always)]
1652    fn unzip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1653        [
1654            a[1usize], a[3usize], a[5usize], a[7usize], b[1usize], b[3usize], b[5usize], b[7usize],
1655        ]
1656        .simd_into(self)
1657    }
1658    #[inline(always)]
1659    fn select_i16x8(self, a: mask16x8<Self>, b: i16x8<Self>, c: i16x8<Self>) -> i16x8<Self> {
1660        [
1661            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1662            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1663            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1664            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1665            if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1666            if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1667            if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1668            if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1669        ]
1670        .simd_into(self)
1671    }
1672    #[inline(always)]
1673    fn min_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1674        [
1675            i16::min(a[0usize], b[0usize]),
1676            i16::min(a[1usize], b[1usize]),
1677            i16::min(a[2usize], b[2usize]),
1678            i16::min(a[3usize], b[3usize]),
1679            i16::min(a[4usize], b[4usize]),
1680            i16::min(a[5usize], b[5usize]),
1681            i16::min(a[6usize], b[6usize]),
1682            i16::min(a[7usize], b[7usize]),
1683        ]
1684        .simd_into(self)
1685    }
1686    #[inline(always)]
1687    fn max_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1688        [
1689            i16::max(a[0usize], b[0usize]),
1690            i16::max(a[1usize], b[1usize]),
1691            i16::max(a[2usize], b[2usize]),
1692            i16::max(a[3usize], b[3usize]),
1693            i16::max(a[4usize], b[4usize]),
1694            i16::max(a[5usize], b[5usize]),
1695            i16::max(a[6usize], b[6usize]),
1696            i16::max(a[7usize], b[7usize]),
1697        ]
1698        .simd_into(self)
1699    }
1700    #[inline(always)]
1701    fn combine_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x16<Self> {
1702        let mut result = [0; 16usize];
1703        result[0..8usize].copy_from_slice(&a.val);
1704        result[8usize..16usize].copy_from_slice(&b.val);
1705        result.simd_into(self)
1706    }
1707    #[inline(always)]
1708    fn reinterpret_u8_i16x8(self, a: i16x8<Self>) -> u8x16<Self> {
1709        u8x16 {
1710            val: bytemuck::cast(a.val),
1711            simd: a.simd,
1712        }
1713    }
1714    #[inline(always)]
1715    fn reinterpret_u32_i16x8(self, a: i16x8<Self>) -> u32x4<Self> {
1716        u32x4 {
1717            val: bytemuck::cast(a.val),
1718            simd: a.simd,
1719        }
1720    }
1721    #[inline(always)]
1722    fn splat_u16x8(self, val: u16) -> u16x8<Self> {
1723        [val; 8usize].simd_into(self)
1724    }
1725    #[inline(always)]
1726    fn not_u16x8(self, a: u16x8<Self>) -> u16x8<Self> {
1727        [
1728            u16::not(a[0usize]),
1729            u16::not(a[1usize]),
1730            u16::not(a[2usize]),
1731            u16::not(a[3usize]),
1732            u16::not(a[4usize]),
1733            u16::not(a[5usize]),
1734            u16::not(a[6usize]),
1735            u16::not(a[7usize]),
1736        ]
1737        .simd_into(self)
1738    }
1739    #[inline(always)]
1740    fn add_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1741        [
1742            u16::add(a[0usize], &b[0usize]),
1743            u16::add(a[1usize], &b[1usize]),
1744            u16::add(a[2usize], &b[2usize]),
1745            u16::add(a[3usize], &b[3usize]),
1746            u16::add(a[4usize], &b[4usize]),
1747            u16::add(a[5usize], &b[5usize]),
1748            u16::add(a[6usize], &b[6usize]),
1749            u16::add(a[7usize], &b[7usize]),
1750        ]
1751        .simd_into(self)
1752    }
1753    #[inline(always)]
1754    fn sub_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1755        [
1756            u16::wrapping_sub(a[0usize], b[0usize]),
1757            u16::wrapping_sub(a[1usize], b[1usize]),
1758            u16::wrapping_sub(a[2usize], b[2usize]),
1759            u16::wrapping_sub(a[3usize], b[3usize]),
1760            u16::wrapping_sub(a[4usize], b[4usize]),
1761            u16::wrapping_sub(a[5usize], b[5usize]),
1762            u16::wrapping_sub(a[6usize], b[6usize]),
1763            u16::wrapping_sub(a[7usize], b[7usize]),
1764        ]
1765        .simd_into(self)
1766    }
1767    #[inline(always)]
1768    fn mul_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1769        [
1770            u16::wrapping_mul(a[0usize], b[0usize]),
1771            u16::wrapping_mul(a[1usize], b[1usize]),
1772            u16::wrapping_mul(a[2usize], b[2usize]),
1773            u16::wrapping_mul(a[3usize], b[3usize]),
1774            u16::wrapping_mul(a[4usize], b[4usize]),
1775            u16::wrapping_mul(a[5usize], b[5usize]),
1776            u16::wrapping_mul(a[6usize], b[6usize]),
1777            u16::wrapping_mul(a[7usize], b[7usize]),
1778        ]
1779        .simd_into(self)
1780    }
1781    #[inline(always)]
1782    fn and_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1783        [
1784            u16::bitand(a[0usize], &b[0usize]),
1785            u16::bitand(a[1usize], &b[1usize]),
1786            u16::bitand(a[2usize], &b[2usize]),
1787            u16::bitand(a[3usize], &b[3usize]),
1788            u16::bitand(a[4usize], &b[4usize]),
1789            u16::bitand(a[5usize], &b[5usize]),
1790            u16::bitand(a[6usize], &b[6usize]),
1791            u16::bitand(a[7usize], &b[7usize]),
1792        ]
1793        .simd_into(self)
1794    }
1795    #[inline(always)]
1796    fn or_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1797        [
1798            u16::bitor(a[0usize], &b[0usize]),
1799            u16::bitor(a[1usize], &b[1usize]),
1800            u16::bitor(a[2usize], &b[2usize]),
1801            u16::bitor(a[3usize], &b[3usize]),
1802            u16::bitor(a[4usize], &b[4usize]),
1803            u16::bitor(a[5usize], &b[5usize]),
1804            u16::bitor(a[6usize], &b[6usize]),
1805            u16::bitor(a[7usize], &b[7usize]),
1806        ]
1807        .simd_into(self)
1808    }
1809    #[inline(always)]
1810    fn xor_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1811        [
1812            u16::bitxor(a[0usize], &b[0usize]),
1813            u16::bitxor(a[1usize], &b[1usize]),
1814            u16::bitxor(a[2usize], &b[2usize]),
1815            u16::bitxor(a[3usize], &b[3usize]),
1816            u16::bitxor(a[4usize], &b[4usize]),
1817            u16::bitxor(a[5usize], &b[5usize]),
1818            u16::bitxor(a[6usize], &b[6usize]),
1819            u16::bitxor(a[7usize], &b[7usize]),
1820        ]
1821        .simd_into(self)
1822    }
1823    #[inline(always)]
1824    fn shr_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {
1825        [
1826            u16::shr(a[0usize], shift as u16),
1827            u16::shr(a[1usize], shift as u16),
1828            u16::shr(a[2usize], shift as u16),
1829            u16::shr(a[3usize], shift as u16),
1830            u16::shr(a[4usize], shift as u16),
1831            u16::shr(a[5usize], shift as u16),
1832            u16::shr(a[6usize], shift as u16),
1833            u16::shr(a[7usize], shift as u16),
1834        ]
1835        .simd_into(self)
1836    }
1837    #[inline(always)]
1838    fn simd_eq_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
1839        [
1840            -(u16::eq(&a[0usize], &b[0usize]) as i16),
1841            -(u16::eq(&a[1usize], &b[1usize]) as i16),
1842            -(u16::eq(&a[2usize], &b[2usize]) as i16),
1843            -(u16::eq(&a[3usize], &b[3usize]) as i16),
1844            -(u16::eq(&a[4usize], &b[4usize]) as i16),
1845            -(u16::eq(&a[5usize], &b[5usize]) as i16),
1846            -(u16::eq(&a[6usize], &b[6usize]) as i16),
1847            -(u16::eq(&a[7usize], &b[7usize]) as i16),
1848        ]
1849        .simd_into(self)
1850    }
1851    #[inline(always)]
1852    fn simd_lt_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
1853        [
1854            -(u16::lt(&a[0usize], &b[0usize]) as i16),
1855            -(u16::lt(&a[1usize], &b[1usize]) as i16),
1856            -(u16::lt(&a[2usize], &b[2usize]) as i16),
1857            -(u16::lt(&a[3usize], &b[3usize]) as i16),
1858            -(u16::lt(&a[4usize], &b[4usize]) as i16),
1859            -(u16::lt(&a[5usize], &b[5usize]) as i16),
1860            -(u16::lt(&a[6usize], &b[6usize]) as i16),
1861            -(u16::lt(&a[7usize], &b[7usize]) as i16),
1862        ]
1863        .simd_into(self)
1864    }
1865    #[inline(always)]
1866    fn simd_le_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
1867        [
1868            -(u16::le(&a[0usize], &b[0usize]) as i16),
1869            -(u16::le(&a[1usize], &b[1usize]) as i16),
1870            -(u16::le(&a[2usize], &b[2usize]) as i16),
1871            -(u16::le(&a[3usize], &b[3usize]) as i16),
1872            -(u16::le(&a[4usize], &b[4usize]) as i16),
1873            -(u16::le(&a[5usize], &b[5usize]) as i16),
1874            -(u16::le(&a[6usize], &b[6usize]) as i16),
1875            -(u16::le(&a[7usize], &b[7usize]) as i16),
1876        ]
1877        .simd_into(self)
1878    }
1879    #[inline(always)]
1880    fn simd_ge_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
1881        [
1882            -(u16::ge(&a[0usize], &b[0usize]) as i16),
1883            -(u16::ge(&a[1usize], &b[1usize]) as i16),
1884            -(u16::ge(&a[2usize], &b[2usize]) as i16),
1885            -(u16::ge(&a[3usize], &b[3usize]) as i16),
1886            -(u16::ge(&a[4usize], &b[4usize]) as i16),
1887            -(u16::ge(&a[5usize], &b[5usize]) as i16),
1888            -(u16::ge(&a[6usize], &b[6usize]) as i16),
1889            -(u16::ge(&a[7usize], &b[7usize]) as i16),
1890        ]
1891        .simd_into(self)
1892    }
1893    #[inline(always)]
1894    fn simd_gt_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
1895        [
1896            -(u16::gt(&a[0usize], &b[0usize]) as i16),
1897            -(u16::gt(&a[1usize], &b[1usize]) as i16),
1898            -(u16::gt(&a[2usize], &b[2usize]) as i16),
1899            -(u16::gt(&a[3usize], &b[3usize]) as i16),
1900            -(u16::gt(&a[4usize], &b[4usize]) as i16),
1901            -(u16::gt(&a[5usize], &b[5usize]) as i16),
1902            -(u16::gt(&a[6usize], &b[6usize]) as i16),
1903            -(u16::gt(&a[7usize], &b[7usize]) as i16),
1904        ]
1905        .simd_into(self)
1906    }
1907    #[inline(always)]
1908    fn zip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1909        [
1910            a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
1911        ]
1912        .simd_into(self)
1913    }
1914    #[inline(always)]
1915    fn zip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1916        [
1917            a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
1918        ]
1919        .simd_into(self)
1920    }
1921    #[inline(always)]
1922    fn unzip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1923        [
1924            a[0usize], a[2usize], a[4usize], a[6usize], b[0usize], b[2usize], b[4usize], b[6usize],
1925        ]
1926        .simd_into(self)
1927    }
1928    #[inline(always)]
1929    fn unzip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1930        [
1931            a[1usize], a[3usize], a[5usize], a[7usize], b[1usize], b[3usize], b[5usize], b[7usize],
1932        ]
1933        .simd_into(self)
1934    }
1935    #[inline(always)]
1936    fn select_u16x8(self, a: mask16x8<Self>, b: u16x8<Self>, c: u16x8<Self>) -> u16x8<Self> {
1937        [
1938            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1939            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1940            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1941            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1942            if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1943            if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1944            if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1945            if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1946        ]
1947        .simd_into(self)
1948    }
1949    #[inline(always)]
1950    fn min_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1951        [
1952            u16::min(a[0usize], b[0usize]),
1953            u16::min(a[1usize], b[1usize]),
1954            u16::min(a[2usize], b[2usize]),
1955            u16::min(a[3usize], b[3usize]),
1956            u16::min(a[4usize], b[4usize]),
1957            u16::min(a[5usize], b[5usize]),
1958            u16::min(a[6usize], b[6usize]),
1959            u16::min(a[7usize], b[7usize]),
1960        ]
1961        .simd_into(self)
1962    }
1963    #[inline(always)]
1964    fn max_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1965        [
1966            u16::max(a[0usize], b[0usize]),
1967            u16::max(a[1usize], b[1usize]),
1968            u16::max(a[2usize], b[2usize]),
1969            u16::max(a[3usize], b[3usize]),
1970            u16::max(a[4usize], b[4usize]),
1971            u16::max(a[5usize], b[5usize]),
1972            u16::max(a[6usize], b[6usize]),
1973            u16::max(a[7usize], b[7usize]),
1974        ]
1975        .simd_into(self)
1976    }
1977    #[inline(always)]
1978    fn combine_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x16<Self> {
1979        let mut result = [0; 16usize];
1980        result[0..8usize].copy_from_slice(&a.val);
1981        result[8usize..16usize].copy_from_slice(&b.val);
1982        result.simd_into(self)
1983    }
1984    #[inline(always)]
1985    fn reinterpret_u8_u16x8(self, a: u16x8<Self>) -> u8x16<Self> {
1986        u8x16 {
1987            val: bytemuck::cast(a.val),
1988            simd: a.simd,
1989        }
1990    }
1991    #[inline(always)]
1992    fn reinterpret_u32_u16x8(self, a: u16x8<Self>) -> u32x4<Self> {
1993        u32x4 {
1994            val: bytemuck::cast(a.val),
1995            simd: a.simd,
1996        }
1997    }
1998    #[inline(always)]
1999    fn splat_mask16x8(self, val: i16) -> mask16x8<Self> {
2000        [val; 8usize].simd_into(self)
2001    }
2002    #[inline(always)]
2003    fn not_mask16x8(self, a: mask16x8<Self>) -> mask16x8<Self> {
2004        [
2005            i16::not(a[0usize]),
2006            i16::not(a[1usize]),
2007            i16::not(a[2usize]),
2008            i16::not(a[3usize]),
2009            i16::not(a[4usize]),
2010            i16::not(a[5usize]),
2011            i16::not(a[6usize]),
2012            i16::not(a[7usize]),
2013        ]
2014        .simd_into(self)
2015    }
2016    #[inline(always)]
2017    fn and_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2018        [
2019            i16::bitand(a[0usize], &b[0usize]),
2020            i16::bitand(a[1usize], &b[1usize]),
2021            i16::bitand(a[2usize], &b[2usize]),
2022            i16::bitand(a[3usize], &b[3usize]),
2023            i16::bitand(a[4usize], &b[4usize]),
2024            i16::bitand(a[5usize], &b[5usize]),
2025            i16::bitand(a[6usize], &b[6usize]),
2026            i16::bitand(a[7usize], &b[7usize]),
2027        ]
2028        .simd_into(self)
2029    }
2030    #[inline(always)]
2031    fn or_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2032        [
2033            i16::bitor(a[0usize], &b[0usize]),
2034            i16::bitor(a[1usize], &b[1usize]),
2035            i16::bitor(a[2usize], &b[2usize]),
2036            i16::bitor(a[3usize], &b[3usize]),
2037            i16::bitor(a[4usize], &b[4usize]),
2038            i16::bitor(a[5usize], &b[5usize]),
2039            i16::bitor(a[6usize], &b[6usize]),
2040            i16::bitor(a[7usize], &b[7usize]),
2041        ]
2042        .simd_into(self)
2043    }
2044    #[inline(always)]
2045    fn xor_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2046        [
2047            i16::bitxor(a[0usize], &b[0usize]),
2048            i16::bitxor(a[1usize], &b[1usize]),
2049            i16::bitxor(a[2usize], &b[2usize]),
2050            i16::bitxor(a[3usize], &b[3usize]),
2051            i16::bitxor(a[4usize], &b[4usize]),
2052            i16::bitxor(a[5usize], &b[5usize]),
2053            i16::bitxor(a[6usize], &b[6usize]),
2054            i16::bitxor(a[7usize], &b[7usize]),
2055        ]
2056        .simd_into(self)
2057    }
2058    #[inline(always)]
2059    fn select_mask16x8(
2060        self,
2061        a: mask16x8<Self>,
2062        b: mask16x8<Self>,
2063        c: mask16x8<Self>,
2064    ) -> mask16x8<Self> {
2065        [
2066            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2067            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2068            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2069            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2070            if a[4usize] != 0 { b[4usize] } else { c[4usize] },
2071            if a[5usize] != 0 { b[5usize] } else { c[5usize] },
2072            if a[6usize] != 0 { b[6usize] } else { c[6usize] },
2073            if a[7usize] != 0 { b[7usize] } else { c[7usize] },
2074        ]
2075        .simd_into(self)
2076    }
2077    #[inline(always)]
2078    fn simd_eq_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2079        [
2080            -(i16::eq(&a[0usize], &b[0usize]) as i16),
2081            -(i16::eq(&a[1usize], &b[1usize]) as i16),
2082            -(i16::eq(&a[2usize], &b[2usize]) as i16),
2083            -(i16::eq(&a[3usize], &b[3usize]) as i16),
2084            -(i16::eq(&a[4usize], &b[4usize]) as i16),
2085            -(i16::eq(&a[5usize], &b[5usize]) as i16),
2086            -(i16::eq(&a[6usize], &b[6usize]) as i16),
2087            -(i16::eq(&a[7usize], &b[7usize]) as i16),
2088        ]
2089        .simd_into(self)
2090    }
2091    #[inline(always)]
2092    fn combine_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x16<Self> {
2093        let mut result = [0; 16usize];
2094        result[0..8usize].copy_from_slice(&a.val);
2095        result[8usize..16usize].copy_from_slice(&b.val);
2096        result.simd_into(self)
2097    }
2098    #[inline(always)]
2099    fn splat_i32x4(self, val: i32) -> i32x4<Self> {
2100        [val; 4usize].simd_into(self)
2101    }
2102    #[inline(always)]
2103    fn not_i32x4(self, a: i32x4<Self>) -> i32x4<Self> {
2104        [
2105            i32::not(a[0usize]),
2106            i32::not(a[1usize]),
2107            i32::not(a[2usize]),
2108            i32::not(a[3usize]),
2109        ]
2110        .simd_into(self)
2111    }
2112    #[inline(always)]
2113    fn add_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2114        [
2115            i32::add(a[0usize], &b[0usize]),
2116            i32::add(a[1usize], &b[1usize]),
2117            i32::add(a[2usize], &b[2usize]),
2118            i32::add(a[3usize], &b[3usize]),
2119        ]
2120        .simd_into(self)
2121    }
2122    #[inline(always)]
2123    fn sub_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2124        [
2125            i32::wrapping_sub(a[0usize], b[0usize]),
2126            i32::wrapping_sub(a[1usize], b[1usize]),
2127            i32::wrapping_sub(a[2usize], b[2usize]),
2128            i32::wrapping_sub(a[3usize], b[3usize]),
2129        ]
2130        .simd_into(self)
2131    }
2132    #[inline(always)]
2133    fn mul_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2134        [
2135            i32::wrapping_mul(a[0usize], b[0usize]),
2136            i32::wrapping_mul(a[1usize], b[1usize]),
2137            i32::wrapping_mul(a[2usize], b[2usize]),
2138            i32::wrapping_mul(a[3usize], b[3usize]),
2139        ]
2140        .simd_into(self)
2141    }
2142    #[inline(always)]
2143    fn and_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2144        [
2145            i32::bitand(a[0usize], &b[0usize]),
2146            i32::bitand(a[1usize], &b[1usize]),
2147            i32::bitand(a[2usize], &b[2usize]),
2148            i32::bitand(a[3usize], &b[3usize]),
2149        ]
2150        .simd_into(self)
2151    }
2152    #[inline(always)]
2153    fn or_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2154        [
2155            i32::bitor(a[0usize], &b[0usize]),
2156            i32::bitor(a[1usize], &b[1usize]),
2157            i32::bitor(a[2usize], &b[2usize]),
2158            i32::bitor(a[3usize], &b[3usize]),
2159        ]
2160        .simd_into(self)
2161    }
2162    #[inline(always)]
2163    fn xor_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2164        [
2165            i32::bitxor(a[0usize], &b[0usize]),
2166            i32::bitxor(a[1usize], &b[1usize]),
2167            i32::bitxor(a[2usize], &b[2usize]),
2168            i32::bitxor(a[3usize], &b[3usize]),
2169        ]
2170        .simd_into(self)
2171    }
2172    #[inline(always)]
2173    fn shr_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {
2174        [
2175            i32::shr(a[0usize], shift as i32),
2176            i32::shr(a[1usize], shift as i32),
2177            i32::shr(a[2usize], shift as i32),
2178            i32::shr(a[3usize], shift as i32),
2179        ]
2180        .simd_into(self)
2181    }
2182    #[inline(always)]
2183    fn simd_eq_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2184        [
2185            -(i32::eq(&a[0usize], &b[0usize]) as i32),
2186            -(i32::eq(&a[1usize], &b[1usize]) as i32),
2187            -(i32::eq(&a[2usize], &b[2usize]) as i32),
2188            -(i32::eq(&a[3usize], &b[3usize]) as i32),
2189        ]
2190        .simd_into(self)
2191    }
2192    #[inline(always)]
2193    fn simd_lt_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2194        [
2195            -(i32::lt(&a[0usize], &b[0usize]) as i32),
2196            -(i32::lt(&a[1usize], &b[1usize]) as i32),
2197            -(i32::lt(&a[2usize], &b[2usize]) as i32),
2198            -(i32::lt(&a[3usize], &b[3usize]) as i32),
2199        ]
2200        .simd_into(self)
2201    }
2202    #[inline(always)]
2203    fn simd_le_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2204        [
2205            -(i32::le(&a[0usize], &b[0usize]) as i32),
2206            -(i32::le(&a[1usize], &b[1usize]) as i32),
2207            -(i32::le(&a[2usize], &b[2usize]) as i32),
2208            -(i32::le(&a[3usize], &b[3usize]) as i32),
2209        ]
2210        .simd_into(self)
2211    }
2212    #[inline(always)]
2213    fn simd_ge_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2214        [
2215            -(i32::ge(&a[0usize], &b[0usize]) as i32),
2216            -(i32::ge(&a[1usize], &b[1usize]) as i32),
2217            -(i32::ge(&a[2usize], &b[2usize]) as i32),
2218            -(i32::ge(&a[3usize], &b[3usize]) as i32),
2219        ]
2220        .simd_into(self)
2221    }
2222    #[inline(always)]
2223    fn simd_gt_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2224        [
2225            -(i32::gt(&a[0usize], &b[0usize]) as i32),
2226            -(i32::gt(&a[1usize], &b[1usize]) as i32),
2227            -(i32::gt(&a[2usize], &b[2usize]) as i32),
2228            -(i32::gt(&a[3usize], &b[3usize]) as i32),
2229        ]
2230        .simd_into(self)
2231    }
2232    #[inline(always)]
2233    fn zip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2234        [a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
2235    }
2236    #[inline(always)]
2237    fn zip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2238        [a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
2239    }
2240    #[inline(always)]
2241    fn unzip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2242        [a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
2243    }
2244    #[inline(always)]
2245    fn unzip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2246        [a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
2247    }
2248    #[inline(always)]
2249    fn select_i32x4(self, a: mask32x4<Self>, b: i32x4<Self>, c: i32x4<Self>) -> i32x4<Self> {
2250        [
2251            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2252            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2253            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2254            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2255        ]
2256        .simd_into(self)
2257    }
2258    #[inline(always)]
2259    fn min_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2260        [
2261            i32::min(a[0usize], b[0usize]),
2262            i32::min(a[1usize], b[1usize]),
2263            i32::min(a[2usize], b[2usize]),
2264            i32::min(a[3usize], b[3usize]),
2265        ]
2266        .simd_into(self)
2267    }
2268    #[inline(always)]
2269    fn max_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2270        [
2271            i32::max(a[0usize], b[0usize]),
2272            i32::max(a[1usize], b[1usize]),
2273            i32::max(a[2usize], b[2usize]),
2274            i32::max(a[3usize], b[3usize]),
2275        ]
2276        .simd_into(self)
2277    }
2278    #[inline(always)]
2279    fn combine_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x8<Self> {
2280        let mut result = [0; 8usize];
2281        result[0..4usize].copy_from_slice(&a.val);
2282        result[4usize..8usize].copy_from_slice(&b.val);
2283        result.simd_into(self)
2284    }
2285    #[inline(always)]
2286    fn reinterpret_u8_i32x4(self, a: i32x4<Self>) -> u8x16<Self> {
2287        u8x16 {
2288            val: bytemuck::cast(a.val),
2289            simd: a.simd,
2290        }
2291    }
2292    #[inline(always)]
2293    fn reinterpret_u32_i32x4(self, a: i32x4<Self>) -> u32x4<Self> {
2294        u32x4 {
2295            val: bytemuck::cast(a.val),
2296            simd: a.simd,
2297        }
2298    }
2299    #[inline(always)]
2300    fn cvt_f32_i32x4(self, a: i32x4<Self>) -> f32x4<Self> {
2301        [
2302            a[0usize] as f32,
2303            a[1usize] as f32,
2304            a[2usize] as f32,
2305            a[3usize] as f32,
2306        ]
2307        .simd_into(self)
2308    }
2309    #[inline(always)]
2310    fn splat_u32x4(self, val: u32) -> u32x4<Self> {
2311        [val; 4usize].simd_into(self)
2312    }
2313    #[inline(always)]
2314    fn not_u32x4(self, a: u32x4<Self>) -> u32x4<Self> {
2315        [
2316            u32::not(a[0usize]),
2317            u32::not(a[1usize]),
2318            u32::not(a[2usize]),
2319            u32::not(a[3usize]),
2320        ]
2321        .simd_into(self)
2322    }
2323    #[inline(always)]
2324    fn add_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2325        [
2326            u32::add(a[0usize], &b[0usize]),
2327            u32::add(a[1usize], &b[1usize]),
2328            u32::add(a[2usize], &b[2usize]),
2329            u32::add(a[3usize], &b[3usize]),
2330        ]
2331        .simd_into(self)
2332    }
2333    #[inline(always)]
2334    fn sub_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2335        [
2336            u32::wrapping_sub(a[0usize], b[0usize]),
2337            u32::wrapping_sub(a[1usize], b[1usize]),
2338            u32::wrapping_sub(a[2usize], b[2usize]),
2339            u32::wrapping_sub(a[3usize], b[3usize]),
2340        ]
2341        .simd_into(self)
2342    }
2343    #[inline(always)]
2344    fn mul_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2345        [
2346            u32::wrapping_mul(a[0usize], b[0usize]),
2347            u32::wrapping_mul(a[1usize], b[1usize]),
2348            u32::wrapping_mul(a[2usize], b[2usize]),
2349            u32::wrapping_mul(a[3usize], b[3usize]),
2350        ]
2351        .simd_into(self)
2352    }
2353    #[inline(always)]
2354    fn and_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2355        [
2356            u32::bitand(a[0usize], &b[0usize]),
2357            u32::bitand(a[1usize], &b[1usize]),
2358            u32::bitand(a[2usize], &b[2usize]),
2359            u32::bitand(a[3usize], &b[3usize]),
2360        ]
2361        .simd_into(self)
2362    }
2363    #[inline(always)]
2364    fn or_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2365        [
2366            u32::bitor(a[0usize], &b[0usize]),
2367            u32::bitor(a[1usize], &b[1usize]),
2368            u32::bitor(a[2usize], &b[2usize]),
2369            u32::bitor(a[3usize], &b[3usize]),
2370        ]
2371        .simd_into(self)
2372    }
2373    #[inline(always)]
2374    fn xor_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2375        [
2376            u32::bitxor(a[0usize], &b[0usize]),
2377            u32::bitxor(a[1usize], &b[1usize]),
2378            u32::bitxor(a[2usize], &b[2usize]),
2379            u32::bitxor(a[3usize], &b[3usize]),
2380        ]
2381        .simd_into(self)
2382    }
2383    #[inline(always)]
2384    fn shr_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {
2385        [
2386            u32::shr(a[0usize], shift as u32),
2387            u32::shr(a[1usize], shift as u32),
2388            u32::shr(a[2usize], shift as u32),
2389            u32::shr(a[3usize], shift as u32),
2390        ]
2391        .simd_into(self)
2392    }
2393    #[inline(always)]
2394    fn simd_eq_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2395        [
2396            -(u32::eq(&a[0usize], &b[0usize]) as i32),
2397            -(u32::eq(&a[1usize], &b[1usize]) as i32),
2398            -(u32::eq(&a[2usize], &b[2usize]) as i32),
2399            -(u32::eq(&a[3usize], &b[3usize]) as i32),
2400        ]
2401        .simd_into(self)
2402    }
2403    #[inline(always)]
2404    fn simd_lt_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2405        [
2406            -(u32::lt(&a[0usize], &b[0usize]) as i32),
2407            -(u32::lt(&a[1usize], &b[1usize]) as i32),
2408            -(u32::lt(&a[2usize], &b[2usize]) as i32),
2409            -(u32::lt(&a[3usize], &b[3usize]) as i32),
2410        ]
2411        .simd_into(self)
2412    }
2413    #[inline(always)]
2414    fn simd_le_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2415        [
2416            -(u32::le(&a[0usize], &b[0usize]) as i32),
2417            -(u32::le(&a[1usize], &b[1usize]) as i32),
2418            -(u32::le(&a[2usize], &b[2usize]) as i32),
2419            -(u32::le(&a[3usize], &b[3usize]) as i32),
2420        ]
2421        .simd_into(self)
2422    }
2423    #[inline(always)]
2424    fn simd_ge_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2425        [
2426            -(u32::ge(&a[0usize], &b[0usize]) as i32),
2427            -(u32::ge(&a[1usize], &b[1usize]) as i32),
2428            -(u32::ge(&a[2usize], &b[2usize]) as i32),
2429            -(u32::ge(&a[3usize], &b[3usize]) as i32),
2430        ]
2431        .simd_into(self)
2432    }
2433    #[inline(always)]
2434    fn simd_gt_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2435        [
2436            -(u32::gt(&a[0usize], &b[0usize]) as i32),
2437            -(u32::gt(&a[1usize], &b[1usize]) as i32),
2438            -(u32::gt(&a[2usize], &b[2usize]) as i32),
2439            -(u32::gt(&a[3usize], &b[3usize]) as i32),
2440        ]
2441        .simd_into(self)
2442    }
2443    #[inline(always)]
2444    fn zip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2445        [a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
2446    }
2447    #[inline(always)]
2448    fn zip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2449        [a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
2450    }
2451    #[inline(always)]
2452    fn unzip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2453        [a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
2454    }
2455    #[inline(always)]
2456    fn unzip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2457        [a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
2458    }
2459    #[inline(always)]
2460    fn select_u32x4(self, a: mask32x4<Self>, b: u32x4<Self>, c: u32x4<Self>) -> u32x4<Self> {
2461        [
2462            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2463            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2464            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2465            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2466        ]
2467        .simd_into(self)
2468    }
2469    #[inline(always)]
2470    fn min_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2471        [
2472            u32::min(a[0usize], b[0usize]),
2473            u32::min(a[1usize], b[1usize]),
2474            u32::min(a[2usize], b[2usize]),
2475            u32::min(a[3usize], b[3usize]),
2476        ]
2477        .simd_into(self)
2478    }
2479    #[inline(always)]
2480    fn max_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2481        [
2482            u32::max(a[0usize], b[0usize]),
2483            u32::max(a[1usize], b[1usize]),
2484            u32::max(a[2usize], b[2usize]),
2485            u32::max(a[3usize], b[3usize]),
2486        ]
2487        .simd_into(self)
2488    }
2489    #[inline(always)]
2490    fn combine_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x8<Self> {
2491        let mut result = [0; 8usize];
2492        result[0..4usize].copy_from_slice(&a.val);
2493        result[4usize..8usize].copy_from_slice(&b.val);
2494        result.simd_into(self)
2495    }
2496    #[inline(always)]
2497    fn reinterpret_u8_u32x4(self, a: u32x4<Self>) -> u8x16<Self> {
2498        u8x16 {
2499            val: bytemuck::cast(a.val),
2500            simd: a.simd,
2501        }
2502    }
2503    #[inline(always)]
2504    fn cvt_f32_u32x4(self, a: u32x4<Self>) -> f32x4<Self> {
2505        [
2506            a[0usize] as f32,
2507            a[1usize] as f32,
2508            a[2usize] as f32,
2509            a[3usize] as f32,
2510        ]
2511        .simd_into(self)
2512    }
2513    #[inline(always)]
2514    fn splat_mask32x4(self, val: i32) -> mask32x4<Self> {
2515        [val; 4usize].simd_into(self)
2516    }
2517    #[inline(always)]
2518    fn not_mask32x4(self, a: mask32x4<Self>) -> mask32x4<Self> {
2519        [
2520            i32::not(a[0usize]),
2521            i32::not(a[1usize]),
2522            i32::not(a[2usize]),
2523            i32::not(a[3usize]),
2524        ]
2525        .simd_into(self)
2526    }
2527    #[inline(always)]
2528    fn and_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
2529        [
2530            i32::bitand(a[0usize], &b[0usize]),
2531            i32::bitand(a[1usize], &b[1usize]),
2532            i32::bitand(a[2usize], &b[2usize]),
2533            i32::bitand(a[3usize], &b[3usize]),
2534        ]
2535        .simd_into(self)
2536    }
2537    #[inline(always)]
2538    fn or_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
2539        [
2540            i32::bitor(a[0usize], &b[0usize]),
2541            i32::bitor(a[1usize], &b[1usize]),
2542            i32::bitor(a[2usize], &b[2usize]),
2543            i32::bitor(a[3usize], &b[3usize]),
2544        ]
2545        .simd_into(self)
2546    }
2547    #[inline(always)]
2548    fn xor_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
2549        [
2550            i32::bitxor(a[0usize], &b[0usize]),
2551            i32::bitxor(a[1usize], &b[1usize]),
2552            i32::bitxor(a[2usize], &b[2usize]),
2553            i32::bitxor(a[3usize], &b[3usize]),
2554        ]
2555        .simd_into(self)
2556    }
2557    #[inline(always)]
2558    fn select_mask32x4(
2559        self,
2560        a: mask32x4<Self>,
2561        b: mask32x4<Self>,
2562        c: mask32x4<Self>,
2563    ) -> mask32x4<Self> {
2564        [
2565            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2566            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2567            if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2568            if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2569        ]
2570        .simd_into(self)
2571    }
2572    #[inline(always)]
2573    fn simd_eq_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
2574        [
2575            -(i32::eq(&a[0usize], &b[0usize]) as i32),
2576            -(i32::eq(&a[1usize], &b[1usize]) as i32),
2577            -(i32::eq(&a[2usize], &b[2usize]) as i32),
2578            -(i32::eq(&a[3usize], &b[3usize]) as i32),
2579        ]
2580        .simd_into(self)
2581    }
2582    #[inline(always)]
2583    fn combine_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x8<Self> {
2584        let mut result = [0; 8usize];
2585        result[0..4usize].copy_from_slice(&a.val);
2586        result[4usize..8usize].copy_from_slice(&b.val);
2587        result.simd_into(self)
2588    }
2589    #[inline(always)]
2590    fn splat_f64x2(self, val: f64) -> f64x2<Self> {
2591        [val; 2usize].simd_into(self)
2592    }
2593    #[inline(always)]
2594    fn abs_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2595        [f64::abs(a[0usize]), f64::abs(a[1usize])].simd_into(self)
2596    }
2597    #[inline(always)]
2598    fn neg_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2599        [f64::neg(a[0usize]), f64::neg(a[1usize])].simd_into(self)
2600    }
2601    #[inline(always)]
2602    fn sqrt_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2603        [f64::sqrt(a[0usize]), f64::sqrt(a[1usize])].simd_into(self)
2604    }
2605    #[inline(always)]
2606    fn add_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2607        [
2608            f64::add(a[0usize], &b[0usize]),
2609            f64::add(a[1usize], &b[1usize]),
2610        ]
2611        .simd_into(self)
2612    }
2613    #[inline(always)]
2614    fn sub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2615        [
2616            f64::sub(a[0usize], &b[0usize]),
2617            f64::sub(a[1usize], &b[1usize]),
2618        ]
2619        .simd_into(self)
2620    }
2621    #[inline(always)]
2622    fn mul_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2623        [
2624            f64::mul(a[0usize], &b[0usize]),
2625            f64::mul(a[1usize], &b[1usize]),
2626        ]
2627        .simd_into(self)
2628    }
2629    #[inline(always)]
2630    fn div_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2631        [
2632            f64::div(a[0usize], &b[0usize]),
2633            f64::div(a[1usize], &b[1usize]),
2634        ]
2635        .simd_into(self)
2636    }
2637    #[inline(always)]
2638    fn copysign_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2639        [
2640            f64::copysign(a[0usize], b[0usize]),
2641            f64::copysign(a[1usize], b[1usize]),
2642        ]
2643        .simd_into(self)
2644    }
2645    #[inline(always)]
2646    fn simd_eq_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2647        [
2648            -(f64::eq(&a[0usize], &b[0usize]) as i64),
2649            -(f64::eq(&a[1usize], &b[1usize]) as i64),
2650        ]
2651        .simd_into(self)
2652    }
2653    #[inline(always)]
2654    fn simd_lt_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2655        [
2656            -(f64::lt(&a[0usize], &b[0usize]) as i64),
2657            -(f64::lt(&a[1usize], &b[1usize]) as i64),
2658        ]
2659        .simd_into(self)
2660    }
2661    #[inline(always)]
2662    fn simd_le_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2663        [
2664            -(f64::le(&a[0usize], &b[0usize]) as i64),
2665            -(f64::le(&a[1usize], &b[1usize]) as i64),
2666        ]
2667        .simd_into(self)
2668    }
2669    #[inline(always)]
2670    fn simd_ge_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2671        [
2672            -(f64::ge(&a[0usize], &b[0usize]) as i64),
2673            -(f64::ge(&a[1usize], &b[1usize]) as i64),
2674        ]
2675        .simd_into(self)
2676    }
2677    #[inline(always)]
2678    fn simd_gt_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2679        [
2680            -(f64::gt(&a[0usize], &b[0usize]) as i64),
2681            -(f64::gt(&a[1usize], &b[1usize]) as i64),
2682        ]
2683        .simd_into(self)
2684    }
2685    #[inline(always)]
2686    fn zip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2687        [a[0usize], b[0usize]].simd_into(self)
2688    }
2689    #[inline(always)]
2690    fn zip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2691        [a[1usize], b[1usize]].simd_into(self)
2692    }
2693    #[inline(always)]
2694    fn unzip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2695        [a[0usize], b[0usize]].simd_into(self)
2696    }
2697    #[inline(always)]
2698    fn unzip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2699        [a[1usize], b[1usize]].simd_into(self)
2700    }
2701    #[inline(always)]
2702    fn max_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2703        [
2704            f64::max(a[0usize], b[0usize]),
2705            f64::max(a[1usize], b[1usize]),
2706        ]
2707        .simd_into(self)
2708    }
2709    #[inline(always)]
2710    fn max_precise_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2711        [
2712            f64::max(a[0usize], b[0usize]),
2713            f64::max(a[1usize], b[1usize]),
2714        ]
2715        .simd_into(self)
2716    }
2717    #[inline(always)]
2718    fn min_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2719        [
2720            f64::min(a[0usize], b[0usize]),
2721            f64::min(a[1usize], b[1usize]),
2722        ]
2723        .simd_into(self)
2724    }
2725    #[inline(always)]
2726    fn min_precise_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2727        [
2728            f64::min(a[0usize], b[0usize]),
2729            f64::min(a[1usize], b[1usize]),
2730        ]
2731        .simd_into(self)
2732    }
2733    #[inline(always)]
2734    fn madd_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
2735        a.add(b.mul(c))
2736    }
2737    #[inline(always)]
2738    fn msub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
2739        a.sub(b.mul(c))
2740    }
2741    #[inline(always)]
2742    fn floor_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2743        [f64::floor(a[0usize]), f64::floor(a[1usize])].simd_into(self)
2744    }
2745    #[inline(always)]
2746    fn fract_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2747        [f64::fract(a[0usize]), f64::fract(a[1usize])].simd_into(self)
2748    }
2749    #[inline(always)]
2750    fn trunc_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2751        [f64::trunc(a[0usize]), f64::trunc(a[1usize])].simd_into(self)
2752    }
2753    #[inline(always)]
2754    fn select_f64x2(self, a: mask64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
2755        [
2756            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2757            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2758        ]
2759        .simd_into(self)
2760    }
2761    #[inline(always)]
2762    fn combine_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x4<Self> {
2763        let mut result = [0.0; 4usize];
2764        result[0..2usize].copy_from_slice(&a.val);
2765        result[2usize..4usize].copy_from_slice(&b.val);
2766        result.simd_into(self)
2767    }
2768    #[inline(always)]
2769    fn reinterpret_f32_f64x2(self, a: f64x2<Self>) -> f32x4<Self> {
2770        f32x4 {
2771            val: bytemuck::cast(a.val),
2772            simd: a.simd,
2773        }
2774    }
2775    #[inline(always)]
2776    fn splat_mask64x2(self, val: i64) -> mask64x2<Self> {
2777        [val; 2usize].simd_into(self)
2778    }
2779    #[inline(always)]
2780    fn not_mask64x2(self, a: mask64x2<Self>) -> mask64x2<Self> {
2781        [i64::not(a[0usize]), i64::not(a[1usize])].simd_into(self)
2782    }
2783    #[inline(always)]
2784    fn and_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
2785        [
2786            i64::bitand(a[0usize], &b[0usize]),
2787            i64::bitand(a[1usize], &b[1usize]),
2788        ]
2789        .simd_into(self)
2790    }
2791    #[inline(always)]
2792    fn or_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
2793        [
2794            i64::bitor(a[0usize], &b[0usize]),
2795            i64::bitor(a[1usize], &b[1usize]),
2796        ]
2797        .simd_into(self)
2798    }
2799    #[inline(always)]
2800    fn xor_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
2801        [
2802            i64::bitxor(a[0usize], &b[0usize]),
2803            i64::bitxor(a[1usize], &b[1usize]),
2804        ]
2805        .simd_into(self)
2806    }
2807    #[inline(always)]
2808    fn select_mask64x2(
2809        self,
2810        a: mask64x2<Self>,
2811        b: mask64x2<Self>,
2812        c: mask64x2<Self>,
2813    ) -> mask64x2<Self> {
2814        [
2815            if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2816            if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2817        ]
2818        .simd_into(self)
2819    }
2820    #[inline(always)]
2821    fn simd_eq_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
2822        [
2823            -(i64::eq(&a[0usize], &b[0usize]) as i64),
2824            -(i64::eq(&a[1usize], &b[1usize]) as i64),
2825        ]
2826        .simd_into(self)
2827    }
2828    #[inline(always)]
2829    fn combine_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x4<Self> {
2830        let mut result = [0; 4usize];
2831        result[0..2usize].copy_from_slice(&a.val);
2832        result[2usize..4usize].copy_from_slice(&b.val);
2833        result.simd_into(self)
2834    }
2835    #[inline(always)]
2836    fn splat_f32x8(self, a: f32) -> f32x8<Self> {
2837        let half = self.splat_f32x4(a);
2838        self.combine_f32x4(half, half)
2839    }
2840    #[inline(always)]
2841    fn abs_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
2842        let (a0, a1) = self.split_f32x8(a);
2843        self.combine_f32x4(self.abs_f32x4(a0), self.abs_f32x4(a1))
2844    }
2845    #[inline(always)]
2846    fn neg_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
2847        let (a0, a1) = self.split_f32x8(a);
2848        self.combine_f32x4(self.neg_f32x4(a0), self.neg_f32x4(a1))
2849    }
2850    #[inline(always)]
2851    fn sqrt_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
2852        let (a0, a1) = self.split_f32x8(a);
2853        self.combine_f32x4(self.sqrt_f32x4(a0), self.sqrt_f32x4(a1))
2854    }
2855    #[inline(always)]
2856    fn add_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2857        let (a0, a1) = self.split_f32x8(a);
2858        let (b0, b1) = self.split_f32x8(b);
2859        self.combine_f32x4(self.add_f32x4(a0, b0), self.add_f32x4(a1, b1))
2860    }
2861    #[inline(always)]
2862    fn sub_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2863        let (a0, a1) = self.split_f32x8(a);
2864        let (b0, b1) = self.split_f32x8(b);
2865        self.combine_f32x4(self.sub_f32x4(a0, b0), self.sub_f32x4(a1, b1))
2866    }
2867    #[inline(always)]
2868    fn mul_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2869        let (a0, a1) = self.split_f32x8(a);
2870        let (b0, b1) = self.split_f32x8(b);
2871        self.combine_f32x4(self.mul_f32x4(a0, b0), self.mul_f32x4(a1, b1))
2872    }
2873    #[inline(always)]
2874    fn div_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2875        let (a0, a1) = self.split_f32x8(a);
2876        let (b0, b1) = self.split_f32x8(b);
2877        self.combine_f32x4(self.div_f32x4(a0, b0), self.div_f32x4(a1, b1))
2878    }
2879    #[inline(always)]
2880    fn copysign_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2881        let (a0, a1) = self.split_f32x8(a);
2882        let (b0, b1) = self.split_f32x8(b);
2883        self.combine_f32x4(self.copysign_f32x4(a0, b0), self.copysign_f32x4(a1, b1))
2884    }
2885    #[inline(always)]
2886    fn simd_eq_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
2887        let (a0, a1) = self.split_f32x8(a);
2888        let (b0, b1) = self.split_f32x8(b);
2889        self.combine_mask32x4(self.simd_eq_f32x4(a0, b0), self.simd_eq_f32x4(a1, b1))
2890    }
2891    #[inline(always)]
2892    fn simd_lt_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
2893        let (a0, a1) = self.split_f32x8(a);
2894        let (b0, b1) = self.split_f32x8(b);
2895        self.combine_mask32x4(self.simd_lt_f32x4(a0, b0), self.simd_lt_f32x4(a1, b1))
2896    }
2897    #[inline(always)]
2898    fn simd_le_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
2899        let (a0, a1) = self.split_f32x8(a);
2900        let (b0, b1) = self.split_f32x8(b);
2901        self.combine_mask32x4(self.simd_le_f32x4(a0, b0), self.simd_le_f32x4(a1, b1))
2902    }
2903    #[inline(always)]
2904    fn simd_ge_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
2905        let (a0, a1) = self.split_f32x8(a);
2906        let (b0, b1) = self.split_f32x8(b);
2907        self.combine_mask32x4(self.simd_ge_f32x4(a0, b0), self.simd_ge_f32x4(a1, b1))
2908    }
2909    #[inline(always)]
2910    fn simd_gt_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
2911        let (a0, a1) = self.split_f32x8(a);
2912        let (b0, b1) = self.split_f32x8(b);
2913        self.combine_mask32x4(self.simd_gt_f32x4(a0, b0), self.simd_gt_f32x4(a1, b1))
2914    }
2915    #[inline(always)]
2916    fn zip_low_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2917        let (a0, _) = self.split_f32x8(a);
2918        let (b0, _) = self.split_f32x8(b);
2919        self.combine_f32x4(self.zip_low_f32x4(a0, b0), self.zip_high_f32x4(a0, b0))
2920    }
2921    #[inline(always)]
2922    fn zip_high_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2923        let (_, a1) = self.split_f32x8(a);
2924        let (_, b1) = self.split_f32x8(b);
2925        self.combine_f32x4(self.zip_low_f32x4(a1, b1), self.zip_high_f32x4(a1, b1))
2926    }
2927    #[inline(always)]
2928    fn unzip_low_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2929        let (a0, a1) = self.split_f32x8(a);
2930        let (b0, b1) = self.split_f32x8(b);
2931        self.combine_f32x4(self.unzip_low_f32x4(a0, a1), self.unzip_low_f32x4(b0, b1))
2932    }
2933    #[inline(always)]
2934    fn unzip_high_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2935        let (a0, a1) = self.split_f32x8(a);
2936        let (b0, b1) = self.split_f32x8(b);
2937        self.combine_f32x4(self.unzip_high_f32x4(a0, a1), self.unzip_high_f32x4(b0, b1))
2938    }
2939    #[inline(always)]
2940    fn max_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2941        let (a0, a1) = self.split_f32x8(a);
2942        let (b0, b1) = self.split_f32x8(b);
2943        self.combine_f32x4(self.max_f32x4(a0, b0), self.max_f32x4(a1, b1))
2944    }
2945    #[inline(always)]
2946    fn max_precise_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2947        let (a0, a1) = self.split_f32x8(a);
2948        let (b0, b1) = self.split_f32x8(b);
2949        self.combine_f32x4(
2950            self.max_precise_f32x4(a0, b0),
2951            self.max_precise_f32x4(a1, b1),
2952        )
2953    }
2954    #[inline(always)]
2955    fn min_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2956        let (a0, a1) = self.split_f32x8(a);
2957        let (b0, b1) = self.split_f32x8(b);
2958        self.combine_f32x4(self.min_f32x4(a0, b0), self.min_f32x4(a1, b1))
2959    }
2960    #[inline(always)]
2961    fn min_precise_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2962        let (a0, a1) = self.split_f32x8(a);
2963        let (b0, b1) = self.split_f32x8(b);
2964        self.combine_f32x4(
2965            self.min_precise_f32x4(a0, b0),
2966            self.min_precise_f32x4(a1, b1),
2967        )
2968    }
2969    #[inline(always)]
2970    fn madd_f32x8(self, a: f32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
2971        let (a0, a1) = self.split_f32x8(a);
2972        let (b0, b1) = self.split_f32x8(b);
2973        let (c0, c1) = self.split_f32x8(c);
2974        self.combine_f32x4(self.madd_f32x4(a0, b0, c0), self.madd_f32x4(a1, b1, c1))
2975    }
2976    #[inline(always)]
2977    fn msub_f32x8(self, a: f32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
2978        let (a0, a1) = self.split_f32x8(a);
2979        let (b0, b1) = self.split_f32x8(b);
2980        let (c0, c1) = self.split_f32x8(c);
2981        self.combine_f32x4(self.msub_f32x4(a0, b0, c0), self.msub_f32x4(a1, b1, c1))
2982    }
2983    #[inline(always)]
2984    fn floor_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
2985        let (a0, a1) = self.split_f32x8(a);
2986        self.combine_f32x4(self.floor_f32x4(a0), self.floor_f32x4(a1))
2987    }
2988    #[inline(always)]
2989    fn fract_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
2990        let (a0, a1) = self.split_f32x8(a);
2991        self.combine_f32x4(self.fract_f32x4(a0), self.fract_f32x4(a1))
2992    }
2993    #[inline(always)]
2994    fn trunc_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
2995        let (a0, a1) = self.split_f32x8(a);
2996        self.combine_f32x4(self.trunc_f32x4(a0), self.trunc_f32x4(a1))
2997    }
2998    #[inline(always)]
2999    fn select_f32x8(self, a: mask32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
3000        let (a0, a1) = self.split_mask32x8(a);
3001        let (b0, b1) = self.split_f32x8(b);
3002        let (c0, c1) = self.split_f32x8(c);
3003        self.combine_f32x4(self.select_f32x4(a0, b0, c0), self.select_f32x4(a1, b1, c1))
3004    }
3005    #[inline(always)]
3006    fn combine_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x16<Self> {
3007        let mut result = [0.0; 16usize];
3008        result[0..8usize].copy_from_slice(&a.val);
3009        result[8usize..16usize].copy_from_slice(&b.val);
3010        result.simd_into(self)
3011    }
3012    #[inline(always)]
3013    fn split_f32x8(self, a: f32x8<Self>) -> (f32x4<Self>, f32x4<Self>) {
3014        let mut b0 = [0.0; 4usize];
3015        let mut b1 = [0.0; 4usize];
3016        b0.copy_from_slice(&a.val[0..4usize]);
3017        b1.copy_from_slice(&a.val[4usize..8usize]);
3018        (b0.simd_into(self), b1.simd_into(self))
3019    }
3020    #[inline(always)]
3021    fn reinterpret_f64_f32x8(self, a: f32x8<Self>) -> f64x4<Self> {
3022        let (a0, a1) = self.split_f32x8(a);
3023        self.combine_f64x2(
3024            self.reinterpret_f64_f32x4(a0),
3025            self.reinterpret_f64_f32x4(a1),
3026        )
3027    }
3028    #[inline(always)]
3029    fn reinterpret_i32_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
3030        let (a0, a1) = self.split_f32x8(a);
3031        self.combine_i32x4(
3032            self.reinterpret_i32_f32x4(a0),
3033            self.reinterpret_i32_f32x4(a1),
3034        )
3035    }
3036    #[inline(always)]
3037    fn reinterpret_u8_f32x8(self, a: f32x8<Self>) -> u8x32<Self> {
3038        let (a0, a1) = self.split_f32x8(a);
3039        self.combine_u8x16(self.reinterpret_u8_f32x4(a0), self.reinterpret_u8_f32x4(a1))
3040    }
3041    #[inline(always)]
3042    fn reinterpret_u32_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
3043        let (a0, a1) = self.split_f32x8(a);
3044        self.combine_u32x4(
3045            self.reinterpret_u32_f32x4(a0),
3046            self.reinterpret_u32_f32x4(a1),
3047        )
3048    }
3049    #[inline(always)]
3050    fn cvt_u32_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
3051        let (a0, a1) = self.split_f32x8(a);
3052        self.combine_u32x4(self.cvt_u32_f32x4(a0), self.cvt_u32_f32x4(a1))
3053    }
3054    #[inline(always)]
3055    fn cvt_i32_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
3056        let (a0, a1) = self.split_f32x8(a);
3057        self.combine_i32x4(self.cvt_i32_f32x4(a0), self.cvt_i32_f32x4(a1))
3058    }
3059    #[inline(always)]
3060    fn splat_i8x32(self, a: i8) -> i8x32<Self> {
3061        let half = self.splat_i8x16(a);
3062        self.combine_i8x16(half, half)
3063    }
3064    #[inline(always)]
3065    fn not_i8x32(self, a: i8x32<Self>) -> i8x32<Self> {
3066        let (a0, a1) = self.split_i8x32(a);
3067        self.combine_i8x16(self.not_i8x16(a0), self.not_i8x16(a1))
3068    }
3069    #[inline(always)]
3070    fn add_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3071        let (a0, a1) = self.split_i8x32(a);
3072        let (b0, b1) = self.split_i8x32(b);
3073        self.combine_i8x16(self.add_i8x16(a0, b0), self.add_i8x16(a1, b1))
3074    }
3075    #[inline(always)]
3076    fn sub_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3077        let (a0, a1) = self.split_i8x32(a);
3078        let (b0, b1) = self.split_i8x32(b);
3079        self.combine_i8x16(self.sub_i8x16(a0, b0), self.sub_i8x16(a1, b1))
3080    }
3081    #[inline(always)]
3082    fn mul_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3083        let (a0, a1) = self.split_i8x32(a);
3084        let (b0, b1) = self.split_i8x32(b);
3085        self.combine_i8x16(self.mul_i8x16(a0, b0), self.mul_i8x16(a1, b1))
3086    }
3087    #[inline(always)]
3088    fn and_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3089        let (a0, a1) = self.split_i8x32(a);
3090        let (b0, b1) = self.split_i8x32(b);
3091        self.combine_i8x16(self.and_i8x16(a0, b0), self.and_i8x16(a1, b1))
3092    }
3093    #[inline(always)]
3094    fn or_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3095        let (a0, a1) = self.split_i8x32(a);
3096        let (b0, b1) = self.split_i8x32(b);
3097        self.combine_i8x16(self.or_i8x16(a0, b0), self.or_i8x16(a1, b1))
3098    }
3099    #[inline(always)]
3100    fn xor_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3101        let (a0, a1) = self.split_i8x32(a);
3102        let (b0, b1) = self.split_i8x32(b);
3103        self.combine_i8x16(self.xor_i8x16(a0, b0), self.xor_i8x16(a1, b1))
3104    }
3105    #[inline(always)]
3106    fn shr_i8x32(self, a: i8x32<Self>, b: u32) -> i8x32<Self> {
3107        let (a0, a1) = self.split_i8x32(a);
3108        self.combine_i8x16(self.shr_i8x16(a0, b), self.shr_i8x16(a1, b))
3109    }
3110    #[inline(always)]
3111    fn simd_eq_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3112        let (a0, a1) = self.split_i8x32(a);
3113        let (b0, b1) = self.split_i8x32(b);
3114        self.combine_mask8x16(self.simd_eq_i8x16(a0, b0), self.simd_eq_i8x16(a1, b1))
3115    }
3116    #[inline(always)]
3117    fn simd_lt_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3118        let (a0, a1) = self.split_i8x32(a);
3119        let (b0, b1) = self.split_i8x32(b);
3120        self.combine_mask8x16(self.simd_lt_i8x16(a0, b0), self.simd_lt_i8x16(a1, b1))
3121    }
3122    #[inline(always)]
3123    fn simd_le_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3124        let (a0, a1) = self.split_i8x32(a);
3125        let (b0, b1) = self.split_i8x32(b);
3126        self.combine_mask8x16(self.simd_le_i8x16(a0, b0), self.simd_le_i8x16(a1, b1))
3127    }
3128    #[inline(always)]
3129    fn simd_ge_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3130        let (a0, a1) = self.split_i8x32(a);
3131        let (b0, b1) = self.split_i8x32(b);
3132        self.combine_mask8x16(self.simd_ge_i8x16(a0, b0), self.simd_ge_i8x16(a1, b1))
3133    }
3134    #[inline(always)]
3135    fn simd_gt_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3136        let (a0, a1) = self.split_i8x32(a);
3137        let (b0, b1) = self.split_i8x32(b);
3138        self.combine_mask8x16(self.simd_gt_i8x16(a0, b0), self.simd_gt_i8x16(a1, b1))
3139    }
3140    #[inline(always)]
3141    fn zip_low_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3142        let (a0, _) = self.split_i8x32(a);
3143        let (b0, _) = self.split_i8x32(b);
3144        self.combine_i8x16(self.zip_low_i8x16(a0, b0), self.zip_high_i8x16(a0, b0))
3145    }
3146    #[inline(always)]
3147    fn zip_high_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3148        let (_, a1) = self.split_i8x32(a);
3149        let (_, b1) = self.split_i8x32(b);
3150        self.combine_i8x16(self.zip_low_i8x16(a1, b1), self.zip_high_i8x16(a1, b1))
3151    }
3152    #[inline(always)]
3153    fn unzip_low_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3154        let (a0, a1) = self.split_i8x32(a);
3155        let (b0, b1) = self.split_i8x32(b);
3156        self.combine_i8x16(self.unzip_low_i8x16(a0, a1), self.unzip_low_i8x16(b0, b1))
3157    }
3158    #[inline(always)]
3159    fn unzip_high_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3160        let (a0, a1) = self.split_i8x32(a);
3161        let (b0, b1) = self.split_i8x32(b);
3162        self.combine_i8x16(self.unzip_high_i8x16(a0, a1), self.unzip_high_i8x16(b0, b1))
3163    }
3164    #[inline(always)]
3165    fn select_i8x32(self, a: mask8x32<Self>, b: i8x32<Self>, c: i8x32<Self>) -> i8x32<Self> {
3166        let (a0, a1) = self.split_mask8x32(a);
3167        let (b0, b1) = self.split_i8x32(b);
3168        let (c0, c1) = self.split_i8x32(c);
3169        self.combine_i8x16(self.select_i8x16(a0, b0, c0), self.select_i8x16(a1, b1, c1))
3170    }
3171    #[inline(always)]
3172    fn min_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3173        let (a0, a1) = self.split_i8x32(a);
3174        let (b0, b1) = self.split_i8x32(b);
3175        self.combine_i8x16(self.min_i8x16(a0, b0), self.min_i8x16(a1, b1))
3176    }
3177    #[inline(always)]
3178    fn max_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3179        let (a0, a1) = self.split_i8x32(a);
3180        let (b0, b1) = self.split_i8x32(b);
3181        self.combine_i8x16(self.max_i8x16(a0, b0), self.max_i8x16(a1, b1))
3182    }
3183    #[inline(always)]
3184    fn combine_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x64<Self> {
3185        let mut result = [0; 64usize];
3186        result[0..32usize].copy_from_slice(&a.val);
3187        result[32usize..64usize].copy_from_slice(&b.val);
3188        result.simd_into(self)
3189    }
3190    #[inline(always)]
3191    fn split_i8x32(self, a: i8x32<Self>) -> (i8x16<Self>, i8x16<Self>) {
3192        let mut b0 = [0; 16usize];
3193        let mut b1 = [0; 16usize];
3194        b0.copy_from_slice(&a.val[0..16usize]);
3195        b1.copy_from_slice(&a.val[16usize..32usize]);
3196        (b0.simd_into(self), b1.simd_into(self))
3197    }
3198    #[inline(always)]
3199    fn reinterpret_u8_i8x32(self, a: i8x32<Self>) -> u8x32<Self> {
3200        let (a0, a1) = self.split_i8x32(a);
3201        self.combine_u8x16(self.reinterpret_u8_i8x16(a0), self.reinterpret_u8_i8x16(a1))
3202    }
3203    #[inline(always)]
3204    fn reinterpret_u32_i8x32(self, a: i8x32<Self>) -> u32x8<Self> {
3205        let (a0, a1) = self.split_i8x32(a);
3206        self.combine_u32x4(
3207            self.reinterpret_u32_i8x16(a0),
3208            self.reinterpret_u32_i8x16(a1),
3209        )
3210    }
3211    #[inline(always)]
3212    fn splat_u8x32(self, a: u8) -> u8x32<Self> {
3213        let half = self.splat_u8x16(a);
3214        self.combine_u8x16(half, half)
3215    }
3216    #[inline(always)]
3217    fn not_u8x32(self, a: u8x32<Self>) -> u8x32<Self> {
3218        let (a0, a1) = self.split_u8x32(a);
3219        self.combine_u8x16(self.not_u8x16(a0), self.not_u8x16(a1))
3220    }
3221    #[inline(always)]
3222    fn add_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3223        let (a0, a1) = self.split_u8x32(a);
3224        let (b0, b1) = self.split_u8x32(b);
3225        self.combine_u8x16(self.add_u8x16(a0, b0), self.add_u8x16(a1, b1))
3226    }
3227    #[inline(always)]
3228    fn sub_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3229        let (a0, a1) = self.split_u8x32(a);
3230        let (b0, b1) = self.split_u8x32(b);
3231        self.combine_u8x16(self.sub_u8x16(a0, b0), self.sub_u8x16(a1, b1))
3232    }
3233    #[inline(always)]
3234    fn mul_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3235        let (a0, a1) = self.split_u8x32(a);
3236        let (b0, b1) = self.split_u8x32(b);
3237        self.combine_u8x16(self.mul_u8x16(a0, b0), self.mul_u8x16(a1, b1))
3238    }
3239    #[inline(always)]
3240    fn and_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3241        let (a0, a1) = self.split_u8x32(a);
3242        let (b0, b1) = self.split_u8x32(b);
3243        self.combine_u8x16(self.and_u8x16(a0, b0), self.and_u8x16(a1, b1))
3244    }
3245    #[inline(always)]
3246    fn or_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3247        let (a0, a1) = self.split_u8x32(a);
3248        let (b0, b1) = self.split_u8x32(b);
3249        self.combine_u8x16(self.or_u8x16(a0, b0), self.or_u8x16(a1, b1))
3250    }
3251    #[inline(always)]
3252    fn xor_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3253        let (a0, a1) = self.split_u8x32(a);
3254        let (b0, b1) = self.split_u8x32(b);
3255        self.combine_u8x16(self.xor_u8x16(a0, b0), self.xor_u8x16(a1, b1))
3256    }
3257    #[inline(always)]
3258    fn shr_u8x32(self, a: u8x32<Self>, b: u32) -> u8x32<Self> {
3259        let (a0, a1) = self.split_u8x32(a);
3260        self.combine_u8x16(self.shr_u8x16(a0, b), self.shr_u8x16(a1, b))
3261    }
3262    #[inline(always)]
3263    fn simd_eq_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3264        let (a0, a1) = self.split_u8x32(a);
3265        let (b0, b1) = self.split_u8x32(b);
3266        self.combine_mask8x16(self.simd_eq_u8x16(a0, b0), self.simd_eq_u8x16(a1, b1))
3267    }
3268    #[inline(always)]
3269    fn simd_lt_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3270        let (a0, a1) = self.split_u8x32(a);
3271        let (b0, b1) = self.split_u8x32(b);
3272        self.combine_mask8x16(self.simd_lt_u8x16(a0, b0), self.simd_lt_u8x16(a1, b1))
3273    }
3274    #[inline(always)]
3275    fn simd_le_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3276        let (a0, a1) = self.split_u8x32(a);
3277        let (b0, b1) = self.split_u8x32(b);
3278        self.combine_mask8x16(self.simd_le_u8x16(a0, b0), self.simd_le_u8x16(a1, b1))
3279    }
3280    #[inline(always)]
3281    fn simd_ge_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3282        let (a0, a1) = self.split_u8x32(a);
3283        let (b0, b1) = self.split_u8x32(b);
3284        self.combine_mask8x16(self.simd_ge_u8x16(a0, b0), self.simd_ge_u8x16(a1, b1))
3285    }
3286    #[inline(always)]
3287    fn simd_gt_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3288        let (a0, a1) = self.split_u8x32(a);
3289        let (b0, b1) = self.split_u8x32(b);
3290        self.combine_mask8x16(self.simd_gt_u8x16(a0, b0), self.simd_gt_u8x16(a1, b1))
3291    }
3292    #[inline(always)]
3293    fn zip_low_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3294        let (a0, _) = self.split_u8x32(a);
3295        let (b0, _) = self.split_u8x32(b);
3296        self.combine_u8x16(self.zip_low_u8x16(a0, b0), self.zip_high_u8x16(a0, b0))
3297    }
3298    #[inline(always)]
3299    fn zip_high_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3300        let (_, a1) = self.split_u8x32(a);
3301        let (_, b1) = self.split_u8x32(b);
3302        self.combine_u8x16(self.zip_low_u8x16(a1, b1), self.zip_high_u8x16(a1, b1))
3303    }
3304    #[inline(always)]
3305    fn unzip_low_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3306        let (a0, a1) = self.split_u8x32(a);
3307        let (b0, b1) = self.split_u8x32(b);
3308        self.combine_u8x16(self.unzip_low_u8x16(a0, a1), self.unzip_low_u8x16(b0, b1))
3309    }
3310    #[inline(always)]
3311    fn unzip_high_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3312        let (a0, a1) = self.split_u8x32(a);
3313        let (b0, b1) = self.split_u8x32(b);
3314        self.combine_u8x16(self.unzip_high_u8x16(a0, a1), self.unzip_high_u8x16(b0, b1))
3315    }
3316    #[inline(always)]
3317    fn select_u8x32(self, a: mask8x32<Self>, b: u8x32<Self>, c: u8x32<Self>) -> u8x32<Self> {
3318        let (a0, a1) = self.split_mask8x32(a);
3319        let (b0, b1) = self.split_u8x32(b);
3320        let (c0, c1) = self.split_u8x32(c);
3321        self.combine_u8x16(self.select_u8x16(a0, b0, c0), self.select_u8x16(a1, b1, c1))
3322    }
3323    #[inline(always)]
3324    fn min_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3325        let (a0, a1) = self.split_u8x32(a);
3326        let (b0, b1) = self.split_u8x32(b);
3327        self.combine_u8x16(self.min_u8x16(a0, b0), self.min_u8x16(a1, b1))
3328    }
3329    #[inline(always)]
3330    fn max_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3331        let (a0, a1) = self.split_u8x32(a);
3332        let (b0, b1) = self.split_u8x32(b);
3333        self.combine_u8x16(self.max_u8x16(a0, b0), self.max_u8x16(a1, b1))
3334    }
3335    #[inline(always)]
3336    fn combine_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x64<Self> {
3337        let mut result = [0; 64usize];
3338        result[0..32usize].copy_from_slice(&a.val);
3339        result[32usize..64usize].copy_from_slice(&b.val);
3340        result.simd_into(self)
3341    }
3342    #[inline(always)]
3343    fn split_u8x32(self, a: u8x32<Self>) -> (u8x16<Self>, u8x16<Self>) {
3344        let mut b0 = [0; 16usize];
3345        let mut b1 = [0; 16usize];
3346        b0.copy_from_slice(&a.val[0..16usize]);
3347        b1.copy_from_slice(&a.val[16usize..32usize]);
3348        (b0.simd_into(self), b1.simd_into(self))
3349    }
3350    #[inline(always)]
3351    fn widen_u8x32(self, a: u8x32<Self>) -> u16x32<Self> {
3352        let (a0, a1) = self.split_u8x32(a);
3353        self.combine_u16x16(self.widen_u8x16(a0), self.widen_u8x16(a1))
3354    }
3355    #[inline(always)]
3356    fn reinterpret_u32_u8x32(self, a: u8x32<Self>) -> u32x8<Self> {
3357        let (a0, a1) = self.split_u8x32(a);
3358        self.combine_u32x4(
3359            self.reinterpret_u32_u8x16(a0),
3360            self.reinterpret_u32_u8x16(a1),
3361        )
3362    }
3363    #[inline(always)]
3364    fn splat_mask8x32(self, a: i8) -> mask8x32<Self> {
3365        let half = self.splat_mask8x16(a);
3366        self.combine_mask8x16(half, half)
3367    }
3368    #[inline(always)]
3369    fn not_mask8x32(self, a: mask8x32<Self>) -> mask8x32<Self> {
3370        let (a0, a1) = self.split_mask8x32(a);
3371        self.combine_mask8x16(self.not_mask8x16(a0), self.not_mask8x16(a1))
3372    }
3373    #[inline(always)]
3374    fn and_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
3375        let (a0, a1) = self.split_mask8x32(a);
3376        let (b0, b1) = self.split_mask8x32(b);
3377        self.combine_mask8x16(self.and_mask8x16(a0, b0), self.and_mask8x16(a1, b1))
3378    }
3379    #[inline(always)]
3380    fn or_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
3381        let (a0, a1) = self.split_mask8x32(a);
3382        let (b0, b1) = self.split_mask8x32(b);
3383        self.combine_mask8x16(self.or_mask8x16(a0, b0), self.or_mask8x16(a1, b1))
3384    }
3385    #[inline(always)]
3386    fn xor_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
3387        let (a0, a1) = self.split_mask8x32(a);
3388        let (b0, b1) = self.split_mask8x32(b);
3389        self.combine_mask8x16(self.xor_mask8x16(a0, b0), self.xor_mask8x16(a1, b1))
3390    }
3391    #[inline(always)]
3392    fn select_mask8x32(
3393        self,
3394        a: mask8x32<Self>,
3395        b: mask8x32<Self>,
3396        c: mask8x32<Self>,
3397    ) -> mask8x32<Self> {
3398        let (a0, a1) = self.split_mask8x32(a);
3399        let (b0, b1) = self.split_mask8x32(b);
3400        let (c0, c1) = self.split_mask8x32(c);
3401        self.combine_mask8x16(
3402            self.select_mask8x16(a0, b0, c0),
3403            self.select_mask8x16(a1, b1, c1),
3404        )
3405    }
3406    #[inline(always)]
3407    fn simd_eq_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
3408        let (a0, a1) = self.split_mask8x32(a);
3409        let (b0, b1) = self.split_mask8x32(b);
3410        self.combine_mask8x16(self.simd_eq_mask8x16(a0, b0), self.simd_eq_mask8x16(a1, b1))
3411    }
3412    #[inline(always)]
3413    fn combine_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x64<Self> {
3414        let mut result = [0; 64usize];
3415        result[0..32usize].copy_from_slice(&a.val);
3416        result[32usize..64usize].copy_from_slice(&b.val);
3417        result.simd_into(self)
3418    }
3419    #[inline(always)]
3420    fn split_mask8x32(self, a: mask8x32<Self>) -> (mask8x16<Self>, mask8x16<Self>) {
3421        let mut b0 = [0; 16usize];
3422        let mut b1 = [0; 16usize];
3423        b0.copy_from_slice(&a.val[0..16usize]);
3424        b1.copy_from_slice(&a.val[16usize..32usize]);
3425        (b0.simd_into(self), b1.simd_into(self))
3426    }
3427    #[inline(always)]
3428    fn splat_i16x16(self, a: i16) -> i16x16<Self> {
3429        let half = self.splat_i16x8(a);
3430        self.combine_i16x8(half, half)
3431    }
3432    #[inline(always)]
3433    fn not_i16x16(self, a: i16x16<Self>) -> i16x16<Self> {
3434        let (a0, a1) = self.split_i16x16(a);
3435        self.combine_i16x8(self.not_i16x8(a0), self.not_i16x8(a1))
3436    }
3437    #[inline(always)]
3438    fn add_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3439        let (a0, a1) = self.split_i16x16(a);
3440        let (b0, b1) = self.split_i16x16(b);
3441        self.combine_i16x8(self.add_i16x8(a0, b0), self.add_i16x8(a1, b1))
3442    }
3443    #[inline(always)]
3444    fn sub_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3445        let (a0, a1) = self.split_i16x16(a);
3446        let (b0, b1) = self.split_i16x16(b);
3447        self.combine_i16x8(self.sub_i16x8(a0, b0), self.sub_i16x8(a1, b1))
3448    }
3449    #[inline(always)]
3450    fn mul_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3451        let (a0, a1) = self.split_i16x16(a);
3452        let (b0, b1) = self.split_i16x16(b);
3453        self.combine_i16x8(self.mul_i16x8(a0, b0), self.mul_i16x8(a1, b1))
3454    }
3455    #[inline(always)]
3456    fn and_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3457        let (a0, a1) = self.split_i16x16(a);
3458        let (b0, b1) = self.split_i16x16(b);
3459        self.combine_i16x8(self.and_i16x8(a0, b0), self.and_i16x8(a1, b1))
3460    }
3461    #[inline(always)]
3462    fn or_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3463        let (a0, a1) = self.split_i16x16(a);
3464        let (b0, b1) = self.split_i16x16(b);
3465        self.combine_i16x8(self.or_i16x8(a0, b0), self.or_i16x8(a1, b1))
3466    }
3467    #[inline(always)]
3468    fn xor_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3469        let (a0, a1) = self.split_i16x16(a);
3470        let (b0, b1) = self.split_i16x16(b);
3471        self.combine_i16x8(self.xor_i16x8(a0, b0), self.xor_i16x8(a1, b1))
3472    }
3473    #[inline(always)]
3474    fn shr_i16x16(self, a: i16x16<Self>, b: u32) -> i16x16<Self> {
3475        let (a0, a1) = self.split_i16x16(a);
3476        self.combine_i16x8(self.shr_i16x8(a0, b), self.shr_i16x8(a1, b))
3477    }
3478    #[inline(always)]
3479    fn simd_eq_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3480        let (a0, a1) = self.split_i16x16(a);
3481        let (b0, b1) = self.split_i16x16(b);
3482        self.combine_mask16x8(self.simd_eq_i16x8(a0, b0), self.simd_eq_i16x8(a1, b1))
3483    }
3484    #[inline(always)]
3485    fn simd_lt_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3486        let (a0, a1) = self.split_i16x16(a);
3487        let (b0, b1) = self.split_i16x16(b);
3488        self.combine_mask16x8(self.simd_lt_i16x8(a0, b0), self.simd_lt_i16x8(a1, b1))
3489    }
3490    #[inline(always)]
3491    fn simd_le_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3492        let (a0, a1) = self.split_i16x16(a);
3493        let (b0, b1) = self.split_i16x16(b);
3494        self.combine_mask16x8(self.simd_le_i16x8(a0, b0), self.simd_le_i16x8(a1, b1))
3495    }
3496    #[inline(always)]
3497    fn simd_ge_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3498        let (a0, a1) = self.split_i16x16(a);
3499        let (b0, b1) = self.split_i16x16(b);
3500        self.combine_mask16x8(self.simd_ge_i16x8(a0, b0), self.simd_ge_i16x8(a1, b1))
3501    }
3502    #[inline(always)]
3503    fn simd_gt_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3504        let (a0, a1) = self.split_i16x16(a);
3505        let (b0, b1) = self.split_i16x16(b);
3506        self.combine_mask16x8(self.simd_gt_i16x8(a0, b0), self.simd_gt_i16x8(a1, b1))
3507    }
3508    #[inline(always)]
3509    fn zip_low_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3510        let (a0, _) = self.split_i16x16(a);
3511        let (b0, _) = self.split_i16x16(b);
3512        self.combine_i16x8(self.zip_low_i16x8(a0, b0), self.zip_high_i16x8(a0, b0))
3513    }
3514    #[inline(always)]
3515    fn zip_high_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3516        let (_, a1) = self.split_i16x16(a);
3517        let (_, b1) = self.split_i16x16(b);
3518        self.combine_i16x8(self.zip_low_i16x8(a1, b1), self.zip_high_i16x8(a1, b1))
3519    }
3520    #[inline(always)]
3521    fn unzip_low_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3522        let (a0, a1) = self.split_i16x16(a);
3523        let (b0, b1) = self.split_i16x16(b);
3524        self.combine_i16x8(self.unzip_low_i16x8(a0, a1), self.unzip_low_i16x8(b0, b1))
3525    }
3526    #[inline(always)]
3527    fn unzip_high_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3528        let (a0, a1) = self.split_i16x16(a);
3529        let (b0, b1) = self.split_i16x16(b);
3530        self.combine_i16x8(self.unzip_high_i16x8(a0, a1), self.unzip_high_i16x8(b0, b1))
3531    }
3532    #[inline(always)]
3533    fn select_i16x16(self, a: mask16x16<Self>, b: i16x16<Self>, c: i16x16<Self>) -> i16x16<Self> {
3534        let (a0, a1) = self.split_mask16x16(a);
3535        let (b0, b1) = self.split_i16x16(b);
3536        let (c0, c1) = self.split_i16x16(c);
3537        self.combine_i16x8(self.select_i16x8(a0, b0, c0), self.select_i16x8(a1, b1, c1))
3538    }
3539    #[inline(always)]
3540    fn min_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3541        let (a0, a1) = self.split_i16x16(a);
3542        let (b0, b1) = self.split_i16x16(b);
3543        self.combine_i16x8(self.min_i16x8(a0, b0), self.min_i16x8(a1, b1))
3544    }
3545    #[inline(always)]
3546    fn max_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3547        let (a0, a1) = self.split_i16x16(a);
3548        let (b0, b1) = self.split_i16x16(b);
3549        self.combine_i16x8(self.max_i16x8(a0, b0), self.max_i16x8(a1, b1))
3550    }
3551    #[inline(always)]
3552    fn combine_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x32<Self> {
3553        let mut result = [0; 32usize];
3554        result[0..16usize].copy_from_slice(&a.val);
3555        result[16usize..32usize].copy_from_slice(&b.val);
3556        result.simd_into(self)
3557    }
3558    #[inline(always)]
3559    fn split_i16x16(self, a: i16x16<Self>) -> (i16x8<Self>, i16x8<Self>) {
3560        let mut b0 = [0; 8usize];
3561        let mut b1 = [0; 8usize];
3562        b0.copy_from_slice(&a.val[0..8usize]);
3563        b1.copy_from_slice(&a.val[8usize..16usize]);
3564        (b0.simd_into(self), b1.simd_into(self))
3565    }
3566    #[inline(always)]
3567    fn reinterpret_u8_i16x16(self, a: i16x16<Self>) -> u8x32<Self> {
3568        let (a0, a1) = self.split_i16x16(a);
3569        self.combine_u8x16(self.reinterpret_u8_i16x8(a0), self.reinterpret_u8_i16x8(a1))
3570    }
3571    #[inline(always)]
3572    fn reinterpret_u32_i16x16(self, a: i16x16<Self>) -> u32x8<Self> {
3573        let (a0, a1) = self.split_i16x16(a);
3574        self.combine_u32x4(
3575            self.reinterpret_u32_i16x8(a0),
3576            self.reinterpret_u32_i16x8(a1),
3577        )
3578    }
3579    #[inline(always)]
3580    fn splat_u16x16(self, a: u16) -> u16x16<Self> {
3581        let half = self.splat_u16x8(a);
3582        self.combine_u16x8(half, half)
3583    }
3584    #[inline(always)]
3585    fn not_u16x16(self, a: u16x16<Self>) -> u16x16<Self> {
3586        let (a0, a1) = self.split_u16x16(a);
3587        self.combine_u16x8(self.not_u16x8(a0), self.not_u16x8(a1))
3588    }
3589    #[inline(always)]
3590    fn add_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3591        let (a0, a1) = self.split_u16x16(a);
3592        let (b0, b1) = self.split_u16x16(b);
3593        self.combine_u16x8(self.add_u16x8(a0, b0), self.add_u16x8(a1, b1))
3594    }
3595    #[inline(always)]
3596    fn sub_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3597        let (a0, a1) = self.split_u16x16(a);
3598        let (b0, b1) = self.split_u16x16(b);
3599        self.combine_u16x8(self.sub_u16x8(a0, b0), self.sub_u16x8(a1, b1))
3600    }
3601    #[inline(always)]
3602    fn mul_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3603        let (a0, a1) = self.split_u16x16(a);
3604        let (b0, b1) = self.split_u16x16(b);
3605        self.combine_u16x8(self.mul_u16x8(a0, b0), self.mul_u16x8(a1, b1))
3606    }
3607    #[inline(always)]
3608    fn and_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3609        let (a0, a1) = self.split_u16x16(a);
3610        let (b0, b1) = self.split_u16x16(b);
3611        self.combine_u16x8(self.and_u16x8(a0, b0), self.and_u16x8(a1, b1))
3612    }
3613    #[inline(always)]
3614    fn or_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3615        let (a0, a1) = self.split_u16x16(a);
3616        let (b0, b1) = self.split_u16x16(b);
3617        self.combine_u16x8(self.or_u16x8(a0, b0), self.or_u16x8(a1, b1))
3618    }
3619    #[inline(always)]
3620    fn xor_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3621        let (a0, a1) = self.split_u16x16(a);
3622        let (b0, b1) = self.split_u16x16(b);
3623        self.combine_u16x8(self.xor_u16x8(a0, b0), self.xor_u16x8(a1, b1))
3624    }
3625    #[inline(always)]
3626    fn shr_u16x16(self, a: u16x16<Self>, b: u32) -> u16x16<Self> {
3627        let (a0, a1) = self.split_u16x16(a);
3628        self.combine_u16x8(self.shr_u16x8(a0, b), self.shr_u16x8(a1, b))
3629    }
3630    #[inline(always)]
3631    fn simd_eq_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3632        let (a0, a1) = self.split_u16x16(a);
3633        let (b0, b1) = self.split_u16x16(b);
3634        self.combine_mask16x8(self.simd_eq_u16x8(a0, b0), self.simd_eq_u16x8(a1, b1))
3635    }
3636    #[inline(always)]
3637    fn simd_lt_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3638        let (a0, a1) = self.split_u16x16(a);
3639        let (b0, b1) = self.split_u16x16(b);
3640        self.combine_mask16x8(self.simd_lt_u16x8(a0, b0), self.simd_lt_u16x8(a1, b1))
3641    }
3642    #[inline(always)]
3643    fn simd_le_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3644        let (a0, a1) = self.split_u16x16(a);
3645        let (b0, b1) = self.split_u16x16(b);
3646        self.combine_mask16x8(self.simd_le_u16x8(a0, b0), self.simd_le_u16x8(a1, b1))
3647    }
3648    #[inline(always)]
3649    fn simd_ge_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3650        let (a0, a1) = self.split_u16x16(a);
3651        let (b0, b1) = self.split_u16x16(b);
3652        self.combine_mask16x8(self.simd_ge_u16x8(a0, b0), self.simd_ge_u16x8(a1, b1))
3653    }
3654    #[inline(always)]
3655    fn simd_gt_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3656        let (a0, a1) = self.split_u16x16(a);
3657        let (b0, b1) = self.split_u16x16(b);
3658        self.combine_mask16x8(self.simd_gt_u16x8(a0, b0), self.simd_gt_u16x8(a1, b1))
3659    }
3660    #[inline(always)]
3661    fn zip_low_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3662        let (a0, _) = self.split_u16x16(a);
3663        let (b0, _) = self.split_u16x16(b);
3664        self.combine_u16x8(self.zip_low_u16x8(a0, b0), self.zip_high_u16x8(a0, b0))
3665    }
3666    #[inline(always)]
3667    fn zip_high_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3668        let (_, a1) = self.split_u16x16(a);
3669        let (_, b1) = self.split_u16x16(b);
3670        self.combine_u16x8(self.zip_low_u16x8(a1, b1), self.zip_high_u16x8(a1, b1))
3671    }
3672    #[inline(always)]
3673    fn unzip_low_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3674        let (a0, a1) = self.split_u16x16(a);
3675        let (b0, b1) = self.split_u16x16(b);
3676        self.combine_u16x8(self.unzip_low_u16x8(a0, a1), self.unzip_low_u16x8(b0, b1))
3677    }
3678    #[inline(always)]
3679    fn unzip_high_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3680        let (a0, a1) = self.split_u16x16(a);
3681        let (b0, b1) = self.split_u16x16(b);
3682        self.combine_u16x8(self.unzip_high_u16x8(a0, a1), self.unzip_high_u16x8(b0, b1))
3683    }
3684    #[inline(always)]
3685    fn select_u16x16(self, a: mask16x16<Self>, b: u16x16<Self>, c: u16x16<Self>) -> u16x16<Self> {
3686        let (a0, a1) = self.split_mask16x16(a);
3687        let (b0, b1) = self.split_u16x16(b);
3688        let (c0, c1) = self.split_u16x16(c);
3689        self.combine_u16x8(self.select_u16x8(a0, b0, c0), self.select_u16x8(a1, b1, c1))
3690    }
3691    #[inline(always)]
3692    fn min_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3693        let (a0, a1) = self.split_u16x16(a);
3694        let (b0, b1) = self.split_u16x16(b);
3695        self.combine_u16x8(self.min_u16x8(a0, b0), self.min_u16x8(a1, b1))
3696    }
3697    #[inline(always)]
3698    fn max_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3699        let (a0, a1) = self.split_u16x16(a);
3700        let (b0, b1) = self.split_u16x16(b);
3701        self.combine_u16x8(self.max_u16x8(a0, b0), self.max_u16x8(a1, b1))
3702    }
3703    #[inline(always)]
3704    fn combine_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x32<Self> {
3705        let mut result = [0; 32usize];
3706        result[0..16usize].copy_from_slice(&a.val);
3707        result[16usize..32usize].copy_from_slice(&b.val);
3708        result.simd_into(self)
3709    }
3710    #[inline(always)]
3711    fn split_u16x16(self, a: u16x16<Self>) -> (u16x8<Self>, u16x8<Self>) {
3712        let mut b0 = [0; 8usize];
3713        let mut b1 = [0; 8usize];
3714        b0.copy_from_slice(&a.val[0..8usize]);
3715        b1.copy_from_slice(&a.val[8usize..16usize]);
3716        (b0.simd_into(self), b1.simd_into(self))
3717    }
3718    #[inline(always)]
3719    fn narrow_u16x16(self, a: u16x16<Self>) -> u8x16<Self> {
3720        [
3721            a[0usize] as u8,
3722            a[1usize] as u8,
3723            a[2usize] as u8,
3724            a[3usize] as u8,
3725            a[4usize] as u8,
3726            a[5usize] as u8,
3727            a[6usize] as u8,
3728            a[7usize] as u8,
3729            a[8usize] as u8,
3730            a[9usize] as u8,
3731            a[10usize] as u8,
3732            a[11usize] as u8,
3733            a[12usize] as u8,
3734            a[13usize] as u8,
3735            a[14usize] as u8,
3736            a[15usize] as u8,
3737        ]
3738        .simd_into(self)
3739    }
3740    #[inline(always)]
3741    fn reinterpret_u8_u16x16(self, a: u16x16<Self>) -> u8x32<Self> {
3742        let (a0, a1) = self.split_u16x16(a);
3743        self.combine_u8x16(self.reinterpret_u8_u16x8(a0), self.reinterpret_u8_u16x8(a1))
3744    }
3745    #[inline(always)]
3746    fn reinterpret_u32_u16x16(self, a: u16x16<Self>) -> u32x8<Self> {
3747        let (a0, a1) = self.split_u16x16(a);
3748        self.combine_u32x4(
3749            self.reinterpret_u32_u16x8(a0),
3750            self.reinterpret_u32_u16x8(a1),
3751        )
3752    }
3753    #[inline(always)]
3754    fn splat_mask16x16(self, a: i16) -> mask16x16<Self> {
3755        let half = self.splat_mask16x8(a);
3756        self.combine_mask16x8(half, half)
3757    }
3758    #[inline(always)]
3759    fn not_mask16x16(self, a: mask16x16<Self>) -> mask16x16<Self> {
3760        let (a0, a1) = self.split_mask16x16(a);
3761        self.combine_mask16x8(self.not_mask16x8(a0), self.not_mask16x8(a1))
3762    }
3763    #[inline(always)]
3764    fn and_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
3765        let (a0, a1) = self.split_mask16x16(a);
3766        let (b0, b1) = self.split_mask16x16(b);
3767        self.combine_mask16x8(self.and_mask16x8(a0, b0), self.and_mask16x8(a1, b1))
3768    }
3769    #[inline(always)]
3770    fn or_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
3771        let (a0, a1) = self.split_mask16x16(a);
3772        let (b0, b1) = self.split_mask16x16(b);
3773        self.combine_mask16x8(self.or_mask16x8(a0, b0), self.or_mask16x8(a1, b1))
3774    }
3775    #[inline(always)]
3776    fn xor_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
3777        let (a0, a1) = self.split_mask16x16(a);
3778        let (b0, b1) = self.split_mask16x16(b);
3779        self.combine_mask16x8(self.xor_mask16x8(a0, b0), self.xor_mask16x8(a1, b1))
3780    }
3781    #[inline(always)]
3782    fn select_mask16x16(
3783        self,
3784        a: mask16x16<Self>,
3785        b: mask16x16<Self>,
3786        c: mask16x16<Self>,
3787    ) -> mask16x16<Self> {
3788        let (a0, a1) = self.split_mask16x16(a);
3789        let (b0, b1) = self.split_mask16x16(b);
3790        let (c0, c1) = self.split_mask16x16(c);
3791        self.combine_mask16x8(
3792            self.select_mask16x8(a0, b0, c0),
3793            self.select_mask16x8(a1, b1, c1),
3794        )
3795    }
3796    #[inline(always)]
3797    fn simd_eq_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
3798        let (a0, a1) = self.split_mask16x16(a);
3799        let (b0, b1) = self.split_mask16x16(b);
3800        self.combine_mask16x8(self.simd_eq_mask16x8(a0, b0), self.simd_eq_mask16x8(a1, b1))
3801    }
3802    #[inline(always)]
3803    fn combine_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x32<Self> {
3804        let mut result = [0; 32usize];
3805        result[0..16usize].copy_from_slice(&a.val);
3806        result[16usize..32usize].copy_from_slice(&b.val);
3807        result.simd_into(self)
3808    }
3809    #[inline(always)]
3810    fn split_mask16x16(self, a: mask16x16<Self>) -> (mask16x8<Self>, mask16x8<Self>) {
3811        let mut b0 = [0; 8usize];
3812        let mut b1 = [0; 8usize];
3813        b0.copy_from_slice(&a.val[0..8usize]);
3814        b1.copy_from_slice(&a.val[8usize..16usize]);
3815        (b0.simd_into(self), b1.simd_into(self))
3816    }
3817    #[inline(always)]
3818    fn splat_i32x8(self, a: i32) -> i32x8<Self> {
3819        let half = self.splat_i32x4(a);
3820        self.combine_i32x4(half, half)
3821    }
3822    #[inline(always)]
3823    fn not_i32x8(self, a: i32x8<Self>) -> i32x8<Self> {
3824        let (a0, a1) = self.split_i32x8(a);
3825        self.combine_i32x4(self.not_i32x4(a0), self.not_i32x4(a1))
3826    }
3827    #[inline(always)]
3828    fn add_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3829        let (a0, a1) = self.split_i32x8(a);
3830        let (b0, b1) = self.split_i32x8(b);
3831        self.combine_i32x4(self.add_i32x4(a0, b0), self.add_i32x4(a1, b1))
3832    }
3833    #[inline(always)]
3834    fn sub_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3835        let (a0, a1) = self.split_i32x8(a);
3836        let (b0, b1) = self.split_i32x8(b);
3837        self.combine_i32x4(self.sub_i32x4(a0, b0), self.sub_i32x4(a1, b1))
3838    }
3839    #[inline(always)]
3840    fn mul_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3841        let (a0, a1) = self.split_i32x8(a);
3842        let (b0, b1) = self.split_i32x8(b);
3843        self.combine_i32x4(self.mul_i32x4(a0, b0), self.mul_i32x4(a1, b1))
3844    }
3845    #[inline(always)]
3846    fn and_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3847        let (a0, a1) = self.split_i32x8(a);
3848        let (b0, b1) = self.split_i32x8(b);
3849        self.combine_i32x4(self.and_i32x4(a0, b0), self.and_i32x4(a1, b1))
3850    }
3851    #[inline(always)]
3852    fn or_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3853        let (a0, a1) = self.split_i32x8(a);
3854        let (b0, b1) = self.split_i32x8(b);
3855        self.combine_i32x4(self.or_i32x4(a0, b0), self.or_i32x4(a1, b1))
3856    }
3857    #[inline(always)]
3858    fn xor_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3859        let (a0, a1) = self.split_i32x8(a);
3860        let (b0, b1) = self.split_i32x8(b);
3861        self.combine_i32x4(self.xor_i32x4(a0, b0), self.xor_i32x4(a1, b1))
3862    }
3863    #[inline(always)]
3864    fn shr_i32x8(self, a: i32x8<Self>, b: u32) -> i32x8<Self> {
3865        let (a0, a1) = self.split_i32x8(a);
3866        self.combine_i32x4(self.shr_i32x4(a0, b), self.shr_i32x4(a1, b))
3867    }
3868    #[inline(always)]
3869    fn simd_eq_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
3870        let (a0, a1) = self.split_i32x8(a);
3871        let (b0, b1) = self.split_i32x8(b);
3872        self.combine_mask32x4(self.simd_eq_i32x4(a0, b0), self.simd_eq_i32x4(a1, b1))
3873    }
3874    #[inline(always)]
3875    fn simd_lt_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
3876        let (a0, a1) = self.split_i32x8(a);
3877        let (b0, b1) = self.split_i32x8(b);
3878        self.combine_mask32x4(self.simd_lt_i32x4(a0, b0), self.simd_lt_i32x4(a1, b1))
3879    }
3880    #[inline(always)]
3881    fn simd_le_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
3882        let (a0, a1) = self.split_i32x8(a);
3883        let (b0, b1) = self.split_i32x8(b);
3884        self.combine_mask32x4(self.simd_le_i32x4(a0, b0), self.simd_le_i32x4(a1, b1))
3885    }
3886    #[inline(always)]
3887    fn simd_ge_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
3888        let (a0, a1) = self.split_i32x8(a);
3889        let (b0, b1) = self.split_i32x8(b);
3890        self.combine_mask32x4(self.simd_ge_i32x4(a0, b0), self.simd_ge_i32x4(a1, b1))
3891    }
3892    #[inline(always)]
3893    fn simd_gt_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
3894        let (a0, a1) = self.split_i32x8(a);
3895        let (b0, b1) = self.split_i32x8(b);
3896        self.combine_mask32x4(self.simd_gt_i32x4(a0, b0), self.simd_gt_i32x4(a1, b1))
3897    }
3898    #[inline(always)]
3899    fn zip_low_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3900        let (a0, _) = self.split_i32x8(a);
3901        let (b0, _) = self.split_i32x8(b);
3902        self.combine_i32x4(self.zip_low_i32x4(a0, b0), self.zip_high_i32x4(a0, b0))
3903    }
3904    #[inline(always)]
3905    fn zip_high_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3906        let (_, a1) = self.split_i32x8(a);
3907        let (_, b1) = self.split_i32x8(b);
3908        self.combine_i32x4(self.zip_low_i32x4(a1, b1), self.zip_high_i32x4(a1, b1))
3909    }
3910    #[inline(always)]
3911    fn unzip_low_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3912        let (a0, a1) = self.split_i32x8(a);
3913        let (b0, b1) = self.split_i32x8(b);
3914        self.combine_i32x4(self.unzip_low_i32x4(a0, a1), self.unzip_low_i32x4(b0, b1))
3915    }
3916    #[inline(always)]
3917    fn unzip_high_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3918        let (a0, a1) = self.split_i32x8(a);
3919        let (b0, b1) = self.split_i32x8(b);
3920        self.combine_i32x4(self.unzip_high_i32x4(a0, a1), self.unzip_high_i32x4(b0, b1))
3921    }
3922    #[inline(always)]
3923    fn select_i32x8(self, a: mask32x8<Self>, b: i32x8<Self>, c: i32x8<Self>) -> i32x8<Self> {
3924        let (a0, a1) = self.split_mask32x8(a);
3925        let (b0, b1) = self.split_i32x8(b);
3926        let (c0, c1) = self.split_i32x8(c);
3927        self.combine_i32x4(self.select_i32x4(a0, b0, c0), self.select_i32x4(a1, b1, c1))
3928    }
3929    #[inline(always)]
3930    fn min_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3931        let (a0, a1) = self.split_i32x8(a);
3932        let (b0, b1) = self.split_i32x8(b);
3933        self.combine_i32x4(self.min_i32x4(a0, b0), self.min_i32x4(a1, b1))
3934    }
3935    #[inline(always)]
3936    fn max_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3937        let (a0, a1) = self.split_i32x8(a);
3938        let (b0, b1) = self.split_i32x8(b);
3939        self.combine_i32x4(self.max_i32x4(a0, b0), self.max_i32x4(a1, b1))
3940    }
3941    #[inline(always)]
3942    fn combine_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x16<Self> {
3943        let mut result = [0; 16usize];
3944        result[0..8usize].copy_from_slice(&a.val);
3945        result[8usize..16usize].copy_from_slice(&b.val);
3946        result.simd_into(self)
3947    }
3948    #[inline(always)]
3949    fn split_i32x8(self, a: i32x8<Self>) -> (i32x4<Self>, i32x4<Self>) {
3950        let mut b0 = [0; 4usize];
3951        let mut b1 = [0; 4usize];
3952        b0.copy_from_slice(&a.val[0..4usize]);
3953        b1.copy_from_slice(&a.val[4usize..8usize]);
3954        (b0.simd_into(self), b1.simd_into(self))
3955    }
3956    #[inline(always)]
3957    fn reinterpret_u8_i32x8(self, a: i32x8<Self>) -> u8x32<Self> {
3958        let (a0, a1) = self.split_i32x8(a);
3959        self.combine_u8x16(self.reinterpret_u8_i32x4(a0), self.reinterpret_u8_i32x4(a1))
3960    }
3961    #[inline(always)]
3962    fn reinterpret_u32_i32x8(self, a: i32x8<Self>) -> u32x8<Self> {
3963        let (a0, a1) = self.split_i32x8(a);
3964        self.combine_u32x4(
3965            self.reinterpret_u32_i32x4(a0),
3966            self.reinterpret_u32_i32x4(a1),
3967        )
3968    }
3969    #[inline(always)]
3970    fn cvt_f32_i32x8(self, a: i32x8<Self>) -> f32x8<Self> {
3971        let (a0, a1) = self.split_i32x8(a);
3972        self.combine_f32x4(self.cvt_f32_i32x4(a0), self.cvt_f32_i32x4(a1))
3973    }
3974    #[inline(always)]
3975    fn splat_u32x8(self, a: u32) -> u32x8<Self> {
3976        let half = self.splat_u32x4(a);
3977        self.combine_u32x4(half, half)
3978    }
3979    #[inline(always)]
3980    fn not_u32x8(self, a: u32x8<Self>) -> u32x8<Self> {
3981        let (a0, a1) = self.split_u32x8(a);
3982        self.combine_u32x4(self.not_u32x4(a0), self.not_u32x4(a1))
3983    }
3984    #[inline(always)]
3985    fn add_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
3986        let (a0, a1) = self.split_u32x8(a);
3987        let (b0, b1) = self.split_u32x8(b);
3988        self.combine_u32x4(self.add_u32x4(a0, b0), self.add_u32x4(a1, b1))
3989    }
3990    #[inline(always)]
3991    fn sub_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
3992        let (a0, a1) = self.split_u32x8(a);
3993        let (b0, b1) = self.split_u32x8(b);
3994        self.combine_u32x4(self.sub_u32x4(a0, b0), self.sub_u32x4(a1, b1))
3995    }
3996    #[inline(always)]
3997    fn mul_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
3998        let (a0, a1) = self.split_u32x8(a);
3999        let (b0, b1) = self.split_u32x8(b);
4000        self.combine_u32x4(self.mul_u32x4(a0, b0), self.mul_u32x4(a1, b1))
4001    }
4002    #[inline(always)]
4003    fn and_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4004        let (a0, a1) = self.split_u32x8(a);
4005        let (b0, b1) = self.split_u32x8(b);
4006        self.combine_u32x4(self.and_u32x4(a0, b0), self.and_u32x4(a1, b1))
4007    }
4008    #[inline(always)]
4009    fn or_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4010        let (a0, a1) = self.split_u32x8(a);
4011        let (b0, b1) = self.split_u32x8(b);
4012        self.combine_u32x4(self.or_u32x4(a0, b0), self.or_u32x4(a1, b1))
4013    }
4014    #[inline(always)]
4015    fn xor_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4016        let (a0, a1) = self.split_u32x8(a);
4017        let (b0, b1) = self.split_u32x8(b);
4018        self.combine_u32x4(self.xor_u32x4(a0, b0), self.xor_u32x4(a1, b1))
4019    }
4020    #[inline(always)]
4021    fn shr_u32x8(self, a: u32x8<Self>, b: u32) -> u32x8<Self> {
4022        let (a0, a1) = self.split_u32x8(a);
4023        self.combine_u32x4(self.shr_u32x4(a0, b), self.shr_u32x4(a1, b))
4024    }
4025    #[inline(always)]
4026    fn simd_eq_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4027        let (a0, a1) = self.split_u32x8(a);
4028        let (b0, b1) = self.split_u32x8(b);
4029        self.combine_mask32x4(self.simd_eq_u32x4(a0, b0), self.simd_eq_u32x4(a1, b1))
4030    }
4031    #[inline(always)]
4032    fn simd_lt_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4033        let (a0, a1) = self.split_u32x8(a);
4034        let (b0, b1) = self.split_u32x8(b);
4035        self.combine_mask32x4(self.simd_lt_u32x4(a0, b0), self.simd_lt_u32x4(a1, b1))
4036    }
4037    #[inline(always)]
4038    fn simd_le_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4039        let (a0, a1) = self.split_u32x8(a);
4040        let (b0, b1) = self.split_u32x8(b);
4041        self.combine_mask32x4(self.simd_le_u32x4(a0, b0), self.simd_le_u32x4(a1, b1))
4042    }
4043    #[inline(always)]
4044    fn simd_ge_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4045        let (a0, a1) = self.split_u32x8(a);
4046        let (b0, b1) = self.split_u32x8(b);
4047        self.combine_mask32x4(self.simd_ge_u32x4(a0, b0), self.simd_ge_u32x4(a1, b1))
4048    }
4049    #[inline(always)]
4050    fn simd_gt_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4051        let (a0, a1) = self.split_u32x8(a);
4052        let (b0, b1) = self.split_u32x8(b);
4053        self.combine_mask32x4(self.simd_gt_u32x4(a0, b0), self.simd_gt_u32x4(a1, b1))
4054    }
4055    #[inline(always)]
4056    fn zip_low_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4057        let (a0, _) = self.split_u32x8(a);
4058        let (b0, _) = self.split_u32x8(b);
4059        self.combine_u32x4(self.zip_low_u32x4(a0, b0), self.zip_high_u32x4(a0, b0))
4060    }
4061    #[inline(always)]
4062    fn zip_high_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4063        let (_, a1) = self.split_u32x8(a);
4064        let (_, b1) = self.split_u32x8(b);
4065        self.combine_u32x4(self.zip_low_u32x4(a1, b1), self.zip_high_u32x4(a1, b1))
4066    }
4067    #[inline(always)]
4068    fn unzip_low_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4069        let (a0, a1) = self.split_u32x8(a);
4070        let (b0, b1) = self.split_u32x8(b);
4071        self.combine_u32x4(self.unzip_low_u32x4(a0, a1), self.unzip_low_u32x4(b0, b1))
4072    }
4073    #[inline(always)]
4074    fn unzip_high_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4075        let (a0, a1) = self.split_u32x8(a);
4076        let (b0, b1) = self.split_u32x8(b);
4077        self.combine_u32x4(self.unzip_high_u32x4(a0, a1), self.unzip_high_u32x4(b0, b1))
4078    }
4079    #[inline(always)]
4080    fn select_u32x8(self, a: mask32x8<Self>, b: u32x8<Self>, c: u32x8<Self>) -> u32x8<Self> {
4081        let (a0, a1) = self.split_mask32x8(a);
4082        let (b0, b1) = self.split_u32x8(b);
4083        let (c0, c1) = self.split_u32x8(c);
4084        self.combine_u32x4(self.select_u32x4(a0, b0, c0), self.select_u32x4(a1, b1, c1))
4085    }
4086    #[inline(always)]
4087    fn min_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4088        let (a0, a1) = self.split_u32x8(a);
4089        let (b0, b1) = self.split_u32x8(b);
4090        self.combine_u32x4(self.min_u32x4(a0, b0), self.min_u32x4(a1, b1))
4091    }
4092    #[inline(always)]
4093    fn max_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4094        let (a0, a1) = self.split_u32x8(a);
4095        let (b0, b1) = self.split_u32x8(b);
4096        self.combine_u32x4(self.max_u32x4(a0, b0), self.max_u32x4(a1, b1))
4097    }
4098    #[inline(always)]
4099    fn combine_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x16<Self> {
4100        let mut result = [0; 16usize];
4101        result[0..8usize].copy_from_slice(&a.val);
4102        result[8usize..16usize].copy_from_slice(&b.val);
4103        result.simd_into(self)
4104    }
4105    #[inline(always)]
4106    fn split_u32x8(self, a: u32x8<Self>) -> (u32x4<Self>, u32x4<Self>) {
4107        let mut b0 = [0; 4usize];
4108        let mut b1 = [0; 4usize];
4109        b0.copy_from_slice(&a.val[0..4usize]);
4110        b1.copy_from_slice(&a.val[4usize..8usize]);
4111        (b0.simd_into(self), b1.simd_into(self))
4112    }
4113    #[inline(always)]
4114    fn reinterpret_u8_u32x8(self, a: u32x8<Self>) -> u8x32<Self> {
4115        let (a0, a1) = self.split_u32x8(a);
4116        self.combine_u8x16(self.reinterpret_u8_u32x4(a0), self.reinterpret_u8_u32x4(a1))
4117    }
4118    #[inline(always)]
4119    fn cvt_f32_u32x8(self, a: u32x8<Self>) -> f32x8<Self> {
4120        let (a0, a1) = self.split_u32x8(a);
4121        self.combine_f32x4(self.cvt_f32_u32x4(a0), self.cvt_f32_u32x4(a1))
4122    }
4123    #[inline(always)]
4124    fn splat_mask32x8(self, a: i32) -> mask32x8<Self> {
4125        let half = self.splat_mask32x4(a);
4126        self.combine_mask32x4(half, half)
4127    }
4128    #[inline(always)]
4129    fn not_mask32x8(self, a: mask32x8<Self>) -> mask32x8<Self> {
4130        let (a0, a1) = self.split_mask32x8(a);
4131        self.combine_mask32x4(self.not_mask32x4(a0), self.not_mask32x4(a1))
4132    }
4133    #[inline(always)]
4134    fn and_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
4135        let (a0, a1) = self.split_mask32x8(a);
4136        let (b0, b1) = self.split_mask32x8(b);
4137        self.combine_mask32x4(self.and_mask32x4(a0, b0), self.and_mask32x4(a1, b1))
4138    }
4139    #[inline(always)]
4140    fn or_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
4141        let (a0, a1) = self.split_mask32x8(a);
4142        let (b0, b1) = self.split_mask32x8(b);
4143        self.combine_mask32x4(self.or_mask32x4(a0, b0), self.or_mask32x4(a1, b1))
4144    }
4145    #[inline(always)]
4146    fn xor_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
4147        let (a0, a1) = self.split_mask32x8(a);
4148        let (b0, b1) = self.split_mask32x8(b);
4149        self.combine_mask32x4(self.xor_mask32x4(a0, b0), self.xor_mask32x4(a1, b1))
4150    }
4151    #[inline(always)]
4152    fn select_mask32x8(
4153        self,
4154        a: mask32x8<Self>,
4155        b: mask32x8<Self>,
4156        c: mask32x8<Self>,
4157    ) -> mask32x8<Self> {
4158        let (a0, a1) = self.split_mask32x8(a);
4159        let (b0, b1) = self.split_mask32x8(b);
4160        let (c0, c1) = self.split_mask32x8(c);
4161        self.combine_mask32x4(
4162            self.select_mask32x4(a0, b0, c0),
4163            self.select_mask32x4(a1, b1, c1),
4164        )
4165    }
4166    #[inline(always)]
4167    fn simd_eq_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
4168        let (a0, a1) = self.split_mask32x8(a);
4169        let (b0, b1) = self.split_mask32x8(b);
4170        self.combine_mask32x4(self.simd_eq_mask32x4(a0, b0), self.simd_eq_mask32x4(a1, b1))
4171    }
4172    #[inline(always)]
4173    fn combine_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x16<Self> {
4174        let mut result = [0; 16usize];
4175        result[0..8usize].copy_from_slice(&a.val);
4176        result[8usize..16usize].copy_from_slice(&b.val);
4177        result.simd_into(self)
4178    }
4179    #[inline(always)]
4180    fn split_mask32x8(self, a: mask32x8<Self>) -> (mask32x4<Self>, mask32x4<Self>) {
4181        let mut b0 = [0; 4usize];
4182        let mut b1 = [0; 4usize];
4183        b0.copy_from_slice(&a.val[0..4usize]);
4184        b1.copy_from_slice(&a.val[4usize..8usize]);
4185        (b0.simd_into(self), b1.simd_into(self))
4186    }
4187    #[inline(always)]
4188    fn splat_f64x4(self, a: f64) -> f64x4<Self> {
4189        let half = self.splat_f64x2(a);
4190        self.combine_f64x2(half, half)
4191    }
4192    #[inline(always)]
4193    fn abs_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4194        let (a0, a1) = self.split_f64x4(a);
4195        self.combine_f64x2(self.abs_f64x2(a0), self.abs_f64x2(a1))
4196    }
4197    #[inline(always)]
4198    fn neg_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4199        let (a0, a1) = self.split_f64x4(a);
4200        self.combine_f64x2(self.neg_f64x2(a0), self.neg_f64x2(a1))
4201    }
4202    #[inline(always)]
4203    fn sqrt_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4204        let (a0, a1) = self.split_f64x4(a);
4205        self.combine_f64x2(self.sqrt_f64x2(a0), self.sqrt_f64x2(a1))
4206    }
4207    #[inline(always)]
4208    fn add_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4209        let (a0, a1) = self.split_f64x4(a);
4210        let (b0, b1) = self.split_f64x4(b);
4211        self.combine_f64x2(self.add_f64x2(a0, b0), self.add_f64x2(a1, b1))
4212    }
4213    #[inline(always)]
4214    fn sub_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4215        let (a0, a1) = self.split_f64x4(a);
4216        let (b0, b1) = self.split_f64x4(b);
4217        self.combine_f64x2(self.sub_f64x2(a0, b0), self.sub_f64x2(a1, b1))
4218    }
4219    #[inline(always)]
4220    fn mul_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4221        let (a0, a1) = self.split_f64x4(a);
4222        let (b0, b1) = self.split_f64x4(b);
4223        self.combine_f64x2(self.mul_f64x2(a0, b0), self.mul_f64x2(a1, b1))
4224    }
4225    #[inline(always)]
4226    fn div_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4227        let (a0, a1) = self.split_f64x4(a);
4228        let (b0, b1) = self.split_f64x4(b);
4229        self.combine_f64x2(self.div_f64x2(a0, b0), self.div_f64x2(a1, b1))
4230    }
4231    #[inline(always)]
4232    fn copysign_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4233        let (a0, a1) = self.split_f64x4(a);
4234        let (b0, b1) = self.split_f64x4(b);
4235        self.combine_f64x2(self.copysign_f64x2(a0, b0), self.copysign_f64x2(a1, b1))
4236    }
4237    #[inline(always)]
4238    fn simd_eq_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4239        let (a0, a1) = self.split_f64x4(a);
4240        let (b0, b1) = self.split_f64x4(b);
4241        self.combine_mask64x2(self.simd_eq_f64x2(a0, b0), self.simd_eq_f64x2(a1, b1))
4242    }
4243    #[inline(always)]
4244    fn simd_lt_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4245        let (a0, a1) = self.split_f64x4(a);
4246        let (b0, b1) = self.split_f64x4(b);
4247        self.combine_mask64x2(self.simd_lt_f64x2(a0, b0), self.simd_lt_f64x2(a1, b1))
4248    }
4249    #[inline(always)]
4250    fn simd_le_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4251        let (a0, a1) = self.split_f64x4(a);
4252        let (b0, b1) = self.split_f64x4(b);
4253        self.combine_mask64x2(self.simd_le_f64x2(a0, b0), self.simd_le_f64x2(a1, b1))
4254    }
4255    #[inline(always)]
4256    fn simd_ge_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4257        let (a0, a1) = self.split_f64x4(a);
4258        let (b0, b1) = self.split_f64x4(b);
4259        self.combine_mask64x2(self.simd_ge_f64x2(a0, b0), self.simd_ge_f64x2(a1, b1))
4260    }
4261    #[inline(always)]
4262    fn simd_gt_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4263        let (a0, a1) = self.split_f64x4(a);
4264        let (b0, b1) = self.split_f64x4(b);
4265        self.combine_mask64x2(self.simd_gt_f64x2(a0, b0), self.simd_gt_f64x2(a1, b1))
4266    }
4267    #[inline(always)]
4268    fn zip_low_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4269        let (a0, _) = self.split_f64x4(a);
4270        let (b0, _) = self.split_f64x4(b);
4271        self.combine_f64x2(self.zip_low_f64x2(a0, b0), self.zip_high_f64x2(a0, b0))
4272    }
4273    #[inline(always)]
4274    fn zip_high_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4275        let (_, a1) = self.split_f64x4(a);
4276        let (_, b1) = self.split_f64x4(b);
4277        self.combine_f64x2(self.zip_low_f64x2(a1, b1), self.zip_high_f64x2(a1, b1))
4278    }
4279    #[inline(always)]
4280    fn unzip_low_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4281        let (a0, a1) = self.split_f64x4(a);
4282        let (b0, b1) = self.split_f64x4(b);
4283        self.combine_f64x2(self.unzip_low_f64x2(a0, a1), self.unzip_low_f64x2(b0, b1))
4284    }
4285    #[inline(always)]
4286    fn unzip_high_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4287        let (a0, a1) = self.split_f64x4(a);
4288        let (b0, b1) = self.split_f64x4(b);
4289        self.combine_f64x2(self.unzip_high_f64x2(a0, a1), self.unzip_high_f64x2(b0, b1))
4290    }
4291    #[inline(always)]
4292    fn max_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4293        let (a0, a1) = self.split_f64x4(a);
4294        let (b0, b1) = self.split_f64x4(b);
4295        self.combine_f64x2(self.max_f64x2(a0, b0), self.max_f64x2(a1, b1))
4296    }
4297    #[inline(always)]
4298    fn max_precise_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4299        let (a0, a1) = self.split_f64x4(a);
4300        let (b0, b1) = self.split_f64x4(b);
4301        self.combine_f64x2(
4302            self.max_precise_f64x2(a0, b0),
4303            self.max_precise_f64x2(a1, b1),
4304        )
4305    }
4306    #[inline(always)]
4307    fn min_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4308        let (a0, a1) = self.split_f64x4(a);
4309        let (b0, b1) = self.split_f64x4(b);
4310        self.combine_f64x2(self.min_f64x2(a0, b0), self.min_f64x2(a1, b1))
4311    }
4312    #[inline(always)]
4313    fn min_precise_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4314        let (a0, a1) = self.split_f64x4(a);
4315        let (b0, b1) = self.split_f64x4(b);
4316        self.combine_f64x2(
4317            self.min_precise_f64x2(a0, b0),
4318            self.min_precise_f64x2(a1, b1),
4319        )
4320    }
4321    #[inline(always)]
4322    fn madd_f64x4(self, a: f64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
4323        let (a0, a1) = self.split_f64x4(a);
4324        let (b0, b1) = self.split_f64x4(b);
4325        let (c0, c1) = self.split_f64x4(c);
4326        self.combine_f64x2(self.madd_f64x2(a0, b0, c0), self.madd_f64x2(a1, b1, c1))
4327    }
4328    #[inline(always)]
4329    fn msub_f64x4(self, a: f64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
4330        let (a0, a1) = self.split_f64x4(a);
4331        let (b0, b1) = self.split_f64x4(b);
4332        let (c0, c1) = self.split_f64x4(c);
4333        self.combine_f64x2(self.msub_f64x2(a0, b0, c0), self.msub_f64x2(a1, b1, c1))
4334    }
4335    #[inline(always)]
4336    fn floor_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4337        let (a0, a1) = self.split_f64x4(a);
4338        self.combine_f64x2(self.floor_f64x2(a0), self.floor_f64x2(a1))
4339    }
4340    #[inline(always)]
4341    fn fract_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4342        let (a0, a1) = self.split_f64x4(a);
4343        self.combine_f64x2(self.fract_f64x2(a0), self.fract_f64x2(a1))
4344    }
4345    #[inline(always)]
4346    fn trunc_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4347        let (a0, a1) = self.split_f64x4(a);
4348        self.combine_f64x2(self.trunc_f64x2(a0), self.trunc_f64x2(a1))
4349    }
4350    #[inline(always)]
4351    fn select_f64x4(self, a: mask64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
4352        let (a0, a1) = self.split_mask64x4(a);
4353        let (b0, b1) = self.split_f64x4(b);
4354        let (c0, c1) = self.split_f64x4(c);
4355        self.combine_f64x2(self.select_f64x2(a0, b0, c0), self.select_f64x2(a1, b1, c1))
4356    }
4357    #[inline(always)]
4358    fn combine_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x8<Self> {
4359        let mut result = [0.0; 8usize];
4360        result[0..4usize].copy_from_slice(&a.val);
4361        result[4usize..8usize].copy_from_slice(&b.val);
4362        result.simd_into(self)
4363    }
4364    #[inline(always)]
4365    fn split_f64x4(self, a: f64x4<Self>) -> (f64x2<Self>, f64x2<Self>) {
4366        let mut b0 = [0.0; 2usize];
4367        let mut b1 = [0.0; 2usize];
4368        b0.copy_from_slice(&a.val[0..2usize]);
4369        b1.copy_from_slice(&a.val[2usize..4usize]);
4370        (b0.simd_into(self), b1.simd_into(self))
4371    }
4372    #[inline(always)]
4373    fn reinterpret_f32_f64x4(self, a: f64x4<Self>) -> f32x8<Self> {
4374        let (a0, a1) = self.split_f64x4(a);
4375        self.combine_f32x4(
4376            self.reinterpret_f32_f64x2(a0),
4377            self.reinterpret_f32_f64x2(a1),
4378        )
4379    }
4380    #[inline(always)]
4381    fn splat_mask64x4(self, a: i64) -> mask64x4<Self> {
4382        let half = self.splat_mask64x2(a);
4383        self.combine_mask64x2(half, half)
4384    }
4385    #[inline(always)]
4386    fn not_mask64x4(self, a: mask64x4<Self>) -> mask64x4<Self> {
4387        let (a0, a1) = self.split_mask64x4(a);
4388        self.combine_mask64x2(self.not_mask64x2(a0), self.not_mask64x2(a1))
4389    }
4390    #[inline(always)]
4391    fn and_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
4392        let (a0, a1) = self.split_mask64x4(a);
4393        let (b0, b1) = self.split_mask64x4(b);
4394        self.combine_mask64x2(self.and_mask64x2(a0, b0), self.and_mask64x2(a1, b1))
4395    }
4396    #[inline(always)]
4397    fn or_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
4398        let (a0, a1) = self.split_mask64x4(a);
4399        let (b0, b1) = self.split_mask64x4(b);
4400        self.combine_mask64x2(self.or_mask64x2(a0, b0), self.or_mask64x2(a1, b1))
4401    }
4402    #[inline(always)]
4403    fn xor_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
4404        let (a0, a1) = self.split_mask64x4(a);
4405        let (b0, b1) = self.split_mask64x4(b);
4406        self.combine_mask64x2(self.xor_mask64x2(a0, b0), self.xor_mask64x2(a1, b1))
4407    }
4408    #[inline(always)]
4409    fn select_mask64x4(
4410        self,
4411        a: mask64x4<Self>,
4412        b: mask64x4<Self>,
4413        c: mask64x4<Self>,
4414    ) -> mask64x4<Self> {
4415        let (a0, a1) = self.split_mask64x4(a);
4416        let (b0, b1) = self.split_mask64x4(b);
4417        let (c0, c1) = self.split_mask64x4(c);
4418        self.combine_mask64x2(
4419            self.select_mask64x2(a0, b0, c0),
4420            self.select_mask64x2(a1, b1, c1),
4421        )
4422    }
4423    #[inline(always)]
4424    fn simd_eq_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
4425        let (a0, a1) = self.split_mask64x4(a);
4426        let (b0, b1) = self.split_mask64x4(b);
4427        self.combine_mask64x2(self.simd_eq_mask64x2(a0, b0), self.simd_eq_mask64x2(a1, b1))
4428    }
4429    #[inline(always)]
4430    fn combine_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x8<Self> {
4431        let mut result = [0; 8usize];
4432        result[0..4usize].copy_from_slice(&a.val);
4433        result[4usize..8usize].copy_from_slice(&b.val);
4434        result.simd_into(self)
4435    }
4436    #[inline(always)]
4437    fn split_mask64x4(self, a: mask64x4<Self>) -> (mask64x2<Self>, mask64x2<Self>) {
4438        let mut b0 = [0; 2usize];
4439        let mut b1 = [0; 2usize];
4440        b0.copy_from_slice(&a.val[0..2usize]);
4441        b1.copy_from_slice(&a.val[2usize..4usize]);
4442        (b0.simd_into(self), b1.simd_into(self))
4443    }
4444    #[inline(always)]
4445    fn splat_f32x16(self, a: f32) -> f32x16<Self> {
4446        let half = self.splat_f32x8(a);
4447        self.combine_f32x8(half, half)
4448    }
4449    #[inline(always)]
4450    fn abs_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4451        let (a0, a1) = self.split_f32x16(a);
4452        self.combine_f32x8(self.abs_f32x8(a0), self.abs_f32x8(a1))
4453    }
4454    #[inline(always)]
4455    fn neg_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4456        let (a0, a1) = self.split_f32x16(a);
4457        self.combine_f32x8(self.neg_f32x8(a0), self.neg_f32x8(a1))
4458    }
4459    #[inline(always)]
4460    fn sqrt_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4461        let (a0, a1) = self.split_f32x16(a);
4462        self.combine_f32x8(self.sqrt_f32x8(a0), self.sqrt_f32x8(a1))
4463    }
4464    #[inline(always)]
4465    fn add_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4466        let (a0, a1) = self.split_f32x16(a);
4467        let (b0, b1) = self.split_f32x16(b);
4468        self.combine_f32x8(self.add_f32x8(a0, b0), self.add_f32x8(a1, b1))
4469    }
4470    #[inline(always)]
4471    fn sub_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4472        let (a0, a1) = self.split_f32x16(a);
4473        let (b0, b1) = self.split_f32x16(b);
4474        self.combine_f32x8(self.sub_f32x8(a0, b0), self.sub_f32x8(a1, b1))
4475    }
4476    #[inline(always)]
4477    fn mul_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4478        let (a0, a1) = self.split_f32x16(a);
4479        let (b0, b1) = self.split_f32x16(b);
4480        self.combine_f32x8(self.mul_f32x8(a0, b0), self.mul_f32x8(a1, b1))
4481    }
4482    #[inline(always)]
4483    fn div_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4484        let (a0, a1) = self.split_f32x16(a);
4485        let (b0, b1) = self.split_f32x16(b);
4486        self.combine_f32x8(self.div_f32x8(a0, b0), self.div_f32x8(a1, b1))
4487    }
4488    #[inline(always)]
4489    fn copysign_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4490        let (a0, a1) = self.split_f32x16(a);
4491        let (b0, b1) = self.split_f32x16(b);
4492        self.combine_f32x8(self.copysign_f32x8(a0, b0), self.copysign_f32x8(a1, b1))
4493    }
4494    #[inline(always)]
4495    fn simd_eq_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4496        let (a0, a1) = self.split_f32x16(a);
4497        let (b0, b1) = self.split_f32x16(b);
4498        self.combine_mask32x8(self.simd_eq_f32x8(a0, b0), self.simd_eq_f32x8(a1, b1))
4499    }
4500    #[inline(always)]
4501    fn simd_lt_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4502        let (a0, a1) = self.split_f32x16(a);
4503        let (b0, b1) = self.split_f32x16(b);
4504        self.combine_mask32x8(self.simd_lt_f32x8(a0, b0), self.simd_lt_f32x8(a1, b1))
4505    }
4506    #[inline(always)]
4507    fn simd_le_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4508        let (a0, a1) = self.split_f32x16(a);
4509        let (b0, b1) = self.split_f32x16(b);
4510        self.combine_mask32x8(self.simd_le_f32x8(a0, b0), self.simd_le_f32x8(a1, b1))
4511    }
4512    #[inline(always)]
4513    fn simd_ge_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4514        let (a0, a1) = self.split_f32x16(a);
4515        let (b0, b1) = self.split_f32x16(b);
4516        self.combine_mask32x8(self.simd_ge_f32x8(a0, b0), self.simd_ge_f32x8(a1, b1))
4517    }
4518    #[inline(always)]
4519    fn simd_gt_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4520        let (a0, a1) = self.split_f32x16(a);
4521        let (b0, b1) = self.split_f32x16(b);
4522        self.combine_mask32x8(self.simd_gt_f32x8(a0, b0), self.simd_gt_f32x8(a1, b1))
4523    }
4524    #[inline(always)]
4525    fn zip_low_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4526        let (a0, _) = self.split_f32x16(a);
4527        let (b0, _) = self.split_f32x16(b);
4528        self.combine_f32x8(self.zip_low_f32x8(a0, b0), self.zip_high_f32x8(a0, b0))
4529    }
4530    #[inline(always)]
4531    fn zip_high_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4532        let (_, a1) = self.split_f32x16(a);
4533        let (_, b1) = self.split_f32x16(b);
4534        self.combine_f32x8(self.zip_low_f32x8(a1, b1), self.zip_high_f32x8(a1, b1))
4535    }
4536    #[inline(always)]
4537    fn unzip_low_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4538        let (a0, a1) = self.split_f32x16(a);
4539        let (b0, b1) = self.split_f32x16(b);
4540        self.combine_f32x8(self.unzip_low_f32x8(a0, a1), self.unzip_low_f32x8(b0, b1))
4541    }
4542    #[inline(always)]
4543    fn unzip_high_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4544        let (a0, a1) = self.split_f32x16(a);
4545        let (b0, b1) = self.split_f32x16(b);
4546        self.combine_f32x8(self.unzip_high_f32x8(a0, a1), self.unzip_high_f32x8(b0, b1))
4547    }
4548    #[inline(always)]
4549    fn max_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4550        let (a0, a1) = self.split_f32x16(a);
4551        let (b0, b1) = self.split_f32x16(b);
4552        self.combine_f32x8(self.max_f32x8(a0, b0), self.max_f32x8(a1, b1))
4553    }
4554    #[inline(always)]
4555    fn max_precise_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4556        let (a0, a1) = self.split_f32x16(a);
4557        let (b0, b1) = self.split_f32x16(b);
4558        self.combine_f32x8(
4559            self.max_precise_f32x8(a0, b0),
4560            self.max_precise_f32x8(a1, b1),
4561        )
4562    }
4563    #[inline(always)]
4564    fn min_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4565        let (a0, a1) = self.split_f32x16(a);
4566        let (b0, b1) = self.split_f32x16(b);
4567        self.combine_f32x8(self.min_f32x8(a0, b0), self.min_f32x8(a1, b1))
4568    }
4569    #[inline(always)]
4570    fn min_precise_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4571        let (a0, a1) = self.split_f32x16(a);
4572        let (b0, b1) = self.split_f32x16(b);
4573        self.combine_f32x8(
4574            self.min_precise_f32x8(a0, b0),
4575            self.min_precise_f32x8(a1, b1),
4576        )
4577    }
4578    #[inline(always)]
4579    fn madd_f32x16(self, a: f32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
4580        let (a0, a1) = self.split_f32x16(a);
4581        let (b0, b1) = self.split_f32x16(b);
4582        let (c0, c1) = self.split_f32x16(c);
4583        self.combine_f32x8(self.madd_f32x8(a0, b0, c0), self.madd_f32x8(a1, b1, c1))
4584    }
4585    #[inline(always)]
4586    fn msub_f32x16(self, a: f32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
4587        let (a0, a1) = self.split_f32x16(a);
4588        let (b0, b1) = self.split_f32x16(b);
4589        let (c0, c1) = self.split_f32x16(c);
4590        self.combine_f32x8(self.msub_f32x8(a0, b0, c0), self.msub_f32x8(a1, b1, c1))
4591    }
4592    #[inline(always)]
4593    fn floor_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4594        let (a0, a1) = self.split_f32x16(a);
4595        self.combine_f32x8(self.floor_f32x8(a0), self.floor_f32x8(a1))
4596    }
4597    #[inline(always)]
4598    fn fract_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4599        let (a0, a1) = self.split_f32x16(a);
4600        self.combine_f32x8(self.fract_f32x8(a0), self.fract_f32x8(a1))
4601    }
4602    #[inline(always)]
4603    fn trunc_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4604        let (a0, a1) = self.split_f32x16(a);
4605        self.combine_f32x8(self.trunc_f32x8(a0), self.trunc_f32x8(a1))
4606    }
4607    #[inline(always)]
4608    fn select_f32x16(self, a: mask32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
4609        let (a0, a1) = self.split_mask32x16(a);
4610        let (b0, b1) = self.split_f32x16(b);
4611        let (c0, c1) = self.split_f32x16(c);
4612        self.combine_f32x8(self.select_f32x8(a0, b0, c0), self.select_f32x8(a1, b1, c1))
4613    }
4614    #[inline(always)]
4615    fn split_f32x16(self, a: f32x16<Self>) -> (f32x8<Self>, f32x8<Self>) {
4616        let mut b0 = [0.0; 8usize];
4617        let mut b1 = [0.0; 8usize];
4618        b0.copy_from_slice(&a.val[0..8usize]);
4619        b1.copy_from_slice(&a.val[8usize..16usize]);
4620        (b0.simd_into(self), b1.simd_into(self))
4621    }
4622    #[inline(always)]
4623    fn reinterpret_f64_f32x16(self, a: f32x16<Self>) -> f64x8<Self> {
4624        let (a0, a1) = self.split_f32x16(a);
4625        self.combine_f64x4(
4626            self.reinterpret_f64_f32x8(a0),
4627            self.reinterpret_f64_f32x8(a1),
4628        )
4629    }
4630    #[inline(always)]
4631    fn reinterpret_i32_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
4632        let (a0, a1) = self.split_f32x16(a);
4633        self.combine_i32x8(
4634            self.reinterpret_i32_f32x8(a0),
4635            self.reinterpret_i32_f32x8(a1),
4636        )
4637    }
4638    #[inline(always)]
4639    fn load_interleaved_128_f32x16(self, src: &[f32; 16usize]) -> f32x16<Self> {
4640        [
4641            src[0usize],
4642            src[4usize],
4643            src[8usize],
4644            src[12usize],
4645            src[1usize],
4646            src[5usize],
4647            src[9usize],
4648            src[13usize],
4649            src[2usize],
4650            src[6usize],
4651            src[10usize],
4652            src[14usize],
4653            src[3usize],
4654            src[7usize],
4655            src[11usize],
4656            src[15usize],
4657        ]
4658        .simd_into(self)
4659    }
4660    #[inline(always)]
4661    fn store_interleaved_128_f32x16(self, a: f32x16<Self>, dest: &mut [f32; 16usize]) -> () {
4662        *dest = [
4663            a[0usize], a[4usize], a[8usize], a[12usize], a[1usize], a[5usize], a[9usize],
4664            a[13usize], a[2usize], a[6usize], a[10usize], a[14usize], a[3usize], a[7usize],
4665            a[11usize], a[15usize],
4666        ];
4667    }
4668    #[inline(always)]
4669    fn reinterpret_u8_f32x16(self, a: f32x16<Self>) -> u8x64<Self> {
4670        let (a0, a1) = self.split_f32x16(a);
4671        self.combine_u8x32(self.reinterpret_u8_f32x8(a0), self.reinterpret_u8_f32x8(a1))
4672    }
4673    #[inline(always)]
4674    fn reinterpret_u32_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
4675        let (a0, a1) = self.split_f32x16(a);
4676        self.combine_u32x8(
4677            self.reinterpret_u32_f32x8(a0),
4678            self.reinterpret_u32_f32x8(a1),
4679        )
4680    }
4681    #[inline(always)]
4682    fn cvt_u32_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
4683        let (a0, a1) = self.split_f32x16(a);
4684        self.combine_u32x8(self.cvt_u32_f32x8(a0), self.cvt_u32_f32x8(a1))
4685    }
4686    #[inline(always)]
4687    fn cvt_i32_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
4688        let (a0, a1) = self.split_f32x16(a);
4689        self.combine_i32x8(self.cvt_i32_f32x8(a0), self.cvt_i32_f32x8(a1))
4690    }
4691    #[inline(always)]
4692    fn splat_i8x64(self, a: i8) -> i8x64<Self> {
4693        let half = self.splat_i8x32(a);
4694        self.combine_i8x32(half, half)
4695    }
4696    #[inline(always)]
4697    fn not_i8x64(self, a: i8x64<Self>) -> i8x64<Self> {
4698        let (a0, a1) = self.split_i8x64(a);
4699        self.combine_i8x32(self.not_i8x32(a0), self.not_i8x32(a1))
4700    }
4701    #[inline(always)]
4702    fn add_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4703        let (a0, a1) = self.split_i8x64(a);
4704        let (b0, b1) = self.split_i8x64(b);
4705        self.combine_i8x32(self.add_i8x32(a0, b0), self.add_i8x32(a1, b1))
4706    }
4707    #[inline(always)]
4708    fn sub_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4709        let (a0, a1) = self.split_i8x64(a);
4710        let (b0, b1) = self.split_i8x64(b);
4711        self.combine_i8x32(self.sub_i8x32(a0, b0), self.sub_i8x32(a1, b1))
4712    }
4713    #[inline(always)]
4714    fn mul_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4715        let (a0, a1) = self.split_i8x64(a);
4716        let (b0, b1) = self.split_i8x64(b);
4717        self.combine_i8x32(self.mul_i8x32(a0, b0), self.mul_i8x32(a1, b1))
4718    }
4719    #[inline(always)]
4720    fn and_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4721        let (a0, a1) = self.split_i8x64(a);
4722        let (b0, b1) = self.split_i8x64(b);
4723        self.combine_i8x32(self.and_i8x32(a0, b0), self.and_i8x32(a1, b1))
4724    }
4725    #[inline(always)]
4726    fn or_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4727        let (a0, a1) = self.split_i8x64(a);
4728        let (b0, b1) = self.split_i8x64(b);
4729        self.combine_i8x32(self.or_i8x32(a0, b0), self.or_i8x32(a1, b1))
4730    }
4731    #[inline(always)]
4732    fn xor_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4733        let (a0, a1) = self.split_i8x64(a);
4734        let (b0, b1) = self.split_i8x64(b);
4735        self.combine_i8x32(self.xor_i8x32(a0, b0), self.xor_i8x32(a1, b1))
4736    }
4737    #[inline(always)]
4738    fn shr_i8x64(self, a: i8x64<Self>, b: u32) -> i8x64<Self> {
4739        let (a0, a1) = self.split_i8x64(a);
4740        self.combine_i8x32(self.shr_i8x32(a0, b), self.shr_i8x32(a1, b))
4741    }
4742    #[inline(always)]
4743    fn simd_eq_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
4744        let (a0, a1) = self.split_i8x64(a);
4745        let (b0, b1) = self.split_i8x64(b);
4746        self.combine_mask8x32(self.simd_eq_i8x32(a0, b0), self.simd_eq_i8x32(a1, b1))
4747    }
4748    #[inline(always)]
4749    fn simd_lt_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
4750        let (a0, a1) = self.split_i8x64(a);
4751        let (b0, b1) = self.split_i8x64(b);
4752        self.combine_mask8x32(self.simd_lt_i8x32(a0, b0), self.simd_lt_i8x32(a1, b1))
4753    }
4754    #[inline(always)]
4755    fn simd_le_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
4756        let (a0, a1) = self.split_i8x64(a);
4757        let (b0, b1) = self.split_i8x64(b);
4758        self.combine_mask8x32(self.simd_le_i8x32(a0, b0), self.simd_le_i8x32(a1, b1))
4759    }
4760    #[inline(always)]
4761    fn simd_ge_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
4762        let (a0, a1) = self.split_i8x64(a);
4763        let (b0, b1) = self.split_i8x64(b);
4764        self.combine_mask8x32(self.simd_ge_i8x32(a0, b0), self.simd_ge_i8x32(a1, b1))
4765    }
4766    #[inline(always)]
4767    fn simd_gt_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
4768        let (a0, a1) = self.split_i8x64(a);
4769        let (b0, b1) = self.split_i8x64(b);
4770        self.combine_mask8x32(self.simd_gt_i8x32(a0, b0), self.simd_gt_i8x32(a1, b1))
4771    }
4772    #[inline(always)]
4773    fn zip_low_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4774        let (a0, _) = self.split_i8x64(a);
4775        let (b0, _) = self.split_i8x64(b);
4776        self.combine_i8x32(self.zip_low_i8x32(a0, b0), self.zip_high_i8x32(a0, b0))
4777    }
4778    #[inline(always)]
4779    fn zip_high_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4780        let (_, a1) = self.split_i8x64(a);
4781        let (_, b1) = self.split_i8x64(b);
4782        self.combine_i8x32(self.zip_low_i8x32(a1, b1), self.zip_high_i8x32(a1, b1))
4783    }
4784    #[inline(always)]
4785    fn unzip_low_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4786        let (a0, a1) = self.split_i8x64(a);
4787        let (b0, b1) = self.split_i8x64(b);
4788        self.combine_i8x32(self.unzip_low_i8x32(a0, a1), self.unzip_low_i8x32(b0, b1))
4789    }
4790    #[inline(always)]
4791    fn unzip_high_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4792        let (a0, a1) = self.split_i8x64(a);
4793        let (b0, b1) = self.split_i8x64(b);
4794        self.combine_i8x32(self.unzip_high_i8x32(a0, a1), self.unzip_high_i8x32(b0, b1))
4795    }
4796    #[inline(always)]
4797    fn select_i8x64(self, a: mask8x64<Self>, b: i8x64<Self>, c: i8x64<Self>) -> i8x64<Self> {
4798        let (a0, a1) = self.split_mask8x64(a);
4799        let (b0, b1) = self.split_i8x64(b);
4800        let (c0, c1) = self.split_i8x64(c);
4801        self.combine_i8x32(self.select_i8x32(a0, b0, c0), self.select_i8x32(a1, b1, c1))
4802    }
4803    #[inline(always)]
4804    fn min_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4805        let (a0, a1) = self.split_i8x64(a);
4806        let (b0, b1) = self.split_i8x64(b);
4807        self.combine_i8x32(self.min_i8x32(a0, b0), self.min_i8x32(a1, b1))
4808    }
4809    #[inline(always)]
4810    fn max_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4811        let (a0, a1) = self.split_i8x64(a);
4812        let (b0, b1) = self.split_i8x64(b);
4813        self.combine_i8x32(self.max_i8x32(a0, b0), self.max_i8x32(a1, b1))
4814    }
4815    #[inline(always)]
4816    fn split_i8x64(self, a: i8x64<Self>) -> (i8x32<Self>, i8x32<Self>) {
4817        let mut b0 = [0; 32usize];
4818        let mut b1 = [0; 32usize];
4819        b0.copy_from_slice(&a.val[0..32usize]);
4820        b1.copy_from_slice(&a.val[32usize..64usize]);
4821        (b0.simd_into(self), b1.simd_into(self))
4822    }
4823    #[inline(always)]
4824    fn reinterpret_u8_i8x64(self, a: i8x64<Self>) -> u8x64<Self> {
4825        let (a0, a1) = self.split_i8x64(a);
4826        self.combine_u8x32(self.reinterpret_u8_i8x32(a0), self.reinterpret_u8_i8x32(a1))
4827    }
4828    #[inline(always)]
4829    fn reinterpret_u32_i8x64(self, a: i8x64<Self>) -> u32x16<Self> {
4830        let (a0, a1) = self.split_i8x64(a);
4831        self.combine_u32x8(
4832            self.reinterpret_u32_i8x32(a0),
4833            self.reinterpret_u32_i8x32(a1),
4834        )
4835    }
4836    #[inline(always)]
4837    fn splat_u8x64(self, a: u8) -> u8x64<Self> {
4838        let half = self.splat_u8x32(a);
4839        self.combine_u8x32(half, half)
4840    }
4841    #[inline(always)]
4842    fn not_u8x64(self, a: u8x64<Self>) -> u8x64<Self> {
4843        let (a0, a1) = self.split_u8x64(a);
4844        self.combine_u8x32(self.not_u8x32(a0), self.not_u8x32(a1))
4845    }
4846    #[inline(always)]
4847    fn add_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4848        let (a0, a1) = self.split_u8x64(a);
4849        let (b0, b1) = self.split_u8x64(b);
4850        self.combine_u8x32(self.add_u8x32(a0, b0), self.add_u8x32(a1, b1))
4851    }
4852    #[inline(always)]
4853    fn sub_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4854        let (a0, a1) = self.split_u8x64(a);
4855        let (b0, b1) = self.split_u8x64(b);
4856        self.combine_u8x32(self.sub_u8x32(a0, b0), self.sub_u8x32(a1, b1))
4857    }
4858    #[inline(always)]
4859    fn mul_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4860        let (a0, a1) = self.split_u8x64(a);
4861        let (b0, b1) = self.split_u8x64(b);
4862        self.combine_u8x32(self.mul_u8x32(a0, b0), self.mul_u8x32(a1, b1))
4863    }
4864    #[inline(always)]
4865    fn and_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4866        let (a0, a1) = self.split_u8x64(a);
4867        let (b0, b1) = self.split_u8x64(b);
4868        self.combine_u8x32(self.and_u8x32(a0, b0), self.and_u8x32(a1, b1))
4869    }
4870    #[inline(always)]
4871    fn or_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4872        let (a0, a1) = self.split_u8x64(a);
4873        let (b0, b1) = self.split_u8x64(b);
4874        self.combine_u8x32(self.or_u8x32(a0, b0), self.or_u8x32(a1, b1))
4875    }
4876    #[inline(always)]
4877    fn xor_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4878        let (a0, a1) = self.split_u8x64(a);
4879        let (b0, b1) = self.split_u8x64(b);
4880        self.combine_u8x32(self.xor_u8x32(a0, b0), self.xor_u8x32(a1, b1))
4881    }
4882    #[inline(always)]
4883    fn shr_u8x64(self, a: u8x64<Self>, b: u32) -> u8x64<Self> {
4884        let (a0, a1) = self.split_u8x64(a);
4885        self.combine_u8x32(self.shr_u8x32(a0, b), self.shr_u8x32(a1, b))
4886    }
4887    #[inline(always)]
4888    fn simd_eq_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
4889        let (a0, a1) = self.split_u8x64(a);
4890        let (b0, b1) = self.split_u8x64(b);
4891        self.combine_mask8x32(self.simd_eq_u8x32(a0, b0), self.simd_eq_u8x32(a1, b1))
4892    }
4893    #[inline(always)]
4894    fn simd_lt_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
4895        let (a0, a1) = self.split_u8x64(a);
4896        let (b0, b1) = self.split_u8x64(b);
4897        self.combine_mask8x32(self.simd_lt_u8x32(a0, b0), self.simd_lt_u8x32(a1, b1))
4898    }
4899    #[inline(always)]
4900    fn simd_le_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
4901        let (a0, a1) = self.split_u8x64(a);
4902        let (b0, b1) = self.split_u8x64(b);
4903        self.combine_mask8x32(self.simd_le_u8x32(a0, b0), self.simd_le_u8x32(a1, b1))
4904    }
4905    #[inline(always)]
4906    fn simd_ge_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
4907        let (a0, a1) = self.split_u8x64(a);
4908        let (b0, b1) = self.split_u8x64(b);
4909        self.combine_mask8x32(self.simd_ge_u8x32(a0, b0), self.simd_ge_u8x32(a1, b1))
4910    }
4911    #[inline(always)]
4912    fn simd_gt_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
4913        let (a0, a1) = self.split_u8x64(a);
4914        let (b0, b1) = self.split_u8x64(b);
4915        self.combine_mask8x32(self.simd_gt_u8x32(a0, b0), self.simd_gt_u8x32(a1, b1))
4916    }
4917    #[inline(always)]
4918    fn zip_low_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4919        let (a0, _) = self.split_u8x64(a);
4920        let (b0, _) = self.split_u8x64(b);
4921        self.combine_u8x32(self.zip_low_u8x32(a0, b0), self.zip_high_u8x32(a0, b0))
4922    }
4923    #[inline(always)]
4924    fn zip_high_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4925        let (_, a1) = self.split_u8x64(a);
4926        let (_, b1) = self.split_u8x64(b);
4927        self.combine_u8x32(self.zip_low_u8x32(a1, b1), self.zip_high_u8x32(a1, b1))
4928    }
4929    #[inline(always)]
4930    fn unzip_low_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4931        let (a0, a1) = self.split_u8x64(a);
4932        let (b0, b1) = self.split_u8x64(b);
4933        self.combine_u8x32(self.unzip_low_u8x32(a0, a1), self.unzip_low_u8x32(b0, b1))
4934    }
4935    #[inline(always)]
4936    fn unzip_high_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4937        let (a0, a1) = self.split_u8x64(a);
4938        let (b0, b1) = self.split_u8x64(b);
4939        self.combine_u8x32(self.unzip_high_u8x32(a0, a1), self.unzip_high_u8x32(b0, b1))
4940    }
4941    #[inline(always)]
4942    fn select_u8x64(self, a: mask8x64<Self>, b: u8x64<Self>, c: u8x64<Self>) -> u8x64<Self> {
4943        let (a0, a1) = self.split_mask8x64(a);
4944        let (b0, b1) = self.split_u8x64(b);
4945        let (c0, c1) = self.split_u8x64(c);
4946        self.combine_u8x32(self.select_u8x32(a0, b0, c0), self.select_u8x32(a1, b1, c1))
4947    }
4948    #[inline(always)]
4949    fn min_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4950        let (a0, a1) = self.split_u8x64(a);
4951        let (b0, b1) = self.split_u8x64(b);
4952        self.combine_u8x32(self.min_u8x32(a0, b0), self.min_u8x32(a1, b1))
4953    }
4954    #[inline(always)]
4955    fn max_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4956        let (a0, a1) = self.split_u8x64(a);
4957        let (b0, b1) = self.split_u8x64(b);
4958        self.combine_u8x32(self.max_u8x32(a0, b0), self.max_u8x32(a1, b1))
4959    }
4960    #[inline(always)]
4961    fn split_u8x64(self, a: u8x64<Self>) -> (u8x32<Self>, u8x32<Self>) {
4962        let mut b0 = [0; 32usize];
4963        let mut b1 = [0; 32usize];
4964        b0.copy_from_slice(&a.val[0..32usize]);
4965        b1.copy_from_slice(&a.val[32usize..64usize]);
4966        (b0.simd_into(self), b1.simd_into(self))
4967    }
4968    #[inline(always)]
4969    fn load_interleaved_128_u8x64(self, src: &[u8; 64usize]) -> u8x64<Self> {
4970        [
4971            src[0usize],
4972            src[4usize],
4973            src[8usize],
4974            src[12usize],
4975            src[16usize],
4976            src[20usize],
4977            src[24usize],
4978            src[28usize],
4979            src[32usize],
4980            src[36usize],
4981            src[40usize],
4982            src[44usize],
4983            src[48usize],
4984            src[52usize],
4985            src[56usize],
4986            src[60usize],
4987            src[1usize],
4988            src[5usize],
4989            src[9usize],
4990            src[13usize],
4991            src[17usize],
4992            src[21usize],
4993            src[25usize],
4994            src[29usize],
4995            src[33usize],
4996            src[37usize],
4997            src[41usize],
4998            src[45usize],
4999            src[49usize],
5000            src[53usize],
5001            src[57usize],
5002            src[61usize],
5003            src[2usize],
5004            src[6usize],
5005            src[10usize],
5006            src[14usize],
5007            src[18usize],
5008            src[22usize],
5009            src[26usize],
5010            src[30usize],
5011            src[34usize],
5012            src[38usize],
5013            src[42usize],
5014            src[46usize],
5015            src[50usize],
5016            src[54usize],
5017            src[58usize],
5018            src[62usize],
5019            src[3usize],
5020            src[7usize],
5021            src[11usize],
5022            src[15usize],
5023            src[19usize],
5024            src[23usize],
5025            src[27usize],
5026            src[31usize],
5027            src[35usize],
5028            src[39usize],
5029            src[43usize],
5030            src[47usize],
5031            src[51usize],
5032            src[55usize],
5033            src[59usize],
5034            src[63usize],
5035        ]
5036        .simd_into(self)
5037    }
5038    #[inline(always)]
5039    fn store_interleaved_128_u8x64(self, a: u8x64<Self>, dest: &mut [u8; 64usize]) -> () {
5040        *dest = [
5041            a[0usize], a[16usize], a[32usize], a[48usize], a[1usize], a[17usize], a[33usize],
5042            a[49usize], a[2usize], a[18usize], a[34usize], a[50usize], a[3usize], a[19usize],
5043            a[35usize], a[51usize], a[4usize], a[20usize], a[36usize], a[52usize], a[5usize],
5044            a[21usize], a[37usize], a[53usize], a[6usize], a[22usize], a[38usize], a[54usize],
5045            a[7usize], a[23usize], a[39usize], a[55usize], a[8usize], a[24usize], a[40usize],
5046            a[56usize], a[9usize], a[25usize], a[41usize], a[57usize], a[10usize], a[26usize],
5047            a[42usize], a[58usize], a[11usize], a[27usize], a[43usize], a[59usize], a[12usize],
5048            a[28usize], a[44usize], a[60usize], a[13usize], a[29usize], a[45usize], a[61usize],
5049            a[14usize], a[30usize], a[46usize], a[62usize], a[15usize], a[31usize], a[47usize],
5050            a[63usize],
5051        ];
5052    }
5053    #[inline(always)]
5054    fn reinterpret_u32_u8x64(self, a: u8x64<Self>) -> u32x16<Self> {
5055        let (a0, a1) = self.split_u8x64(a);
5056        self.combine_u32x8(
5057            self.reinterpret_u32_u8x32(a0),
5058            self.reinterpret_u32_u8x32(a1),
5059        )
5060    }
5061    #[inline(always)]
5062    fn splat_mask8x64(self, a: i8) -> mask8x64<Self> {
5063        let half = self.splat_mask8x32(a);
5064        self.combine_mask8x32(half, half)
5065    }
5066    #[inline(always)]
5067    fn not_mask8x64(self, a: mask8x64<Self>) -> mask8x64<Self> {
5068        let (a0, a1) = self.split_mask8x64(a);
5069        self.combine_mask8x32(self.not_mask8x32(a0), self.not_mask8x32(a1))
5070    }
5071    #[inline(always)]
5072    fn and_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
5073        let (a0, a1) = self.split_mask8x64(a);
5074        let (b0, b1) = self.split_mask8x64(b);
5075        self.combine_mask8x32(self.and_mask8x32(a0, b0), self.and_mask8x32(a1, b1))
5076    }
5077    #[inline(always)]
5078    fn or_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
5079        let (a0, a1) = self.split_mask8x64(a);
5080        let (b0, b1) = self.split_mask8x64(b);
5081        self.combine_mask8x32(self.or_mask8x32(a0, b0), self.or_mask8x32(a1, b1))
5082    }
5083    #[inline(always)]
5084    fn xor_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
5085        let (a0, a1) = self.split_mask8x64(a);
5086        let (b0, b1) = self.split_mask8x64(b);
5087        self.combine_mask8x32(self.xor_mask8x32(a0, b0), self.xor_mask8x32(a1, b1))
5088    }
5089    #[inline(always)]
5090    fn select_mask8x64(
5091        self,
5092        a: mask8x64<Self>,
5093        b: mask8x64<Self>,
5094        c: mask8x64<Self>,
5095    ) -> mask8x64<Self> {
5096        let (a0, a1) = self.split_mask8x64(a);
5097        let (b0, b1) = self.split_mask8x64(b);
5098        let (c0, c1) = self.split_mask8x64(c);
5099        self.combine_mask8x32(
5100            self.select_mask8x32(a0, b0, c0),
5101            self.select_mask8x32(a1, b1, c1),
5102        )
5103    }
5104    #[inline(always)]
5105    fn simd_eq_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
5106        let (a0, a1) = self.split_mask8x64(a);
5107        let (b0, b1) = self.split_mask8x64(b);
5108        self.combine_mask8x32(self.simd_eq_mask8x32(a0, b0), self.simd_eq_mask8x32(a1, b1))
5109    }
5110    #[inline(always)]
5111    fn split_mask8x64(self, a: mask8x64<Self>) -> (mask8x32<Self>, mask8x32<Self>) {
5112        let mut b0 = [0; 32usize];
5113        let mut b1 = [0; 32usize];
5114        b0.copy_from_slice(&a.val[0..32usize]);
5115        b1.copy_from_slice(&a.val[32usize..64usize]);
5116        (b0.simd_into(self), b1.simd_into(self))
5117    }
5118    #[inline(always)]
5119    fn splat_i16x32(self, a: i16) -> i16x32<Self> {
5120        let half = self.splat_i16x16(a);
5121        self.combine_i16x16(half, half)
5122    }
5123    #[inline(always)]
5124    fn not_i16x32(self, a: i16x32<Self>) -> i16x32<Self> {
5125        let (a0, a1) = self.split_i16x32(a);
5126        self.combine_i16x16(self.not_i16x16(a0), self.not_i16x16(a1))
5127    }
5128    #[inline(always)]
5129    fn add_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5130        let (a0, a1) = self.split_i16x32(a);
5131        let (b0, b1) = self.split_i16x32(b);
5132        self.combine_i16x16(self.add_i16x16(a0, b0), self.add_i16x16(a1, b1))
5133    }
5134    #[inline(always)]
5135    fn sub_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5136        let (a0, a1) = self.split_i16x32(a);
5137        let (b0, b1) = self.split_i16x32(b);
5138        self.combine_i16x16(self.sub_i16x16(a0, b0), self.sub_i16x16(a1, b1))
5139    }
5140    #[inline(always)]
5141    fn mul_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5142        let (a0, a1) = self.split_i16x32(a);
5143        let (b0, b1) = self.split_i16x32(b);
5144        self.combine_i16x16(self.mul_i16x16(a0, b0), self.mul_i16x16(a1, b1))
5145    }
5146    #[inline(always)]
5147    fn and_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5148        let (a0, a1) = self.split_i16x32(a);
5149        let (b0, b1) = self.split_i16x32(b);
5150        self.combine_i16x16(self.and_i16x16(a0, b0), self.and_i16x16(a1, b1))
5151    }
5152    #[inline(always)]
5153    fn or_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5154        let (a0, a1) = self.split_i16x32(a);
5155        let (b0, b1) = self.split_i16x32(b);
5156        self.combine_i16x16(self.or_i16x16(a0, b0), self.or_i16x16(a1, b1))
5157    }
5158    #[inline(always)]
5159    fn xor_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5160        let (a0, a1) = self.split_i16x32(a);
5161        let (b0, b1) = self.split_i16x32(b);
5162        self.combine_i16x16(self.xor_i16x16(a0, b0), self.xor_i16x16(a1, b1))
5163    }
5164    #[inline(always)]
5165    fn shr_i16x32(self, a: i16x32<Self>, b: u32) -> i16x32<Self> {
5166        let (a0, a1) = self.split_i16x32(a);
5167        self.combine_i16x16(self.shr_i16x16(a0, b), self.shr_i16x16(a1, b))
5168    }
5169    #[inline(always)]
5170    fn simd_eq_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5171        let (a0, a1) = self.split_i16x32(a);
5172        let (b0, b1) = self.split_i16x32(b);
5173        self.combine_mask16x16(self.simd_eq_i16x16(a0, b0), self.simd_eq_i16x16(a1, b1))
5174    }
5175    #[inline(always)]
5176    fn simd_lt_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5177        let (a0, a1) = self.split_i16x32(a);
5178        let (b0, b1) = self.split_i16x32(b);
5179        self.combine_mask16x16(self.simd_lt_i16x16(a0, b0), self.simd_lt_i16x16(a1, b1))
5180    }
5181    #[inline(always)]
5182    fn simd_le_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5183        let (a0, a1) = self.split_i16x32(a);
5184        let (b0, b1) = self.split_i16x32(b);
5185        self.combine_mask16x16(self.simd_le_i16x16(a0, b0), self.simd_le_i16x16(a1, b1))
5186    }
5187    #[inline(always)]
5188    fn simd_ge_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5189        let (a0, a1) = self.split_i16x32(a);
5190        let (b0, b1) = self.split_i16x32(b);
5191        self.combine_mask16x16(self.simd_ge_i16x16(a0, b0), self.simd_ge_i16x16(a1, b1))
5192    }
5193    #[inline(always)]
5194    fn simd_gt_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5195        let (a0, a1) = self.split_i16x32(a);
5196        let (b0, b1) = self.split_i16x32(b);
5197        self.combine_mask16x16(self.simd_gt_i16x16(a0, b0), self.simd_gt_i16x16(a1, b1))
5198    }
5199    #[inline(always)]
5200    fn zip_low_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5201        let (a0, _) = self.split_i16x32(a);
5202        let (b0, _) = self.split_i16x32(b);
5203        self.combine_i16x16(self.zip_low_i16x16(a0, b0), self.zip_high_i16x16(a0, b0))
5204    }
5205    #[inline(always)]
5206    fn zip_high_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5207        let (_, a1) = self.split_i16x32(a);
5208        let (_, b1) = self.split_i16x32(b);
5209        self.combine_i16x16(self.zip_low_i16x16(a1, b1), self.zip_high_i16x16(a1, b1))
5210    }
5211    #[inline(always)]
5212    fn unzip_low_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5213        let (a0, a1) = self.split_i16x32(a);
5214        let (b0, b1) = self.split_i16x32(b);
5215        self.combine_i16x16(self.unzip_low_i16x16(a0, a1), self.unzip_low_i16x16(b0, b1))
5216    }
5217    #[inline(always)]
5218    fn unzip_high_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5219        let (a0, a1) = self.split_i16x32(a);
5220        let (b0, b1) = self.split_i16x32(b);
5221        self.combine_i16x16(
5222            self.unzip_high_i16x16(a0, a1),
5223            self.unzip_high_i16x16(b0, b1),
5224        )
5225    }
5226    #[inline(always)]
5227    fn select_i16x32(self, a: mask16x32<Self>, b: i16x32<Self>, c: i16x32<Self>) -> i16x32<Self> {
5228        let (a0, a1) = self.split_mask16x32(a);
5229        let (b0, b1) = self.split_i16x32(b);
5230        let (c0, c1) = self.split_i16x32(c);
5231        self.combine_i16x16(
5232            self.select_i16x16(a0, b0, c0),
5233            self.select_i16x16(a1, b1, c1),
5234        )
5235    }
5236    #[inline(always)]
5237    fn min_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5238        let (a0, a1) = self.split_i16x32(a);
5239        let (b0, b1) = self.split_i16x32(b);
5240        self.combine_i16x16(self.min_i16x16(a0, b0), self.min_i16x16(a1, b1))
5241    }
5242    #[inline(always)]
5243    fn max_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5244        let (a0, a1) = self.split_i16x32(a);
5245        let (b0, b1) = self.split_i16x32(b);
5246        self.combine_i16x16(self.max_i16x16(a0, b0), self.max_i16x16(a1, b1))
5247    }
5248    #[inline(always)]
5249    fn split_i16x32(self, a: i16x32<Self>) -> (i16x16<Self>, i16x16<Self>) {
5250        let mut b0 = [0; 16usize];
5251        let mut b1 = [0; 16usize];
5252        b0.copy_from_slice(&a.val[0..16usize]);
5253        b1.copy_from_slice(&a.val[16usize..32usize]);
5254        (b0.simd_into(self), b1.simd_into(self))
5255    }
5256    #[inline(always)]
5257    fn reinterpret_u8_i16x32(self, a: i16x32<Self>) -> u8x64<Self> {
5258        let (a0, a1) = self.split_i16x32(a);
5259        self.combine_u8x32(
5260            self.reinterpret_u8_i16x16(a0),
5261            self.reinterpret_u8_i16x16(a1),
5262        )
5263    }
5264    #[inline(always)]
5265    fn reinterpret_u32_i16x32(self, a: i16x32<Self>) -> u32x16<Self> {
5266        let (a0, a1) = self.split_i16x32(a);
5267        self.combine_u32x8(
5268            self.reinterpret_u32_i16x16(a0),
5269            self.reinterpret_u32_i16x16(a1),
5270        )
5271    }
5272    #[inline(always)]
5273    fn splat_u16x32(self, a: u16) -> u16x32<Self> {
5274        let half = self.splat_u16x16(a);
5275        self.combine_u16x16(half, half)
5276    }
5277    #[inline(always)]
5278    fn not_u16x32(self, a: u16x32<Self>) -> u16x32<Self> {
5279        let (a0, a1) = self.split_u16x32(a);
5280        self.combine_u16x16(self.not_u16x16(a0), self.not_u16x16(a1))
5281    }
5282    #[inline(always)]
5283    fn add_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5284        let (a0, a1) = self.split_u16x32(a);
5285        let (b0, b1) = self.split_u16x32(b);
5286        self.combine_u16x16(self.add_u16x16(a0, b0), self.add_u16x16(a1, b1))
5287    }
5288    #[inline(always)]
5289    fn sub_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5290        let (a0, a1) = self.split_u16x32(a);
5291        let (b0, b1) = self.split_u16x32(b);
5292        self.combine_u16x16(self.sub_u16x16(a0, b0), self.sub_u16x16(a1, b1))
5293    }
5294    #[inline(always)]
5295    fn mul_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5296        let (a0, a1) = self.split_u16x32(a);
5297        let (b0, b1) = self.split_u16x32(b);
5298        self.combine_u16x16(self.mul_u16x16(a0, b0), self.mul_u16x16(a1, b1))
5299    }
5300    #[inline(always)]
5301    fn and_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5302        let (a0, a1) = self.split_u16x32(a);
5303        let (b0, b1) = self.split_u16x32(b);
5304        self.combine_u16x16(self.and_u16x16(a0, b0), self.and_u16x16(a1, b1))
5305    }
5306    #[inline(always)]
5307    fn or_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5308        let (a0, a1) = self.split_u16x32(a);
5309        let (b0, b1) = self.split_u16x32(b);
5310        self.combine_u16x16(self.or_u16x16(a0, b0), self.or_u16x16(a1, b1))
5311    }
5312    #[inline(always)]
5313    fn xor_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5314        let (a0, a1) = self.split_u16x32(a);
5315        let (b0, b1) = self.split_u16x32(b);
5316        self.combine_u16x16(self.xor_u16x16(a0, b0), self.xor_u16x16(a1, b1))
5317    }
5318    #[inline(always)]
5319    fn shr_u16x32(self, a: u16x32<Self>, b: u32) -> u16x32<Self> {
5320        let (a0, a1) = self.split_u16x32(a);
5321        self.combine_u16x16(self.shr_u16x16(a0, b), self.shr_u16x16(a1, b))
5322    }
5323    #[inline(always)]
5324    fn simd_eq_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5325        let (a0, a1) = self.split_u16x32(a);
5326        let (b0, b1) = self.split_u16x32(b);
5327        self.combine_mask16x16(self.simd_eq_u16x16(a0, b0), self.simd_eq_u16x16(a1, b1))
5328    }
5329    #[inline(always)]
5330    fn simd_lt_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5331        let (a0, a1) = self.split_u16x32(a);
5332        let (b0, b1) = self.split_u16x32(b);
5333        self.combine_mask16x16(self.simd_lt_u16x16(a0, b0), self.simd_lt_u16x16(a1, b1))
5334    }
5335    #[inline(always)]
5336    fn simd_le_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5337        let (a0, a1) = self.split_u16x32(a);
5338        let (b0, b1) = self.split_u16x32(b);
5339        self.combine_mask16x16(self.simd_le_u16x16(a0, b0), self.simd_le_u16x16(a1, b1))
5340    }
5341    #[inline(always)]
5342    fn simd_ge_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5343        let (a0, a1) = self.split_u16x32(a);
5344        let (b0, b1) = self.split_u16x32(b);
5345        self.combine_mask16x16(self.simd_ge_u16x16(a0, b0), self.simd_ge_u16x16(a1, b1))
5346    }
5347    #[inline(always)]
5348    fn simd_gt_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5349        let (a0, a1) = self.split_u16x32(a);
5350        let (b0, b1) = self.split_u16x32(b);
5351        self.combine_mask16x16(self.simd_gt_u16x16(a0, b0), self.simd_gt_u16x16(a1, b1))
5352    }
5353    #[inline(always)]
5354    fn zip_low_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5355        let (a0, _) = self.split_u16x32(a);
5356        let (b0, _) = self.split_u16x32(b);
5357        self.combine_u16x16(self.zip_low_u16x16(a0, b0), self.zip_high_u16x16(a0, b0))
5358    }
5359    #[inline(always)]
5360    fn zip_high_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5361        let (_, a1) = self.split_u16x32(a);
5362        let (_, b1) = self.split_u16x32(b);
5363        self.combine_u16x16(self.zip_low_u16x16(a1, b1), self.zip_high_u16x16(a1, b1))
5364    }
5365    #[inline(always)]
5366    fn unzip_low_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5367        let (a0, a1) = self.split_u16x32(a);
5368        let (b0, b1) = self.split_u16x32(b);
5369        self.combine_u16x16(self.unzip_low_u16x16(a0, a1), self.unzip_low_u16x16(b0, b1))
5370    }
5371    #[inline(always)]
5372    fn unzip_high_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5373        let (a0, a1) = self.split_u16x32(a);
5374        let (b0, b1) = self.split_u16x32(b);
5375        self.combine_u16x16(
5376            self.unzip_high_u16x16(a0, a1),
5377            self.unzip_high_u16x16(b0, b1),
5378        )
5379    }
5380    #[inline(always)]
5381    fn select_u16x32(self, a: mask16x32<Self>, b: u16x32<Self>, c: u16x32<Self>) -> u16x32<Self> {
5382        let (a0, a1) = self.split_mask16x32(a);
5383        let (b0, b1) = self.split_u16x32(b);
5384        let (c0, c1) = self.split_u16x32(c);
5385        self.combine_u16x16(
5386            self.select_u16x16(a0, b0, c0),
5387            self.select_u16x16(a1, b1, c1),
5388        )
5389    }
5390    #[inline(always)]
5391    fn min_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5392        let (a0, a1) = self.split_u16x32(a);
5393        let (b0, b1) = self.split_u16x32(b);
5394        self.combine_u16x16(self.min_u16x16(a0, b0), self.min_u16x16(a1, b1))
5395    }
5396    #[inline(always)]
5397    fn max_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5398        let (a0, a1) = self.split_u16x32(a);
5399        let (b0, b1) = self.split_u16x32(b);
5400        self.combine_u16x16(self.max_u16x16(a0, b0), self.max_u16x16(a1, b1))
5401    }
5402    #[inline(always)]
5403    fn split_u16x32(self, a: u16x32<Self>) -> (u16x16<Self>, u16x16<Self>) {
5404        let mut b0 = [0; 16usize];
5405        let mut b1 = [0; 16usize];
5406        b0.copy_from_slice(&a.val[0..16usize]);
5407        b1.copy_from_slice(&a.val[16usize..32usize]);
5408        (b0.simd_into(self), b1.simd_into(self))
5409    }
5410    #[inline(always)]
5411    fn load_interleaved_128_u16x32(self, src: &[u16; 32usize]) -> u16x32<Self> {
5412        [
5413            src[0usize],
5414            src[4usize],
5415            src[8usize],
5416            src[12usize],
5417            src[16usize],
5418            src[20usize],
5419            src[24usize],
5420            src[28usize],
5421            src[1usize],
5422            src[5usize],
5423            src[9usize],
5424            src[13usize],
5425            src[17usize],
5426            src[21usize],
5427            src[25usize],
5428            src[29usize],
5429            src[2usize],
5430            src[6usize],
5431            src[10usize],
5432            src[14usize],
5433            src[18usize],
5434            src[22usize],
5435            src[26usize],
5436            src[30usize],
5437            src[3usize],
5438            src[7usize],
5439            src[11usize],
5440            src[15usize],
5441            src[19usize],
5442            src[23usize],
5443            src[27usize],
5444            src[31usize],
5445        ]
5446        .simd_into(self)
5447    }
5448    #[inline(always)]
5449    fn store_interleaved_128_u16x32(self, a: u16x32<Self>, dest: &mut [u16; 32usize]) -> () {
5450        *dest = [
5451            a[0usize], a[8usize], a[16usize], a[24usize], a[1usize], a[9usize], a[17usize],
5452            a[25usize], a[2usize], a[10usize], a[18usize], a[26usize], a[3usize], a[11usize],
5453            a[19usize], a[27usize], a[4usize], a[12usize], a[20usize], a[28usize], a[5usize],
5454            a[13usize], a[21usize], a[29usize], a[6usize], a[14usize], a[22usize], a[30usize],
5455            a[7usize], a[15usize], a[23usize], a[31usize],
5456        ];
5457    }
5458    #[inline(always)]
5459    fn narrow_u16x32(self, a: u16x32<Self>) -> u8x32<Self> {
5460        let (a0, a1) = self.split_u16x32(a);
5461        self.combine_u8x16(self.narrow_u16x16(a0), self.narrow_u16x16(a1))
5462    }
5463    #[inline(always)]
5464    fn reinterpret_u8_u16x32(self, a: u16x32<Self>) -> u8x64<Self> {
5465        let (a0, a1) = self.split_u16x32(a);
5466        self.combine_u8x32(
5467            self.reinterpret_u8_u16x16(a0),
5468            self.reinterpret_u8_u16x16(a1),
5469        )
5470    }
5471    #[inline(always)]
5472    fn reinterpret_u32_u16x32(self, a: u16x32<Self>) -> u32x16<Self> {
5473        let (a0, a1) = self.split_u16x32(a);
5474        self.combine_u32x8(
5475            self.reinterpret_u32_u16x16(a0),
5476            self.reinterpret_u32_u16x16(a1),
5477        )
5478    }
5479    #[inline(always)]
5480    fn splat_mask16x32(self, a: i16) -> mask16x32<Self> {
5481        let half = self.splat_mask16x16(a);
5482        self.combine_mask16x16(half, half)
5483    }
5484    #[inline(always)]
5485    fn not_mask16x32(self, a: mask16x32<Self>) -> mask16x32<Self> {
5486        let (a0, a1) = self.split_mask16x32(a);
5487        self.combine_mask16x16(self.not_mask16x16(a0), self.not_mask16x16(a1))
5488    }
5489    #[inline(always)]
5490    fn and_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
5491        let (a0, a1) = self.split_mask16x32(a);
5492        let (b0, b1) = self.split_mask16x32(b);
5493        self.combine_mask16x16(self.and_mask16x16(a0, b0), self.and_mask16x16(a1, b1))
5494    }
5495    #[inline(always)]
5496    fn or_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
5497        let (a0, a1) = self.split_mask16x32(a);
5498        let (b0, b1) = self.split_mask16x32(b);
5499        self.combine_mask16x16(self.or_mask16x16(a0, b0), self.or_mask16x16(a1, b1))
5500    }
5501    #[inline(always)]
5502    fn xor_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
5503        let (a0, a1) = self.split_mask16x32(a);
5504        let (b0, b1) = self.split_mask16x32(b);
5505        self.combine_mask16x16(self.xor_mask16x16(a0, b0), self.xor_mask16x16(a1, b1))
5506    }
5507    #[inline(always)]
5508    fn select_mask16x32(
5509        self,
5510        a: mask16x32<Self>,
5511        b: mask16x32<Self>,
5512        c: mask16x32<Self>,
5513    ) -> mask16x32<Self> {
5514        let (a0, a1) = self.split_mask16x32(a);
5515        let (b0, b1) = self.split_mask16x32(b);
5516        let (c0, c1) = self.split_mask16x32(c);
5517        self.combine_mask16x16(
5518            self.select_mask16x16(a0, b0, c0),
5519            self.select_mask16x16(a1, b1, c1),
5520        )
5521    }
5522    #[inline(always)]
5523    fn simd_eq_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
5524        let (a0, a1) = self.split_mask16x32(a);
5525        let (b0, b1) = self.split_mask16x32(b);
5526        self.combine_mask16x16(
5527            self.simd_eq_mask16x16(a0, b0),
5528            self.simd_eq_mask16x16(a1, b1),
5529        )
5530    }
5531    #[inline(always)]
5532    fn split_mask16x32(self, a: mask16x32<Self>) -> (mask16x16<Self>, mask16x16<Self>) {
5533        let mut b0 = [0; 16usize];
5534        let mut b1 = [0; 16usize];
5535        b0.copy_from_slice(&a.val[0..16usize]);
5536        b1.copy_from_slice(&a.val[16usize..32usize]);
5537        (b0.simd_into(self), b1.simd_into(self))
5538    }
5539    #[inline(always)]
5540    fn splat_i32x16(self, a: i32) -> i32x16<Self> {
5541        let half = self.splat_i32x8(a);
5542        self.combine_i32x8(half, half)
5543    }
5544    #[inline(always)]
5545    fn not_i32x16(self, a: i32x16<Self>) -> i32x16<Self> {
5546        let (a0, a1) = self.split_i32x16(a);
5547        self.combine_i32x8(self.not_i32x8(a0), self.not_i32x8(a1))
5548    }
5549    #[inline(always)]
5550    fn add_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5551        let (a0, a1) = self.split_i32x16(a);
5552        let (b0, b1) = self.split_i32x16(b);
5553        self.combine_i32x8(self.add_i32x8(a0, b0), self.add_i32x8(a1, b1))
5554    }
5555    #[inline(always)]
5556    fn sub_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5557        let (a0, a1) = self.split_i32x16(a);
5558        let (b0, b1) = self.split_i32x16(b);
5559        self.combine_i32x8(self.sub_i32x8(a0, b0), self.sub_i32x8(a1, b1))
5560    }
5561    #[inline(always)]
5562    fn mul_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5563        let (a0, a1) = self.split_i32x16(a);
5564        let (b0, b1) = self.split_i32x16(b);
5565        self.combine_i32x8(self.mul_i32x8(a0, b0), self.mul_i32x8(a1, b1))
5566    }
5567    #[inline(always)]
5568    fn and_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5569        let (a0, a1) = self.split_i32x16(a);
5570        let (b0, b1) = self.split_i32x16(b);
5571        self.combine_i32x8(self.and_i32x8(a0, b0), self.and_i32x8(a1, b1))
5572    }
5573    #[inline(always)]
5574    fn or_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5575        let (a0, a1) = self.split_i32x16(a);
5576        let (b0, b1) = self.split_i32x16(b);
5577        self.combine_i32x8(self.or_i32x8(a0, b0), self.or_i32x8(a1, b1))
5578    }
5579    #[inline(always)]
5580    fn xor_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5581        let (a0, a1) = self.split_i32x16(a);
5582        let (b0, b1) = self.split_i32x16(b);
5583        self.combine_i32x8(self.xor_i32x8(a0, b0), self.xor_i32x8(a1, b1))
5584    }
5585    #[inline(always)]
5586    fn shr_i32x16(self, a: i32x16<Self>, b: u32) -> i32x16<Self> {
5587        let (a0, a1) = self.split_i32x16(a);
5588        self.combine_i32x8(self.shr_i32x8(a0, b), self.shr_i32x8(a1, b))
5589    }
5590    #[inline(always)]
5591    fn simd_eq_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5592        let (a0, a1) = self.split_i32x16(a);
5593        let (b0, b1) = self.split_i32x16(b);
5594        self.combine_mask32x8(self.simd_eq_i32x8(a0, b0), self.simd_eq_i32x8(a1, b1))
5595    }
5596    #[inline(always)]
5597    fn simd_lt_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5598        let (a0, a1) = self.split_i32x16(a);
5599        let (b0, b1) = self.split_i32x16(b);
5600        self.combine_mask32x8(self.simd_lt_i32x8(a0, b0), self.simd_lt_i32x8(a1, b1))
5601    }
5602    #[inline(always)]
5603    fn simd_le_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5604        let (a0, a1) = self.split_i32x16(a);
5605        let (b0, b1) = self.split_i32x16(b);
5606        self.combine_mask32x8(self.simd_le_i32x8(a0, b0), self.simd_le_i32x8(a1, b1))
5607    }
5608    #[inline(always)]
5609    fn simd_ge_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5610        let (a0, a1) = self.split_i32x16(a);
5611        let (b0, b1) = self.split_i32x16(b);
5612        self.combine_mask32x8(self.simd_ge_i32x8(a0, b0), self.simd_ge_i32x8(a1, b1))
5613    }
5614    #[inline(always)]
5615    fn simd_gt_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5616        let (a0, a1) = self.split_i32x16(a);
5617        let (b0, b1) = self.split_i32x16(b);
5618        self.combine_mask32x8(self.simd_gt_i32x8(a0, b0), self.simd_gt_i32x8(a1, b1))
5619    }
5620    #[inline(always)]
5621    fn zip_low_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5622        let (a0, _) = self.split_i32x16(a);
5623        let (b0, _) = self.split_i32x16(b);
5624        self.combine_i32x8(self.zip_low_i32x8(a0, b0), self.zip_high_i32x8(a0, b0))
5625    }
5626    #[inline(always)]
5627    fn zip_high_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5628        let (_, a1) = self.split_i32x16(a);
5629        let (_, b1) = self.split_i32x16(b);
5630        self.combine_i32x8(self.zip_low_i32x8(a1, b1), self.zip_high_i32x8(a1, b1))
5631    }
5632    #[inline(always)]
5633    fn unzip_low_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5634        let (a0, a1) = self.split_i32x16(a);
5635        let (b0, b1) = self.split_i32x16(b);
5636        self.combine_i32x8(self.unzip_low_i32x8(a0, a1), self.unzip_low_i32x8(b0, b1))
5637    }
5638    #[inline(always)]
5639    fn unzip_high_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5640        let (a0, a1) = self.split_i32x16(a);
5641        let (b0, b1) = self.split_i32x16(b);
5642        self.combine_i32x8(self.unzip_high_i32x8(a0, a1), self.unzip_high_i32x8(b0, b1))
5643    }
5644    #[inline(always)]
5645    fn select_i32x16(self, a: mask32x16<Self>, b: i32x16<Self>, c: i32x16<Self>) -> i32x16<Self> {
5646        let (a0, a1) = self.split_mask32x16(a);
5647        let (b0, b1) = self.split_i32x16(b);
5648        let (c0, c1) = self.split_i32x16(c);
5649        self.combine_i32x8(self.select_i32x8(a0, b0, c0), self.select_i32x8(a1, b1, c1))
5650    }
5651    #[inline(always)]
5652    fn min_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5653        let (a0, a1) = self.split_i32x16(a);
5654        let (b0, b1) = self.split_i32x16(b);
5655        self.combine_i32x8(self.min_i32x8(a0, b0), self.min_i32x8(a1, b1))
5656    }
5657    #[inline(always)]
5658    fn max_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5659        let (a0, a1) = self.split_i32x16(a);
5660        let (b0, b1) = self.split_i32x16(b);
5661        self.combine_i32x8(self.max_i32x8(a0, b0), self.max_i32x8(a1, b1))
5662    }
5663    #[inline(always)]
5664    fn split_i32x16(self, a: i32x16<Self>) -> (i32x8<Self>, i32x8<Self>) {
5665        let mut b0 = [0; 8usize];
5666        let mut b1 = [0; 8usize];
5667        b0.copy_from_slice(&a.val[0..8usize]);
5668        b1.copy_from_slice(&a.val[8usize..16usize]);
5669        (b0.simd_into(self), b1.simd_into(self))
5670    }
5671    #[inline(always)]
5672    fn reinterpret_u8_i32x16(self, a: i32x16<Self>) -> u8x64<Self> {
5673        let (a0, a1) = self.split_i32x16(a);
5674        self.combine_u8x32(self.reinterpret_u8_i32x8(a0), self.reinterpret_u8_i32x8(a1))
5675    }
5676    #[inline(always)]
5677    fn reinterpret_u32_i32x16(self, a: i32x16<Self>) -> u32x16<Self> {
5678        let (a0, a1) = self.split_i32x16(a);
5679        self.combine_u32x8(
5680            self.reinterpret_u32_i32x8(a0),
5681            self.reinterpret_u32_i32x8(a1),
5682        )
5683    }
5684    #[inline(always)]
5685    fn cvt_f32_i32x16(self, a: i32x16<Self>) -> f32x16<Self> {
5686        let (a0, a1) = self.split_i32x16(a);
5687        self.combine_f32x8(self.cvt_f32_i32x8(a0), self.cvt_f32_i32x8(a1))
5688    }
5689    #[inline(always)]
5690    fn splat_u32x16(self, a: u32) -> u32x16<Self> {
5691        let half = self.splat_u32x8(a);
5692        self.combine_u32x8(half, half)
5693    }
5694    #[inline(always)]
5695    fn not_u32x16(self, a: u32x16<Self>) -> u32x16<Self> {
5696        let (a0, a1) = self.split_u32x16(a);
5697        self.combine_u32x8(self.not_u32x8(a0), self.not_u32x8(a1))
5698    }
5699    #[inline(always)]
5700    fn add_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5701        let (a0, a1) = self.split_u32x16(a);
5702        let (b0, b1) = self.split_u32x16(b);
5703        self.combine_u32x8(self.add_u32x8(a0, b0), self.add_u32x8(a1, b1))
5704    }
5705    #[inline(always)]
5706    fn sub_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5707        let (a0, a1) = self.split_u32x16(a);
5708        let (b0, b1) = self.split_u32x16(b);
5709        self.combine_u32x8(self.sub_u32x8(a0, b0), self.sub_u32x8(a1, b1))
5710    }
5711    #[inline(always)]
5712    fn mul_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5713        let (a0, a1) = self.split_u32x16(a);
5714        let (b0, b1) = self.split_u32x16(b);
5715        self.combine_u32x8(self.mul_u32x8(a0, b0), self.mul_u32x8(a1, b1))
5716    }
5717    #[inline(always)]
5718    fn and_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5719        let (a0, a1) = self.split_u32x16(a);
5720        let (b0, b1) = self.split_u32x16(b);
5721        self.combine_u32x8(self.and_u32x8(a0, b0), self.and_u32x8(a1, b1))
5722    }
5723    #[inline(always)]
5724    fn or_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5725        let (a0, a1) = self.split_u32x16(a);
5726        let (b0, b1) = self.split_u32x16(b);
5727        self.combine_u32x8(self.or_u32x8(a0, b0), self.or_u32x8(a1, b1))
5728    }
5729    #[inline(always)]
5730    fn xor_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5731        let (a0, a1) = self.split_u32x16(a);
5732        let (b0, b1) = self.split_u32x16(b);
5733        self.combine_u32x8(self.xor_u32x8(a0, b0), self.xor_u32x8(a1, b1))
5734    }
5735    #[inline(always)]
5736    fn shr_u32x16(self, a: u32x16<Self>, b: u32) -> u32x16<Self> {
5737        let (a0, a1) = self.split_u32x16(a);
5738        self.combine_u32x8(self.shr_u32x8(a0, b), self.shr_u32x8(a1, b))
5739    }
5740    #[inline(always)]
5741    fn simd_eq_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
5742        let (a0, a1) = self.split_u32x16(a);
5743        let (b0, b1) = self.split_u32x16(b);
5744        self.combine_mask32x8(self.simd_eq_u32x8(a0, b0), self.simd_eq_u32x8(a1, b1))
5745    }
5746    #[inline(always)]
5747    fn simd_lt_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
5748        let (a0, a1) = self.split_u32x16(a);
5749        let (b0, b1) = self.split_u32x16(b);
5750        self.combine_mask32x8(self.simd_lt_u32x8(a0, b0), self.simd_lt_u32x8(a1, b1))
5751    }
5752    #[inline(always)]
5753    fn simd_le_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
5754        let (a0, a1) = self.split_u32x16(a);
5755        let (b0, b1) = self.split_u32x16(b);
5756        self.combine_mask32x8(self.simd_le_u32x8(a0, b0), self.simd_le_u32x8(a1, b1))
5757    }
5758    #[inline(always)]
5759    fn simd_ge_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
5760        let (a0, a1) = self.split_u32x16(a);
5761        let (b0, b1) = self.split_u32x16(b);
5762        self.combine_mask32x8(self.simd_ge_u32x8(a0, b0), self.simd_ge_u32x8(a1, b1))
5763    }
5764    #[inline(always)]
5765    fn simd_gt_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
5766        let (a0, a1) = self.split_u32x16(a);
5767        let (b0, b1) = self.split_u32x16(b);
5768        self.combine_mask32x8(self.simd_gt_u32x8(a0, b0), self.simd_gt_u32x8(a1, b1))
5769    }
5770    #[inline(always)]
5771    fn zip_low_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5772        let (a0, _) = self.split_u32x16(a);
5773        let (b0, _) = self.split_u32x16(b);
5774        self.combine_u32x8(self.zip_low_u32x8(a0, b0), self.zip_high_u32x8(a0, b0))
5775    }
5776    #[inline(always)]
5777    fn zip_high_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5778        let (_, a1) = self.split_u32x16(a);
5779        let (_, b1) = self.split_u32x16(b);
5780        self.combine_u32x8(self.zip_low_u32x8(a1, b1), self.zip_high_u32x8(a1, b1))
5781    }
5782    #[inline(always)]
5783    fn unzip_low_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5784        let (a0, a1) = self.split_u32x16(a);
5785        let (b0, b1) = self.split_u32x16(b);
5786        self.combine_u32x8(self.unzip_low_u32x8(a0, a1), self.unzip_low_u32x8(b0, b1))
5787    }
5788    #[inline(always)]
5789    fn unzip_high_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5790        let (a0, a1) = self.split_u32x16(a);
5791        let (b0, b1) = self.split_u32x16(b);
5792        self.combine_u32x8(self.unzip_high_u32x8(a0, a1), self.unzip_high_u32x8(b0, b1))
5793    }
5794    #[inline(always)]
5795    fn select_u32x16(self, a: mask32x16<Self>, b: u32x16<Self>, c: u32x16<Self>) -> u32x16<Self> {
5796        let (a0, a1) = self.split_mask32x16(a);
5797        let (b0, b1) = self.split_u32x16(b);
5798        let (c0, c1) = self.split_u32x16(c);
5799        self.combine_u32x8(self.select_u32x8(a0, b0, c0), self.select_u32x8(a1, b1, c1))
5800    }
5801    #[inline(always)]
5802    fn min_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5803        let (a0, a1) = self.split_u32x16(a);
5804        let (b0, b1) = self.split_u32x16(b);
5805        self.combine_u32x8(self.min_u32x8(a0, b0), self.min_u32x8(a1, b1))
5806    }
5807    #[inline(always)]
5808    fn max_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5809        let (a0, a1) = self.split_u32x16(a);
5810        let (b0, b1) = self.split_u32x16(b);
5811        self.combine_u32x8(self.max_u32x8(a0, b0), self.max_u32x8(a1, b1))
5812    }
5813    #[inline(always)]
5814    fn split_u32x16(self, a: u32x16<Self>) -> (u32x8<Self>, u32x8<Self>) {
5815        let mut b0 = [0; 8usize];
5816        let mut b1 = [0; 8usize];
5817        b0.copy_from_slice(&a.val[0..8usize]);
5818        b1.copy_from_slice(&a.val[8usize..16usize]);
5819        (b0.simd_into(self), b1.simd_into(self))
5820    }
5821    #[inline(always)]
5822    fn load_interleaved_128_u32x16(self, src: &[u32; 16usize]) -> u32x16<Self> {
5823        [
5824            src[0usize],
5825            src[4usize],
5826            src[8usize],
5827            src[12usize],
5828            src[1usize],
5829            src[5usize],
5830            src[9usize],
5831            src[13usize],
5832            src[2usize],
5833            src[6usize],
5834            src[10usize],
5835            src[14usize],
5836            src[3usize],
5837            src[7usize],
5838            src[11usize],
5839            src[15usize],
5840        ]
5841        .simd_into(self)
5842    }
5843    #[inline(always)]
5844    fn store_interleaved_128_u32x16(self, a: u32x16<Self>, dest: &mut [u32; 16usize]) -> () {
5845        *dest = [
5846            a[0usize], a[4usize], a[8usize], a[12usize], a[1usize], a[5usize], a[9usize],
5847            a[13usize], a[2usize], a[6usize], a[10usize], a[14usize], a[3usize], a[7usize],
5848            a[11usize], a[15usize],
5849        ];
5850    }
5851    #[inline(always)]
5852    fn reinterpret_u8_u32x16(self, a: u32x16<Self>) -> u8x64<Self> {
5853        let (a0, a1) = self.split_u32x16(a);
5854        self.combine_u8x32(self.reinterpret_u8_u32x8(a0), self.reinterpret_u8_u32x8(a1))
5855    }
5856    #[inline(always)]
5857    fn cvt_f32_u32x16(self, a: u32x16<Self>) -> f32x16<Self> {
5858        let (a0, a1) = self.split_u32x16(a);
5859        self.combine_f32x8(self.cvt_f32_u32x8(a0), self.cvt_f32_u32x8(a1))
5860    }
5861    #[inline(always)]
5862    fn splat_mask32x16(self, a: i32) -> mask32x16<Self> {
5863        let half = self.splat_mask32x8(a);
5864        self.combine_mask32x8(half, half)
5865    }
5866    #[inline(always)]
5867    fn not_mask32x16(self, a: mask32x16<Self>) -> mask32x16<Self> {
5868        let (a0, a1) = self.split_mask32x16(a);
5869        self.combine_mask32x8(self.not_mask32x8(a0), self.not_mask32x8(a1))
5870    }
5871    #[inline(always)]
5872    fn and_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
5873        let (a0, a1) = self.split_mask32x16(a);
5874        let (b0, b1) = self.split_mask32x16(b);
5875        self.combine_mask32x8(self.and_mask32x8(a0, b0), self.and_mask32x8(a1, b1))
5876    }
5877    #[inline(always)]
5878    fn or_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
5879        let (a0, a1) = self.split_mask32x16(a);
5880        let (b0, b1) = self.split_mask32x16(b);
5881        self.combine_mask32x8(self.or_mask32x8(a0, b0), self.or_mask32x8(a1, b1))
5882    }
5883    #[inline(always)]
5884    fn xor_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
5885        let (a0, a1) = self.split_mask32x16(a);
5886        let (b0, b1) = self.split_mask32x16(b);
5887        self.combine_mask32x8(self.xor_mask32x8(a0, b0), self.xor_mask32x8(a1, b1))
5888    }
5889    #[inline(always)]
5890    fn select_mask32x16(
5891        self,
5892        a: mask32x16<Self>,
5893        b: mask32x16<Self>,
5894        c: mask32x16<Self>,
5895    ) -> mask32x16<Self> {
5896        let (a0, a1) = self.split_mask32x16(a);
5897        let (b0, b1) = self.split_mask32x16(b);
5898        let (c0, c1) = self.split_mask32x16(c);
5899        self.combine_mask32x8(
5900            self.select_mask32x8(a0, b0, c0),
5901            self.select_mask32x8(a1, b1, c1),
5902        )
5903    }
5904    #[inline(always)]
5905    fn simd_eq_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
5906        let (a0, a1) = self.split_mask32x16(a);
5907        let (b0, b1) = self.split_mask32x16(b);
5908        self.combine_mask32x8(self.simd_eq_mask32x8(a0, b0), self.simd_eq_mask32x8(a1, b1))
5909    }
5910    #[inline(always)]
5911    fn split_mask32x16(self, a: mask32x16<Self>) -> (mask32x8<Self>, mask32x8<Self>) {
5912        let mut b0 = [0; 8usize];
5913        let mut b1 = [0; 8usize];
5914        b0.copy_from_slice(&a.val[0..8usize]);
5915        b1.copy_from_slice(&a.val[8usize..16usize]);
5916        (b0.simd_into(self), b1.simd_into(self))
5917    }
5918    #[inline(always)]
5919    fn splat_f64x8(self, a: f64) -> f64x8<Self> {
5920        let half = self.splat_f64x4(a);
5921        self.combine_f64x4(half, half)
5922    }
5923    #[inline(always)]
5924    fn abs_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
5925        let (a0, a1) = self.split_f64x8(a);
5926        self.combine_f64x4(self.abs_f64x4(a0), self.abs_f64x4(a1))
5927    }
5928    #[inline(always)]
5929    fn neg_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
5930        let (a0, a1) = self.split_f64x8(a);
5931        self.combine_f64x4(self.neg_f64x4(a0), self.neg_f64x4(a1))
5932    }
5933    #[inline(always)]
5934    fn sqrt_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
5935        let (a0, a1) = self.split_f64x8(a);
5936        self.combine_f64x4(self.sqrt_f64x4(a0), self.sqrt_f64x4(a1))
5937    }
5938    #[inline(always)]
5939    fn add_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
5940        let (a0, a1) = self.split_f64x8(a);
5941        let (b0, b1) = self.split_f64x8(b);
5942        self.combine_f64x4(self.add_f64x4(a0, b0), self.add_f64x4(a1, b1))
5943    }
5944    #[inline(always)]
5945    fn sub_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
5946        let (a0, a1) = self.split_f64x8(a);
5947        let (b0, b1) = self.split_f64x8(b);
5948        self.combine_f64x4(self.sub_f64x4(a0, b0), self.sub_f64x4(a1, b1))
5949    }
5950    #[inline(always)]
5951    fn mul_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
5952        let (a0, a1) = self.split_f64x8(a);
5953        let (b0, b1) = self.split_f64x8(b);
5954        self.combine_f64x4(self.mul_f64x4(a0, b0), self.mul_f64x4(a1, b1))
5955    }
5956    #[inline(always)]
5957    fn div_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
5958        let (a0, a1) = self.split_f64x8(a);
5959        let (b0, b1) = self.split_f64x8(b);
5960        self.combine_f64x4(self.div_f64x4(a0, b0), self.div_f64x4(a1, b1))
5961    }
5962    #[inline(always)]
5963    fn copysign_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
5964        let (a0, a1) = self.split_f64x8(a);
5965        let (b0, b1) = self.split_f64x8(b);
5966        self.combine_f64x4(self.copysign_f64x4(a0, b0), self.copysign_f64x4(a1, b1))
5967    }
5968    #[inline(always)]
5969    fn simd_eq_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
5970        let (a0, a1) = self.split_f64x8(a);
5971        let (b0, b1) = self.split_f64x8(b);
5972        self.combine_mask64x4(self.simd_eq_f64x4(a0, b0), self.simd_eq_f64x4(a1, b1))
5973    }
5974    #[inline(always)]
5975    fn simd_lt_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
5976        let (a0, a1) = self.split_f64x8(a);
5977        let (b0, b1) = self.split_f64x8(b);
5978        self.combine_mask64x4(self.simd_lt_f64x4(a0, b0), self.simd_lt_f64x4(a1, b1))
5979    }
5980    #[inline(always)]
5981    fn simd_le_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
5982        let (a0, a1) = self.split_f64x8(a);
5983        let (b0, b1) = self.split_f64x8(b);
5984        self.combine_mask64x4(self.simd_le_f64x4(a0, b0), self.simd_le_f64x4(a1, b1))
5985    }
5986    #[inline(always)]
5987    fn simd_ge_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
5988        let (a0, a1) = self.split_f64x8(a);
5989        let (b0, b1) = self.split_f64x8(b);
5990        self.combine_mask64x4(self.simd_ge_f64x4(a0, b0), self.simd_ge_f64x4(a1, b1))
5991    }
5992    #[inline(always)]
5993    fn simd_gt_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
5994        let (a0, a1) = self.split_f64x8(a);
5995        let (b0, b1) = self.split_f64x8(b);
5996        self.combine_mask64x4(self.simd_gt_f64x4(a0, b0), self.simd_gt_f64x4(a1, b1))
5997    }
5998    #[inline(always)]
5999    fn zip_low_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6000        let (a0, _) = self.split_f64x8(a);
6001        let (b0, _) = self.split_f64x8(b);
6002        self.combine_f64x4(self.zip_low_f64x4(a0, b0), self.zip_high_f64x4(a0, b0))
6003    }
6004    #[inline(always)]
6005    fn zip_high_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6006        let (_, a1) = self.split_f64x8(a);
6007        let (_, b1) = self.split_f64x8(b);
6008        self.combine_f64x4(self.zip_low_f64x4(a1, b1), self.zip_high_f64x4(a1, b1))
6009    }
6010    #[inline(always)]
6011    fn unzip_low_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6012        let (a0, a1) = self.split_f64x8(a);
6013        let (b0, b1) = self.split_f64x8(b);
6014        self.combine_f64x4(self.unzip_low_f64x4(a0, a1), self.unzip_low_f64x4(b0, b1))
6015    }
6016    #[inline(always)]
6017    fn unzip_high_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6018        let (a0, a1) = self.split_f64x8(a);
6019        let (b0, b1) = self.split_f64x8(b);
6020        self.combine_f64x4(self.unzip_high_f64x4(a0, a1), self.unzip_high_f64x4(b0, b1))
6021    }
6022    #[inline(always)]
6023    fn max_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6024        let (a0, a1) = self.split_f64x8(a);
6025        let (b0, b1) = self.split_f64x8(b);
6026        self.combine_f64x4(self.max_f64x4(a0, b0), self.max_f64x4(a1, b1))
6027    }
6028    #[inline(always)]
6029    fn max_precise_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6030        let (a0, a1) = self.split_f64x8(a);
6031        let (b0, b1) = self.split_f64x8(b);
6032        self.combine_f64x4(
6033            self.max_precise_f64x4(a0, b0),
6034            self.max_precise_f64x4(a1, b1),
6035        )
6036    }
6037    #[inline(always)]
6038    fn min_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6039        let (a0, a1) = self.split_f64x8(a);
6040        let (b0, b1) = self.split_f64x8(b);
6041        self.combine_f64x4(self.min_f64x4(a0, b0), self.min_f64x4(a1, b1))
6042    }
6043    #[inline(always)]
6044    fn min_precise_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6045        let (a0, a1) = self.split_f64x8(a);
6046        let (b0, b1) = self.split_f64x8(b);
6047        self.combine_f64x4(
6048            self.min_precise_f64x4(a0, b0),
6049            self.min_precise_f64x4(a1, b1),
6050        )
6051    }
6052    #[inline(always)]
6053    fn madd_f64x8(self, a: f64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
6054        let (a0, a1) = self.split_f64x8(a);
6055        let (b0, b1) = self.split_f64x8(b);
6056        let (c0, c1) = self.split_f64x8(c);
6057        self.combine_f64x4(self.madd_f64x4(a0, b0, c0), self.madd_f64x4(a1, b1, c1))
6058    }
6059    #[inline(always)]
6060    fn msub_f64x8(self, a: f64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
6061        let (a0, a1) = self.split_f64x8(a);
6062        let (b0, b1) = self.split_f64x8(b);
6063        let (c0, c1) = self.split_f64x8(c);
6064        self.combine_f64x4(self.msub_f64x4(a0, b0, c0), self.msub_f64x4(a1, b1, c1))
6065    }
6066    #[inline(always)]
6067    fn floor_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
6068        let (a0, a1) = self.split_f64x8(a);
6069        self.combine_f64x4(self.floor_f64x4(a0), self.floor_f64x4(a1))
6070    }
6071    #[inline(always)]
6072    fn fract_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
6073        let (a0, a1) = self.split_f64x8(a);
6074        self.combine_f64x4(self.fract_f64x4(a0), self.fract_f64x4(a1))
6075    }
6076    #[inline(always)]
6077    fn trunc_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
6078        let (a0, a1) = self.split_f64x8(a);
6079        self.combine_f64x4(self.trunc_f64x4(a0), self.trunc_f64x4(a1))
6080    }
6081    #[inline(always)]
6082    fn select_f64x8(self, a: mask64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
6083        let (a0, a1) = self.split_mask64x8(a);
6084        let (b0, b1) = self.split_f64x8(b);
6085        let (c0, c1) = self.split_f64x8(c);
6086        self.combine_f64x4(self.select_f64x4(a0, b0, c0), self.select_f64x4(a1, b1, c1))
6087    }
6088    #[inline(always)]
6089    fn split_f64x8(self, a: f64x8<Self>) -> (f64x4<Self>, f64x4<Self>) {
6090        let mut b0 = [0.0; 4usize];
6091        let mut b1 = [0.0; 4usize];
6092        b0.copy_from_slice(&a.val[0..4usize]);
6093        b1.copy_from_slice(&a.val[4usize..8usize]);
6094        (b0.simd_into(self), b1.simd_into(self))
6095    }
6096    #[inline(always)]
6097    fn reinterpret_f32_f64x8(self, a: f64x8<Self>) -> f32x16<Self> {
6098        let (a0, a1) = self.split_f64x8(a);
6099        self.combine_f32x8(
6100            self.reinterpret_f32_f64x4(a0),
6101            self.reinterpret_f32_f64x4(a1),
6102        )
6103    }
6104    #[inline(always)]
6105    fn splat_mask64x8(self, a: i64) -> mask64x8<Self> {
6106        let half = self.splat_mask64x4(a);
6107        self.combine_mask64x4(half, half)
6108    }
6109    #[inline(always)]
6110    fn not_mask64x8(self, a: mask64x8<Self>) -> mask64x8<Self> {
6111        let (a0, a1) = self.split_mask64x8(a);
6112        self.combine_mask64x4(self.not_mask64x4(a0), self.not_mask64x4(a1))
6113    }
6114    #[inline(always)]
6115    fn and_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
6116        let (a0, a1) = self.split_mask64x8(a);
6117        let (b0, b1) = self.split_mask64x8(b);
6118        self.combine_mask64x4(self.and_mask64x4(a0, b0), self.and_mask64x4(a1, b1))
6119    }
6120    #[inline(always)]
6121    fn or_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
6122        let (a0, a1) = self.split_mask64x8(a);
6123        let (b0, b1) = self.split_mask64x8(b);
6124        self.combine_mask64x4(self.or_mask64x4(a0, b0), self.or_mask64x4(a1, b1))
6125    }
6126    #[inline(always)]
6127    fn xor_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
6128        let (a0, a1) = self.split_mask64x8(a);
6129        let (b0, b1) = self.split_mask64x8(b);
6130        self.combine_mask64x4(self.xor_mask64x4(a0, b0), self.xor_mask64x4(a1, b1))
6131    }
6132    #[inline(always)]
6133    fn select_mask64x8(
6134        self,
6135        a: mask64x8<Self>,
6136        b: mask64x8<Self>,
6137        c: mask64x8<Self>,
6138    ) -> mask64x8<Self> {
6139        let (a0, a1) = self.split_mask64x8(a);
6140        let (b0, b1) = self.split_mask64x8(b);
6141        let (c0, c1) = self.split_mask64x8(c);
6142        self.combine_mask64x4(
6143            self.select_mask64x4(a0, b0, c0),
6144            self.select_mask64x4(a1, b1, c1),
6145        )
6146    }
6147    #[inline(always)]
6148    fn simd_eq_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
6149        let (a0, a1) = self.split_mask64x8(a);
6150        let (b0, b1) = self.split_mask64x8(b);
6151        self.combine_mask64x4(self.simd_eq_mask64x4(a0, b0), self.simd_eq_mask64x4(a1, b1))
6152    }
6153    #[inline(always)]
6154    fn split_mask64x8(self, a: mask64x8<Self>) -> (mask64x4<Self>, mask64x4<Self>) {
6155        let mut b0 = [0; 4usize];
6156        let mut b1 = [0; 4usize];
6157        b0.copy_from_slice(&a.val[0..4usize]);
6158        b1.copy_from_slice(&a.val[4usize..8usize]);
6159        (b0.simd_into(self), b1.simd_into(self))
6160    }
6161}