1use crate::{Level, Simd, SimdInto, seal::Seal};
7use crate::{
8 f32x4, f32x8, f32x16, f64x2, f64x4, f64x8, i8x16, i8x32, i8x64, i16x8, i16x16, i16x32, i32x4,
9 i32x8, i32x16, mask8x16, mask8x32, mask8x64, mask16x8, mask16x16, mask16x32, mask32x4,
10 mask32x8, mask32x16, mask64x2, mask64x4, mask64x8, u8x16, u8x32, u8x64, u16x8, u16x16, u16x32,
11 u32x4, u32x8, u32x16,
12};
13use core::ops::*;
14#[cfg(all(feature = "libm", not(feature = "std")))]
15trait FloatExt {
16 fn floor(self) -> Self;
17 fn fract(self) -> Self;
18 fn sqrt(self) -> Self;
19 fn trunc(self) -> Self;
20}
21#[cfg(all(feature = "libm", not(feature = "std")))]
22impl FloatExt for f32 {
23 #[inline(always)]
24 fn floor(self) -> f32 {
25 libm::floorf(self)
26 }
27 #[inline(always)]
28 fn sqrt(self) -> f32 {
29 libm::sqrtf(self)
30 }
31 #[inline(always)]
32 fn fract(self) -> f32 {
33 self - self.trunc()
34 }
35 #[inline(always)]
36 fn trunc(self) -> f32 {
37 libm::truncf(self)
38 }
39}
40#[cfg(all(feature = "libm", not(feature = "std")))]
41impl FloatExt for f64 {
42 #[inline(always)]
43 fn floor(self) -> f64 {
44 libm::floor(self)
45 }
46 #[inline(always)]
47 fn sqrt(self) -> f64 {
48 libm::sqrt(self)
49 }
50 #[inline(always)]
51 fn fract(self) -> f64 {
52 self - self.trunc()
53 }
54 #[inline(always)]
55 fn trunc(self) -> f64 {
56 libm::trunc(self)
57 }
58}
59#[doc = r#" The SIMD token for the "fallback" level."#]
60#[derive(Clone, Copy, Debug)]
61pub struct Fallback {
62 pub fallback: crate::core_arch::fallback::Fallback,
63}
64impl Fallback {
65 #[inline]
66 pub const fn new() -> Self {
67 Fallback {
68 fallback: crate::core_arch::fallback::Fallback::new(),
69 }
70 }
71}
72impl Seal for Fallback {}
73impl Simd for Fallback {
74 type f32s = f32x4<Self>;
75 type u8s = u8x16<Self>;
76 type i8s = i8x16<Self>;
77 type u16s = u16x8<Self>;
78 type i16s = i16x8<Self>;
79 type u32s = u32x4<Self>;
80 type i32s = i32x4<Self>;
81 type mask8s = mask8x16<Self>;
82 type mask16s = mask16x8<Self>;
83 type mask32s = mask32x4<Self>;
84 #[inline(always)]
85 fn level(self) -> Level {
86 Level::Fallback(self)
87 }
88 #[inline]
89 fn vectorize<F: FnOnce() -> R, R>(self, f: F) -> R {
90 f()
91 }
92 #[inline(always)]
93 fn splat_f32x4(self, val: f32) -> f32x4<Self> {
94 [val; 4usize].simd_into(self)
95 }
96 #[inline(always)]
97 fn abs_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
98 [
99 f32::abs(a[0usize]),
100 f32::abs(a[1usize]),
101 f32::abs(a[2usize]),
102 f32::abs(a[3usize]),
103 ]
104 .simd_into(self)
105 }
106 #[inline(always)]
107 fn neg_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
108 [
109 f32::neg(a[0usize]),
110 f32::neg(a[1usize]),
111 f32::neg(a[2usize]),
112 f32::neg(a[3usize]),
113 ]
114 .simd_into(self)
115 }
116 #[inline(always)]
117 fn sqrt_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
118 [
119 f32::sqrt(a[0usize]),
120 f32::sqrt(a[1usize]),
121 f32::sqrt(a[2usize]),
122 f32::sqrt(a[3usize]),
123 ]
124 .simd_into(self)
125 }
126 #[inline(always)]
127 fn add_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
128 [
129 f32::add(a[0usize], &b[0usize]),
130 f32::add(a[1usize], &b[1usize]),
131 f32::add(a[2usize], &b[2usize]),
132 f32::add(a[3usize], &b[3usize]),
133 ]
134 .simd_into(self)
135 }
136 #[inline(always)]
137 fn sub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
138 [
139 f32::sub(a[0usize], &b[0usize]),
140 f32::sub(a[1usize], &b[1usize]),
141 f32::sub(a[2usize], &b[2usize]),
142 f32::sub(a[3usize], &b[3usize]),
143 ]
144 .simd_into(self)
145 }
146 #[inline(always)]
147 fn mul_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
148 [
149 f32::mul(a[0usize], &b[0usize]),
150 f32::mul(a[1usize], &b[1usize]),
151 f32::mul(a[2usize], &b[2usize]),
152 f32::mul(a[3usize], &b[3usize]),
153 ]
154 .simd_into(self)
155 }
156 #[inline(always)]
157 fn div_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
158 [
159 f32::div(a[0usize], &b[0usize]),
160 f32::div(a[1usize], &b[1usize]),
161 f32::div(a[2usize], &b[2usize]),
162 f32::div(a[3usize], &b[3usize]),
163 ]
164 .simd_into(self)
165 }
166 #[inline(always)]
167 fn copysign_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
168 [
169 f32::copysign(a[0usize], b[0usize]),
170 f32::copysign(a[1usize], b[1usize]),
171 f32::copysign(a[2usize], b[2usize]),
172 f32::copysign(a[3usize], b[3usize]),
173 ]
174 .simd_into(self)
175 }
176 #[inline(always)]
177 fn simd_eq_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
178 [
179 -(f32::eq(&a[0usize], &b[0usize]) as i32),
180 -(f32::eq(&a[1usize], &b[1usize]) as i32),
181 -(f32::eq(&a[2usize], &b[2usize]) as i32),
182 -(f32::eq(&a[3usize], &b[3usize]) as i32),
183 ]
184 .simd_into(self)
185 }
186 #[inline(always)]
187 fn simd_lt_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
188 [
189 -(f32::lt(&a[0usize], &b[0usize]) as i32),
190 -(f32::lt(&a[1usize], &b[1usize]) as i32),
191 -(f32::lt(&a[2usize], &b[2usize]) as i32),
192 -(f32::lt(&a[3usize], &b[3usize]) as i32),
193 ]
194 .simd_into(self)
195 }
196 #[inline(always)]
197 fn simd_le_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
198 [
199 -(f32::le(&a[0usize], &b[0usize]) as i32),
200 -(f32::le(&a[1usize], &b[1usize]) as i32),
201 -(f32::le(&a[2usize], &b[2usize]) as i32),
202 -(f32::le(&a[3usize], &b[3usize]) as i32),
203 ]
204 .simd_into(self)
205 }
206 #[inline(always)]
207 fn simd_ge_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
208 [
209 -(f32::ge(&a[0usize], &b[0usize]) as i32),
210 -(f32::ge(&a[1usize], &b[1usize]) as i32),
211 -(f32::ge(&a[2usize], &b[2usize]) as i32),
212 -(f32::ge(&a[3usize], &b[3usize]) as i32),
213 ]
214 .simd_into(self)
215 }
216 #[inline(always)]
217 fn simd_gt_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
218 [
219 -(f32::gt(&a[0usize], &b[0usize]) as i32),
220 -(f32::gt(&a[1usize], &b[1usize]) as i32),
221 -(f32::gt(&a[2usize], &b[2usize]) as i32),
222 -(f32::gt(&a[3usize], &b[3usize]) as i32),
223 ]
224 .simd_into(self)
225 }
226 #[inline(always)]
227 fn zip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
228 [a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
229 }
230 #[inline(always)]
231 fn zip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
232 [a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
233 }
234 #[inline(always)]
235 fn unzip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
236 [a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
237 }
238 #[inline(always)]
239 fn unzip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
240 [a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
241 }
242 #[inline(always)]
243 fn max_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
244 [
245 f32::max(a[0usize], b[0usize]),
246 f32::max(a[1usize], b[1usize]),
247 f32::max(a[2usize], b[2usize]),
248 f32::max(a[3usize], b[3usize]),
249 ]
250 .simd_into(self)
251 }
252 #[inline(always)]
253 fn max_precise_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
254 [
255 f32::max(a[0usize], b[0usize]),
256 f32::max(a[1usize], b[1usize]),
257 f32::max(a[2usize], b[2usize]),
258 f32::max(a[3usize], b[3usize]),
259 ]
260 .simd_into(self)
261 }
262 #[inline(always)]
263 fn min_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
264 [
265 f32::min(a[0usize], b[0usize]),
266 f32::min(a[1usize], b[1usize]),
267 f32::min(a[2usize], b[2usize]),
268 f32::min(a[3usize], b[3usize]),
269 ]
270 .simd_into(self)
271 }
272 #[inline(always)]
273 fn min_precise_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
274 [
275 f32::min(a[0usize], b[0usize]),
276 f32::min(a[1usize], b[1usize]),
277 f32::min(a[2usize], b[2usize]),
278 f32::min(a[3usize], b[3usize]),
279 ]
280 .simd_into(self)
281 }
282 #[inline(always)]
283 fn madd_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
284 a.mul(b).add(c)
285 }
286 #[inline(always)]
287 fn msub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
288 a.mul(b).sub(c)
289 }
290 #[inline(always)]
291 fn floor_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
292 [
293 f32::floor(a[0usize]),
294 f32::floor(a[1usize]),
295 f32::floor(a[2usize]),
296 f32::floor(a[3usize]),
297 ]
298 .simd_into(self)
299 }
300 #[inline(always)]
301 fn fract_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
302 [
303 f32::fract(a[0usize]),
304 f32::fract(a[1usize]),
305 f32::fract(a[2usize]),
306 f32::fract(a[3usize]),
307 ]
308 .simd_into(self)
309 }
310 #[inline(always)]
311 fn trunc_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
312 [
313 f32::trunc(a[0usize]),
314 f32::trunc(a[1usize]),
315 f32::trunc(a[2usize]),
316 f32::trunc(a[3usize]),
317 ]
318 .simd_into(self)
319 }
320 #[inline(always)]
321 fn select_f32x4(self, a: mask32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
322 [
323 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
324 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
325 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
326 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
327 ]
328 .simd_into(self)
329 }
330 #[inline(always)]
331 fn combine_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x8<Self> {
332 let mut result = [0.0; 8usize];
333 result[0..4usize].copy_from_slice(&a.val);
334 result[4usize..8usize].copy_from_slice(&b.val);
335 result.simd_into(self)
336 }
337 #[inline(always)]
338 fn reinterpret_f64_f32x4(self, a: f32x4<Self>) -> f64x2<Self> {
339 f64x2 {
340 val: bytemuck::cast(a.val),
341 simd: a.simd,
342 }
343 }
344 #[inline(always)]
345 fn reinterpret_i32_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
346 i32x4 {
347 val: bytemuck::cast(a.val),
348 simd: a.simd,
349 }
350 }
351 #[inline(always)]
352 fn reinterpret_u8_f32x4(self, a: f32x4<Self>) -> u8x16<Self> {
353 u8x16 {
354 val: bytemuck::cast(a.val),
355 simd: a.simd,
356 }
357 }
358 #[inline(always)]
359 fn reinterpret_u32_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
360 u32x4 {
361 val: bytemuck::cast(a.val),
362 simd: a.simd,
363 }
364 }
365 #[inline(always)]
366 fn cvt_u32_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
367 [
368 a[0usize] as u32,
369 a[1usize] as u32,
370 a[2usize] as u32,
371 a[3usize] as u32,
372 ]
373 .simd_into(self)
374 }
375 #[inline(always)]
376 fn cvt_i32_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
377 [
378 a[0usize] as i32,
379 a[1usize] as i32,
380 a[2usize] as i32,
381 a[3usize] as i32,
382 ]
383 .simd_into(self)
384 }
385 #[inline(always)]
386 fn splat_i8x16(self, val: i8) -> i8x16<Self> {
387 [val; 16usize].simd_into(self)
388 }
389 #[inline(always)]
390 fn not_i8x16(self, a: i8x16<Self>) -> i8x16<Self> {
391 [
392 i8::not(a[0usize]),
393 i8::not(a[1usize]),
394 i8::not(a[2usize]),
395 i8::not(a[3usize]),
396 i8::not(a[4usize]),
397 i8::not(a[5usize]),
398 i8::not(a[6usize]),
399 i8::not(a[7usize]),
400 i8::not(a[8usize]),
401 i8::not(a[9usize]),
402 i8::not(a[10usize]),
403 i8::not(a[11usize]),
404 i8::not(a[12usize]),
405 i8::not(a[13usize]),
406 i8::not(a[14usize]),
407 i8::not(a[15usize]),
408 ]
409 .simd_into(self)
410 }
411 #[inline(always)]
412 fn add_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
413 [
414 i8::wrapping_add(a[0usize], b[0usize]),
415 i8::wrapping_add(a[1usize], b[1usize]),
416 i8::wrapping_add(a[2usize], b[2usize]),
417 i8::wrapping_add(a[3usize], b[3usize]),
418 i8::wrapping_add(a[4usize], b[4usize]),
419 i8::wrapping_add(a[5usize], b[5usize]),
420 i8::wrapping_add(a[6usize], b[6usize]),
421 i8::wrapping_add(a[7usize], b[7usize]),
422 i8::wrapping_add(a[8usize], b[8usize]),
423 i8::wrapping_add(a[9usize], b[9usize]),
424 i8::wrapping_add(a[10usize], b[10usize]),
425 i8::wrapping_add(a[11usize], b[11usize]),
426 i8::wrapping_add(a[12usize], b[12usize]),
427 i8::wrapping_add(a[13usize], b[13usize]),
428 i8::wrapping_add(a[14usize], b[14usize]),
429 i8::wrapping_add(a[15usize], b[15usize]),
430 ]
431 .simd_into(self)
432 }
433 #[inline(always)]
434 fn sub_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
435 [
436 i8::wrapping_sub(a[0usize], b[0usize]),
437 i8::wrapping_sub(a[1usize], b[1usize]),
438 i8::wrapping_sub(a[2usize], b[2usize]),
439 i8::wrapping_sub(a[3usize], b[3usize]),
440 i8::wrapping_sub(a[4usize], b[4usize]),
441 i8::wrapping_sub(a[5usize], b[5usize]),
442 i8::wrapping_sub(a[6usize], b[6usize]),
443 i8::wrapping_sub(a[7usize], b[7usize]),
444 i8::wrapping_sub(a[8usize], b[8usize]),
445 i8::wrapping_sub(a[9usize], b[9usize]),
446 i8::wrapping_sub(a[10usize], b[10usize]),
447 i8::wrapping_sub(a[11usize], b[11usize]),
448 i8::wrapping_sub(a[12usize], b[12usize]),
449 i8::wrapping_sub(a[13usize], b[13usize]),
450 i8::wrapping_sub(a[14usize], b[14usize]),
451 i8::wrapping_sub(a[15usize], b[15usize]),
452 ]
453 .simd_into(self)
454 }
455 #[inline(always)]
456 fn mul_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
457 [
458 i8::wrapping_mul(a[0usize], b[0usize]),
459 i8::wrapping_mul(a[1usize], b[1usize]),
460 i8::wrapping_mul(a[2usize], b[2usize]),
461 i8::wrapping_mul(a[3usize], b[3usize]),
462 i8::wrapping_mul(a[4usize], b[4usize]),
463 i8::wrapping_mul(a[5usize], b[5usize]),
464 i8::wrapping_mul(a[6usize], b[6usize]),
465 i8::wrapping_mul(a[7usize], b[7usize]),
466 i8::wrapping_mul(a[8usize], b[8usize]),
467 i8::wrapping_mul(a[9usize], b[9usize]),
468 i8::wrapping_mul(a[10usize], b[10usize]),
469 i8::wrapping_mul(a[11usize], b[11usize]),
470 i8::wrapping_mul(a[12usize], b[12usize]),
471 i8::wrapping_mul(a[13usize], b[13usize]),
472 i8::wrapping_mul(a[14usize], b[14usize]),
473 i8::wrapping_mul(a[15usize], b[15usize]),
474 ]
475 .simd_into(self)
476 }
477 #[inline(always)]
478 fn and_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
479 [
480 i8::bitand(a[0usize], &b[0usize]),
481 i8::bitand(a[1usize], &b[1usize]),
482 i8::bitand(a[2usize], &b[2usize]),
483 i8::bitand(a[3usize], &b[3usize]),
484 i8::bitand(a[4usize], &b[4usize]),
485 i8::bitand(a[5usize], &b[5usize]),
486 i8::bitand(a[6usize], &b[6usize]),
487 i8::bitand(a[7usize], &b[7usize]),
488 i8::bitand(a[8usize], &b[8usize]),
489 i8::bitand(a[9usize], &b[9usize]),
490 i8::bitand(a[10usize], &b[10usize]),
491 i8::bitand(a[11usize], &b[11usize]),
492 i8::bitand(a[12usize], &b[12usize]),
493 i8::bitand(a[13usize], &b[13usize]),
494 i8::bitand(a[14usize], &b[14usize]),
495 i8::bitand(a[15usize], &b[15usize]),
496 ]
497 .simd_into(self)
498 }
499 #[inline(always)]
500 fn or_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
501 [
502 i8::bitor(a[0usize], &b[0usize]),
503 i8::bitor(a[1usize], &b[1usize]),
504 i8::bitor(a[2usize], &b[2usize]),
505 i8::bitor(a[3usize], &b[3usize]),
506 i8::bitor(a[4usize], &b[4usize]),
507 i8::bitor(a[5usize], &b[5usize]),
508 i8::bitor(a[6usize], &b[6usize]),
509 i8::bitor(a[7usize], &b[7usize]),
510 i8::bitor(a[8usize], &b[8usize]),
511 i8::bitor(a[9usize], &b[9usize]),
512 i8::bitor(a[10usize], &b[10usize]),
513 i8::bitor(a[11usize], &b[11usize]),
514 i8::bitor(a[12usize], &b[12usize]),
515 i8::bitor(a[13usize], &b[13usize]),
516 i8::bitor(a[14usize], &b[14usize]),
517 i8::bitor(a[15usize], &b[15usize]),
518 ]
519 .simd_into(self)
520 }
521 #[inline(always)]
522 fn xor_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
523 [
524 i8::bitxor(a[0usize], &b[0usize]),
525 i8::bitxor(a[1usize], &b[1usize]),
526 i8::bitxor(a[2usize], &b[2usize]),
527 i8::bitxor(a[3usize], &b[3usize]),
528 i8::bitxor(a[4usize], &b[4usize]),
529 i8::bitxor(a[5usize], &b[5usize]),
530 i8::bitxor(a[6usize], &b[6usize]),
531 i8::bitxor(a[7usize], &b[7usize]),
532 i8::bitxor(a[8usize], &b[8usize]),
533 i8::bitxor(a[9usize], &b[9usize]),
534 i8::bitxor(a[10usize], &b[10usize]),
535 i8::bitxor(a[11usize], &b[11usize]),
536 i8::bitxor(a[12usize], &b[12usize]),
537 i8::bitxor(a[13usize], &b[13usize]),
538 i8::bitxor(a[14usize], &b[14usize]),
539 i8::bitxor(a[15usize], &b[15usize]),
540 ]
541 .simd_into(self)
542 }
543 #[inline(always)]
544 fn shr_i8x16(self, a: i8x16<Self>, shift: u32) -> i8x16<Self> {
545 [
546 i8::shr(a[0usize], shift as i8),
547 i8::shr(a[1usize], shift as i8),
548 i8::shr(a[2usize], shift as i8),
549 i8::shr(a[3usize], shift as i8),
550 i8::shr(a[4usize], shift as i8),
551 i8::shr(a[5usize], shift as i8),
552 i8::shr(a[6usize], shift as i8),
553 i8::shr(a[7usize], shift as i8),
554 i8::shr(a[8usize], shift as i8),
555 i8::shr(a[9usize], shift as i8),
556 i8::shr(a[10usize], shift as i8),
557 i8::shr(a[11usize], shift as i8),
558 i8::shr(a[12usize], shift as i8),
559 i8::shr(a[13usize], shift as i8),
560 i8::shr(a[14usize], shift as i8),
561 i8::shr(a[15usize], shift as i8),
562 ]
563 .simd_into(self)
564 }
565 #[inline(always)]
566 fn shrv_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
567 [
568 i8::shr(a[0usize], &b[0usize]),
569 i8::shr(a[1usize], &b[1usize]),
570 i8::shr(a[2usize], &b[2usize]),
571 i8::shr(a[3usize], &b[3usize]),
572 i8::shr(a[4usize], &b[4usize]),
573 i8::shr(a[5usize], &b[5usize]),
574 i8::shr(a[6usize], &b[6usize]),
575 i8::shr(a[7usize], &b[7usize]),
576 i8::shr(a[8usize], &b[8usize]),
577 i8::shr(a[9usize], &b[9usize]),
578 i8::shr(a[10usize], &b[10usize]),
579 i8::shr(a[11usize], &b[11usize]),
580 i8::shr(a[12usize], &b[12usize]),
581 i8::shr(a[13usize], &b[13usize]),
582 i8::shr(a[14usize], &b[14usize]),
583 i8::shr(a[15usize], &b[15usize]),
584 ]
585 .simd_into(self)
586 }
587 #[inline(always)]
588 fn shl_i8x16(self, a: i8x16<Self>, shift: u32) -> i8x16<Self> {
589 [
590 i8::shl(a[0usize], shift as i8),
591 i8::shl(a[1usize], shift as i8),
592 i8::shl(a[2usize], shift as i8),
593 i8::shl(a[3usize], shift as i8),
594 i8::shl(a[4usize], shift as i8),
595 i8::shl(a[5usize], shift as i8),
596 i8::shl(a[6usize], shift as i8),
597 i8::shl(a[7usize], shift as i8),
598 i8::shl(a[8usize], shift as i8),
599 i8::shl(a[9usize], shift as i8),
600 i8::shl(a[10usize], shift as i8),
601 i8::shl(a[11usize], shift as i8),
602 i8::shl(a[12usize], shift as i8),
603 i8::shl(a[13usize], shift as i8),
604 i8::shl(a[14usize], shift as i8),
605 i8::shl(a[15usize], shift as i8),
606 ]
607 .simd_into(self)
608 }
609 #[inline(always)]
610 fn simd_eq_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
611 [
612 -(i8::eq(&a[0usize], &b[0usize]) as i8),
613 -(i8::eq(&a[1usize], &b[1usize]) as i8),
614 -(i8::eq(&a[2usize], &b[2usize]) as i8),
615 -(i8::eq(&a[3usize], &b[3usize]) as i8),
616 -(i8::eq(&a[4usize], &b[4usize]) as i8),
617 -(i8::eq(&a[5usize], &b[5usize]) as i8),
618 -(i8::eq(&a[6usize], &b[6usize]) as i8),
619 -(i8::eq(&a[7usize], &b[7usize]) as i8),
620 -(i8::eq(&a[8usize], &b[8usize]) as i8),
621 -(i8::eq(&a[9usize], &b[9usize]) as i8),
622 -(i8::eq(&a[10usize], &b[10usize]) as i8),
623 -(i8::eq(&a[11usize], &b[11usize]) as i8),
624 -(i8::eq(&a[12usize], &b[12usize]) as i8),
625 -(i8::eq(&a[13usize], &b[13usize]) as i8),
626 -(i8::eq(&a[14usize], &b[14usize]) as i8),
627 -(i8::eq(&a[15usize], &b[15usize]) as i8),
628 ]
629 .simd_into(self)
630 }
631 #[inline(always)]
632 fn simd_lt_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
633 [
634 -(i8::lt(&a[0usize], &b[0usize]) as i8),
635 -(i8::lt(&a[1usize], &b[1usize]) as i8),
636 -(i8::lt(&a[2usize], &b[2usize]) as i8),
637 -(i8::lt(&a[3usize], &b[3usize]) as i8),
638 -(i8::lt(&a[4usize], &b[4usize]) as i8),
639 -(i8::lt(&a[5usize], &b[5usize]) as i8),
640 -(i8::lt(&a[6usize], &b[6usize]) as i8),
641 -(i8::lt(&a[7usize], &b[7usize]) as i8),
642 -(i8::lt(&a[8usize], &b[8usize]) as i8),
643 -(i8::lt(&a[9usize], &b[9usize]) as i8),
644 -(i8::lt(&a[10usize], &b[10usize]) as i8),
645 -(i8::lt(&a[11usize], &b[11usize]) as i8),
646 -(i8::lt(&a[12usize], &b[12usize]) as i8),
647 -(i8::lt(&a[13usize], &b[13usize]) as i8),
648 -(i8::lt(&a[14usize], &b[14usize]) as i8),
649 -(i8::lt(&a[15usize], &b[15usize]) as i8),
650 ]
651 .simd_into(self)
652 }
653 #[inline(always)]
654 fn simd_le_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
655 [
656 -(i8::le(&a[0usize], &b[0usize]) as i8),
657 -(i8::le(&a[1usize], &b[1usize]) as i8),
658 -(i8::le(&a[2usize], &b[2usize]) as i8),
659 -(i8::le(&a[3usize], &b[3usize]) as i8),
660 -(i8::le(&a[4usize], &b[4usize]) as i8),
661 -(i8::le(&a[5usize], &b[5usize]) as i8),
662 -(i8::le(&a[6usize], &b[6usize]) as i8),
663 -(i8::le(&a[7usize], &b[7usize]) as i8),
664 -(i8::le(&a[8usize], &b[8usize]) as i8),
665 -(i8::le(&a[9usize], &b[9usize]) as i8),
666 -(i8::le(&a[10usize], &b[10usize]) as i8),
667 -(i8::le(&a[11usize], &b[11usize]) as i8),
668 -(i8::le(&a[12usize], &b[12usize]) as i8),
669 -(i8::le(&a[13usize], &b[13usize]) as i8),
670 -(i8::le(&a[14usize], &b[14usize]) as i8),
671 -(i8::le(&a[15usize], &b[15usize]) as i8),
672 ]
673 .simd_into(self)
674 }
675 #[inline(always)]
676 fn simd_ge_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
677 [
678 -(i8::ge(&a[0usize], &b[0usize]) as i8),
679 -(i8::ge(&a[1usize], &b[1usize]) as i8),
680 -(i8::ge(&a[2usize], &b[2usize]) as i8),
681 -(i8::ge(&a[3usize], &b[3usize]) as i8),
682 -(i8::ge(&a[4usize], &b[4usize]) as i8),
683 -(i8::ge(&a[5usize], &b[5usize]) as i8),
684 -(i8::ge(&a[6usize], &b[6usize]) as i8),
685 -(i8::ge(&a[7usize], &b[7usize]) as i8),
686 -(i8::ge(&a[8usize], &b[8usize]) as i8),
687 -(i8::ge(&a[9usize], &b[9usize]) as i8),
688 -(i8::ge(&a[10usize], &b[10usize]) as i8),
689 -(i8::ge(&a[11usize], &b[11usize]) as i8),
690 -(i8::ge(&a[12usize], &b[12usize]) as i8),
691 -(i8::ge(&a[13usize], &b[13usize]) as i8),
692 -(i8::ge(&a[14usize], &b[14usize]) as i8),
693 -(i8::ge(&a[15usize], &b[15usize]) as i8),
694 ]
695 .simd_into(self)
696 }
697 #[inline(always)]
698 fn simd_gt_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
699 [
700 -(i8::gt(&a[0usize], &b[0usize]) as i8),
701 -(i8::gt(&a[1usize], &b[1usize]) as i8),
702 -(i8::gt(&a[2usize], &b[2usize]) as i8),
703 -(i8::gt(&a[3usize], &b[3usize]) as i8),
704 -(i8::gt(&a[4usize], &b[4usize]) as i8),
705 -(i8::gt(&a[5usize], &b[5usize]) as i8),
706 -(i8::gt(&a[6usize], &b[6usize]) as i8),
707 -(i8::gt(&a[7usize], &b[7usize]) as i8),
708 -(i8::gt(&a[8usize], &b[8usize]) as i8),
709 -(i8::gt(&a[9usize], &b[9usize]) as i8),
710 -(i8::gt(&a[10usize], &b[10usize]) as i8),
711 -(i8::gt(&a[11usize], &b[11usize]) as i8),
712 -(i8::gt(&a[12usize], &b[12usize]) as i8),
713 -(i8::gt(&a[13usize], &b[13usize]) as i8),
714 -(i8::gt(&a[14usize], &b[14usize]) as i8),
715 -(i8::gt(&a[15usize], &b[15usize]) as i8),
716 ]
717 .simd_into(self)
718 }
719 #[inline(always)]
720 fn zip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
721 [
722 a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
723 a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
724 ]
725 .simd_into(self)
726 }
727 #[inline(always)]
728 fn zip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
729 [
730 a[8usize], b[8usize], a[9usize], b[9usize], a[10usize], b[10usize], a[11usize],
731 b[11usize], a[12usize], b[12usize], a[13usize], b[13usize], a[14usize], b[14usize],
732 a[15usize], b[15usize],
733 ]
734 .simd_into(self)
735 }
736 #[inline(always)]
737 fn unzip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
738 [
739 a[0usize], a[2usize], a[4usize], a[6usize], a[8usize], a[10usize], a[12usize],
740 a[14usize], b[0usize], b[2usize], b[4usize], b[6usize], b[8usize], b[10usize],
741 b[12usize], b[14usize],
742 ]
743 .simd_into(self)
744 }
745 #[inline(always)]
746 fn unzip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
747 [
748 a[1usize], a[3usize], a[5usize], a[7usize], a[9usize], a[11usize], a[13usize],
749 a[15usize], b[1usize], b[3usize], b[5usize], b[7usize], b[9usize], b[11usize],
750 b[13usize], b[15usize],
751 ]
752 .simd_into(self)
753 }
754 #[inline(always)]
755 fn select_i8x16(self, a: mask8x16<Self>, b: i8x16<Self>, c: i8x16<Self>) -> i8x16<Self> {
756 [
757 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
758 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
759 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
760 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
761 if a[4usize] != 0 { b[4usize] } else { c[4usize] },
762 if a[5usize] != 0 { b[5usize] } else { c[5usize] },
763 if a[6usize] != 0 { b[6usize] } else { c[6usize] },
764 if a[7usize] != 0 { b[7usize] } else { c[7usize] },
765 if a[8usize] != 0 { b[8usize] } else { c[8usize] },
766 if a[9usize] != 0 { b[9usize] } else { c[9usize] },
767 if a[10usize] != 0 {
768 b[10usize]
769 } else {
770 c[10usize]
771 },
772 if a[11usize] != 0 {
773 b[11usize]
774 } else {
775 c[11usize]
776 },
777 if a[12usize] != 0 {
778 b[12usize]
779 } else {
780 c[12usize]
781 },
782 if a[13usize] != 0 {
783 b[13usize]
784 } else {
785 c[13usize]
786 },
787 if a[14usize] != 0 {
788 b[14usize]
789 } else {
790 c[14usize]
791 },
792 if a[15usize] != 0 {
793 b[15usize]
794 } else {
795 c[15usize]
796 },
797 ]
798 .simd_into(self)
799 }
800 #[inline(always)]
801 fn min_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
802 [
803 i8::min(a[0usize], b[0usize]),
804 i8::min(a[1usize], b[1usize]),
805 i8::min(a[2usize], b[2usize]),
806 i8::min(a[3usize], b[3usize]),
807 i8::min(a[4usize], b[4usize]),
808 i8::min(a[5usize], b[5usize]),
809 i8::min(a[6usize], b[6usize]),
810 i8::min(a[7usize], b[7usize]),
811 i8::min(a[8usize], b[8usize]),
812 i8::min(a[9usize], b[9usize]),
813 i8::min(a[10usize], b[10usize]),
814 i8::min(a[11usize], b[11usize]),
815 i8::min(a[12usize], b[12usize]),
816 i8::min(a[13usize], b[13usize]),
817 i8::min(a[14usize], b[14usize]),
818 i8::min(a[15usize], b[15usize]),
819 ]
820 .simd_into(self)
821 }
822 #[inline(always)]
823 fn max_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
824 [
825 i8::max(a[0usize], b[0usize]),
826 i8::max(a[1usize], b[1usize]),
827 i8::max(a[2usize], b[2usize]),
828 i8::max(a[3usize], b[3usize]),
829 i8::max(a[4usize], b[4usize]),
830 i8::max(a[5usize], b[5usize]),
831 i8::max(a[6usize], b[6usize]),
832 i8::max(a[7usize], b[7usize]),
833 i8::max(a[8usize], b[8usize]),
834 i8::max(a[9usize], b[9usize]),
835 i8::max(a[10usize], b[10usize]),
836 i8::max(a[11usize], b[11usize]),
837 i8::max(a[12usize], b[12usize]),
838 i8::max(a[13usize], b[13usize]),
839 i8::max(a[14usize], b[14usize]),
840 i8::max(a[15usize], b[15usize]),
841 ]
842 .simd_into(self)
843 }
844 #[inline(always)]
845 fn combine_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x32<Self> {
846 let mut result = [0; 32usize];
847 result[0..16usize].copy_from_slice(&a.val);
848 result[16usize..32usize].copy_from_slice(&b.val);
849 result.simd_into(self)
850 }
851 #[inline(always)]
852 fn neg_i8x16(self, a: i8x16<Self>) -> i8x16<Self> {
853 [
854 i8::neg(a[0usize]),
855 i8::neg(a[1usize]),
856 i8::neg(a[2usize]),
857 i8::neg(a[3usize]),
858 i8::neg(a[4usize]),
859 i8::neg(a[5usize]),
860 i8::neg(a[6usize]),
861 i8::neg(a[7usize]),
862 i8::neg(a[8usize]),
863 i8::neg(a[9usize]),
864 i8::neg(a[10usize]),
865 i8::neg(a[11usize]),
866 i8::neg(a[12usize]),
867 i8::neg(a[13usize]),
868 i8::neg(a[14usize]),
869 i8::neg(a[15usize]),
870 ]
871 .simd_into(self)
872 }
873 #[inline(always)]
874 fn reinterpret_u8_i8x16(self, a: i8x16<Self>) -> u8x16<Self> {
875 u8x16 {
876 val: bytemuck::cast(a.val),
877 simd: a.simd,
878 }
879 }
880 #[inline(always)]
881 fn reinterpret_u32_i8x16(self, a: i8x16<Self>) -> u32x4<Self> {
882 u32x4 {
883 val: bytemuck::cast(a.val),
884 simd: a.simd,
885 }
886 }
887 #[inline(always)]
888 fn splat_u8x16(self, val: u8) -> u8x16<Self> {
889 [val; 16usize].simd_into(self)
890 }
891 #[inline(always)]
892 fn not_u8x16(self, a: u8x16<Self>) -> u8x16<Self> {
893 [
894 u8::not(a[0usize]),
895 u8::not(a[1usize]),
896 u8::not(a[2usize]),
897 u8::not(a[3usize]),
898 u8::not(a[4usize]),
899 u8::not(a[5usize]),
900 u8::not(a[6usize]),
901 u8::not(a[7usize]),
902 u8::not(a[8usize]),
903 u8::not(a[9usize]),
904 u8::not(a[10usize]),
905 u8::not(a[11usize]),
906 u8::not(a[12usize]),
907 u8::not(a[13usize]),
908 u8::not(a[14usize]),
909 u8::not(a[15usize]),
910 ]
911 .simd_into(self)
912 }
913 #[inline(always)]
914 fn add_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
915 [
916 u8::wrapping_add(a[0usize], b[0usize]),
917 u8::wrapping_add(a[1usize], b[1usize]),
918 u8::wrapping_add(a[2usize], b[2usize]),
919 u8::wrapping_add(a[3usize], b[3usize]),
920 u8::wrapping_add(a[4usize], b[4usize]),
921 u8::wrapping_add(a[5usize], b[5usize]),
922 u8::wrapping_add(a[6usize], b[6usize]),
923 u8::wrapping_add(a[7usize], b[7usize]),
924 u8::wrapping_add(a[8usize], b[8usize]),
925 u8::wrapping_add(a[9usize], b[9usize]),
926 u8::wrapping_add(a[10usize], b[10usize]),
927 u8::wrapping_add(a[11usize], b[11usize]),
928 u8::wrapping_add(a[12usize], b[12usize]),
929 u8::wrapping_add(a[13usize], b[13usize]),
930 u8::wrapping_add(a[14usize], b[14usize]),
931 u8::wrapping_add(a[15usize], b[15usize]),
932 ]
933 .simd_into(self)
934 }
935 #[inline(always)]
936 fn sub_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
937 [
938 u8::wrapping_sub(a[0usize], b[0usize]),
939 u8::wrapping_sub(a[1usize], b[1usize]),
940 u8::wrapping_sub(a[2usize], b[2usize]),
941 u8::wrapping_sub(a[3usize], b[3usize]),
942 u8::wrapping_sub(a[4usize], b[4usize]),
943 u8::wrapping_sub(a[5usize], b[5usize]),
944 u8::wrapping_sub(a[6usize], b[6usize]),
945 u8::wrapping_sub(a[7usize], b[7usize]),
946 u8::wrapping_sub(a[8usize], b[8usize]),
947 u8::wrapping_sub(a[9usize], b[9usize]),
948 u8::wrapping_sub(a[10usize], b[10usize]),
949 u8::wrapping_sub(a[11usize], b[11usize]),
950 u8::wrapping_sub(a[12usize], b[12usize]),
951 u8::wrapping_sub(a[13usize], b[13usize]),
952 u8::wrapping_sub(a[14usize], b[14usize]),
953 u8::wrapping_sub(a[15usize], b[15usize]),
954 ]
955 .simd_into(self)
956 }
957 #[inline(always)]
958 fn mul_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
959 [
960 u8::wrapping_mul(a[0usize], b[0usize]),
961 u8::wrapping_mul(a[1usize], b[1usize]),
962 u8::wrapping_mul(a[2usize], b[2usize]),
963 u8::wrapping_mul(a[3usize], b[3usize]),
964 u8::wrapping_mul(a[4usize], b[4usize]),
965 u8::wrapping_mul(a[5usize], b[5usize]),
966 u8::wrapping_mul(a[6usize], b[6usize]),
967 u8::wrapping_mul(a[7usize], b[7usize]),
968 u8::wrapping_mul(a[8usize], b[8usize]),
969 u8::wrapping_mul(a[9usize], b[9usize]),
970 u8::wrapping_mul(a[10usize], b[10usize]),
971 u8::wrapping_mul(a[11usize], b[11usize]),
972 u8::wrapping_mul(a[12usize], b[12usize]),
973 u8::wrapping_mul(a[13usize], b[13usize]),
974 u8::wrapping_mul(a[14usize], b[14usize]),
975 u8::wrapping_mul(a[15usize], b[15usize]),
976 ]
977 .simd_into(self)
978 }
979 #[inline(always)]
980 fn and_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
981 [
982 u8::bitand(a[0usize], &b[0usize]),
983 u8::bitand(a[1usize], &b[1usize]),
984 u8::bitand(a[2usize], &b[2usize]),
985 u8::bitand(a[3usize], &b[3usize]),
986 u8::bitand(a[4usize], &b[4usize]),
987 u8::bitand(a[5usize], &b[5usize]),
988 u8::bitand(a[6usize], &b[6usize]),
989 u8::bitand(a[7usize], &b[7usize]),
990 u8::bitand(a[8usize], &b[8usize]),
991 u8::bitand(a[9usize], &b[9usize]),
992 u8::bitand(a[10usize], &b[10usize]),
993 u8::bitand(a[11usize], &b[11usize]),
994 u8::bitand(a[12usize], &b[12usize]),
995 u8::bitand(a[13usize], &b[13usize]),
996 u8::bitand(a[14usize], &b[14usize]),
997 u8::bitand(a[15usize], &b[15usize]),
998 ]
999 .simd_into(self)
1000 }
1001 #[inline(always)]
1002 fn or_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1003 [
1004 u8::bitor(a[0usize], &b[0usize]),
1005 u8::bitor(a[1usize], &b[1usize]),
1006 u8::bitor(a[2usize], &b[2usize]),
1007 u8::bitor(a[3usize], &b[3usize]),
1008 u8::bitor(a[4usize], &b[4usize]),
1009 u8::bitor(a[5usize], &b[5usize]),
1010 u8::bitor(a[6usize], &b[6usize]),
1011 u8::bitor(a[7usize], &b[7usize]),
1012 u8::bitor(a[8usize], &b[8usize]),
1013 u8::bitor(a[9usize], &b[9usize]),
1014 u8::bitor(a[10usize], &b[10usize]),
1015 u8::bitor(a[11usize], &b[11usize]),
1016 u8::bitor(a[12usize], &b[12usize]),
1017 u8::bitor(a[13usize], &b[13usize]),
1018 u8::bitor(a[14usize], &b[14usize]),
1019 u8::bitor(a[15usize], &b[15usize]),
1020 ]
1021 .simd_into(self)
1022 }
1023 #[inline(always)]
1024 fn xor_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1025 [
1026 u8::bitxor(a[0usize], &b[0usize]),
1027 u8::bitxor(a[1usize], &b[1usize]),
1028 u8::bitxor(a[2usize], &b[2usize]),
1029 u8::bitxor(a[3usize], &b[3usize]),
1030 u8::bitxor(a[4usize], &b[4usize]),
1031 u8::bitxor(a[5usize], &b[5usize]),
1032 u8::bitxor(a[6usize], &b[6usize]),
1033 u8::bitxor(a[7usize], &b[7usize]),
1034 u8::bitxor(a[8usize], &b[8usize]),
1035 u8::bitxor(a[9usize], &b[9usize]),
1036 u8::bitxor(a[10usize], &b[10usize]),
1037 u8::bitxor(a[11usize], &b[11usize]),
1038 u8::bitxor(a[12usize], &b[12usize]),
1039 u8::bitxor(a[13usize], &b[13usize]),
1040 u8::bitxor(a[14usize], &b[14usize]),
1041 u8::bitxor(a[15usize], &b[15usize]),
1042 ]
1043 .simd_into(self)
1044 }
1045 #[inline(always)]
1046 fn shr_u8x16(self, a: u8x16<Self>, shift: u32) -> u8x16<Self> {
1047 [
1048 u8::shr(a[0usize], shift as u8),
1049 u8::shr(a[1usize], shift as u8),
1050 u8::shr(a[2usize], shift as u8),
1051 u8::shr(a[3usize], shift as u8),
1052 u8::shr(a[4usize], shift as u8),
1053 u8::shr(a[5usize], shift as u8),
1054 u8::shr(a[6usize], shift as u8),
1055 u8::shr(a[7usize], shift as u8),
1056 u8::shr(a[8usize], shift as u8),
1057 u8::shr(a[9usize], shift as u8),
1058 u8::shr(a[10usize], shift as u8),
1059 u8::shr(a[11usize], shift as u8),
1060 u8::shr(a[12usize], shift as u8),
1061 u8::shr(a[13usize], shift as u8),
1062 u8::shr(a[14usize], shift as u8),
1063 u8::shr(a[15usize], shift as u8),
1064 ]
1065 .simd_into(self)
1066 }
1067 #[inline(always)]
1068 fn shrv_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1069 [
1070 u8::shr(a[0usize], &b[0usize]),
1071 u8::shr(a[1usize], &b[1usize]),
1072 u8::shr(a[2usize], &b[2usize]),
1073 u8::shr(a[3usize], &b[3usize]),
1074 u8::shr(a[4usize], &b[4usize]),
1075 u8::shr(a[5usize], &b[5usize]),
1076 u8::shr(a[6usize], &b[6usize]),
1077 u8::shr(a[7usize], &b[7usize]),
1078 u8::shr(a[8usize], &b[8usize]),
1079 u8::shr(a[9usize], &b[9usize]),
1080 u8::shr(a[10usize], &b[10usize]),
1081 u8::shr(a[11usize], &b[11usize]),
1082 u8::shr(a[12usize], &b[12usize]),
1083 u8::shr(a[13usize], &b[13usize]),
1084 u8::shr(a[14usize], &b[14usize]),
1085 u8::shr(a[15usize], &b[15usize]),
1086 ]
1087 .simd_into(self)
1088 }
1089 #[inline(always)]
1090 fn shl_u8x16(self, a: u8x16<Self>, shift: u32) -> u8x16<Self> {
1091 [
1092 u8::shl(a[0usize], shift as u8),
1093 u8::shl(a[1usize], shift as u8),
1094 u8::shl(a[2usize], shift as u8),
1095 u8::shl(a[3usize], shift as u8),
1096 u8::shl(a[4usize], shift as u8),
1097 u8::shl(a[5usize], shift as u8),
1098 u8::shl(a[6usize], shift as u8),
1099 u8::shl(a[7usize], shift as u8),
1100 u8::shl(a[8usize], shift as u8),
1101 u8::shl(a[9usize], shift as u8),
1102 u8::shl(a[10usize], shift as u8),
1103 u8::shl(a[11usize], shift as u8),
1104 u8::shl(a[12usize], shift as u8),
1105 u8::shl(a[13usize], shift as u8),
1106 u8::shl(a[14usize], shift as u8),
1107 u8::shl(a[15usize], shift as u8),
1108 ]
1109 .simd_into(self)
1110 }
1111 #[inline(always)]
1112 fn simd_eq_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1113 [
1114 -(u8::eq(&a[0usize], &b[0usize]) as i8),
1115 -(u8::eq(&a[1usize], &b[1usize]) as i8),
1116 -(u8::eq(&a[2usize], &b[2usize]) as i8),
1117 -(u8::eq(&a[3usize], &b[3usize]) as i8),
1118 -(u8::eq(&a[4usize], &b[4usize]) as i8),
1119 -(u8::eq(&a[5usize], &b[5usize]) as i8),
1120 -(u8::eq(&a[6usize], &b[6usize]) as i8),
1121 -(u8::eq(&a[7usize], &b[7usize]) as i8),
1122 -(u8::eq(&a[8usize], &b[8usize]) as i8),
1123 -(u8::eq(&a[9usize], &b[9usize]) as i8),
1124 -(u8::eq(&a[10usize], &b[10usize]) as i8),
1125 -(u8::eq(&a[11usize], &b[11usize]) as i8),
1126 -(u8::eq(&a[12usize], &b[12usize]) as i8),
1127 -(u8::eq(&a[13usize], &b[13usize]) as i8),
1128 -(u8::eq(&a[14usize], &b[14usize]) as i8),
1129 -(u8::eq(&a[15usize], &b[15usize]) as i8),
1130 ]
1131 .simd_into(self)
1132 }
1133 #[inline(always)]
1134 fn simd_lt_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1135 [
1136 -(u8::lt(&a[0usize], &b[0usize]) as i8),
1137 -(u8::lt(&a[1usize], &b[1usize]) as i8),
1138 -(u8::lt(&a[2usize], &b[2usize]) as i8),
1139 -(u8::lt(&a[3usize], &b[3usize]) as i8),
1140 -(u8::lt(&a[4usize], &b[4usize]) as i8),
1141 -(u8::lt(&a[5usize], &b[5usize]) as i8),
1142 -(u8::lt(&a[6usize], &b[6usize]) as i8),
1143 -(u8::lt(&a[7usize], &b[7usize]) as i8),
1144 -(u8::lt(&a[8usize], &b[8usize]) as i8),
1145 -(u8::lt(&a[9usize], &b[9usize]) as i8),
1146 -(u8::lt(&a[10usize], &b[10usize]) as i8),
1147 -(u8::lt(&a[11usize], &b[11usize]) as i8),
1148 -(u8::lt(&a[12usize], &b[12usize]) as i8),
1149 -(u8::lt(&a[13usize], &b[13usize]) as i8),
1150 -(u8::lt(&a[14usize], &b[14usize]) as i8),
1151 -(u8::lt(&a[15usize], &b[15usize]) as i8),
1152 ]
1153 .simd_into(self)
1154 }
1155 #[inline(always)]
1156 fn simd_le_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1157 [
1158 -(u8::le(&a[0usize], &b[0usize]) as i8),
1159 -(u8::le(&a[1usize], &b[1usize]) as i8),
1160 -(u8::le(&a[2usize], &b[2usize]) as i8),
1161 -(u8::le(&a[3usize], &b[3usize]) as i8),
1162 -(u8::le(&a[4usize], &b[4usize]) as i8),
1163 -(u8::le(&a[5usize], &b[5usize]) as i8),
1164 -(u8::le(&a[6usize], &b[6usize]) as i8),
1165 -(u8::le(&a[7usize], &b[7usize]) as i8),
1166 -(u8::le(&a[8usize], &b[8usize]) as i8),
1167 -(u8::le(&a[9usize], &b[9usize]) as i8),
1168 -(u8::le(&a[10usize], &b[10usize]) as i8),
1169 -(u8::le(&a[11usize], &b[11usize]) as i8),
1170 -(u8::le(&a[12usize], &b[12usize]) as i8),
1171 -(u8::le(&a[13usize], &b[13usize]) as i8),
1172 -(u8::le(&a[14usize], &b[14usize]) as i8),
1173 -(u8::le(&a[15usize], &b[15usize]) as i8),
1174 ]
1175 .simd_into(self)
1176 }
1177 #[inline(always)]
1178 fn simd_ge_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1179 [
1180 -(u8::ge(&a[0usize], &b[0usize]) as i8),
1181 -(u8::ge(&a[1usize], &b[1usize]) as i8),
1182 -(u8::ge(&a[2usize], &b[2usize]) as i8),
1183 -(u8::ge(&a[3usize], &b[3usize]) as i8),
1184 -(u8::ge(&a[4usize], &b[4usize]) as i8),
1185 -(u8::ge(&a[5usize], &b[5usize]) as i8),
1186 -(u8::ge(&a[6usize], &b[6usize]) as i8),
1187 -(u8::ge(&a[7usize], &b[7usize]) as i8),
1188 -(u8::ge(&a[8usize], &b[8usize]) as i8),
1189 -(u8::ge(&a[9usize], &b[9usize]) as i8),
1190 -(u8::ge(&a[10usize], &b[10usize]) as i8),
1191 -(u8::ge(&a[11usize], &b[11usize]) as i8),
1192 -(u8::ge(&a[12usize], &b[12usize]) as i8),
1193 -(u8::ge(&a[13usize], &b[13usize]) as i8),
1194 -(u8::ge(&a[14usize], &b[14usize]) as i8),
1195 -(u8::ge(&a[15usize], &b[15usize]) as i8),
1196 ]
1197 .simd_into(self)
1198 }
1199 #[inline(always)]
1200 fn simd_gt_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1201 [
1202 -(u8::gt(&a[0usize], &b[0usize]) as i8),
1203 -(u8::gt(&a[1usize], &b[1usize]) as i8),
1204 -(u8::gt(&a[2usize], &b[2usize]) as i8),
1205 -(u8::gt(&a[3usize], &b[3usize]) as i8),
1206 -(u8::gt(&a[4usize], &b[4usize]) as i8),
1207 -(u8::gt(&a[5usize], &b[5usize]) as i8),
1208 -(u8::gt(&a[6usize], &b[6usize]) as i8),
1209 -(u8::gt(&a[7usize], &b[7usize]) as i8),
1210 -(u8::gt(&a[8usize], &b[8usize]) as i8),
1211 -(u8::gt(&a[9usize], &b[9usize]) as i8),
1212 -(u8::gt(&a[10usize], &b[10usize]) as i8),
1213 -(u8::gt(&a[11usize], &b[11usize]) as i8),
1214 -(u8::gt(&a[12usize], &b[12usize]) as i8),
1215 -(u8::gt(&a[13usize], &b[13usize]) as i8),
1216 -(u8::gt(&a[14usize], &b[14usize]) as i8),
1217 -(u8::gt(&a[15usize], &b[15usize]) as i8),
1218 ]
1219 .simd_into(self)
1220 }
1221 #[inline(always)]
1222 fn zip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1223 [
1224 a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
1225 a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
1226 ]
1227 .simd_into(self)
1228 }
1229 #[inline(always)]
1230 fn zip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1231 [
1232 a[8usize], b[8usize], a[9usize], b[9usize], a[10usize], b[10usize], a[11usize],
1233 b[11usize], a[12usize], b[12usize], a[13usize], b[13usize], a[14usize], b[14usize],
1234 a[15usize], b[15usize],
1235 ]
1236 .simd_into(self)
1237 }
1238 #[inline(always)]
1239 fn unzip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1240 [
1241 a[0usize], a[2usize], a[4usize], a[6usize], a[8usize], a[10usize], a[12usize],
1242 a[14usize], b[0usize], b[2usize], b[4usize], b[6usize], b[8usize], b[10usize],
1243 b[12usize], b[14usize],
1244 ]
1245 .simd_into(self)
1246 }
1247 #[inline(always)]
1248 fn unzip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1249 [
1250 a[1usize], a[3usize], a[5usize], a[7usize], a[9usize], a[11usize], a[13usize],
1251 a[15usize], b[1usize], b[3usize], b[5usize], b[7usize], b[9usize], b[11usize],
1252 b[13usize], b[15usize],
1253 ]
1254 .simd_into(self)
1255 }
1256 #[inline(always)]
1257 fn select_u8x16(self, a: mask8x16<Self>, b: u8x16<Self>, c: u8x16<Self>) -> u8x16<Self> {
1258 [
1259 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1260 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1261 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1262 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1263 if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1264 if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1265 if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1266 if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1267 if a[8usize] != 0 { b[8usize] } else { c[8usize] },
1268 if a[9usize] != 0 { b[9usize] } else { c[9usize] },
1269 if a[10usize] != 0 {
1270 b[10usize]
1271 } else {
1272 c[10usize]
1273 },
1274 if a[11usize] != 0 {
1275 b[11usize]
1276 } else {
1277 c[11usize]
1278 },
1279 if a[12usize] != 0 {
1280 b[12usize]
1281 } else {
1282 c[12usize]
1283 },
1284 if a[13usize] != 0 {
1285 b[13usize]
1286 } else {
1287 c[13usize]
1288 },
1289 if a[14usize] != 0 {
1290 b[14usize]
1291 } else {
1292 c[14usize]
1293 },
1294 if a[15usize] != 0 {
1295 b[15usize]
1296 } else {
1297 c[15usize]
1298 },
1299 ]
1300 .simd_into(self)
1301 }
1302 #[inline(always)]
1303 fn min_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1304 [
1305 u8::min(a[0usize], b[0usize]),
1306 u8::min(a[1usize], b[1usize]),
1307 u8::min(a[2usize], b[2usize]),
1308 u8::min(a[3usize], b[3usize]),
1309 u8::min(a[4usize], b[4usize]),
1310 u8::min(a[5usize], b[5usize]),
1311 u8::min(a[6usize], b[6usize]),
1312 u8::min(a[7usize], b[7usize]),
1313 u8::min(a[8usize], b[8usize]),
1314 u8::min(a[9usize], b[9usize]),
1315 u8::min(a[10usize], b[10usize]),
1316 u8::min(a[11usize], b[11usize]),
1317 u8::min(a[12usize], b[12usize]),
1318 u8::min(a[13usize], b[13usize]),
1319 u8::min(a[14usize], b[14usize]),
1320 u8::min(a[15usize], b[15usize]),
1321 ]
1322 .simd_into(self)
1323 }
1324 #[inline(always)]
1325 fn max_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1326 [
1327 u8::max(a[0usize], b[0usize]),
1328 u8::max(a[1usize], b[1usize]),
1329 u8::max(a[2usize], b[2usize]),
1330 u8::max(a[3usize], b[3usize]),
1331 u8::max(a[4usize], b[4usize]),
1332 u8::max(a[5usize], b[5usize]),
1333 u8::max(a[6usize], b[6usize]),
1334 u8::max(a[7usize], b[7usize]),
1335 u8::max(a[8usize], b[8usize]),
1336 u8::max(a[9usize], b[9usize]),
1337 u8::max(a[10usize], b[10usize]),
1338 u8::max(a[11usize], b[11usize]),
1339 u8::max(a[12usize], b[12usize]),
1340 u8::max(a[13usize], b[13usize]),
1341 u8::max(a[14usize], b[14usize]),
1342 u8::max(a[15usize], b[15usize]),
1343 ]
1344 .simd_into(self)
1345 }
1346 #[inline(always)]
1347 fn combine_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x32<Self> {
1348 let mut result = [0; 32usize];
1349 result[0..16usize].copy_from_slice(&a.val);
1350 result[16usize..32usize].copy_from_slice(&b.val);
1351 result.simd_into(self)
1352 }
1353 #[inline(always)]
1354 fn widen_u8x16(self, a: u8x16<Self>) -> u16x16<Self> {
1355 [
1356 a[0usize] as u16,
1357 a[1usize] as u16,
1358 a[2usize] as u16,
1359 a[3usize] as u16,
1360 a[4usize] as u16,
1361 a[5usize] as u16,
1362 a[6usize] as u16,
1363 a[7usize] as u16,
1364 a[8usize] as u16,
1365 a[9usize] as u16,
1366 a[10usize] as u16,
1367 a[11usize] as u16,
1368 a[12usize] as u16,
1369 a[13usize] as u16,
1370 a[14usize] as u16,
1371 a[15usize] as u16,
1372 ]
1373 .simd_into(self)
1374 }
1375 #[inline(always)]
1376 fn reinterpret_u32_u8x16(self, a: u8x16<Self>) -> u32x4<Self> {
1377 u32x4 {
1378 val: bytemuck::cast(a.val),
1379 simd: a.simd,
1380 }
1381 }
1382 #[inline(always)]
1383 fn splat_mask8x16(self, val: i8) -> mask8x16<Self> {
1384 [val; 16usize].simd_into(self)
1385 }
1386 #[inline(always)]
1387 fn not_mask8x16(self, a: mask8x16<Self>) -> mask8x16<Self> {
1388 [
1389 i8::not(a[0usize]),
1390 i8::not(a[1usize]),
1391 i8::not(a[2usize]),
1392 i8::not(a[3usize]),
1393 i8::not(a[4usize]),
1394 i8::not(a[5usize]),
1395 i8::not(a[6usize]),
1396 i8::not(a[7usize]),
1397 i8::not(a[8usize]),
1398 i8::not(a[9usize]),
1399 i8::not(a[10usize]),
1400 i8::not(a[11usize]),
1401 i8::not(a[12usize]),
1402 i8::not(a[13usize]),
1403 i8::not(a[14usize]),
1404 i8::not(a[15usize]),
1405 ]
1406 .simd_into(self)
1407 }
1408 #[inline(always)]
1409 fn and_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1410 [
1411 i8::bitand(a[0usize], &b[0usize]),
1412 i8::bitand(a[1usize], &b[1usize]),
1413 i8::bitand(a[2usize], &b[2usize]),
1414 i8::bitand(a[3usize], &b[3usize]),
1415 i8::bitand(a[4usize], &b[4usize]),
1416 i8::bitand(a[5usize], &b[5usize]),
1417 i8::bitand(a[6usize], &b[6usize]),
1418 i8::bitand(a[7usize], &b[7usize]),
1419 i8::bitand(a[8usize], &b[8usize]),
1420 i8::bitand(a[9usize], &b[9usize]),
1421 i8::bitand(a[10usize], &b[10usize]),
1422 i8::bitand(a[11usize], &b[11usize]),
1423 i8::bitand(a[12usize], &b[12usize]),
1424 i8::bitand(a[13usize], &b[13usize]),
1425 i8::bitand(a[14usize], &b[14usize]),
1426 i8::bitand(a[15usize], &b[15usize]),
1427 ]
1428 .simd_into(self)
1429 }
1430 #[inline(always)]
1431 fn or_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1432 [
1433 i8::bitor(a[0usize], &b[0usize]),
1434 i8::bitor(a[1usize], &b[1usize]),
1435 i8::bitor(a[2usize], &b[2usize]),
1436 i8::bitor(a[3usize], &b[3usize]),
1437 i8::bitor(a[4usize], &b[4usize]),
1438 i8::bitor(a[5usize], &b[5usize]),
1439 i8::bitor(a[6usize], &b[6usize]),
1440 i8::bitor(a[7usize], &b[7usize]),
1441 i8::bitor(a[8usize], &b[8usize]),
1442 i8::bitor(a[9usize], &b[9usize]),
1443 i8::bitor(a[10usize], &b[10usize]),
1444 i8::bitor(a[11usize], &b[11usize]),
1445 i8::bitor(a[12usize], &b[12usize]),
1446 i8::bitor(a[13usize], &b[13usize]),
1447 i8::bitor(a[14usize], &b[14usize]),
1448 i8::bitor(a[15usize], &b[15usize]),
1449 ]
1450 .simd_into(self)
1451 }
1452 #[inline(always)]
1453 fn xor_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1454 [
1455 i8::bitxor(a[0usize], &b[0usize]),
1456 i8::bitxor(a[1usize], &b[1usize]),
1457 i8::bitxor(a[2usize], &b[2usize]),
1458 i8::bitxor(a[3usize], &b[3usize]),
1459 i8::bitxor(a[4usize], &b[4usize]),
1460 i8::bitxor(a[5usize], &b[5usize]),
1461 i8::bitxor(a[6usize], &b[6usize]),
1462 i8::bitxor(a[7usize], &b[7usize]),
1463 i8::bitxor(a[8usize], &b[8usize]),
1464 i8::bitxor(a[9usize], &b[9usize]),
1465 i8::bitxor(a[10usize], &b[10usize]),
1466 i8::bitxor(a[11usize], &b[11usize]),
1467 i8::bitxor(a[12usize], &b[12usize]),
1468 i8::bitxor(a[13usize], &b[13usize]),
1469 i8::bitxor(a[14usize], &b[14usize]),
1470 i8::bitxor(a[15usize], &b[15usize]),
1471 ]
1472 .simd_into(self)
1473 }
1474 #[inline(always)]
1475 fn select_mask8x16(
1476 self,
1477 a: mask8x16<Self>,
1478 b: mask8x16<Self>,
1479 c: mask8x16<Self>,
1480 ) -> mask8x16<Self> {
1481 [
1482 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1483 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1484 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1485 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1486 if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1487 if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1488 if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1489 if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1490 if a[8usize] != 0 { b[8usize] } else { c[8usize] },
1491 if a[9usize] != 0 { b[9usize] } else { c[9usize] },
1492 if a[10usize] != 0 {
1493 b[10usize]
1494 } else {
1495 c[10usize]
1496 },
1497 if a[11usize] != 0 {
1498 b[11usize]
1499 } else {
1500 c[11usize]
1501 },
1502 if a[12usize] != 0 {
1503 b[12usize]
1504 } else {
1505 c[12usize]
1506 },
1507 if a[13usize] != 0 {
1508 b[13usize]
1509 } else {
1510 c[13usize]
1511 },
1512 if a[14usize] != 0 {
1513 b[14usize]
1514 } else {
1515 c[14usize]
1516 },
1517 if a[15usize] != 0 {
1518 b[15usize]
1519 } else {
1520 c[15usize]
1521 },
1522 ]
1523 .simd_into(self)
1524 }
1525 #[inline(always)]
1526 fn simd_eq_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1527 [
1528 -(i8::eq(&a[0usize], &b[0usize]) as i8),
1529 -(i8::eq(&a[1usize], &b[1usize]) as i8),
1530 -(i8::eq(&a[2usize], &b[2usize]) as i8),
1531 -(i8::eq(&a[3usize], &b[3usize]) as i8),
1532 -(i8::eq(&a[4usize], &b[4usize]) as i8),
1533 -(i8::eq(&a[5usize], &b[5usize]) as i8),
1534 -(i8::eq(&a[6usize], &b[6usize]) as i8),
1535 -(i8::eq(&a[7usize], &b[7usize]) as i8),
1536 -(i8::eq(&a[8usize], &b[8usize]) as i8),
1537 -(i8::eq(&a[9usize], &b[9usize]) as i8),
1538 -(i8::eq(&a[10usize], &b[10usize]) as i8),
1539 -(i8::eq(&a[11usize], &b[11usize]) as i8),
1540 -(i8::eq(&a[12usize], &b[12usize]) as i8),
1541 -(i8::eq(&a[13usize], &b[13usize]) as i8),
1542 -(i8::eq(&a[14usize], &b[14usize]) as i8),
1543 -(i8::eq(&a[15usize], &b[15usize]) as i8),
1544 ]
1545 .simd_into(self)
1546 }
1547 #[inline(always)]
1548 fn combine_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x32<Self> {
1549 let mut result = [0; 32usize];
1550 result[0..16usize].copy_from_slice(&a.val);
1551 result[16usize..32usize].copy_from_slice(&b.val);
1552 result.simd_into(self)
1553 }
1554 #[inline(always)]
1555 fn splat_i16x8(self, val: i16) -> i16x8<Self> {
1556 [val; 8usize].simd_into(self)
1557 }
1558 #[inline(always)]
1559 fn not_i16x8(self, a: i16x8<Self>) -> i16x8<Self> {
1560 [
1561 i16::not(a[0usize]),
1562 i16::not(a[1usize]),
1563 i16::not(a[2usize]),
1564 i16::not(a[3usize]),
1565 i16::not(a[4usize]),
1566 i16::not(a[5usize]),
1567 i16::not(a[6usize]),
1568 i16::not(a[7usize]),
1569 ]
1570 .simd_into(self)
1571 }
1572 #[inline(always)]
1573 fn add_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1574 [
1575 i16::wrapping_add(a[0usize], b[0usize]),
1576 i16::wrapping_add(a[1usize], b[1usize]),
1577 i16::wrapping_add(a[2usize], b[2usize]),
1578 i16::wrapping_add(a[3usize], b[3usize]),
1579 i16::wrapping_add(a[4usize], b[4usize]),
1580 i16::wrapping_add(a[5usize], b[5usize]),
1581 i16::wrapping_add(a[6usize], b[6usize]),
1582 i16::wrapping_add(a[7usize], b[7usize]),
1583 ]
1584 .simd_into(self)
1585 }
1586 #[inline(always)]
1587 fn sub_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1588 [
1589 i16::wrapping_sub(a[0usize], b[0usize]),
1590 i16::wrapping_sub(a[1usize], b[1usize]),
1591 i16::wrapping_sub(a[2usize], b[2usize]),
1592 i16::wrapping_sub(a[3usize], b[3usize]),
1593 i16::wrapping_sub(a[4usize], b[4usize]),
1594 i16::wrapping_sub(a[5usize], b[5usize]),
1595 i16::wrapping_sub(a[6usize], b[6usize]),
1596 i16::wrapping_sub(a[7usize], b[7usize]),
1597 ]
1598 .simd_into(self)
1599 }
1600 #[inline(always)]
1601 fn mul_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1602 [
1603 i16::wrapping_mul(a[0usize], b[0usize]),
1604 i16::wrapping_mul(a[1usize], b[1usize]),
1605 i16::wrapping_mul(a[2usize], b[2usize]),
1606 i16::wrapping_mul(a[3usize], b[3usize]),
1607 i16::wrapping_mul(a[4usize], b[4usize]),
1608 i16::wrapping_mul(a[5usize], b[5usize]),
1609 i16::wrapping_mul(a[6usize], b[6usize]),
1610 i16::wrapping_mul(a[7usize], b[7usize]),
1611 ]
1612 .simd_into(self)
1613 }
1614 #[inline(always)]
1615 fn and_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1616 [
1617 i16::bitand(a[0usize], &b[0usize]),
1618 i16::bitand(a[1usize], &b[1usize]),
1619 i16::bitand(a[2usize], &b[2usize]),
1620 i16::bitand(a[3usize], &b[3usize]),
1621 i16::bitand(a[4usize], &b[4usize]),
1622 i16::bitand(a[5usize], &b[5usize]),
1623 i16::bitand(a[6usize], &b[6usize]),
1624 i16::bitand(a[7usize], &b[7usize]),
1625 ]
1626 .simd_into(self)
1627 }
1628 #[inline(always)]
1629 fn or_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1630 [
1631 i16::bitor(a[0usize], &b[0usize]),
1632 i16::bitor(a[1usize], &b[1usize]),
1633 i16::bitor(a[2usize], &b[2usize]),
1634 i16::bitor(a[3usize], &b[3usize]),
1635 i16::bitor(a[4usize], &b[4usize]),
1636 i16::bitor(a[5usize], &b[5usize]),
1637 i16::bitor(a[6usize], &b[6usize]),
1638 i16::bitor(a[7usize], &b[7usize]),
1639 ]
1640 .simd_into(self)
1641 }
1642 #[inline(always)]
1643 fn xor_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1644 [
1645 i16::bitxor(a[0usize], &b[0usize]),
1646 i16::bitxor(a[1usize], &b[1usize]),
1647 i16::bitxor(a[2usize], &b[2usize]),
1648 i16::bitxor(a[3usize], &b[3usize]),
1649 i16::bitxor(a[4usize], &b[4usize]),
1650 i16::bitxor(a[5usize], &b[5usize]),
1651 i16::bitxor(a[6usize], &b[6usize]),
1652 i16::bitxor(a[7usize], &b[7usize]),
1653 ]
1654 .simd_into(self)
1655 }
1656 #[inline(always)]
1657 fn shr_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {
1658 [
1659 i16::shr(a[0usize], shift as i16),
1660 i16::shr(a[1usize], shift as i16),
1661 i16::shr(a[2usize], shift as i16),
1662 i16::shr(a[3usize], shift as i16),
1663 i16::shr(a[4usize], shift as i16),
1664 i16::shr(a[5usize], shift as i16),
1665 i16::shr(a[6usize], shift as i16),
1666 i16::shr(a[7usize], shift as i16),
1667 ]
1668 .simd_into(self)
1669 }
1670 #[inline(always)]
1671 fn shrv_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1672 [
1673 i16::shr(a[0usize], &b[0usize]),
1674 i16::shr(a[1usize], &b[1usize]),
1675 i16::shr(a[2usize], &b[2usize]),
1676 i16::shr(a[3usize], &b[3usize]),
1677 i16::shr(a[4usize], &b[4usize]),
1678 i16::shr(a[5usize], &b[5usize]),
1679 i16::shr(a[6usize], &b[6usize]),
1680 i16::shr(a[7usize], &b[7usize]),
1681 ]
1682 .simd_into(self)
1683 }
1684 #[inline(always)]
1685 fn shl_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {
1686 [
1687 i16::shl(a[0usize], shift as i16),
1688 i16::shl(a[1usize], shift as i16),
1689 i16::shl(a[2usize], shift as i16),
1690 i16::shl(a[3usize], shift as i16),
1691 i16::shl(a[4usize], shift as i16),
1692 i16::shl(a[5usize], shift as i16),
1693 i16::shl(a[6usize], shift as i16),
1694 i16::shl(a[7usize], shift as i16),
1695 ]
1696 .simd_into(self)
1697 }
1698 #[inline(always)]
1699 fn simd_eq_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1700 [
1701 -(i16::eq(&a[0usize], &b[0usize]) as i16),
1702 -(i16::eq(&a[1usize], &b[1usize]) as i16),
1703 -(i16::eq(&a[2usize], &b[2usize]) as i16),
1704 -(i16::eq(&a[3usize], &b[3usize]) as i16),
1705 -(i16::eq(&a[4usize], &b[4usize]) as i16),
1706 -(i16::eq(&a[5usize], &b[5usize]) as i16),
1707 -(i16::eq(&a[6usize], &b[6usize]) as i16),
1708 -(i16::eq(&a[7usize], &b[7usize]) as i16),
1709 ]
1710 .simd_into(self)
1711 }
1712 #[inline(always)]
1713 fn simd_lt_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1714 [
1715 -(i16::lt(&a[0usize], &b[0usize]) as i16),
1716 -(i16::lt(&a[1usize], &b[1usize]) as i16),
1717 -(i16::lt(&a[2usize], &b[2usize]) as i16),
1718 -(i16::lt(&a[3usize], &b[3usize]) as i16),
1719 -(i16::lt(&a[4usize], &b[4usize]) as i16),
1720 -(i16::lt(&a[5usize], &b[5usize]) as i16),
1721 -(i16::lt(&a[6usize], &b[6usize]) as i16),
1722 -(i16::lt(&a[7usize], &b[7usize]) as i16),
1723 ]
1724 .simd_into(self)
1725 }
1726 #[inline(always)]
1727 fn simd_le_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1728 [
1729 -(i16::le(&a[0usize], &b[0usize]) as i16),
1730 -(i16::le(&a[1usize], &b[1usize]) as i16),
1731 -(i16::le(&a[2usize], &b[2usize]) as i16),
1732 -(i16::le(&a[3usize], &b[3usize]) as i16),
1733 -(i16::le(&a[4usize], &b[4usize]) as i16),
1734 -(i16::le(&a[5usize], &b[5usize]) as i16),
1735 -(i16::le(&a[6usize], &b[6usize]) as i16),
1736 -(i16::le(&a[7usize], &b[7usize]) as i16),
1737 ]
1738 .simd_into(self)
1739 }
1740 #[inline(always)]
1741 fn simd_ge_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1742 [
1743 -(i16::ge(&a[0usize], &b[0usize]) as i16),
1744 -(i16::ge(&a[1usize], &b[1usize]) as i16),
1745 -(i16::ge(&a[2usize], &b[2usize]) as i16),
1746 -(i16::ge(&a[3usize], &b[3usize]) as i16),
1747 -(i16::ge(&a[4usize], &b[4usize]) as i16),
1748 -(i16::ge(&a[5usize], &b[5usize]) as i16),
1749 -(i16::ge(&a[6usize], &b[6usize]) as i16),
1750 -(i16::ge(&a[7usize], &b[7usize]) as i16),
1751 ]
1752 .simd_into(self)
1753 }
1754 #[inline(always)]
1755 fn simd_gt_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1756 [
1757 -(i16::gt(&a[0usize], &b[0usize]) as i16),
1758 -(i16::gt(&a[1usize], &b[1usize]) as i16),
1759 -(i16::gt(&a[2usize], &b[2usize]) as i16),
1760 -(i16::gt(&a[3usize], &b[3usize]) as i16),
1761 -(i16::gt(&a[4usize], &b[4usize]) as i16),
1762 -(i16::gt(&a[5usize], &b[5usize]) as i16),
1763 -(i16::gt(&a[6usize], &b[6usize]) as i16),
1764 -(i16::gt(&a[7usize], &b[7usize]) as i16),
1765 ]
1766 .simd_into(self)
1767 }
1768 #[inline(always)]
1769 fn zip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1770 [
1771 a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
1772 ]
1773 .simd_into(self)
1774 }
1775 #[inline(always)]
1776 fn zip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1777 [
1778 a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
1779 ]
1780 .simd_into(self)
1781 }
1782 #[inline(always)]
1783 fn unzip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1784 [
1785 a[0usize], a[2usize], a[4usize], a[6usize], b[0usize], b[2usize], b[4usize], b[6usize],
1786 ]
1787 .simd_into(self)
1788 }
1789 #[inline(always)]
1790 fn unzip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1791 [
1792 a[1usize], a[3usize], a[5usize], a[7usize], b[1usize], b[3usize], b[5usize], b[7usize],
1793 ]
1794 .simd_into(self)
1795 }
1796 #[inline(always)]
1797 fn select_i16x8(self, a: mask16x8<Self>, b: i16x8<Self>, c: i16x8<Self>) -> i16x8<Self> {
1798 [
1799 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1800 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1801 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1802 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1803 if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1804 if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1805 if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1806 if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1807 ]
1808 .simd_into(self)
1809 }
1810 #[inline(always)]
1811 fn min_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1812 [
1813 i16::min(a[0usize], b[0usize]),
1814 i16::min(a[1usize], b[1usize]),
1815 i16::min(a[2usize], b[2usize]),
1816 i16::min(a[3usize], b[3usize]),
1817 i16::min(a[4usize], b[4usize]),
1818 i16::min(a[5usize], b[5usize]),
1819 i16::min(a[6usize], b[6usize]),
1820 i16::min(a[7usize], b[7usize]),
1821 ]
1822 .simd_into(self)
1823 }
1824 #[inline(always)]
1825 fn max_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1826 [
1827 i16::max(a[0usize], b[0usize]),
1828 i16::max(a[1usize], b[1usize]),
1829 i16::max(a[2usize], b[2usize]),
1830 i16::max(a[3usize], b[3usize]),
1831 i16::max(a[4usize], b[4usize]),
1832 i16::max(a[5usize], b[5usize]),
1833 i16::max(a[6usize], b[6usize]),
1834 i16::max(a[7usize], b[7usize]),
1835 ]
1836 .simd_into(self)
1837 }
1838 #[inline(always)]
1839 fn combine_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x16<Self> {
1840 let mut result = [0; 16usize];
1841 result[0..8usize].copy_from_slice(&a.val);
1842 result[8usize..16usize].copy_from_slice(&b.val);
1843 result.simd_into(self)
1844 }
1845 #[inline(always)]
1846 fn neg_i16x8(self, a: i16x8<Self>) -> i16x8<Self> {
1847 [
1848 i16::neg(a[0usize]),
1849 i16::neg(a[1usize]),
1850 i16::neg(a[2usize]),
1851 i16::neg(a[3usize]),
1852 i16::neg(a[4usize]),
1853 i16::neg(a[5usize]),
1854 i16::neg(a[6usize]),
1855 i16::neg(a[7usize]),
1856 ]
1857 .simd_into(self)
1858 }
1859 #[inline(always)]
1860 fn reinterpret_u8_i16x8(self, a: i16x8<Self>) -> u8x16<Self> {
1861 u8x16 {
1862 val: bytemuck::cast(a.val),
1863 simd: a.simd,
1864 }
1865 }
1866 #[inline(always)]
1867 fn reinterpret_u32_i16x8(self, a: i16x8<Self>) -> u32x4<Self> {
1868 u32x4 {
1869 val: bytemuck::cast(a.val),
1870 simd: a.simd,
1871 }
1872 }
1873 #[inline(always)]
1874 fn splat_u16x8(self, val: u16) -> u16x8<Self> {
1875 [val; 8usize].simd_into(self)
1876 }
1877 #[inline(always)]
1878 fn not_u16x8(self, a: u16x8<Self>) -> u16x8<Self> {
1879 [
1880 u16::not(a[0usize]),
1881 u16::not(a[1usize]),
1882 u16::not(a[2usize]),
1883 u16::not(a[3usize]),
1884 u16::not(a[4usize]),
1885 u16::not(a[5usize]),
1886 u16::not(a[6usize]),
1887 u16::not(a[7usize]),
1888 ]
1889 .simd_into(self)
1890 }
1891 #[inline(always)]
1892 fn add_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1893 [
1894 u16::wrapping_add(a[0usize], b[0usize]),
1895 u16::wrapping_add(a[1usize], b[1usize]),
1896 u16::wrapping_add(a[2usize], b[2usize]),
1897 u16::wrapping_add(a[3usize], b[3usize]),
1898 u16::wrapping_add(a[4usize], b[4usize]),
1899 u16::wrapping_add(a[5usize], b[5usize]),
1900 u16::wrapping_add(a[6usize], b[6usize]),
1901 u16::wrapping_add(a[7usize], b[7usize]),
1902 ]
1903 .simd_into(self)
1904 }
1905 #[inline(always)]
1906 fn sub_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1907 [
1908 u16::wrapping_sub(a[0usize], b[0usize]),
1909 u16::wrapping_sub(a[1usize], b[1usize]),
1910 u16::wrapping_sub(a[2usize], b[2usize]),
1911 u16::wrapping_sub(a[3usize], b[3usize]),
1912 u16::wrapping_sub(a[4usize], b[4usize]),
1913 u16::wrapping_sub(a[5usize], b[5usize]),
1914 u16::wrapping_sub(a[6usize], b[6usize]),
1915 u16::wrapping_sub(a[7usize], b[7usize]),
1916 ]
1917 .simd_into(self)
1918 }
1919 #[inline(always)]
1920 fn mul_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1921 [
1922 u16::wrapping_mul(a[0usize], b[0usize]),
1923 u16::wrapping_mul(a[1usize], b[1usize]),
1924 u16::wrapping_mul(a[2usize], b[2usize]),
1925 u16::wrapping_mul(a[3usize], b[3usize]),
1926 u16::wrapping_mul(a[4usize], b[4usize]),
1927 u16::wrapping_mul(a[5usize], b[5usize]),
1928 u16::wrapping_mul(a[6usize], b[6usize]),
1929 u16::wrapping_mul(a[7usize], b[7usize]),
1930 ]
1931 .simd_into(self)
1932 }
1933 #[inline(always)]
1934 fn and_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1935 [
1936 u16::bitand(a[0usize], &b[0usize]),
1937 u16::bitand(a[1usize], &b[1usize]),
1938 u16::bitand(a[2usize], &b[2usize]),
1939 u16::bitand(a[3usize], &b[3usize]),
1940 u16::bitand(a[4usize], &b[4usize]),
1941 u16::bitand(a[5usize], &b[5usize]),
1942 u16::bitand(a[6usize], &b[6usize]),
1943 u16::bitand(a[7usize], &b[7usize]),
1944 ]
1945 .simd_into(self)
1946 }
1947 #[inline(always)]
1948 fn or_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1949 [
1950 u16::bitor(a[0usize], &b[0usize]),
1951 u16::bitor(a[1usize], &b[1usize]),
1952 u16::bitor(a[2usize], &b[2usize]),
1953 u16::bitor(a[3usize], &b[3usize]),
1954 u16::bitor(a[4usize], &b[4usize]),
1955 u16::bitor(a[5usize], &b[5usize]),
1956 u16::bitor(a[6usize], &b[6usize]),
1957 u16::bitor(a[7usize], &b[7usize]),
1958 ]
1959 .simd_into(self)
1960 }
1961 #[inline(always)]
1962 fn xor_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1963 [
1964 u16::bitxor(a[0usize], &b[0usize]),
1965 u16::bitxor(a[1usize], &b[1usize]),
1966 u16::bitxor(a[2usize], &b[2usize]),
1967 u16::bitxor(a[3usize], &b[3usize]),
1968 u16::bitxor(a[4usize], &b[4usize]),
1969 u16::bitxor(a[5usize], &b[5usize]),
1970 u16::bitxor(a[6usize], &b[6usize]),
1971 u16::bitxor(a[7usize], &b[7usize]),
1972 ]
1973 .simd_into(self)
1974 }
1975 #[inline(always)]
1976 fn shr_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {
1977 [
1978 u16::shr(a[0usize], shift as u16),
1979 u16::shr(a[1usize], shift as u16),
1980 u16::shr(a[2usize], shift as u16),
1981 u16::shr(a[3usize], shift as u16),
1982 u16::shr(a[4usize], shift as u16),
1983 u16::shr(a[5usize], shift as u16),
1984 u16::shr(a[6usize], shift as u16),
1985 u16::shr(a[7usize], shift as u16),
1986 ]
1987 .simd_into(self)
1988 }
1989 #[inline(always)]
1990 fn shrv_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1991 [
1992 u16::shr(a[0usize], &b[0usize]),
1993 u16::shr(a[1usize], &b[1usize]),
1994 u16::shr(a[2usize], &b[2usize]),
1995 u16::shr(a[3usize], &b[3usize]),
1996 u16::shr(a[4usize], &b[4usize]),
1997 u16::shr(a[5usize], &b[5usize]),
1998 u16::shr(a[6usize], &b[6usize]),
1999 u16::shr(a[7usize], &b[7usize]),
2000 ]
2001 .simd_into(self)
2002 }
2003 #[inline(always)]
2004 fn shl_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {
2005 [
2006 u16::shl(a[0usize], shift as u16),
2007 u16::shl(a[1usize], shift as u16),
2008 u16::shl(a[2usize], shift as u16),
2009 u16::shl(a[3usize], shift as u16),
2010 u16::shl(a[4usize], shift as u16),
2011 u16::shl(a[5usize], shift as u16),
2012 u16::shl(a[6usize], shift as u16),
2013 u16::shl(a[7usize], shift as u16),
2014 ]
2015 .simd_into(self)
2016 }
2017 #[inline(always)]
2018 fn simd_eq_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2019 [
2020 -(u16::eq(&a[0usize], &b[0usize]) as i16),
2021 -(u16::eq(&a[1usize], &b[1usize]) as i16),
2022 -(u16::eq(&a[2usize], &b[2usize]) as i16),
2023 -(u16::eq(&a[3usize], &b[3usize]) as i16),
2024 -(u16::eq(&a[4usize], &b[4usize]) as i16),
2025 -(u16::eq(&a[5usize], &b[5usize]) as i16),
2026 -(u16::eq(&a[6usize], &b[6usize]) as i16),
2027 -(u16::eq(&a[7usize], &b[7usize]) as i16),
2028 ]
2029 .simd_into(self)
2030 }
2031 #[inline(always)]
2032 fn simd_lt_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2033 [
2034 -(u16::lt(&a[0usize], &b[0usize]) as i16),
2035 -(u16::lt(&a[1usize], &b[1usize]) as i16),
2036 -(u16::lt(&a[2usize], &b[2usize]) as i16),
2037 -(u16::lt(&a[3usize], &b[3usize]) as i16),
2038 -(u16::lt(&a[4usize], &b[4usize]) as i16),
2039 -(u16::lt(&a[5usize], &b[5usize]) as i16),
2040 -(u16::lt(&a[6usize], &b[6usize]) as i16),
2041 -(u16::lt(&a[7usize], &b[7usize]) as i16),
2042 ]
2043 .simd_into(self)
2044 }
2045 #[inline(always)]
2046 fn simd_le_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2047 [
2048 -(u16::le(&a[0usize], &b[0usize]) as i16),
2049 -(u16::le(&a[1usize], &b[1usize]) as i16),
2050 -(u16::le(&a[2usize], &b[2usize]) as i16),
2051 -(u16::le(&a[3usize], &b[3usize]) as i16),
2052 -(u16::le(&a[4usize], &b[4usize]) as i16),
2053 -(u16::le(&a[5usize], &b[5usize]) as i16),
2054 -(u16::le(&a[6usize], &b[6usize]) as i16),
2055 -(u16::le(&a[7usize], &b[7usize]) as i16),
2056 ]
2057 .simd_into(self)
2058 }
2059 #[inline(always)]
2060 fn simd_ge_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2061 [
2062 -(u16::ge(&a[0usize], &b[0usize]) as i16),
2063 -(u16::ge(&a[1usize], &b[1usize]) as i16),
2064 -(u16::ge(&a[2usize], &b[2usize]) as i16),
2065 -(u16::ge(&a[3usize], &b[3usize]) as i16),
2066 -(u16::ge(&a[4usize], &b[4usize]) as i16),
2067 -(u16::ge(&a[5usize], &b[5usize]) as i16),
2068 -(u16::ge(&a[6usize], &b[6usize]) as i16),
2069 -(u16::ge(&a[7usize], &b[7usize]) as i16),
2070 ]
2071 .simd_into(self)
2072 }
2073 #[inline(always)]
2074 fn simd_gt_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
2075 [
2076 -(u16::gt(&a[0usize], &b[0usize]) as i16),
2077 -(u16::gt(&a[1usize], &b[1usize]) as i16),
2078 -(u16::gt(&a[2usize], &b[2usize]) as i16),
2079 -(u16::gt(&a[3usize], &b[3usize]) as i16),
2080 -(u16::gt(&a[4usize], &b[4usize]) as i16),
2081 -(u16::gt(&a[5usize], &b[5usize]) as i16),
2082 -(u16::gt(&a[6usize], &b[6usize]) as i16),
2083 -(u16::gt(&a[7usize], &b[7usize]) as i16),
2084 ]
2085 .simd_into(self)
2086 }
2087 #[inline(always)]
2088 fn zip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2089 [
2090 a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
2091 ]
2092 .simd_into(self)
2093 }
2094 #[inline(always)]
2095 fn zip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2096 [
2097 a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
2098 ]
2099 .simd_into(self)
2100 }
2101 #[inline(always)]
2102 fn unzip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2103 [
2104 a[0usize], a[2usize], a[4usize], a[6usize], b[0usize], b[2usize], b[4usize], b[6usize],
2105 ]
2106 .simd_into(self)
2107 }
2108 #[inline(always)]
2109 fn unzip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2110 [
2111 a[1usize], a[3usize], a[5usize], a[7usize], b[1usize], b[3usize], b[5usize], b[7usize],
2112 ]
2113 .simd_into(self)
2114 }
2115 #[inline(always)]
2116 fn select_u16x8(self, a: mask16x8<Self>, b: u16x8<Self>, c: u16x8<Self>) -> u16x8<Self> {
2117 [
2118 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2119 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2120 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2121 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2122 if a[4usize] != 0 { b[4usize] } else { c[4usize] },
2123 if a[5usize] != 0 { b[5usize] } else { c[5usize] },
2124 if a[6usize] != 0 { b[6usize] } else { c[6usize] },
2125 if a[7usize] != 0 { b[7usize] } else { c[7usize] },
2126 ]
2127 .simd_into(self)
2128 }
2129 #[inline(always)]
2130 fn min_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2131 [
2132 u16::min(a[0usize], b[0usize]),
2133 u16::min(a[1usize], b[1usize]),
2134 u16::min(a[2usize], b[2usize]),
2135 u16::min(a[3usize], b[3usize]),
2136 u16::min(a[4usize], b[4usize]),
2137 u16::min(a[5usize], b[5usize]),
2138 u16::min(a[6usize], b[6usize]),
2139 u16::min(a[7usize], b[7usize]),
2140 ]
2141 .simd_into(self)
2142 }
2143 #[inline(always)]
2144 fn max_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
2145 [
2146 u16::max(a[0usize], b[0usize]),
2147 u16::max(a[1usize], b[1usize]),
2148 u16::max(a[2usize], b[2usize]),
2149 u16::max(a[3usize], b[3usize]),
2150 u16::max(a[4usize], b[4usize]),
2151 u16::max(a[5usize], b[5usize]),
2152 u16::max(a[6usize], b[6usize]),
2153 u16::max(a[7usize], b[7usize]),
2154 ]
2155 .simd_into(self)
2156 }
2157 #[inline(always)]
2158 fn combine_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x16<Self> {
2159 let mut result = [0; 16usize];
2160 result[0..8usize].copy_from_slice(&a.val);
2161 result[8usize..16usize].copy_from_slice(&b.val);
2162 result.simd_into(self)
2163 }
2164 #[inline(always)]
2165 fn reinterpret_u8_u16x8(self, a: u16x8<Self>) -> u8x16<Self> {
2166 u8x16 {
2167 val: bytemuck::cast(a.val),
2168 simd: a.simd,
2169 }
2170 }
2171 #[inline(always)]
2172 fn reinterpret_u32_u16x8(self, a: u16x8<Self>) -> u32x4<Self> {
2173 u32x4 {
2174 val: bytemuck::cast(a.val),
2175 simd: a.simd,
2176 }
2177 }
2178 #[inline(always)]
2179 fn splat_mask16x8(self, val: i16) -> mask16x8<Self> {
2180 [val; 8usize].simd_into(self)
2181 }
2182 #[inline(always)]
2183 fn not_mask16x8(self, a: mask16x8<Self>) -> mask16x8<Self> {
2184 [
2185 i16::not(a[0usize]),
2186 i16::not(a[1usize]),
2187 i16::not(a[2usize]),
2188 i16::not(a[3usize]),
2189 i16::not(a[4usize]),
2190 i16::not(a[5usize]),
2191 i16::not(a[6usize]),
2192 i16::not(a[7usize]),
2193 ]
2194 .simd_into(self)
2195 }
2196 #[inline(always)]
2197 fn and_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2198 [
2199 i16::bitand(a[0usize], &b[0usize]),
2200 i16::bitand(a[1usize], &b[1usize]),
2201 i16::bitand(a[2usize], &b[2usize]),
2202 i16::bitand(a[3usize], &b[3usize]),
2203 i16::bitand(a[4usize], &b[4usize]),
2204 i16::bitand(a[5usize], &b[5usize]),
2205 i16::bitand(a[6usize], &b[6usize]),
2206 i16::bitand(a[7usize], &b[7usize]),
2207 ]
2208 .simd_into(self)
2209 }
2210 #[inline(always)]
2211 fn or_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2212 [
2213 i16::bitor(a[0usize], &b[0usize]),
2214 i16::bitor(a[1usize], &b[1usize]),
2215 i16::bitor(a[2usize], &b[2usize]),
2216 i16::bitor(a[3usize], &b[3usize]),
2217 i16::bitor(a[4usize], &b[4usize]),
2218 i16::bitor(a[5usize], &b[5usize]),
2219 i16::bitor(a[6usize], &b[6usize]),
2220 i16::bitor(a[7usize], &b[7usize]),
2221 ]
2222 .simd_into(self)
2223 }
2224 #[inline(always)]
2225 fn xor_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2226 [
2227 i16::bitxor(a[0usize], &b[0usize]),
2228 i16::bitxor(a[1usize], &b[1usize]),
2229 i16::bitxor(a[2usize], &b[2usize]),
2230 i16::bitxor(a[3usize], &b[3usize]),
2231 i16::bitxor(a[4usize], &b[4usize]),
2232 i16::bitxor(a[5usize], &b[5usize]),
2233 i16::bitxor(a[6usize], &b[6usize]),
2234 i16::bitxor(a[7usize], &b[7usize]),
2235 ]
2236 .simd_into(self)
2237 }
2238 #[inline(always)]
2239 fn select_mask16x8(
2240 self,
2241 a: mask16x8<Self>,
2242 b: mask16x8<Self>,
2243 c: mask16x8<Self>,
2244 ) -> mask16x8<Self> {
2245 [
2246 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2247 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2248 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2249 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2250 if a[4usize] != 0 { b[4usize] } else { c[4usize] },
2251 if a[5usize] != 0 { b[5usize] } else { c[5usize] },
2252 if a[6usize] != 0 { b[6usize] } else { c[6usize] },
2253 if a[7usize] != 0 { b[7usize] } else { c[7usize] },
2254 ]
2255 .simd_into(self)
2256 }
2257 #[inline(always)]
2258 fn simd_eq_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2259 [
2260 -(i16::eq(&a[0usize], &b[0usize]) as i16),
2261 -(i16::eq(&a[1usize], &b[1usize]) as i16),
2262 -(i16::eq(&a[2usize], &b[2usize]) as i16),
2263 -(i16::eq(&a[3usize], &b[3usize]) as i16),
2264 -(i16::eq(&a[4usize], &b[4usize]) as i16),
2265 -(i16::eq(&a[5usize], &b[5usize]) as i16),
2266 -(i16::eq(&a[6usize], &b[6usize]) as i16),
2267 -(i16::eq(&a[7usize], &b[7usize]) as i16),
2268 ]
2269 .simd_into(self)
2270 }
2271 #[inline(always)]
2272 fn combine_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x16<Self> {
2273 let mut result = [0; 16usize];
2274 result[0..8usize].copy_from_slice(&a.val);
2275 result[8usize..16usize].copy_from_slice(&b.val);
2276 result.simd_into(self)
2277 }
2278 #[inline(always)]
2279 fn splat_i32x4(self, val: i32) -> i32x4<Self> {
2280 [val; 4usize].simd_into(self)
2281 }
2282 #[inline(always)]
2283 fn not_i32x4(self, a: i32x4<Self>) -> i32x4<Self> {
2284 [
2285 i32::not(a[0usize]),
2286 i32::not(a[1usize]),
2287 i32::not(a[2usize]),
2288 i32::not(a[3usize]),
2289 ]
2290 .simd_into(self)
2291 }
2292 #[inline(always)]
2293 fn add_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2294 [
2295 i32::wrapping_add(a[0usize], b[0usize]),
2296 i32::wrapping_add(a[1usize], b[1usize]),
2297 i32::wrapping_add(a[2usize], b[2usize]),
2298 i32::wrapping_add(a[3usize], b[3usize]),
2299 ]
2300 .simd_into(self)
2301 }
2302 #[inline(always)]
2303 fn sub_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2304 [
2305 i32::wrapping_sub(a[0usize], b[0usize]),
2306 i32::wrapping_sub(a[1usize], b[1usize]),
2307 i32::wrapping_sub(a[2usize], b[2usize]),
2308 i32::wrapping_sub(a[3usize], b[3usize]),
2309 ]
2310 .simd_into(self)
2311 }
2312 #[inline(always)]
2313 fn mul_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2314 [
2315 i32::wrapping_mul(a[0usize], b[0usize]),
2316 i32::wrapping_mul(a[1usize], b[1usize]),
2317 i32::wrapping_mul(a[2usize], b[2usize]),
2318 i32::wrapping_mul(a[3usize], b[3usize]),
2319 ]
2320 .simd_into(self)
2321 }
2322 #[inline(always)]
2323 fn and_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2324 [
2325 i32::bitand(a[0usize], &b[0usize]),
2326 i32::bitand(a[1usize], &b[1usize]),
2327 i32::bitand(a[2usize], &b[2usize]),
2328 i32::bitand(a[3usize], &b[3usize]),
2329 ]
2330 .simd_into(self)
2331 }
2332 #[inline(always)]
2333 fn or_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2334 [
2335 i32::bitor(a[0usize], &b[0usize]),
2336 i32::bitor(a[1usize], &b[1usize]),
2337 i32::bitor(a[2usize], &b[2usize]),
2338 i32::bitor(a[3usize], &b[3usize]),
2339 ]
2340 .simd_into(self)
2341 }
2342 #[inline(always)]
2343 fn xor_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2344 [
2345 i32::bitxor(a[0usize], &b[0usize]),
2346 i32::bitxor(a[1usize], &b[1usize]),
2347 i32::bitxor(a[2usize], &b[2usize]),
2348 i32::bitxor(a[3usize], &b[3usize]),
2349 ]
2350 .simd_into(self)
2351 }
2352 #[inline(always)]
2353 fn shr_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {
2354 [
2355 i32::shr(a[0usize], shift as i32),
2356 i32::shr(a[1usize], shift as i32),
2357 i32::shr(a[2usize], shift as i32),
2358 i32::shr(a[3usize], shift as i32),
2359 ]
2360 .simd_into(self)
2361 }
2362 #[inline(always)]
2363 fn shrv_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2364 [
2365 i32::shr(a[0usize], &b[0usize]),
2366 i32::shr(a[1usize], &b[1usize]),
2367 i32::shr(a[2usize], &b[2usize]),
2368 i32::shr(a[3usize], &b[3usize]),
2369 ]
2370 .simd_into(self)
2371 }
2372 #[inline(always)]
2373 fn shl_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {
2374 [
2375 i32::shl(a[0usize], shift as i32),
2376 i32::shl(a[1usize], shift as i32),
2377 i32::shl(a[2usize], shift as i32),
2378 i32::shl(a[3usize], shift as i32),
2379 ]
2380 .simd_into(self)
2381 }
2382 #[inline(always)]
2383 fn simd_eq_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2384 [
2385 -(i32::eq(&a[0usize], &b[0usize]) as i32),
2386 -(i32::eq(&a[1usize], &b[1usize]) as i32),
2387 -(i32::eq(&a[2usize], &b[2usize]) as i32),
2388 -(i32::eq(&a[3usize], &b[3usize]) as i32),
2389 ]
2390 .simd_into(self)
2391 }
2392 #[inline(always)]
2393 fn simd_lt_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2394 [
2395 -(i32::lt(&a[0usize], &b[0usize]) as i32),
2396 -(i32::lt(&a[1usize], &b[1usize]) as i32),
2397 -(i32::lt(&a[2usize], &b[2usize]) as i32),
2398 -(i32::lt(&a[3usize], &b[3usize]) as i32),
2399 ]
2400 .simd_into(self)
2401 }
2402 #[inline(always)]
2403 fn simd_le_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2404 [
2405 -(i32::le(&a[0usize], &b[0usize]) as i32),
2406 -(i32::le(&a[1usize], &b[1usize]) as i32),
2407 -(i32::le(&a[2usize], &b[2usize]) as i32),
2408 -(i32::le(&a[3usize], &b[3usize]) as i32),
2409 ]
2410 .simd_into(self)
2411 }
2412 #[inline(always)]
2413 fn simd_ge_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2414 [
2415 -(i32::ge(&a[0usize], &b[0usize]) as i32),
2416 -(i32::ge(&a[1usize], &b[1usize]) as i32),
2417 -(i32::ge(&a[2usize], &b[2usize]) as i32),
2418 -(i32::ge(&a[3usize], &b[3usize]) as i32),
2419 ]
2420 .simd_into(self)
2421 }
2422 #[inline(always)]
2423 fn simd_gt_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2424 [
2425 -(i32::gt(&a[0usize], &b[0usize]) as i32),
2426 -(i32::gt(&a[1usize], &b[1usize]) as i32),
2427 -(i32::gt(&a[2usize], &b[2usize]) as i32),
2428 -(i32::gt(&a[3usize], &b[3usize]) as i32),
2429 ]
2430 .simd_into(self)
2431 }
2432 #[inline(always)]
2433 fn zip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2434 [a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
2435 }
2436 #[inline(always)]
2437 fn zip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2438 [a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
2439 }
2440 #[inline(always)]
2441 fn unzip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2442 [a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
2443 }
2444 #[inline(always)]
2445 fn unzip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2446 [a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
2447 }
2448 #[inline(always)]
2449 fn select_i32x4(self, a: mask32x4<Self>, b: i32x4<Self>, c: i32x4<Self>) -> i32x4<Self> {
2450 [
2451 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2452 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2453 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2454 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2455 ]
2456 .simd_into(self)
2457 }
2458 #[inline(always)]
2459 fn min_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2460 [
2461 i32::min(a[0usize], b[0usize]),
2462 i32::min(a[1usize], b[1usize]),
2463 i32::min(a[2usize], b[2usize]),
2464 i32::min(a[3usize], b[3usize]),
2465 ]
2466 .simd_into(self)
2467 }
2468 #[inline(always)]
2469 fn max_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2470 [
2471 i32::max(a[0usize], b[0usize]),
2472 i32::max(a[1usize], b[1usize]),
2473 i32::max(a[2usize], b[2usize]),
2474 i32::max(a[3usize], b[3usize]),
2475 ]
2476 .simd_into(self)
2477 }
2478 #[inline(always)]
2479 fn combine_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x8<Self> {
2480 let mut result = [0; 8usize];
2481 result[0..4usize].copy_from_slice(&a.val);
2482 result[4usize..8usize].copy_from_slice(&b.val);
2483 result.simd_into(self)
2484 }
2485 #[inline(always)]
2486 fn neg_i32x4(self, a: i32x4<Self>) -> i32x4<Self> {
2487 [
2488 i32::neg(a[0usize]),
2489 i32::neg(a[1usize]),
2490 i32::neg(a[2usize]),
2491 i32::neg(a[3usize]),
2492 ]
2493 .simd_into(self)
2494 }
2495 #[inline(always)]
2496 fn reinterpret_u8_i32x4(self, a: i32x4<Self>) -> u8x16<Self> {
2497 u8x16 {
2498 val: bytemuck::cast(a.val),
2499 simd: a.simd,
2500 }
2501 }
2502 #[inline(always)]
2503 fn reinterpret_u32_i32x4(self, a: i32x4<Self>) -> u32x4<Self> {
2504 u32x4 {
2505 val: bytemuck::cast(a.val),
2506 simd: a.simd,
2507 }
2508 }
2509 #[inline(always)]
2510 fn cvt_f32_i32x4(self, a: i32x4<Self>) -> f32x4<Self> {
2511 [
2512 a[0usize] as f32,
2513 a[1usize] as f32,
2514 a[2usize] as f32,
2515 a[3usize] as f32,
2516 ]
2517 .simd_into(self)
2518 }
2519 #[inline(always)]
2520 fn splat_u32x4(self, val: u32) -> u32x4<Self> {
2521 [val; 4usize].simd_into(self)
2522 }
2523 #[inline(always)]
2524 fn not_u32x4(self, a: u32x4<Self>) -> u32x4<Self> {
2525 [
2526 u32::not(a[0usize]),
2527 u32::not(a[1usize]),
2528 u32::not(a[2usize]),
2529 u32::not(a[3usize]),
2530 ]
2531 .simd_into(self)
2532 }
2533 #[inline(always)]
2534 fn add_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2535 [
2536 u32::wrapping_add(a[0usize], b[0usize]),
2537 u32::wrapping_add(a[1usize], b[1usize]),
2538 u32::wrapping_add(a[2usize], b[2usize]),
2539 u32::wrapping_add(a[3usize], b[3usize]),
2540 ]
2541 .simd_into(self)
2542 }
2543 #[inline(always)]
2544 fn sub_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2545 [
2546 u32::wrapping_sub(a[0usize], b[0usize]),
2547 u32::wrapping_sub(a[1usize], b[1usize]),
2548 u32::wrapping_sub(a[2usize], b[2usize]),
2549 u32::wrapping_sub(a[3usize], b[3usize]),
2550 ]
2551 .simd_into(self)
2552 }
2553 #[inline(always)]
2554 fn mul_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2555 [
2556 u32::wrapping_mul(a[0usize], b[0usize]),
2557 u32::wrapping_mul(a[1usize], b[1usize]),
2558 u32::wrapping_mul(a[2usize], b[2usize]),
2559 u32::wrapping_mul(a[3usize], b[3usize]),
2560 ]
2561 .simd_into(self)
2562 }
2563 #[inline(always)]
2564 fn and_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2565 [
2566 u32::bitand(a[0usize], &b[0usize]),
2567 u32::bitand(a[1usize], &b[1usize]),
2568 u32::bitand(a[2usize], &b[2usize]),
2569 u32::bitand(a[3usize], &b[3usize]),
2570 ]
2571 .simd_into(self)
2572 }
2573 #[inline(always)]
2574 fn or_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2575 [
2576 u32::bitor(a[0usize], &b[0usize]),
2577 u32::bitor(a[1usize], &b[1usize]),
2578 u32::bitor(a[2usize], &b[2usize]),
2579 u32::bitor(a[3usize], &b[3usize]),
2580 ]
2581 .simd_into(self)
2582 }
2583 #[inline(always)]
2584 fn xor_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2585 [
2586 u32::bitxor(a[0usize], &b[0usize]),
2587 u32::bitxor(a[1usize], &b[1usize]),
2588 u32::bitxor(a[2usize], &b[2usize]),
2589 u32::bitxor(a[3usize], &b[3usize]),
2590 ]
2591 .simd_into(self)
2592 }
2593 #[inline(always)]
2594 fn shr_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {
2595 [
2596 u32::shr(a[0usize], shift as u32),
2597 u32::shr(a[1usize], shift as u32),
2598 u32::shr(a[2usize], shift as u32),
2599 u32::shr(a[3usize], shift as u32),
2600 ]
2601 .simd_into(self)
2602 }
2603 #[inline(always)]
2604 fn shrv_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2605 [
2606 u32::shr(a[0usize], &b[0usize]),
2607 u32::shr(a[1usize], &b[1usize]),
2608 u32::shr(a[2usize], &b[2usize]),
2609 u32::shr(a[3usize], &b[3usize]),
2610 ]
2611 .simd_into(self)
2612 }
2613 #[inline(always)]
2614 fn shl_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {
2615 [
2616 u32::shl(a[0usize], shift as u32),
2617 u32::shl(a[1usize], shift as u32),
2618 u32::shl(a[2usize], shift as u32),
2619 u32::shl(a[3usize], shift as u32),
2620 ]
2621 .simd_into(self)
2622 }
2623 #[inline(always)]
2624 fn simd_eq_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2625 [
2626 -(u32::eq(&a[0usize], &b[0usize]) as i32),
2627 -(u32::eq(&a[1usize], &b[1usize]) as i32),
2628 -(u32::eq(&a[2usize], &b[2usize]) as i32),
2629 -(u32::eq(&a[3usize], &b[3usize]) as i32),
2630 ]
2631 .simd_into(self)
2632 }
2633 #[inline(always)]
2634 fn simd_lt_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2635 [
2636 -(u32::lt(&a[0usize], &b[0usize]) as i32),
2637 -(u32::lt(&a[1usize], &b[1usize]) as i32),
2638 -(u32::lt(&a[2usize], &b[2usize]) as i32),
2639 -(u32::lt(&a[3usize], &b[3usize]) as i32),
2640 ]
2641 .simd_into(self)
2642 }
2643 #[inline(always)]
2644 fn simd_le_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2645 [
2646 -(u32::le(&a[0usize], &b[0usize]) as i32),
2647 -(u32::le(&a[1usize], &b[1usize]) as i32),
2648 -(u32::le(&a[2usize], &b[2usize]) as i32),
2649 -(u32::le(&a[3usize], &b[3usize]) as i32),
2650 ]
2651 .simd_into(self)
2652 }
2653 #[inline(always)]
2654 fn simd_ge_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2655 [
2656 -(u32::ge(&a[0usize], &b[0usize]) as i32),
2657 -(u32::ge(&a[1usize], &b[1usize]) as i32),
2658 -(u32::ge(&a[2usize], &b[2usize]) as i32),
2659 -(u32::ge(&a[3usize], &b[3usize]) as i32),
2660 ]
2661 .simd_into(self)
2662 }
2663 #[inline(always)]
2664 fn simd_gt_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2665 [
2666 -(u32::gt(&a[0usize], &b[0usize]) as i32),
2667 -(u32::gt(&a[1usize], &b[1usize]) as i32),
2668 -(u32::gt(&a[2usize], &b[2usize]) as i32),
2669 -(u32::gt(&a[3usize], &b[3usize]) as i32),
2670 ]
2671 .simd_into(self)
2672 }
2673 #[inline(always)]
2674 fn zip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2675 [a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
2676 }
2677 #[inline(always)]
2678 fn zip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2679 [a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
2680 }
2681 #[inline(always)]
2682 fn unzip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2683 [a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
2684 }
2685 #[inline(always)]
2686 fn unzip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2687 [a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
2688 }
2689 #[inline(always)]
2690 fn select_u32x4(self, a: mask32x4<Self>, b: u32x4<Self>, c: u32x4<Self>) -> u32x4<Self> {
2691 [
2692 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2693 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2694 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2695 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2696 ]
2697 .simd_into(self)
2698 }
2699 #[inline(always)]
2700 fn min_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2701 [
2702 u32::min(a[0usize], b[0usize]),
2703 u32::min(a[1usize], b[1usize]),
2704 u32::min(a[2usize], b[2usize]),
2705 u32::min(a[3usize], b[3usize]),
2706 ]
2707 .simd_into(self)
2708 }
2709 #[inline(always)]
2710 fn max_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2711 [
2712 u32::max(a[0usize], b[0usize]),
2713 u32::max(a[1usize], b[1usize]),
2714 u32::max(a[2usize], b[2usize]),
2715 u32::max(a[3usize], b[3usize]),
2716 ]
2717 .simd_into(self)
2718 }
2719 #[inline(always)]
2720 fn combine_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x8<Self> {
2721 let mut result = [0; 8usize];
2722 result[0..4usize].copy_from_slice(&a.val);
2723 result[4usize..8usize].copy_from_slice(&b.val);
2724 result.simd_into(self)
2725 }
2726 #[inline(always)]
2727 fn reinterpret_u8_u32x4(self, a: u32x4<Self>) -> u8x16<Self> {
2728 u8x16 {
2729 val: bytemuck::cast(a.val),
2730 simd: a.simd,
2731 }
2732 }
2733 #[inline(always)]
2734 fn cvt_f32_u32x4(self, a: u32x4<Self>) -> f32x4<Self> {
2735 [
2736 a[0usize] as f32,
2737 a[1usize] as f32,
2738 a[2usize] as f32,
2739 a[3usize] as f32,
2740 ]
2741 .simd_into(self)
2742 }
2743 #[inline(always)]
2744 fn splat_mask32x4(self, val: i32) -> mask32x4<Self> {
2745 [val; 4usize].simd_into(self)
2746 }
2747 #[inline(always)]
2748 fn not_mask32x4(self, a: mask32x4<Self>) -> mask32x4<Self> {
2749 [
2750 i32::not(a[0usize]),
2751 i32::not(a[1usize]),
2752 i32::not(a[2usize]),
2753 i32::not(a[3usize]),
2754 ]
2755 .simd_into(self)
2756 }
2757 #[inline(always)]
2758 fn and_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
2759 [
2760 i32::bitand(a[0usize], &b[0usize]),
2761 i32::bitand(a[1usize], &b[1usize]),
2762 i32::bitand(a[2usize], &b[2usize]),
2763 i32::bitand(a[3usize], &b[3usize]),
2764 ]
2765 .simd_into(self)
2766 }
2767 #[inline(always)]
2768 fn or_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
2769 [
2770 i32::bitor(a[0usize], &b[0usize]),
2771 i32::bitor(a[1usize], &b[1usize]),
2772 i32::bitor(a[2usize], &b[2usize]),
2773 i32::bitor(a[3usize], &b[3usize]),
2774 ]
2775 .simd_into(self)
2776 }
2777 #[inline(always)]
2778 fn xor_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
2779 [
2780 i32::bitxor(a[0usize], &b[0usize]),
2781 i32::bitxor(a[1usize], &b[1usize]),
2782 i32::bitxor(a[2usize], &b[2usize]),
2783 i32::bitxor(a[3usize], &b[3usize]),
2784 ]
2785 .simd_into(self)
2786 }
2787 #[inline(always)]
2788 fn select_mask32x4(
2789 self,
2790 a: mask32x4<Self>,
2791 b: mask32x4<Self>,
2792 c: mask32x4<Self>,
2793 ) -> mask32x4<Self> {
2794 [
2795 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2796 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2797 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2798 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2799 ]
2800 .simd_into(self)
2801 }
2802 #[inline(always)]
2803 fn simd_eq_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
2804 [
2805 -(i32::eq(&a[0usize], &b[0usize]) as i32),
2806 -(i32::eq(&a[1usize], &b[1usize]) as i32),
2807 -(i32::eq(&a[2usize], &b[2usize]) as i32),
2808 -(i32::eq(&a[3usize], &b[3usize]) as i32),
2809 ]
2810 .simd_into(self)
2811 }
2812 #[inline(always)]
2813 fn combine_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x8<Self> {
2814 let mut result = [0; 8usize];
2815 result[0..4usize].copy_from_slice(&a.val);
2816 result[4usize..8usize].copy_from_slice(&b.val);
2817 result.simd_into(self)
2818 }
2819 #[inline(always)]
2820 fn splat_f64x2(self, val: f64) -> f64x2<Self> {
2821 [val; 2usize].simd_into(self)
2822 }
2823 #[inline(always)]
2824 fn abs_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2825 [f64::abs(a[0usize]), f64::abs(a[1usize])].simd_into(self)
2826 }
2827 #[inline(always)]
2828 fn neg_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2829 [f64::neg(a[0usize]), f64::neg(a[1usize])].simd_into(self)
2830 }
2831 #[inline(always)]
2832 fn sqrt_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2833 [f64::sqrt(a[0usize]), f64::sqrt(a[1usize])].simd_into(self)
2834 }
2835 #[inline(always)]
2836 fn add_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2837 [
2838 f64::add(a[0usize], &b[0usize]),
2839 f64::add(a[1usize], &b[1usize]),
2840 ]
2841 .simd_into(self)
2842 }
2843 #[inline(always)]
2844 fn sub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2845 [
2846 f64::sub(a[0usize], &b[0usize]),
2847 f64::sub(a[1usize], &b[1usize]),
2848 ]
2849 .simd_into(self)
2850 }
2851 #[inline(always)]
2852 fn mul_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2853 [
2854 f64::mul(a[0usize], &b[0usize]),
2855 f64::mul(a[1usize], &b[1usize]),
2856 ]
2857 .simd_into(self)
2858 }
2859 #[inline(always)]
2860 fn div_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2861 [
2862 f64::div(a[0usize], &b[0usize]),
2863 f64::div(a[1usize], &b[1usize]),
2864 ]
2865 .simd_into(self)
2866 }
2867 #[inline(always)]
2868 fn copysign_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2869 [
2870 f64::copysign(a[0usize], b[0usize]),
2871 f64::copysign(a[1usize], b[1usize]),
2872 ]
2873 .simd_into(self)
2874 }
2875 #[inline(always)]
2876 fn simd_eq_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2877 [
2878 -(f64::eq(&a[0usize], &b[0usize]) as i64),
2879 -(f64::eq(&a[1usize], &b[1usize]) as i64),
2880 ]
2881 .simd_into(self)
2882 }
2883 #[inline(always)]
2884 fn simd_lt_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2885 [
2886 -(f64::lt(&a[0usize], &b[0usize]) as i64),
2887 -(f64::lt(&a[1usize], &b[1usize]) as i64),
2888 ]
2889 .simd_into(self)
2890 }
2891 #[inline(always)]
2892 fn simd_le_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2893 [
2894 -(f64::le(&a[0usize], &b[0usize]) as i64),
2895 -(f64::le(&a[1usize], &b[1usize]) as i64),
2896 ]
2897 .simd_into(self)
2898 }
2899 #[inline(always)]
2900 fn simd_ge_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2901 [
2902 -(f64::ge(&a[0usize], &b[0usize]) as i64),
2903 -(f64::ge(&a[1usize], &b[1usize]) as i64),
2904 ]
2905 .simd_into(self)
2906 }
2907 #[inline(always)]
2908 fn simd_gt_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2909 [
2910 -(f64::gt(&a[0usize], &b[0usize]) as i64),
2911 -(f64::gt(&a[1usize], &b[1usize]) as i64),
2912 ]
2913 .simd_into(self)
2914 }
2915 #[inline(always)]
2916 fn zip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2917 [a[0usize], b[0usize]].simd_into(self)
2918 }
2919 #[inline(always)]
2920 fn zip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2921 [a[1usize], b[1usize]].simd_into(self)
2922 }
2923 #[inline(always)]
2924 fn unzip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2925 [a[0usize], b[0usize]].simd_into(self)
2926 }
2927 #[inline(always)]
2928 fn unzip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2929 [a[1usize], b[1usize]].simd_into(self)
2930 }
2931 #[inline(always)]
2932 fn max_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2933 [
2934 f64::max(a[0usize], b[0usize]),
2935 f64::max(a[1usize], b[1usize]),
2936 ]
2937 .simd_into(self)
2938 }
2939 #[inline(always)]
2940 fn max_precise_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2941 [
2942 f64::max(a[0usize], b[0usize]),
2943 f64::max(a[1usize], b[1usize]),
2944 ]
2945 .simd_into(self)
2946 }
2947 #[inline(always)]
2948 fn min_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2949 [
2950 f64::min(a[0usize], b[0usize]),
2951 f64::min(a[1usize], b[1usize]),
2952 ]
2953 .simd_into(self)
2954 }
2955 #[inline(always)]
2956 fn min_precise_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2957 [
2958 f64::min(a[0usize], b[0usize]),
2959 f64::min(a[1usize], b[1usize]),
2960 ]
2961 .simd_into(self)
2962 }
2963 #[inline(always)]
2964 fn madd_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
2965 a.mul(b).add(c)
2966 }
2967 #[inline(always)]
2968 fn msub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
2969 a.mul(b).sub(c)
2970 }
2971 #[inline(always)]
2972 fn floor_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2973 [f64::floor(a[0usize]), f64::floor(a[1usize])].simd_into(self)
2974 }
2975 #[inline(always)]
2976 fn fract_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2977 [f64::fract(a[0usize]), f64::fract(a[1usize])].simd_into(self)
2978 }
2979 #[inline(always)]
2980 fn trunc_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2981 [f64::trunc(a[0usize]), f64::trunc(a[1usize])].simd_into(self)
2982 }
2983 #[inline(always)]
2984 fn select_f64x2(self, a: mask64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
2985 [
2986 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2987 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2988 ]
2989 .simd_into(self)
2990 }
2991 #[inline(always)]
2992 fn combine_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x4<Self> {
2993 let mut result = [0.0; 4usize];
2994 result[0..2usize].copy_from_slice(&a.val);
2995 result[2usize..4usize].copy_from_slice(&b.val);
2996 result.simd_into(self)
2997 }
2998 #[inline(always)]
2999 fn reinterpret_f32_f64x2(self, a: f64x2<Self>) -> f32x4<Self> {
3000 f32x4 {
3001 val: bytemuck::cast(a.val),
3002 simd: a.simd,
3003 }
3004 }
3005 #[inline(always)]
3006 fn splat_mask64x2(self, val: i64) -> mask64x2<Self> {
3007 [val; 2usize].simd_into(self)
3008 }
3009 #[inline(always)]
3010 fn not_mask64x2(self, a: mask64x2<Self>) -> mask64x2<Self> {
3011 [i64::not(a[0usize]), i64::not(a[1usize])].simd_into(self)
3012 }
3013 #[inline(always)]
3014 fn and_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
3015 [
3016 i64::bitand(a[0usize], &b[0usize]),
3017 i64::bitand(a[1usize], &b[1usize]),
3018 ]
3019 .simd_into(self)
3020 }
3021 #[inline(always)]
3022 fn or_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
3023 [
3024 i64::bitor(a[0usize], &b[0usize]),
3025 i64::bitor(a[1usize], &b[1usize]),
3026 ]
3027 .simd_into(self)
3028 }
3029 #[inline(always)]
3030 fn xor_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
3031 [
3032 i64::bitxor(a[0usize], &b[0usize]),
3033 i64::bitxor(a[1usize], &b[1usize]),
3034 ]
3035 .simd_into(self)
3036 }
3037 #[inline(always)]
3038 fn select_mask64x2(
3039 self,
3040 a: mask64x2<Self>,
3041 b: mask64x2<Self>,
3042 c: mask64x2<Self>,
3043 ) -> mask64x2<Self> {
3044 [
3045 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
3046 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
3047 ]
3048 .simd_into(self)
3049 }
3050 #[inline(always)]
3051 fn simd_eq_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
3052 [
3053 -(i64::eq(&a[0usize], &b[0usize]) as i64),
3054 -(i64::eq(&a[1usize], &b[1usize]) as i64),
3055 ]
3056 .simd_into(self)
3057 }
3058 #[inline(always)]
3059 fn combine_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x4<Self> {
3060 let mut result = [0; 4usize];
3061 result[0..2usize].copy_from_slice(&a.val);
3062 result[2usize..4usize].copy_from_slice(&b.val);
3063 result.simd_into(self)
3064 }
3065 #[inline(always)]
3066 fn splat_f32x8(self, a: f32) -> f32x8<Self> {
3067 let half = self.splat_f32x4(a);
3068 self.combine_f32x4(half, half)
3069 }
3070 #[inline(always)]
3071 fn abs_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
3072 let (a0, a1) = self.split_f32x8(a);
3073 self.combine_f32x4(self.abs_f32x4(a0), self.abs_f32x4(a1))
3074 }
3075 #[inline(always)]
3076 fn neg_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
3077 let (a0, a1) = self.split_f32x8(a);
3078 self.combine_f32x4(self.neg_f32x4(a0), self.neg_f32x4(a1))
3079 }
3080 #[inline(always)]
3081 fn sqrt_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
3082 let (a0, a1) = self.split_f32x8(a);
3083 self.combine_f32x4(self.sqrt_f32x4(a0), self.sqrt_f32x4(a1))
3084 }
3085 #[inline(always)]
3086 fn add_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3087 let (a0, a1) = self.split_f32x8(a);
3088 let (b0, b1) = self.split_f32x8(b);
3089 self.combine_f32x4(self.add_f32x4(a0, b0), self.add_f32x4(a1, b1))
3090 }
3091 #[inline(always)]
3092 fn sub_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3093 let (a0, a1) = self.split_f32x8(a);
3094 let (b0, b1) = self.split_f32x8(b);
3095 self.combine_f32x4(self.sub_f32x4(a0, b0), self.sub_f32x4(a1, b1))
3096 }
3097 #[inline(always)]
3098 fn mul_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3099 let (a0, a1) = self.split_f32x8(a);
3100 let (b0, b1) = self.split_f32x8(b);
3101 self.combine_f32x4(self.mul_f32x4(a0, b0), self.mul_f32x4(a1, b1))
3102 }
3103 #[inline(always)]
3104 fn div_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3105 let (a0, a1) = self.split_f32x8(a);
3106 let (b0, b1) = self.split_f32x8(b);
3107 self.combine_f32x4(self.div_f32x4(a0, b0), self.div_f32x4(a1, b1))
3108 }
3109 #[inline(always)]
3110 fn copysign_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3111 let (a0, a1) = self.split_f32x8(a);
3112 let (b0, b1) = self.split_f32x8(b);
3113 self.combine_f32x4(self.copysign_f32x4(a0, b0), self.copysign_f32x4(a1, b1))
3114 }
3115 #[inline(always)]
3116 fn simd_eq_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
3117 let (a0, a1) = self.split_f32x8(a);
3118 let (b0, b1) = self.split_f32x8(b);
3119 self.combine_mask32x4(self.simd_eq_f32x4(a0, b0), self.simd_eq_f32x4(a1, b1))
3120 }
3121 #[inline(always)]
3122 fn simd_lt_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
3123 let (a0, a1) = self.split_f32x8(a);
3124 let (b0, b1) = self.split_f32x8(b);
3125 self.combine_mask32x4(self.simd_lt_f32x4(a0, b0), self.simd_lt_f32x4(a1, b1))
3126 }
3127 #[inline(always)]
3128 fn simd_le_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
3129 let (a0, a1) = self.split_f32x8(a);
3130 let (b0, b1) = self.split_f32x8(b);
3131 self.combine_mask32x4(self.simd_le_f32x4(a0, b0), self.simd_le_f32x4(a1, b1))
3132 }
3133 #[inline(always)]
3134 fn simd_ge_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
3135 let (a0, a1) = self.split_f32x8(a);
3136 let (b0, b1) = self.split_f32x8(b);
3137 self.combine_mask32x4(self.simd_ge_f32x4(a0, b0), self.simd_ge_f32x4(a1, b1))
3138 }
3139 #[inline(always)]
3140 fn simd_gt_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
3141 let (a0, a1) = self.split_f32x8(a);
3142 let (b0, b1) = self.split_f32x8(b);
3143 self.combine_mask32x4(self.simd_gt_f32x4(a0, b0), self.simd_gt_f32x4(a1, b1))
3144 }
3145 #[inline(always)]
3146 fn zip_low_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3147 let (a0, _) = self.split_f32x8(a);
3148 let (b0, _) = self.split_f32x8(b);
3149 self.combine_f32x4(self.zip_low_f32x4(a0, b0), self.zip_high_f32x4(a0, b0))
3150 }
3151 #[inline(always)]
3152 fn zip_high_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3153 let (_, a1) = self.split_f32x8(a);
3154 let (_, b1) = self.split_f32x8(b);
3155 self.combine_f32x4(self.zip_low_f32x4(a1, b1), self.zip_high_f32x4(a1, b1))
3156 }
3157 #[inline(always)]
3158 fn unzip_low_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3159 let (a0, a1) = self.split_f32x8(a);
3160 let (b0, b1) = self.split_f32x8(b);
3161 self.combine_f32x4(self.unzip_low_f32x4(a0, a1), self.unzip_low_f32x4(b0, b1))
3162 }
3163 #[inline(always)]
3164 fn unzip_high_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3165 let (a0, a1) = self.split_f32x8(a);
3166 let (b0, b1) = self.split_f32x8(b);
3167 self.combine_f32x4(self.unzip_high_f32x4(a0, a1), self.unzip_high_f32x4(b0, b1))
3168 }
3169 #[inline(always)]
3170 fn max_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3171 let (a0, a1) = self.split_f32x8(a);
3172 let (b0, b1) = self.split_f32x8(b);
3173 self.combine_f32x4(self.max_f32x4(a0, b0), self.max_f32x4(a1, b1))
3174 }
3175 #[inline(always)]
3176 fn max_precise_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3177 let (a0, a1) = self.split_f32x8(a);
3178 let (b0, b1) = self.split_f32x8(b);
3179 self.combine_f32x4(
3180 self.max_precise_f32x4(a0, b0),
3181 self.max_precise_f32x4(a1, b1),
3182 )
3183 }
3184 #[inline(always)]
3185 fn min_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3186 let (a0, a1) = self.split_f32x8(a);
3187 let (b0, b1) = self.split_f32x8(b);
3188 self.combine_f32x4(self.min_f32x4(a0, b0), self.min_f32x4(a1, b1))
3189 }
3190 #[inline(always)]
3191 fn min_precise_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
3192 let (a0, a1) = self.split_f32x8(a);
3193 let (b0, b1) = self.split_f32x8(b);
3194 self.combine_f32x4(
3195 self.min_precise_f32x4(a0, b0),
3196 self.min_precise_f32x4(a1, b1),
3197 )
3198 }
3199 #[inline(always)]
3200 fn madd_f32x8(self, a: f32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
3201 let (a0, a1) = self.split_f32x8(a);
3202 let (b0, b1) = self.split_f32x8(b);
3203 let (c0, c1) = self.split_f32x8(c);
3204 self.combine_f32x4(self.madd_f32x4(a0, b0, c0), self.madd_f32x4(a1, b1, c1))
3205 }
3206 #[inline(always)]
3207 fn msub_f32x8(self, a: f32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
3208 let (a0, a1) = self.split_f32x8(a);
3209 let (b0, b1) = self.split_f32x8(b);
3210 let (c0, c1) = self.split_f32x8(c);
3211 self.combine_f32x4(self.msub_f32x4(a0, b0, c0), self.msub_f32x4(a1, b1, c1))
3212 }
3213 #[inline(always)]
3214 fn floor_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
3215 let (a0, a1) = self.split_f32x8(a);
3216 self.combine_f32x4(self.floor_f32x4(a0), self.floor_f32x4(a1))
3217 }
3218 #[inline(always)]
3219 fn fract_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
3220 let (a0, a1) = self.split_f32x8(a);
3221 self.combine_f32x4(self.fract_f32x4(a0), self.fract_f32x4(a1))
3222 }
3223 #[inline(always)]
3224 fn trunc_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
3225 let (a0, a1) = self.split_f32x8(a);
3226 self.combine_f32x4(self.trunc_f32x4(a0), self.trunc_f32x4(a1))
3227 }
3228 #[inline(always)]
3229 fn select_f32x8(self, a: mask32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
3230 let (a0, a1) = self.split_mask32x8(a);
3231 let (b0, b1) = self.split_f32x8(b);
3232 let (c0, c1) = self.split_f32x8(c);
3233 self.combine_f32x4(self.select_f32x4(a0, b0, c0), self.select_f32x4(a1, b1, c1))
3234 }
3235 #[inline(always)]
3236 fn combine_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x16<Self> {
3237 let mut result = [0.0; 16usize];
3238 result[0..8usize].copy_from_slice(&a.val);
3239 result[8usize..16usize].copy_from_slice(&b.val);
3240 result.simd_into(self)
3241 }
3242 #[inline(always)]
3243 fn split_f32x8(self, a: f32x8<Self>) -> (f32x4<Self>, f32x4<Self>) {
3244 let mut b0 = [0.0; 4usize];
3245 let mut b1 = [0.0; 4usize];
3246 b0.copy_from_slice(&a.val[0..4usize]);
3247 b1.copy_from_slice(&a.val[4usize..8usize]);
3248 (b0.simd_into(self), b1.simd_into(self))
3249 }
3250 #[inline(always)]
3251 fn reinterpret_f64_f32x8(self, a: f32x8<Self>) -> f64x4<Self> {
3252 let (a0, a1) = self.split_f32x8(a);
3253 self.combine_f64x2(
3254 self.reinterpret_f64_f32x4(a0),
3255 self.reinterpret_f64_f32x4(a1),
3256 )
3257 }
3258 #[inline(always)]
3259 fn reinterpret_i32_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
3260 let (a0, a1) = self.split_f32x8(a);
3261 self.combine_i32x4(
3262 self.reinterpret_i32_f32x4(a0),
3263 self.reinterpret_i32_f32x4(a1),
3264 )
3265 }
3266 #[inline(always)]
3267 fn reinterpret_u8_f32x8(self, a: f32x8<Self>) -> u8x32<Self> {
3268 let (a0, a1) = self.split_f32x8(a);
3269 self.combine_u8x16(self.reinterpret_u8_f32x4(a0), self.reinterpret_u8_f32x4(a1))
3270 }
3271 #[inline(always)]
3272 fn reinterpret_u32_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
3273 let (a0, a1) = self.split_f32x8(a);
3274 self.combine_u32x4(
3275 self.reinterpret_u32_f32x4(a0),
3276 self.reinterpret_u32_f32x4(a1),
3277 )
3278 }
3279 #[inline(always)]
3280 fn cvt_u32_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
3281 let (a0, a1) = self.split_f32x8(a);
3282 self.combine_u32x4(self.cvt_u32_f32x4(a0), self.cvt_u32_f32x4(a1))
3283 }
3284 #[inline(always)]
3285 fn cvt_i32_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
3286 let (a0, a1) = self.split_f32x8(a);
3287 self.combine_i32x4(self.cvt_i32_f32x4(a0), self.cvt_i32_f32x4(a1))
3288 }
3289 #[inline(always)]
3290 fn splat_i8x32(self, a: i8) -> i8x32<Self> {
3291 let half = self.splat_i8x16(a);
3292 self.combine_i8x16(half, half)
3293 }
3294 #[inline(always)]
3295 fn not_i8x32(self, a: i8x32<Self>) -> i8x32<Self> {
3296 let (a0, a1) = self.split_i8x32(a);
3297 self.combine_i8x16(self.not_i8x16(a0), self.not_i8x16(a1))
3298 }
3299 #[inline(always)]
3300 fn add_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3301 let (a0, a1) = self.split_i8x32(a);
3302 let (b0, b1) = self.split_i8x32(b);
3303 self.combine_i8x16(self.add_i8x16(a0, b0), self.add_i8x16(a1, b1))
3304 }
3305 #[inline(always)]
3306 fn sub_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3307 let (a0, a1) = self.split_i8x32(a);
3308 let (b0, b1) = self.split_i8x32(b);
3309 self.combine_i8x16(self.sub_i8x16(a0, b0), self.sub_i8x16(a1, b1))
3310 }
3311 #[inline(always)]
3312 fn mul_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3313 let (a0, a1) = self.split_i8x32(a);
3314 let (b0, b1) = self.split_i8x32(b);
3315 self.combine_i8x16(self.mul_i8x16(a0, b0), self.mul_i8x16(a1, b1))
3316 }
3317 #[inline(always)]
3318 fn and_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3319 let (a0, a1) = self.split_i8x32(a);
3320 let (b0, b1) = self.split_i8x32(b);
3321 self.combine_i8x16(self.and_i8x16(a0, b0), self.and_i8x16(a1, b1))
3322 }
3323 #[inline(always)]
3324 fn or_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3325 let (a0, a1) = self.split_i8x32(a);
3326 let (b0, b1) = self.split_i8x32(b);
3327 self.combine_i8x16(self.or_i8x16(a0, b0), self.or_i8x16(a1, b1))
3328 }
3329 #[inline(always)]
3330 fn xor_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3331 let (a0, a1) = self.split_i8x32(a);
3332 let (b0, b1) = self.split_i8x32(b);
3333 self.combine_i8x16(self.xor_i8x16(a0, b0), self.xor_i8x16(a1, b1))
3334 }
3335 #[inline(always)]
3336 fn shr_i8x32(self, a: i8x32<Self>, b: u32) -> i8x32<Self> {
3337 let (a0, a1) = self.split_i8x32(a);
3338 self.combine_i8x16(self.shr_i8x16(a0, b), self.shr_i8x16(a1, b))
3339 }
3340 #[inline(always)]
3341 fn shrv_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3342 let (a0, a1) = self.split_i8x32(a);
3343 let (b0, b1) = self.split_i8x32(b);
3344 self.combine_i8x16(self.shrv_i8x16(a0, b0), self.shrv_i8x16(a1, b1))
3345 }
3346 #[inline(always)]
3347 fn shl_i8x32(self, a: i8x32<Self>, b: u32) -> i8x32<Self> {
3348 let (a0, a1) = self.split_i8x32(a);
3349 self.combine_i8x16(self.shl_i8x16(a0, b), self.shl_i8x16(a1, b))
3350 }
3351 #[inline(always)]
3352 fn simd_eq_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3353 let (a0, a1) = self.split_i8x32(a);
3354 let (b0, b1) = self.split_i8x32(b);
3355 self.combine_mask8x16(self.simd_eq_i8x16(a0, b0), self.simd_eq_i8x16(a1, b1))
3356 }
3357 #[inline(always)]
3358 fn simd_lt_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3359 let (a0, a1) = self.split_i8x32(a);
3360 let (b0, b1) = self.split_i8x32(b);
3361 self.combine_mask8x16(self.simd_lt_i8x16(a0, b0), self.simd_lt_i8x16(a1, b1))
3362 }
3363 #[inline(always)]
3364 fn simd_le_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3365 let (a0, a1) = self.split_i8x32(a);
3366 let (b0, b1) = self.split_i8x32(b);
3367 self.combine_mask8x16(self.simd_le_i8x16(a0, b0), self.simd_le_i8x16(a1, b1))
3368 }
3369 #[inline(always)]
3370 fn simd_ge_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3371 let (a0, a1) = self.split_i8x32(a);
3372 let (b0, b1) = self.split_i8x32(b);
3373 self.combine_mask8x16(self.simd_ge_i8x16(a0, b0), self.simd_ge_i8x16(a1, b1))
3374 }
3375 #[inline(always)]
3376 fn simd_gt_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3377 let (a0, a1) = self.split_i8x32(a);
3378 let (b0, b1) = self.split_i8x32(b);
3379 self.combine_mask8x16(self.simd_gt_i8x16(a0, b0), self.simd_gt_i8x16(a1, b1))
3380 }
3381 #[inline(always)]
3382 fn zip_low_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3383 let (a0, _) = self.split_i8x32(a);
3384 let (b0, _) = self.split_i8x32(b);
3385 self.combine_i8x16(self.zip_low_i8x16(a0, b0), self.zip_high_i8x16(a0, b0))
3386 }
3387 #[inline(always)]
3388 fn zip_high_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3389 let (_, a1) = self.split_i8x32(a);
3390 let (_, b1) = self.split_i8x32(b);
3391 self.combine_i8x16(self.zip_low_i8x16(a1, b1), self.zip_high_i8x16(a1, b1))
3392 }
3393 #[inline(always)]
3394 fn unzip_low_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3395 let (a0, a1) = self.split_i8x32(a);
3396 let (b0, b1) = self.split_i8x32(b);
3397 self.combine_i8x16(self.unzip_low_i8x16(a0, a1), self.unzip_low_i8x16(b0, b1))
3398 }
3399 #[inline(always)]
3400 fn unzip_high_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3401 let (a0, a1) = self.split_i8x32(a);
3402 let (b0, b1) = self.split_i8x32(b);
3403 self.combine_i8x16(self.unzip_high_i8x16(a0, a1), self.unzip_high_i8x16(b0, b1))
3404 }
3405 #[inline(always)]
3406 fn select_i8x32(self, a: mask8x32<Self>, b: i8x32<Self>, c: i8x32<Self>) -> i8x32<Self> {
3407 let (a0, a1) = self.split_mask8x32(a);
3408 let (b0, b1) = self.split_i8x32(b);
3409 let (c0, c1) = self.split_i8x32(c);
3410 self.combine_i8x16(self.select_i8x16(a0, b0, c0), self.select_i8x16(a1, b1, c1))
3411 }
3412 #[inline(always)]
3413 fn min_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3414 let (a0, a1) = self.split_i8x32(a);
3415 let (b0, b1) = self.split_i8x32(b);
3416 self.combine_i8x16(self.min_i8x16(a0, b0), self.min_i8x16(a1, b1))
3417 }
3418 #[inline(always)]
3419 fn max_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3420 let (a0, a1) = self.split_i8x32(a);
3421 let (b0, b1) = self.split_i8x32(b);
3422 self.combine_i8x16(self.max_i8x16(a0, b0), self.max_i8x16(a1, b1))
3423 }
3424 #[inline(always)]
3425 fn combine_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x64<Self> {
3426 let mut result = [0; 64usize];
3427 result[0..32usize].copy_from_slice(&a.val);
3428 result[32usize..64usize].copy_from_slice(&b.val);
3429 result.simd_into(self)
3430 }
3431 #[inline(always)]
3432 fn split_i8x32(self, a: i8x32<Self>) -> (i8x16<Self>, i8x16<Self>) {
3433 let mut b0 = [0; 16usize];
3434 let mut b1 = [0; 16usize];
3435 b0.copy_from_slice(&a.val[0..16usize]);
3436 b1.copy_from_slice(&a.val[16usize..32usize]);
3437 (b0.simd_into(self), b1.simd_into(self))
3438 }
3439 #[inline(always)]
3440 fn neg_i8x32(self, a: i8x32<Self>) -> i8x32<Self> {
3441 let (a0, a1) = self.split_i8x32(a);
3442 self.combine_i8x16(self.neg_i8x16(a0), self.neg_i8x16(a1))
3443 }
3444 #[inline(always)]
3445 fn reinterpret_u8_i8x32(self, a: i8x32<Self>) -> u8x32<Self> {
3446 let (a0, a1) = self.split_i8x32(a);
3447 self.combine_u8x16(self.reinterpret_u8_i8x16(a0), self.reinterpret_u8_i8x16(a1))
3448 }
3449 #[inline(always)]
3450 fn reinterpret_u32_i8x32(self, a: i8x32<Self>) -> u32x8<Self> {
3451 let (a0, a1) = self.split_i8x32(a);
3452 self.combine_u32x4(
3453 self.reinterpret_u32_i8x16(a0),
3454 self.reinterpret_u32_i8x16(a1),
3455 )
3456 }
3457 #[inline(always)]
3458 fn splat_u8x32(self, a: u8) -> u8x32<Self> {
3459 let half = self.splat_u8x16(a);
3460 self.combine_u8x16(half, half)
3461 }
3462 #[inline(always)]
3463 fn not_u8x32(self, a: u8x32<Self>) -> u8x32<Self> {
3464 let (a0, a1) = self.split_u8x32(a);
3465 self.combine_u8x16(self.not_u8x16(a0), self.not_u8x16(a1))
3466 }
3467 #[inline(always)]
3468 fn add_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3469 let (a0, a1) = self.split_u8x32(a);
3470 let (b0, b1) = self.split_u8x32(b);
3471 self.combine_u8x16(self.add_u8x16(a0, b0), self.add_u8x16(a1, b1))
3472 }
3473 #[inline(always)]
3474 fn sub_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3475 let (a0, a1) = self.split_u8x32(a);
3476 let (b0, b1) = self.split_u8x32(b);
3477 self.combine_u8x16(self.sub_u8x16(a0, b0), self.sub_u8x16(a1, b1))
3478 }
3479 #[inline(always)]
3480 fn mul_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3481 let (a0, a1) = self.split_u8x32(a);
3482 let (b0, b1) = self.split_u8x32(b);
3483 self.combine_u8x16(self.mul_u8x16(a0, b0), self.mul_u8x16(a1, b1))
3484 }
3485 #[inline(always)]
3486 fn and_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3487 let (a0, a1) = self.split_u8x32(a);
3488 let (b0, b1) = self.split_u8x32(b);
3489 self.combine_u8x16(self.and_u8x16(a0, b0), self.and_u8x16(a1, b1))
3490 }
3491 #[inline(always)]
3492 fn or_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3493 let (a0, a1) = self.split_u8x32(a);
3494 let (b0, b1) = self.split_u8x32(b);
3495 self.combine_u8x16(self.or_u8x16(a0, b0), self.or_u8x16(a1, b1))
3496 }
3497 #[inline(always)]
3498 fn xor_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3499 let (a0, a1) = self.split_u8x32(a);
3500 let (b0, b1) = self.split_u8x32(b);
3501 self.combine_u8x16(self.xor_u8x16(a0, b0), self.xor_u8x16(a1, b1))
3502 }
3503 #[inline(always)]
3504 fn shr_u8x32(self, a: u8x32<Self>, b: u32) -> u8x32<Self> {
3505 let (a0, a1) = self.split_u8x32(a);
3506 self.combine_u8x16(self.shr_u8x16(a0, b), self.shr_u8x16(a1, b))
3507 }
3508 #[inline(always)]
3509 fn shrv_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3510 let (a0, a1) = self.split_u8x32(a);
3511 let (b0, b1) = self.split_u8x32(b);
3512 self.combine_u8x16(self.shrv_u8x16(a0, b0), self.shrv_u8x16(a1, b1))
3513 }
3514 #[inline(always)]
3515 fn shl_u8x32(self, a: u8x32<Self>, b: u32) -> u8x32<Self> {
3516 let (a0, a1) = self.split_u8x32(a);
3517 self.combine_u8x16(self.shl_u8x16(a0, b), self.shl_u8x16(a1, b))
3518 }
3519 #[inline(always)]
3520 fn simd_eq_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3521 let (a0, a1) = self.split_u8x32(a);
3522 let (b0, b1) = self.split_u8x32(b);
3523 self.combine_mask8x16(self.simd_eq_u8x16(a0, b0), self.simd_eq_u8x16(a1, b1))
3524 }
3525 #[inline(always)]
3526 fn simd_lt_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3527 let (a0, a1) = self.split_u8x32(a);
3528 let (b0, b1) = self.split_u8x32(b);
3529 self.combine_mask8x16(self.simd_lt_u8x16(a0, b0), self.simd_lt_u8x16(a1, b1))
3530 }
3531 #[inline(always)]
3532 fn simd_le_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3533 let (a0, a1) = self.split_u8x32(a);
3534 let (b0, b1) = self.split_u8x32(b);
3535 self.combine_mask8x16(self.simd_le_u8x16(a0, b0), self.simd_le_u8x16(a1, b1))
3536 }
3537 #[inline(always)]
3538 fn simd_ge_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3539 let (a0, a1) = self.split_u8x32(a);
3540 let (b0, b1) = self.split_u8x32(b);
3541 self.combine_mask8x16(self.simd_ge_u8x16(a0, b0), self.simd_ge_u8x16(a1, b1))
3542 }
3543 #[inline(always)]
3544 fn simd_gt_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3545 let (a0, a1) = self.split_u8x32(a);
3546 let (b0, b1) = self.split_u8x32(b);
3547 self.combine_mask8x16(self.simd_gt_u8x16(a0, b0), self.simd_gt_u8x16(a1, b1))
3548 }
3549 #[inline(always)]
3550 fn zip_low_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3551 let (a0, _) = self.split_u8x32(a);
3552 let (b0, _) = self.split_u8x32(b);
3553 self.combine_u8x16(self.zip_low_u8x16(a0, b0), self.zip_high_u8x16(a0, b0))
3554 }
3555 #[inline(always)]
3556 fn zip_high_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3557 let (_, a1) = self.split_u8x32(a);
3558 let (_, b1) = self.split_u8x32(b);
3559 self.combine_u8x16(self.zip_low_u8x16(a1, b1), self.zip_high_u8x16(a1, b1))
3560 }
3561 #[inline(always)]
3562 fn unzip_low_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3563 let (a0, a1) = self.split_u8x32(a);
3564 let (b0, b1) = self.split_u8x32(b);
3565 self.combine_u8x16(self.unzip_low_u8x16(a0, a1), self.unzip_low_u8x16(b0, b1))
3566 }
3567 #[inline(always)]
3568 fn unzip_high_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3569 let (a0, a1) = self.split_u8x32(a);
3570 let (b0, b1) = self.split_u8x32(b);
3571 self.combine_u8x16(self.unzip_high_u8x16(a0, a1), self.unzip_high_u8x16(b0, b1))
3572 }
3573 #[inline(always)]
3574 fn select_u8x32(self, a: mask8x32<Self>, b: u8x32<Self>, c: u8x32<Self>) -> u8x32<Self> {
3575 let (a0, a1) = self.split_mask8x32(a);
3576 let (b0, b1) = self.split_u8x32(b);
3577 let (c0, c1) = self.split_u8x32(c);
3578 self.combine_u8x16(self.select_u8x16(a0, b0, c0), self.select_u8x16(a1, b1, c1))
3579 }
3580 #[inline(always)]
3581 fn min_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3582 let (a0, a1) = self.split_u8x32(a);
3583 let (b0, b1) = self.split_u8x32(b);
3584 self.combine_u8x16(self.min_u8x16(a0, b0), self.min_u8x16(a1, b1))
3585 }
3586 #[inline(always)]
3587 fn max_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3588 let (a0, a1) = self.split_u8x32(a);
3589 let (b0, b1) = self.split_u8x32(b);
3590 self.combine_u8x16(self.max_u8x16(a0, b0), self.max_u8x16(a1, b1))
3591 }
3592 #[inline(always)]
3593 fn combine_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x64<Self> {
3594 let mut result = [0; 64usize];
3595 result[0..32usize].copy_from_slice(&a.val);
3596 result[32usize..64usize].copy_from_slice(&b.val);
3597 result.simd_into(self)
3598 }
3599 #[inline(always)]
3600 fn split_u8x32(self, a: u8x32<Self>) -> (u8x16<Self>, u8x16<Self>) {
3601 let mut b0 = [0; 16usize];
3602 let mut b1 = [0; 16usize];
3603 b0.copy_from_slice(&a.val[0..16usize]);
3604 b1.copy_from_slice(&a.val[16usize..32usize]);
3605 (b0.simd_into(self), b1.simd_into(self))
3606 }
3607 #[inline(always)]
3608 fn widen_u8x32(self, a: u8x32<Self>) -> u16x32<Self> {
3609 let (a0, a1) = self.split_u8x32(a);
3610 self.combine_u16x16(self.widen_u8x16(a0), self.widen_u8x16(a1))
3611 }
3612 #[inline(always)]
3613 fn reinterpret_u32_u8x32(self, a: u8x32<Self>) -> u32x8<Self> {
3614 let (a0, a1) = self.split_u8x32(a);
3615 self.combine_u32x4(
3616 self.reinterpret_u32_u8x16(a0),
3617 self.reinterpret_u32_u8x16(a1),
3618 )
3619 }
3620 #[inline(always)]
3621 fn splat_mask8x32(self, a: i8) -> mask8x32<Self> {
3622 let half = self.splat_mask8x16(a);
3623 self.combine_mask8x16(half, half)
3624 }
3625 #[inline(always)]
3626 fn not_mask8x32(self, a: mask8x32<Self>) -> mask8x32<Self> {
3627 let (a0, a1) = self.split_mask8x32(a);
3628 self.combine_mask8x16(self.not_mask8x16(a0), self.not_mask8x16(a1))
3629 }
3630 #[inline(always)]
3631 fn and_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
3632 let (a0, a1) = self.split_mask8x32(a);
3633 let (b0, b1) = self.split_mask8x32(b);
3634 self.combine_mask8x16(self.and_mask8x16(a0, b0), self.and_mask8x16(a1, b1))
3635 }
3636 #[inline(always)]
3637 fn or_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
3638 let (a0, a1) = self.split_mask8x32(a);
3639 let (b0, b1) = self.split_mask8x32(b);
3640 self.combine_mask8x16(self.or_mask8x16(a0, b0), self.or_mask8x16(a1, b1))
3641 }
3642 #[inline(always)]
3643 fn xor_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
3644 let (a0, a1) = self.split_mask8x32(a);
3645 let (b0, b1) = self.split_mask8x32(b);
3646 self.combine_mask8x16(self.xor_mask8x16(a0, b0), self.xor_mask8x16(a1, b1))
3647 }
3648 #[inline(always)]
3649 fn select_mask8x32(
3650 self,
3651 a: mask8x32<Self>,
3652 b: mask8x32<Self>,
3653 c: mask8x32<Self>,
3654 ) -> mask8x32<Self> {
3655 let (a0, a1) = self.split_mask8x32(a);
3656 let (b0, b1) = self.split_mask8x32(b);
3657 let (c0, c1) = self.split_mask8x32(c);
3658 self.combine_mask8x16(
3659 self.select_mask8x16(a0, b0, c0),
3660 self.select_mask8x16(a1, b1, c1),
3661 )
3662 }
3663 #[inline(always)]
3664 fn simd_eq_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
3665 let (a0, a1) = self.split_mask8x32(a);
3666 let (b0, b1) = self.split_mask8x32(b);
3667 self.combine_mask8x16(self.simd_eq_mask8x16(a0, b0), self.simd_eq_mask8x16(a1, b1))
3668 }
3669 #[inline(always)]
3670 fn combine_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x64<Self> {
3671 let mut result = [0; 64usize];
3672 result[0..32usize].copy_from_slice(&a.val);
3673 result[32usize..64usize].copy_from_slice(&b.val);
3674 result.simd_into(self)
3675 }
3676 #[inline(always)]
3677 fn split_mask8x32(self, a: mask8x32<Self>) -> (mask8x16<Self>, mask8x16<Self>) {
3678 let mut b0 = [0; 16usize];
3679 let mut b1 = [0; 16usize];
3680 b0.copy_from_slice(&a.val[0..16usize]);
3681 b1.copy_from_slice(&a.val[16usize..32usize]);
3682 (b0.simd_into(self), b1.simd_into(self))
3683 }
3684 #[inline(always)]
3685 fn splat_i16x16(self, a: i16) -> i16x16<Self> {
3686 let half = self.splat_i16x8(a);
3687 self.combine_i16x8(half, half)
3688 }
3689 #[inline(always)]
3690 fn not_i16x16(self, a: i16x16<Self>) -> i16x16<Self> {
3691 let (a0, a1) = self.split_i16x16(a);
3692 self.combine_i16x8(self.not_i16x8(a0), self.not_i16x8(a1))
3693 }
3694 #[inline(always)]
3695 fn add_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3696 let (a0, a1) = self.split_i16x16(a);
3697 let (b0, b1) = self.split_i16x16(b);
3698 self.combine_i16x8(self.add_i16x8(a0, b0), self.add_i16x8(a1, b1))
3699 }
3700 #[inline(always)]
3701 fn sub_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3702 let (a0, a1) = self.split_i16x16(a);
3703 let (b0, b1) = self.split_i16x16(b);
3704 self.combine_i16x8(self.sub_i16x8(a0, b0), self.sub_i16x8(a1, b1))
3705 }
3706 #[inline(always)]
3707 fn mul_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3708 let (a0, a1) = self.split_i16x16(a);
3709 let (b0, b1) = self.split_i16x16(b);
3710 self.combine_i16x8(self.mul_i16x8(a0, b0), self.mul_i16x8(a1, b1))
3711 }
3712 #[inline(always)]
3713 fn and_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3714 let (a0, a1) = self.split_i16x16(a);
3715 let (b0, b1) = self.split_i16x16(b);
3716 self.combine_i16x8(self.and_i16x8(a0, b0), self.and_i16x8(a1, b1))
3717 }
3718 #[inline(always)]
3719 fn or_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3720 let (a0, a1) = self.split_i16x16(a);
3721 let (b0, b1) = self.split_i16x16(b);
3722 self.combine_i16x8(self.or_i16x8(a0, b0), self.or_i16x8(a1, b1))
3723 }
3724 #[inline(always)]
3725 fn xor_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3726 let (a0, a1) = self.split_i16x16(a);
3727 let (b0, b1) = self.split_i16x16(b);
3728 self.combine_i16x8(self.xor_i16x8(a0, b0), self.xor_i16x8(a1, b1))
3729 }
3730 #[inline(always)]
3731 fn shr_i16x16(self, a: i16x16<Self>, b: u32) -> i16x16<Self> {
3732 let (a0, a1) = self.split_i16x16(a);
3733 self.combine_i16x8(self.shr_i16x8(a0, b), self.shr_i16x8(a1, b))
3734 }
3735 #[inline(always)]
3736 fn shrv_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3737 let (a0, a1) = self.split_i16x16(a);
3738 let (b0, b1) = self.split_i16x16(b);
3739 self.combine_i16x8(self.shrv_i16x8(a0, b0), self.shrv_i16x8(a1, b1))
3740 }
3741 #[inline(always)]
3742 fn shl_i16x16(self, a: i16x16<Self>, b: u32) -> i16x16<Self> {
3743 let (a0, a1) = self.split_i16x16(a);
3744 self.combine_i16x8(self.shl_i16x8(a0, b), self.shl_i16x8(a1, b))
3745 }
3746 #[inline(always)]
3747 fn simd_eq_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3748 let (a0, a1) = self.split_i16x16(a);
3749 let (b0, b1) = self.split_i16x16(b);
3750 self.combine_mask16x8(self.simd_eq_i16x8(a0, b0), self.simd_eq_i16x8(a1, b1))
3751 }
3752 #[inline(always)]
3753 fn simd_lt_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3754 let (a0, a1) = self.split_i16x16(a);
3755 let (b0, b1) = self.split_i16x16(b);
3756 self.combine_mask16x8(self.simd_lt_i16x8(a0, b0), self.simd_lt_i16x8(a1, b1))
3757 }
3758 #[inline(always)]
3759 fn simd_le_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3760 let (a0, a1) = self.split_i16x16(a);
3761 let (b0, b1) = self.split_i16x16(b);
3762 self.combine_mask16x8(self.simd_le_i16x8(a0, b0), self.simd_le_i16x8(a1, b1))
3763 }
3764 #[inline(always)]
3765 fn simd_ge_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3766 let (a0, a1) = self.split_i16x16(a);
3767 let (b0, b1) = self.split_i16x16(b);
3768 self.combine_mask16x8(self.simd_ge_i16x8(a0, b0), self.simd_ge_i16x8(a1, b1))
3769 }
3770 #[inline(always)]
3771 fn simd_gt_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3772 let (a0, a1) = self.split_i16x16(a);
3773 let (b0, b1) = self.split_i16x16(b);
3774 self.combine_mask16x8(self.simd_gt_i16x8(a0, b0), self.simd_gt_i16x8(a1, b1))
3775 }
3776 #[inline(always)]
3777 fn zip_low_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3778 let (a0, _) = self.split_i16x16(a);
3779 let (b0, _) = self.split_i16x16(b);
3780 self.combine_i16x8(self.zip_low_i16x8(a0, b0), self.zip_high_i16x8(a0, b0))
3781 }
3782 #[inline(always)]
3783 fn zip_high_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3784 let (_, a1) = self.split_i16x16(a);
3785 let (_, b1) = self.split_i16x16(b);
3786 self.combine_i16x8(self.zip_low_i16x8(a1, b1), self.zip_high_i16x8(a1, b1))
3787 }
3788 #[inline(always)]
3789 fn unzip_low_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3790 let (a0, a1) = self.split_i16x16(a);
3791 let (b0, b1) = self.split_i16x16(b);
3792 self.combine_i16x8(self.unzip_low_i16x8(a0, a1), self.unzip_low_i16x8(b0, b1))
3793 }
3794 #[inline(always)]
3795 fn unzip_high_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3796 let (a0, a1) = self.split_i16x16(a);
3797 let (b0, b1) = self.split_i16x16(b);
3798 self.combine_i16x8(self.unzip_high_i16x8(a0, a1), self.unzip_high_i16x8(b0, b1))
3799 }
3800 #[inline(always)]
3801 fn select_i16x16(self, a: mask16x16<Self>, b: i16x16<Self>, c: i16x16<Self>) -> i16x16<Self> {
3802 let (a0, a1) = self.split_mask16x16(a);
3803 let (b0, b1) = self.split_i16x16(b);
3804 let (c0, c1) = self.split_i16x16(c);
3805 self.combine_i16x8(self.select_i16x8(a0, b0, c0), self.select_i16x8(a1, b1, c1))
3806 }
3807 #[inline(always)]
3808 fn min_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3809 let (a0, a1) = self.split_i16x16(a);
3810 let (b0, b1) = self.split_i16x16(b);
3811 self.combine_i16x8(self.min_i16x8(a0, b0), self.min_i16x8(a1, b1))
3812 }
3813 #[inline(always)]
3814 fn max_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3815 let (a0, a1) = self.split_i16x16(a);
3816 let (b0, b1) = self.split_i16x16(b);
3817 self.combine_i16x8(self.max_i16x8(a0, b0), self.max_i16x8(a1, b1))
3818 }
3819 #[inline(always)]
3820 fn combine_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x32<Self> {
3821 let mut result = [0; 32usize];
3822 result[0..16usize].copy_from_slice(&a.val);
3823 result[16usize..32usize].copy_from_slice(&b.val);
3824 result.simd_into(self)
3825 }
3826 #[inline(always)]
3827 fn split_i16x16(self, a: i16x16<Self>) -> (i16x8<Self>, i16x8<Self>) {
3828 let mut b0 = [0; 8usize];
3829 let mut b1 = [0; 8usize];
3830 b0.copy_from_slice(&a.val[0..8usize]);
3831 b1.copy_from_slice(&a.val[8usize..16usize]);
3832 (b0.simd_into(self), b1.simd_into(self))
3833 }
3834 #[inline(always)]
3835 fn neg_i16x16(self, a: i16x16<Self>) -> i16x16<Self> {
3836 let (a0, a1) = self.split_i16x16(a);
3837 self.combine_i16x8(self.neg_i16x8(a0), self.neg_i16x8(a1))
3838 }
3839 #[inline(always)]
3840 fn reinterpret_u8_i16x16(self, a: i16x16<Self>) -> u8x32<Self> {
3841 let (a0, a1) = self.split_i16x16(a);
3842 self.combine_u8x16(self.reinterpret_u8_i16x8(a0), self.reinterpret_u8_i16x8(a1))
3843 }
3844 #[inline(always)]
3845 fn reinterpret_u32_i16x16(self, a: i16x16<Self>) -> u32x8<Self> {
3846 let (a0, a1) = self.split_i16x16(a);
3847 self.combine_u32x4(
3848 self.reinterpret_u32_i16x8(a0),
3849 self.reinterpret_u32_i16x8(a1),
3850 )
3851 }
3852 #[inline(always)]
3853 fn splat_u16x16(self, a: u16) -> u16x16<Self> {
3854 let half = self.splat_u16x8(a);
3855 self.combine_u16x8(half, half)
3856 }
3857 #[inline(always)]
3858 fn not_u16x16(self, a: u16x16<Self>) -> u16x16<Self> {
3859 let (a0, a1) = self.split_u16x16(a);
3860 self.combine_u16x8(self.not_u16x8(a0), self.not_u16x8(a1))
3861 }
3862 #[inline(always)]
3863 fn add_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3864 let (a0, a1) = self.split_u16x16(a);
3865 let (b0, b1) = self.split_u16x16(b);
3866 self.combine_u16x8(self.add_u16x8(a0, b0), self.add_u16x8(a1, b1))
3867 }
3868 #[inline(always)]
3869 fn sub_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3870 let (a0, a1) = self.split_u16x16(a);
3871 let (b0, b1) = self.split_u16x16(b);
3872 self.combine_u16x8(self.sub_u16x8(a0, b0), self.sub_u16x8(a1, b1))
3873 }
3874 #[inline(always)]
3875 fn mul_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3876 let (a0, a1) = self.split_u16x16(a);
3877 let (b0, b1) = self.split_u16x16(b);
3878 self.combine_u16x8(self.mul_u16x8(a0, b0), self.mul_u16x8(a1, b1))
3879 }
3880 #[inline(always)]
3881 fn and_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3882 let (a0, a1) = self.split_u16x16(a);
3883 let (b0, b1) = self.split_u16x16(b);
3884 self.combine_u16x8(self.and_u16x8(a0, b0), self.and_u16x8(a1, b1))
3885 }
3886 #[inline(always)]
3887 fn or_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3888 let (a0, a1) = self.split_u16x16(a);
3889 let (b0, b1) = self.split_u16x16(b);
3890 self.combine_u16x8(self.or_u16x8(a0, b0), self.or_u16x8(a1, b1))
3891 }
3892 #[inline(always)]
3893 fn xor_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3894 let (a0, a1) = self.split_u16x16(a);
3895 let (b0, b1) = self.split_u16x16(b);
3896 self.combine_u16x8(self.xor_u16x8(a0, b0), self.xor_u16x8(a1, b1))
3897 }
3898 #[inline(always)]
3899 fn shr_u16x16(self, a: u16x16<Self>, b: u32) -> u16x16<Self> {
3900 let (a0, a1) = self.split_u16x16(a);
3901 self.combine_u16x8(self.shr_u16x8(a0, b), self.shr_u16x8(a1, b))
3902 }
3903 #[inline(always)]
3904 fn shrv_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3905 let (a0, a1) = self.split_u16x16(a);
3906 let (b0, b1) = self.split_u16x16(b);
3907 self.combine_u16x8(self.shrv_u16x8(a0, b0), self.shrv_u16x8(a1, b1))
3908 }
3909 #[inline(always)]
3910 fn shl_u16x16(self, a: u16x16<Self>, b: u32) -> u16x16<Self> {
3911 let (a0, a1) = self.split_u16x16(a);
3912 self.combine_u16x8(self.shl_u16x8(a0, b), self.shl_u16x8(a1, b))
3913 }
3914 #[inline(always)]
3915 fn simd_eq_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3916 let (a0, a1) = self.split_u16x16(a);
3917 let (b0, b1) = self.split_u16x16(b);
3918 self.combine_mask16x8(self.simd_eq_u16x8(a0, b0), self.simd_eq_u16x8(a1, b1))
3919 }
3920 #[inline(always)]
3921 fn simd_lt_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3922 let (a0, a1) = self.split_u16x16(a);
3923 let (b0, b1) = self.split_u16x16(b);
3924 self.combine_mask16x8(self.simd_lt_u16x8(a0, b0), self.simd_lt_u16x8(a1, b1))
3925 }
3926 #[inline(always)]
3927 fn simd_le_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3928 let (a0, a1) = self.split_u16x16(a);
3929 let (b0, b1) = self.split_u16x16(b);
3930 self.combine_mask16x8(self.simd_le_u16x8(a0, b0), self.simd_le_u16x8(a1, b1))
3931 }
3932 #[inline(always)]
3933 fn simd_ge_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3934 let (a0, a1) = self.split_u16x16(a);
3935 let (b0, b1) = self.split_u16x16(b);
3936 self.combine_mask16x8(self.simd_ge_u16x8(a0, b0), self.simd_ge_u16x8(a1, b1))
3937 }
3938 #[inline(always)]
3939 fn simd_gt_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3940 let (a0, a1) = self.split_u16x16(a);
3941 let (b0, b1) = self.split_u16x16(b);
3942 self.combine_mask16x8(self.simd_gt_u16x8(a0, b0), self.simd_gt_u16x8(a1, b1))
3943 }
3944 #[inline(always)]
3945 fn zip_low_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3946 let (a0, _) = self.split_u16x16(a);
3947 let (b0, _) = self.split_u16x16(b);
3948 self.combine_u16x8(self.zip_low_u16x8(a0, b0), self.zip_high_u16x8(a0, b0))
3949 }
3950 #[inline(always)]
3951 fn zip_high_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3952 let (_, a1) = self.split_u16x16(a);
3953 let (_, b1) = self.split_u16x16(b);
3954 self.combine_u16x8(self.zip_low_u16x8(a1, b1), self.zip_high_u16x8(a1, b1))
3955 }
3956 #[inline(always)]
3957 fn unzip_low_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3958 let (a0, a1) = self.split_u16x16(a);
3959 let (b0, b1) = self.split_u16x16(b);
3960 self.combine_u16x8(self.unzip_low_u16x8(a0, a1), self.unzip_low_u16x8(b0, b1))
3961 }
3962 #[inline(always)]
3963 fn unzip_high_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3964 let (a0, a1) = self.split_u16x16(a);
3965 let (b0, b1) = self.split_u16x16(b);
3966 self.combine_u16x8(self.unzip_high_u16x8(a0, a1), self.unzip_high_u16x8(b0, b1))
3967 }
3968 #[inline(always)]
3969 fn select_u16x16(self, a: mask16x16<Self>, b: u16x16<Self>, c: u16x16<Self>) -> u16x16<Self> {
3970 let (a0, a1) = self.split_mask16x16(a);
3971 let (b0, b1) = self.split_u16x16(b);
3972 let (c0, c1) = self.split_u16x16(c);
3973 self.combine_u16x8(self.select_u16x8(a0, b0, c0), self.select_u16x8(a1, b1, c1))
3974 }
3975 #[inline(always)]
3976 fn min_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3977 let (a0, a1) = self.split_u16x16(a);
3978 let (b0, b1) = self.split_u16x16(b);
3979 self.combine_u16x8(self.min_u16x8(a0, b0), self.min_u16x8(a1, b1))
3980 }
3981 #[inline(always)]
3982 fn max_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3983 let (a0, a1) = self.split_u16x16(a);
3984 let (b0, b1) = self.split_u16x16(b);
3985 self.combine_u16x8(self.max_u16x8(a0, b0), self.max_u16x8(a1, b1))
3986 }
3987 #[inline(always)]
3988 fn combine_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x32<Self> {
3989 let mut result = [0; 32usize];
3990 result[0..16usize].copy_from_slice(&a.val);
3991 result[16usize..32usize].copy_from_slice(&b.val);
3992 result.simd_into(self)
3993 }
3994 #[inline(always)]
3995 fn split_u16x16(self, a: u16x16<Self>) -> (u16x8<Self>, u16x8<Self>) {
3996 let mut b0 = [0; 8usize];
3997 let mut b1 = [0; 8usize];
3998 b0.copy_from_slice(&a.val[0..8usize]);
3999 b1.copy_from_slice(&a.val[8usize..16usize]);
4000 (b0.simd_into(self), b1.simd_into(self))
4001 }
4002 #[inline(always)]
4003 fn narrow_u16x16(self, a: u16x16<Self>) -> u8x16<Self> {
4004 [
4005 a[0usize] as u8,
4006 a[1usize] as u8,
4007 a[2usize] as u8,
4008 a[3usize] as u8,
4009 a[4usize] as u8,
4010 a[5usize] as u8,
4011 a[6usize] as u8,
4012 a[7usize] as u8,
4013 a[8usize] as u8,
4014 a[9usize] as u8,
4015 a[10usize] as u8,
4016 a[11usize] as u8,
4017 a[12usize] as u8,
4018 a[13usize] as u8,
4019 a[14usize] as u8,
4020 a[15usize] as u8,
4021 ]
4022 .simd_into(self)
4023 }
4024 #[inline(always)]
4025 fn reinterpret_u8_u16x16(self, a: u16x16<Self>) -> u8x32<Self> {
4026 let (a0, a1) = self.split_u16x16(a);
4027 self.combine_u8x16(self.reinterpret_u8_u16x8(a0), self.reinterpret_u8_u16x8(a1))
4028 }
4029 #[inline(always)]
4030 fn reinterpret_u32_u16x16(self, a: u16x16<Self>) -> u32x8<Self> {
4031 let (a0, a1) = self.split_u16x16(a);
4032 self.combine_u32x4(
4033 self.reinterpret_u32_u16x8(a0),
4034 self.reinterpret_u32_u16x8(a1),
4035 )
4036 }
4037 #[inline(always)]
4038 fn splat_mask16x16(self, a: i16) -> mask16x16<Self> {
4039 let half = self.splat_mask16x8(a);
4040 self.combine_mask16x8(half, half)
4041 }
4042 #[inline(always)]
4043 fn not_mask16x16(self, a: mask16x16<Self>) -> mask16x16<Self> {
4044 let (a0, a1) = self.split_mask16x16(a);
4045 self.combine_mask16x8(self.not_mask16x8(a0), self.not_mask16x8(a1))
4046 }
4047 #[inline(always)]
4048 fn and_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
4049 let (a0, a1) = self.split_mask16x16(a);
4050 let (b0, b1) = self.split_mask16x16(b);
4051 self.combine_mask16x8(self.and_mask16x8(a0, b0), self.and_mask16x8(a1, b1))
4052 }
4053 #[inline(always)]
4054 fn or_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
4055 let (a0, a1) = self.split_mask16x16(a);
4056 let (b0, b1) = self.split_mask16x16(b);
4057 self.combine_mask16x8(self.or_mask16x8(a0, b0), self.or_mask16x8(a1, b1))
4058 }
4059 #[inline(always)]
4060 fn xor_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
4061 let (a0, a1) = self.split_mask16x16(a);
4062 let (b0, b1) = self.split_mask16x16(b);
4063 self.combine_mask16x8(self.xor_mask16x8(a0, b0), self.xor_mask16x8(a1, b1))
4064 }
4065 #[inline(always)]
4066 fn select_mask16x16(
4067 self,
4068 a: mask16x16<Self>,
4069 b: mask16x16<Self>,
4070 c: mask16x16<Self>,
4071 ) -> mask16x16<Self> {
4072 let (a0, a1) = self.split_mask16x16(a);
4073 let (b0, b1) = self.split_mask16x16(b);
4074 let (c0, c1) = self.split_mask16x16(c);
4075 self.combine_mask16x8(
4076 self.select_mask16x8(a0, b0, c0),
4077 self.select_mask16x8(a1, b1, c1),
4078 )
4079 }
4080 #[inline(always)]
4081 fn simd_eq_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
4082 let (a0, a1) = self.split_mask16x16(a);
4083 let (b0, b1) = self.split_mask16x16(b);
4084 self.combine_mask16x8(self.simd_eq_mask16x8(a0, b0), self.simd_eq_mask16x8(a1, b1))
4085 }
4086 #[inline(always)]
4087 fn combine_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x32<Self> {
4088 let mut result = [0; 32usize];
4089 result[0..16usize].copy_from_slice(&a.val);
4090 result[16usize..32usize].copy_from_slice(&b.val);
4091 result.simd_into(self)
4092 }
4093 #[inline(always)]
4094 fn split_mask16x16(self, a: mask16x16<Self>) -> (mask16x8<Self>, mask16x8<Self>) {
4095 let mut b0 = [0; 8usize];
4096 let mut b1 = [0; 8usize];
4097 b0.copy_from_slice(&a.val[0..8usize]);
4098 b1.copy_from_slice(&a.val[8usize..16usize]);
4099 (b0.simd_into(self), b1.simd_into(self))
4100 }
4101 #[inline(always)]
4102 fn splat_i32x8(self, a: i32) -> i32x8<Self> {
4103 let half = self.splat_i32x4(a);
4104 self.combine_i32x4(half, half)
4105 }
4106 #[inline(always)]
4107 fn not_i32x8(self, a: i32x8<Self>) -> i32x8<Self> {
4108 let (a0, a1) = self.split_i32x8(a);
4109 self.combine_i32x4(self.not_i32x4(a0), self.not_i32x4(a1))
4110 }
4111 #[inline(always)]
4112 fn add_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4113 let (a0, a1) = self.split_i32x8(a);
4114 let (b0, b1) = self.split_i32x8(b);
4115 self.combine_i32x4(self.add_i32x4(a0, b0), self.add_i32x4(a1, b1))
4116 }
4117 #[inline(always)]
4118 fn sub_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4119 let (a0, a1) = self.split_i32x8(a);
4120 let (b0, b1) = self.split_i32x8(b);
4121 self.combine_i32x4(self.sub_i32x4(a0, b0), self.sub_i32x4(a1, b1))
4122 }
4123 #[inline(always)]
4124 fn mul_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4125 let (a0, a1) = self.split_i32x8(a);
4126 let (b0, b1) = self.split_i32x8(b);
4127 self.combine_i32x4(self.mul_i32x4(a0, b0), self.mul_i32x4(a1, b1))
4128 }
4129 #[inline(always)]
4130 fn and_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4131 let (a0, a1) = self.split_i32x8(a);
4132 let (b0, b1) = self.split_i32x8(b);
4133 self.combine_i32x4(self.and_i32x4(a0, b0), self.and_i32x4(a1, b1))
4134 }
4135 #[inline(always)]
4136 fn or_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4137 let (a0, a1) = self.split_i32x8(a);
4138 let (b0, b1) = self.split_i32x8(b);
4139 self.combine_i32x4(self.or_i32x4(a0, b0), self.or_i32x4(a1, b1))
4140 }
4141 #[inline(always)]
4142 fn xor_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4143 let (a0, a1) = self.split_i32x8(a);
4144 let (b0, b1) = self.split_i32x8(b);
4145 self.combine_i32x4(self.xor_i32x4(a0, b0), self.xor_i32x4(a1, b1))
4146 }
4147 #[inline(always)]
4148 fn shr_i32x8(self, a: i32x8<Self>, b: u32) -> i32x8<Self> {
4149 let (a0, a1) = self.split_i32x8(a);
4150 self.combine_i32x4(self.shr_i32x4(a0, b), self.shr_i32x4(a1, b))
4151 }
4152 #[inline(always)]
4153 fn shrv_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4154 let (a0, a1) = self.split_i32x8(a);
4155 let (b0, b1) = self.split_i32x8(b);
4156 self.combine_i32x4(self.shrv_i32x4(a0, b0), self.shrv_i32x4(a1, b1))
4157 }
4158 #[inline(always)]
4159 fn shl_i32x8(self, a: i32x8<Self>, b: u32) -> i32x8<Self> {
4160 let (a0, a1) = self.split_i32x8(a);
4161 self.combine_i32x4(self.shl_i32x4(a0, b), self.shl_i32x4(a1, b))
4162 }
4163 #[inline(always)]
4164 fn simd_eq_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
4165 let (a0, a1) = self.split_i32x8(a);
4166 let (b0, b1) = self.split_i32x8(b);
4167 self.combine_mask32x4(self.simd_eq_i32x4(a0, b0), self.simd_eq_i32x4(a1, b1))
4168 }
4169 #[inline(always)]
4170 fn simd_lt_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
4171 let (a0, a1) = self.split_i32x8(a);
4172 let (b0, b1) = self.split_i32x8(b);
4173 self.combine_mask32x4(self.simd_lt_i32x4(a0, b0), self.simd_lt_i32x4(a1, b1))
4174 }
4175 #[inline(always)]
4176 fn simd_le_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
4177 let (a0, a1) = self.split_i32x8(a);
4178 let (b0, b1) = self.split_i32x8(b);
4179 self.combine_mask32x4(self.simd_le_i32x4(a0, b0), self.simd_le_i32x4(a1, b1))
4180 }
4181 #[inline(always)]
4182 fn simd_ge_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
4183 let (a0, a1) = self.split_i32x8(a);
4184 let (b0, b1) = self.split_i32x8(b);
4185 self.combine_mask32x4(self.simd_ge_i32x4(a0, b0), self.simd_ge_i32x4(a1, b1))
4186 }
4187 #[inline(always)]
4188 fn simd_gt_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
4189 let (a0, a1) = self.split_i32x8(a);
4190 let (b0, b1) = self.split_i32x8(b);
4191 self.combine_mask32x4(self.simd_gt_i32x4(a0, b0), self.simd_gt_i32x4(a1, b1))
4192 }
4193 #[inline(always)]
4194 fn zip_low_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4195 let (a0, _) = self.split_i32x8(a);
4196 let (b0, _) = self.split_i32x8(b);
4197 self.combine_i32x4(self.zip_low_i32x4(a0, b0), self.zip_high_i32x4(a0, b0))
4198 }
4199 #[inline(always)]
4200 fn zip_high_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4201 let (_, a1) = self.split_i32x8(a);
4202 let (_, b1) = self.split_i32x8(b);
4203 self.combine_i32x4(self.zip_low_i32x4(a1, b1), self.zip_high_i32x4(a1, b1))
4204 }
4205 #[inline(always)]
4206 fn unzip_low_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4207 let (a0, a1) = self.split_i32x8(a);
4208 let (b0, b1) = self.split_i32x8(b);
4209 self.combine_i32x4(self.unzip_low_i32x4(a0, a1), self.unzip_low_i32x4(b0, b1))
4210 }
4211 #[inline(always)]
4212 fn unzip_high_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4213 let (a0, a1) = self.split_i32x8(a);
4214 let (b0, b1) = self.split_i32x8(b);
4215 self.combine_i32x4(self.unzip_high_i32x4(a0, a1), self.unzip_high_i32x4(b0, b1))
4216 }
4217 #[inline(always)]
4218 fn select_i32x8(self, a: mask32x8<Self>, b: i32x8<Self>, c: i32x8<Self>) -> i32x8<Self> {
4219 let (a0, a1) = self.split_mask32x8(a);
4220 let (b0, b1) = self.split_i32x8(b);
4221 let (c0, c1) = self.split_i32x8(c);
4222 self.combine_i32x4(self.select_i32x4(a0, b0, c0), self.select_i32x4(a1, b1, c1))
4223 }
4224 #[inline(always)]
4225 fn min_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4226 let (a0, a1) = self.split_i32x8(a);
4227 let (b0, b1) = self.split_i32x8(b);
4228 self.combine_i32x4(self.min_i32x4(a0, b0), self.min_i32x4(a1, b1))
4229 }
4230 #[inline(always)]
4231 fn max_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
4232 let (a0, a1) = self.split_i32x8(a);
4233 let (b0, b1) = self.split_i32x8(b);
4234 self.combine_i32x4(self.max_i32x4(a0, b0), self.max_i32x4(a1, b1))
4235 }
4236 #[inline(always)]
4237 fn combine_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x16<Self> {
4238 let mut result = [0; 16usize];
4239 result[0..8usize].copy_from_slice(&a.val);
4240 result[8usize..16usize].copy_from_slice(&b.val);
4241 result.simd_into(self)
4242 }
4243 #[inline(always)]
4244 fn split_i32x8(self, a: i32x8<Self>) -> (i32x4<Self>, i32x4<Self>) {
4245 let mut b0 = [0; 4usize];
4246 let mut b1 = [0; 4usize];
4247 b0.copy_from_slice(&a.val[0..4usize]);
4248 b1.copy_from_slice(&a.val[4usize..8usize]);
4249 (b0.simd_into(self), b1.simd_into(self))
4250 }
4251 #[inline(always)]
4252 fn neg_i32x8(self, a: i32x8<Self>) -> i32x8<Self> {
4253 let (a0, a1) = self.split_i32x8(a);
4254 self.combine_i32x4(self.neg_i32x4(a0), self.neg_i32x4(a1))
4255 }
4256 #[inline(always)]
4257 fn reinterpret_u8_i32x8(self, a: i32x8<Self>) -> u8x32<Self> {
4258 let (a0, a1) = self.split_i32x8(a);
4259 self.combine_u8x16(self.reinterpret_u8_i32x4(a0), self.reinterpret_u8_i32x4(a1))
4260 }
4261 #[inline(always)]
4262 fn reinterpret_u32_i32x8(self, a: i32x8<Self>) -> u32x8<Self> {
4263 let (a0, a1) = self.split_i32x8(a);
4264 self.combine_u32x4(
4265 self.reinterpret_u32_i32x4(a0),
4266 self.reinterpret_u32_i32x4(a1),
4267 )
4268 }
4269 #[inline(always)]
4270 fn cvt_f32_i32x8(self, a: i32x8<Self>) -> f32x8<Self> {
4271 let (a0, a1) = self.split_i32x8(a);
4272 self.combine_f32x4(self.cvt_f32_i32x4(a0), self.cvt_f32_i32x4(a1))
4273 }
4274 #[inline(always)]
4275 fn splat_u32x8(self, a: u32) -> u32x8<Self> {
4276 let half = self.splat_u32x4(a);
4277 self.combine_u32x4(half, half)
4278 }
4279 #[inline(always)]
4280 fn not_u32x8(self, a: u32x8<Self>) -> u32x8<Self> {
4281 let (a0, a1) = self.split_u32x8(a);
4282 self.combine_u32x4(self.not_u32x4(a0), self.not_u32x4(a1))
4283 }
4284 #[inline(always)]
4285 fn add_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4286 let (a0, a1) = self.split_u32x8(a);
4287 let (b0, b1) = self.split_u32x8(b);
4288 self.combine_u32x4(self.add_u32x4(a0, b0), self.add_u32x4(a1, b1))
4289 }
4290 #[inline(always)]
4291 fn sub_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4292 let (a0, a1) = self.split_u32x8(a);
4293 let (b0, b1) = self.split_u32x8(b);
4294 self.combine_u32x4(self.sub_u32x4(a0, b0), self.sub_u32x4(a1, b1))
4295 }
4296 #[inline(always)]
4297 fn mul_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4298 let (a0, a1) = self.split_u32x8(a);
4299 let (b0, b1) = self.split_u32x8(b);
4300 self.combine_u32x4(self.mul_u32x4(a0, b0), self.mul_u32x4(a1, b1))
4301 }
4302 #[inline(always)]
4303 fn and_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4304 let (a0, a1) = self.split_u32x8(a);
4305 let (b0, b1) = self.split_u32x8(b);
4306 self.combine_u32x4(self.and_u32x4(a0, b0), self.and_u32x4(a1, b1))
4307 }
4308 #[inline(always)]
4309 fn or_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4310 let (a0, a1) = self.split_u32x8(a);
4311 let (b0, b1) = self.split_u32x8(b);
4312 self.combine_u32x4(self.or_u32x4(a0, b0), self.or_u32x4(a1, b1))
4313 }
4314 #[inline(always)]
4315 fn xor_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4316 let (a0, a1) = self.split_u32x8(a);
4317 let (b0, b1) = self.split_u32x8(b);
4318 self.combine_u32x4(self.xor_u32x4(a0, b0), self.xor_u32x4(a1, b1))
4319 }
4320 #[inline(always)]
4321 fn shr_u32x8(self, a: u32x8<Self>, b: u32) -> u32x8<Self> {
4322 let (a0, a1) = self.split_u32x8(a);
4323 self.combine_u32x4(self.shr_u32x4(a0, b), self.shr_u32x4(a1, b))
4324 }
4325 #[inline(always)]
4326 fn shrv_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4327 let (a0, a1) = self.split_u32x8(a);
4328 let (b0, b1) = self.split_u32x8(b);
4329 self.combine_u32x4(self.shrv_u32x4(a0, b0), self.shrv_u32x4(a1, b1))
4330 }
4331 #[inline(always)]
4332 fn shl_u32x8(self, a: u32x8<Self>, b: u32) -> u32x8<Self> {
4333 let (a0, a1) = self.split_u32x8(a);
4334 self.combine_u32x4(self.shl_u32x4(a0, b), self.shl_u32x4(a1, b))
4335 }
4336 #[inline(always)]
4337 fn simd_eq_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4338 let (a0, a1) = self.split_u32x8(a);
4339 let (b0, b1) = self.split_u32x8(b);
4340 self.combine_mask32x4(self.simd_eq_u32x4(a0, b0), self.simd_eq_u32x4(a1, b1))
4341 }
4342 #[inline(always)]
4343 fn simd_lt_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4344 let (a0, a1) = self.split_u32x8(a);
4345 let (b0, b1) = self.split_u32x8(b);
4346 self.combine_mask32x4(self.simd_lt_u32x4(a0, b0), self.simd_lt_u32x4(a1, b1))
4347 }
4348 #[inline(always)]
4349 fn simd_le_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4350 let (a0, a1) = self.split_u32x8(a);
4351 let (b0, b1) = self.split_u32x8(b);
4352 self.combine_mask32x4(self.simd_le_u32x4(a0, b0), self.simd_le_u32x4(a1, b1))
4353 }
4354 #[inline(always)]
4355 fn simd_ge_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4356 let (a0, a1) = self.split_u32x8(a);
4357 let (b0, b1) = self.split_u32x8(b);
4358 self.combine_mask32x4(self.simd_ge_u32x4(a0, b0), self.simd_ge_u32x4(a1, b1))
4359 }
4360 #[inline(always)]
4361 fn simd_gt_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4362 let (a0, a1) = self.split_u32x8(a);
4363 let (b0, b1) = self.split_u32x8(b);
4364 self.combine_mask32x4(self.simd_gt_u32x4(a0, b0), self.simd_gt_u32x4(a1, b1))
4365 }
4366 #[inline(always)]
4367 fn zip_low_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4368 let (a0, _) = self.split_u32x8(a);
4369 let (b0, _) = self.split_u32x8(b);
4370 self.combine_u32x4(self.zip_low_u32x4(a0, b0), self.zip_high_u32x4(a0, b0))
4371 }
4372 #[inline(always)]
4373 fn zip_high_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4374 let (_, a1) = self.split_u32x8(a);
4375 let (_, b1) = self.split_u32x8(b);
4376 self.combine_u32x4(self.zip_low_u32x4(a1, b1), self.zip_high_u32x4(a1, b1))
4377 }
4378 #[inline(always)]
4379 fn unzip_low_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4380 let (a0, a1) = self.split_u32x8(a);
4381 let (b0, b1) = self.split_u32x8(b);
4382 self.combine_u32x4(self.unzip_low_u32x4(a0, a1), self.unzip_low_u32x4(b0, b1))
4383 }
4384 #[inline(always)]
4385 fn unzip_high_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4386 let (a0, a1) = self.split_u32x8(a);
4387 let (b0, b1) = self.split_u32x8(b);
4388 self.combine_u32x4(self.unzip_high_u32x4(a0, a1), self.unzip_high_u32x4(b0, b1))
4389 }
4390 #[inline(always)]
4391 fn select_u32x8(self, a: mask32x8<Self>, b: u32x8<Self>, c: u32x8<Self>) -> u32x8<Self> {
4392 let (a0, a1) = self.split_mask32x8(a);
4393 let (b0, b1) = self.split_u32x8(b);
4394 let (c0, c1) = self.split_u32x8(c);
4395 self.combine_u32x4(self.select_u32x4(a0, b0, c0), self.select_u32x4(a1, b1, c1))
4396 }
4397 #[inline(always)]
4398 fn min_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4399 let (a0, a1) = self.split_u32x8(a);
4400 let (b0, b1) = self.split_u32x8(b);
4401 self.combine_u32x4(self.min_u32x4(a0, b0), self.min_u32x4(a1, b1))
4402 }
4403 #[inline(always)]
4404 fn max_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4405 let (a0, a1) = self.split_u32x8(a);
4406 let (b0, b1) = self.split_u32x8(b);
4407 self.combine_u32x4(self.max_u32x4(a0, b0), self.max_u32x4(a1, b1))
4408 }
4409 #[inline(always)]
4410 fn combine_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x16<Self> {
4411 let mut result = [0; 16usize];
4412 result[0..8usize].copy_from_slice(&a.val);
4413 result[8usize..16usize].copy_from_slice(&b.val);
4414 result.simd_into(self)
4415 }
4416 #[inline(always)]
4417 fn split_u32x8(self, a: u32x8<Self>) -> (u32x4<Self>, u32x4<Self>) {
4418 let mut b0 = [0; 4usize];
4419 let mut b1 = [0; 4usize];
4420 b0.copy_from_slice(&a.val[0..4usize]);
4421 b1.copy_from_slice(&a.val[4usize..8usize]);
4422 (b0.simd_into(self), b1.simd_into(self))
4423 }
4424 #[inline(always)]
4425 fn reinterpret_u8_u32x8(self, a: u32x8<Self>) -> u8x32<Self> {
4426 let (a0, a1) = self.split_u32x8(a);
4427 self.combine_u8x16(self.reinterpret_u8_u32x4(a0), self.reinterpret_u8_u32x4(a1))
4428 }
4429 #[inline(always)]
4430 fn cvt_f32_u32x8(self, a: u32x8<Self>) -> f32x8<Self> {
4431 let (a0, a1) = self.split_u32x8(a);
4432 self.combine_f32x4(self.cvt_f32_u32x4(a0), self.cvt_f32_u32x4(a1))
4433 }
4434 #[inline(always)]
4435 fn splat_mask32x8(self, a: i32) -> mask32x8<Self> {
4436 let half = self.splat_mask32x4(a);
4437 self.combine_mask32x4(half, half)
4438 }
4439 #[inline(always)]
4440 fn not_mask32x8(self, a: mask32x8<Self>) -> mask32x8<Self> {
4441 let (a0, a1) = self.split_mask32x8(a);
4442 self.combine_mask32x4(self.not_mask32x4(a0), self.not_mask32x4(a1))
4443 }
4444 #[inline(always)]
4445 fn and_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
4446 let (a0, a1) = self.split_mask32x8(a);
4447 let (b0, b1) = self.split_mask32x8(b);
4448 self.combine_mask32x4(self.and_mask32x4(a0, b0), self.and_mask32x4(a1, b1))
4449 }
4450 #[inline(always)]
4451 fn or_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
4452 let (a0, a1) = self.split_mask32x8(a);
4453 let (b0, b1) = self.split_mask32x8(b);
4454 self.combine_mask32x4(self.or_mask32x4(a0, b0), self.or_mask32x4(a1, b1))
4455 }
4456 #[inline(always)]
4457 fn xor_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
4458 let (a0, a1) = self.split_mask32x8(a);
4459 let (b0, b1) = self.split_mask32x8(b);
4460 self.combine_mask32x4(self.xor_mask32x4(a0, b0), self.xor_mask32x4(a1, b1))
4461 }
4462 #[inline(always)]
4463 fn select_mask32x8(
4464 self,
4465 a: mask32x8<Self>,
4466 b: mask32x8<Self>,
4467 c: mask32x8<Self>,
4468 ) -> mask32x8<Self> {
4469 let (a0, a1) = self.split_mask32x8(a);
4470 let (b0, b1) = self.split_mask32x8(b);
4471 let (c0, c1) = self.split_mask32x8(c);
4472 self.combine_mask32x4(
4473 self.select_mask32x4(a0, b0, c0),
4474 self.select_mask32x4(a1, b1, c1),
4475 )
4476 }
4477 #[inline(always)]
4478 fn simd_eq_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
4479 let (a0, a1) = self.split_mask32x8(a);
4480 let (b0, b1) = self.split_mask32x8(b);
4481 self.combine_mask32x4(self.simd_eq_mask32x4(a0, b0), self.simd_eq_mask32x4(a1, b1))
4482 }
4483 #[inline(always)]
4484 fn combine_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x16<Self> {
4485 let mut result = [0; 16usize];
4486 result[0..8usize].copy_from_slice(&a.val);
4487 result[8usize..16usize].copy_from_slice(&b.val);
4488 result.simd_into(self)
4489 }
4490 #[inline(always)]
4491 fn split_mask32x8(self, a: mask32x8<Self>) -> (mask32x4<Self>, mask32x4<Self>) {
4492 let mut b0 = [0; 4usize];
4493 let mut b1 = [0; 4usize];
4494 b0.copy_from_slice(&a.val[0..4usize]);
4495 b1.copy_from_slice(&a.val[4usize..8usize]);
4496 (b0.simd_into(self), b1.simd_into(self))
4497 }
4498 #[inline(always)]
4499 fn splat_f64x4(self, a: f64) -> f64x4<Self> {
4500 let half = self.splat_f64x2(a);
4501 self.combine_f64x2(half, half)
4502 }
4503 #[inline(always)]
4504 fn abs_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4505 let (a0, a1) = self.split_f64x4(a);
4506 self.combine_f64x2(self.abs_f64x2(a0), self.abs_f64x2(a1))
4507 }
4508 #[inline(always)]
4509 fn neg_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4510 let (a0, a1) = self.split_f64x4(a);
4511 self.combine_f64x2(self.neg_f64x2(a0), self.neg_f64x2(a1))
4512 }
4513 #[inline(always)]
4514 fn sqrt_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4515 let (a0, a1) = self.split_f64x4(a);
4516 self.combine_f64x2(self.sqrt_f64x2(a0), self.sqrt_f64x2(a1))
4517 }
4518 #[inline(always)]
4519 fn add_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4520 let (a0, a1) = self.split_f64x4(a);
4521 let (b0, b1) = self.split_f64x4(b);
4522 self.combine_f64x2(self.add_f64x2(a0, b0), self.add_f64x2(a1, b1))
4523 }
4524 #[inline(always)]
4525 fn sub_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4526 let (a0, a1) = self.split_f64x4(a);
4527 let (b0, b1) = self.split_f64x4(b);
4528 self.combine_f64x2(self.sub_f64x2(a0, b0), self.sub_f64x2(a1, b1))
4529 }
4530 #[inline(always)]
4531 fn mul_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4532 let (a0, a1) = self.split_f64x4(a);
4533 let (b0, b1) = self.split_f64x4(b);
4534 self.combine_f64x2(self.mul_f64x2(a0, b0), self.mul_f64x2(a1, b1))
4535 }
4536 #[inline(always)]
4537 fn div_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4538 let (a0, a1) = self.split_f64x4(a);
4539 let (b0, b1) = self.split_f64x4(b);
4540 self.combine_f64x2(self.div_f64x2(a0, b0), self.div_f64x2(a1, b1))
4541 }
4542 #[inline(always)]
4543 fn copysign_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4544 let (a0, a1) = self.split_f64x4(a);
4545 let (b0, b1) = self.split_f64x4(b);
4546 self.combine_f64x2(self.copysign_f64x2(a0, b0), self.copysign_f64x2(a1, b1))
4547 }
4548 #[inline(always)]
4549 fn simd_eq_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4550 let (a0, a1) = self.split_f64x4(a);
4551 let (b0, b1) = self.split_f64x4(b);
4552 self.combine_mask64x2(self.simd_eq_f64x2(a0, b0), self.simd_eq_f64x2(a1, b1))
4553 }
4554 #[inline(always)]
4555 fn simd_lt_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4556 let (a0, a1) = self.split_f64x4(a);
4557 let (b0, b1) = self.split_f64x4(b);
4558 self.combine_mask64x2(self.simd_lt_f64x2(a0, b0), self.simd_lt_f64x2(a1, b1))
4559 }
4560 #[inline(always)]
4561 fn simd_le_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4562 let (a0, a1) = self.split_f64x4(a);
4563 let (b0, b1) = self.split_f64x4(b);
4564 self.combine_mask64x2(self.simd_le_f64x2(a0, b0), self.simd_le_f64x2(a1, b1))
4565 }
4566 #[inline(always)]
4567 fn simd_ge_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4568 let (a0, a1) = self.split_f64x4(a);
4569 let (b0, b1) = self.split_f64x4(b);
4570 self.combine_mask64x2(self.simd_ge_f64x2(a0, b0), self.simd_ge_f64x2(a1, b1))
4571 }
4572 #[inline(always)]
4573 fn simd_gt_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4574 let (a0, a1) = self.split_f64x4(a);
4575 let (b0, b1) = self.split_f64x4(b);
4576 self.combine_mask64x2(self.simd_gt_f64x2(a0, b0), self.simd_gt_f64x2(a1, b1))
4577 }
4578 #[inline(always)]
4579 fn zip_low_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4580 let (a0, _) = self.split_f64x4(a);
4581 let (b0, _) = self.split_f64x4(b);
4582 self.combine_f64x2(self.zip_low_f64x2(a0, b0), self.zip_high_f64x2(a0, b0))
4583 }
4584 #[inline(always)]
4585 fn zip_high_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4586 let (_, a1) = self.split_f64x4(a);
4587 let (_, b1) = self.split_f64x4(b);
4588 self.combine_f64x2(self.zip_low_f64x2(a1, b1), self.zip_high_f64x2(a1, b1))
4589 }
4590 #[inline(always)]
4591 fn unzip_low_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4592 let (a0, a1) = self.split_f64x4(a);
4593 let (b0, b1) = self.split_f64x4(b);
4594 self.combine_f64x2(self.unzip_low_f64x2(a0, a1), self.unzip_low_f64x2(b0, b1))
4595 }
4596 #[inline(always)]
4597 fn unzip_high_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4598 let (a0, a1) = self.split_f64x4(a);
4599 let (b0, b1) = self.split_f64x4(b);
4600 self.combine_f64x2(self.unzip_high_f64x2(a0, a1), self.unzip_high_f64x2(b0, b1))
4601 }
4602 #[inline(always)]
4603 fn max_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4604 let (a0, a1) = self.split_f64x4(a);
4605 let (b0, b1) = self.split_f64x4(b);
4606 self.combine_f64x2(self.max_f64x2(a0, b0), self.max_f64x2(a1, b1))
4607 }
4608 #[inline(always)]
4609 fn max_precise_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4610 let (a0, a1) = self.split_f64x4(a);
4611 let (b0, b1) = self.split_f64x4(b);
4612 self.combine_f64x2(
4613 self.max_precise_f64x2(a0, b0),
4614 self.max_precise_f64x2(a1, b1),
4615 )
4616 }
4617 #[inline(always)]
4618 fn min_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4619 let (a0, a1) = self.split_f64x4(a);
4620 let (b0, b1) = self.split_f64x4(b);
4621 self.combine_f64x2(self.min_f64x2(a0, b0), self.min_f64x2(a1, b1))
4622 }
4623 #[inline(always)]
4624 fn min_precise_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4625 let (a0, a1) = self.split_f64x4(a);
4626 let (b0, b1) = self.split_f64x4(b);
4627 self.combine_f64x2(
4628 self.min_precise_f64x2(a0, b0),
4629 self.min_precise_f64x2(a1, b1),
4630 )
4631 }
4632 #[inline(always)]
4633 fn madd_f64x4(self, a: f64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
4634 let (a0, a1) = self.split_f64x4(a);
4635 let (b0, b1) = self.split_f64x4(b);
4636 let (c0, c1) = self.split_f64x4(c);
4637 self.combine_f64x2(self.madd_f64x2(a0, b0, c0), self.madd_f64x2(a1, b1, c1))
4638 }
4639 #[inline(always)]
4640 fn msub_f64x4(self, a: f64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
4641 let (a0, a1) = self.split_f64x4(a);
4642 let (b0, b1) = self.split_f64x4(b);
4643 let (c0, c1) = self.split_f64x4(c);
4644 self.combine_f64x2(self.msub_f64x2(a0, b0, c0), self.msub_f64x2(a1, b1, c1))
4645 }
4646 #[inline(always)]
4647 fn floor_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4648 let (a0, a1) = self.split_f64x4(a);
4649 self.combine_f64x2(self.floor_f64x2(a0), self.floor_f64x2(a1))
4650 }
4651 #[inline(always)]
4652 fn fract_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4653 let (a0, a1) = self.split_f64x4(a);
4654 self.combine_f64x2(self.fract_f64x2(a0), self.fract_f64x2(a1))
4655 }
4656 #[inline(always)]
4657 fn trunc_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4658 let (a0, a1) = self.split_f64x4(a);
4659 self.combine_f64x2(self.trunc_f64x2(a0), self.trunc_f64x2(a1))
4660 }
4661 #[inline(always)]
4662 fn select_f64x4(self, a: mask64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
4663 let (a0, a1) = self.split_mask64x4(a);
4664 let (b0, b1) = self.split_f64x4(b);
4665 let (c0, c1) = self.split_f64x4(c);
4666 self.combine_f64x2(self.select_f64x2(a0, b0, c0), self.select_f64x2(a1, b1, c1))
4667 }
4668 #[inline(always)]
4669 fn combine_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x8<Self> {
4670 let mut result = [0.0; 8usize];
4671 result[0..4usize].copy_from_slice(&a.val);
4672 result[4usize..8usize].copy_from_slice(&b.val);
4673 result.simd_into(self)
4674 }
4675 #[inline(always)]
4676 fn split_f64x4(self, a: f64x4<Self>) -> (f64x2<Self>, f64x2<Self>) {
4677 let mut b0 = [0.0; 2usize];
4678 let mut b1 = [0.0; 2usize];
4679 b0.copy_from_slice(&a.val[0..2usize]);
4680 b1.copy_from_slice(&a.val[2usize..4usize]);
4681 (b0.simd_into(self), b1.simd_into(self))
4682 }
4683 #[inline(always)]
4684 fn reinterpret_f32_f64x4(self, a: f64x4<Self>) -> f32x8<Self> {
4685 let (a0, a1) = self.split_f64x4(a);
4686 self.combine_f32x4(
4687 self.reinterpret_f32_f64x2(a0),
4688 self.reinterpret_f32_f64x2(a1),
4689 )
4690 }
4691 #[inline(always)]
4692 fn splat_mask64x4(self, a: i64) -> mask64x4<Self> {
4693 let half = self.splat_mask64x2(a);
4694 self.combine_mask64x2(half, half)
4695 }
4696 #[inline(always)]
4697 fn not_mask64x4(self, a: mask64x4<Self>) -> mask64x4<Self> {
4698 let (a0, a1) = self.split_mask64x4(a);
4699 self.combine_mask64x2(self.not_mask64x2(a0), self.not_mask64x2(a1))
4700 }
4701 #[inline(always)]
4702 fn and_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
4703 let (a0, a1) = self.split_mask64x4(a);
4704 let (b0, b1) = self.split_mask64x4(b);
4705 self.combine_mask64x2(self.and_mask64x2(a0, b0), self.and_mask64x2(a1, b1))
4706 }
4707 #[inline(always)]
4708 fn or_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
4709 let (a0, a1) = self.split_mask64x4(a);
4710 let (b0, b1) = self.split_mask64x4(b);
4711 self.combine_mask64x2(self.or_mask64x2(a0, b0), self.or_mask64x2(a1, b1))
4712 }
4713 #[inline(always)]
4714 fn xor_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
4715 let (a0, a1) = self.split_mask64x4(a);
4716 let (b0, b1) = self.split_mask64x4(b);
4717 self.combine_mask64x2(self.xor_mask64x2(a0, b0), self.xor_mask64x2(a1, b1))
4718 }
4719 #[inline(always)]
4720 fn select_mask64x4(
4721 self,
4722 a: mask64x4<Self>,
4723 b: mask64x4<Self>,
4724 c: mask64x4<Self>,
4725 ) -> mask64x4<Self> {
4726 let (a0, a1) = self.split_mask64x4(a);
4727 let (b0, b1) = self.split_mask64x4(b);
4728 let (c0, c1) = self.split_mask64x4(c);
4729 self.combine_mask64x2(
4730 self.select_mask64x2(a0, b0, c0),
4731 self.select_mask64x2(a1, b1, c1),
4732 )
4733 }
4734 #[inline(always)]
4735 fn simd_eq_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
4736 let (a0, a1) = self.split_mask64x4(a);
4737 let (b0, b1) = self.split_mask64x4(b);
4738 self.combine_mask64x2(self.simd_eq_mask64x2(a0, b0), self.simd_eq_mask64x2(a1, b1))
4739 }
4740 #[inline(always)]
4741 fn combine_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x8<Self> {
4742 let mut result = [0; 8usize];
4743 result[0..4usize].copy_from_slice(&a.val);
4744 result[4usize..8usize].copy_from_slice(&b.val);
4745 result.simd_into(self)
4746 }
4747 #[inline(always)]
4748 fn split_mask64x4(self, a: mask64x4<Self>) -> (mask64x2<Self>, mask64x2<Self>) {
4749 let mut b0 = [0; 2usize];
4750 let mut b1 = [0; 2usize];
4751 b0.copy_from_slice(&a.val[0..2usize]);
4752 b1.copy_from_slice(&a.val[2usize..4usize]);
4753 (b0.simd_into(self), b1.simd_into(self))
4754 }
4755 #[inline(always)]
4756 fn splat_f32x16(self, a: f32) -> f32x16<Self> {
4757 let half = self.splat_f32x8(a);
4758 self.combine_f32x8(half, half)
4759 }
4760 #[inline(always)]
4761 fn abs_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4762 let (a0, a1) = self.split_f32x16(a);
4763 self.combine_f32x8(self.abs_f32x8(a0), self.abs_f32x8(a1))
4764 }
4765 #[inline(always)]
4766 fn neg_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4767 let (a0, a1) = self.split_f32x16(a);
4768 self.combine_f32x8(self.neg_f32x8(a0), self.neg_f32x8(a1))
4769 }
4770 #[inline(always)]
4771 fn sqrt_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4772 let (a0, a1) = self.split_f32x16(a);
4773 self.combine_f32x8(self.sqrt_f32x8(a0), self.sqrt_f32x8(a1))
4774 }
4775 #[inline(always)]
4776 fn add_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4777 let (a0, a1) = self.split_f32x16(a);
4778 let (b0, b1) = self.split_f32x16(b);
4779 self.combine_f32x8(self.add_f32x8(a0, b0), self.add_f32x8(a1, b1))
4780 }
4781 #[inline(always)]
4782 fn sub_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4783 let (a0, a1) = self.split_f32x16(a);
4784 let (b0, b1) = self.split_f32x16(b);
4785 self.combine_f32x8(self.sub_f32x8(a0, b0), self.sub_f32x8(a1, b1))
4786 }
4787 #[inline(always)]
4788 fn mul_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4789 let (a0, a1) = self.split_f32x16(a);
4790 let (b0, b1) = self.split_f32x16(b);
4791 self.combine_f32x8(self.mul_f32x8(a0, b0), self.mul_f32x8(a1, b1))
4792 }
4793 #[inline(always)]
4794 fn div_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4795 let (a0, a1) = self.split_f32x16(a);
4796 let (b0, b1) = self.split_f32x16(b);
4797 self.combine_f32x8(self.div_f32x8(a0, b0), self.div_f32x8(a1, b1))
4798 }
4799 #[inline(always)]
4800 fn copysign_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4801 let (a0, a1) = self.split_f32x16(a);
4802 let (b0, b1) = self.split_f32x16(b);
4803 self.combine_f32x8(self.copysign_f32x8(a0, b0), self.copysign_f32x8(a1, b1))
4804 }
4805 #[inline(always)]
4806 fn simd_eq_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4807 let (a0, a1) = self.split_f32x16(a);
4808 let (b0, b1) = self.split_f32x16(b);
4809 self.combine_mask32x8(self.simd_eq_f32x8(a0, b0), self.simd_eq_f32x8(a1, b1))
4810 }
4811 #[inline(always)]
4812 fn simd_lt_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4813 let (a0, a1) = self.split_f32x16(a);
4814 let (b0, b1) = self.split_f32x16(b);
4815 self.combine_mask32x8(self.simd_lt_f32x8(a0, b0), self.simd_lt_f32x8(a1, b1))
4816 }
4817 #[inline(always)]
4818 fn simd_le_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4819 let (a0, a1) = self.split_f32x16(a);
4820 let (b0, b1) = self.split_f32x16(b);
4821 self.combine_mask32x8(self.simd_le_f32x8(a0, b0), self.simd_le_f32x8(a1, b1))
4822 }
4823 #[inline(always)]
4824 fn simd_ge_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4825 let (a0, a1) = self.split_f32x16(a);
4826 let (b0, b1) = self.split_f32x16(b);
4827 self.combine_mask32x8(self.simd_ge_f32x8(a0, b0), self.simd_ge_f32x8(a1, b1))
4828 }
4829 #[inline(always)]
4830 fn simd_gt_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4831 let (a0, a1) = self.split_f32x16(a);
4832 let (b0, b1) = self.split_f32x16(b);
4833 self.combine_mask32x8(self.simd_gt_f32x8(a0, b0), self.simd_gt_f32x8(a1, b1))
4834 }
4835 #[inline(always)]
4836 fn zip_low_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4837 let (a0, _) = self.split_f32x16(a);
4838 let (b0, _) = self.split_f32x16(b);
4839 self.combine_f32x8(self.zip_low_f32x8(a0, b0), self.zip_high_f32x8(a0, b0))
4840 }
4841 #[inline(always)]
4842 fn zip_high_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4843 let (_, a1) = self.split_f32x16(a);
4844 let (_, b1) = self.split_f32x16(b);
4845 self.combine_f32x8(self.zip_low_f32x8(a1, b1), self.zip_high_f32x8(a1, b1))
4846 }
4847 #[inline(always)]
4848 fn unzip_low_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4849 let (a0, a1) = self.split_f32x16(a);
4850 let (b0, b1) = self.split_f32x16(b);
4851 self.combine_f32x8(self.unzip_low_f32x8(a0, a1), self.unzip_low_f32x8(b0, b1))
4852 }
4853 #[inline(always)]
4854 fn unzip_high_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4855 let (a0, a1) = self.split_f32x16(a);
4856 let (b0, b1) = self.split_f32x16(b);
4857 self.combine_f32x8(self.unzip_high_f32x8(a0, a1), self.unzip_high_f32x8(b0, b1))
4858 }
4859 #[inline(always)]
4860 fn max_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4861 let (a0, a1) = self.split_f32x16(a);
4862 let (b0, b1) = self.split_f32x16(b);
4863 self.combine_f32x8(self.max_f32x8(a0, b0), self.max_f32x8(a1, b1))
4864 }
4865 #[inline(always)]
4866 fn max_precise_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4867 let (a0, a1) = self.split_f32x16(a);
4868 let (b0, b1) = self.split_f32x16(b);
4869 self.combine_f32x8(
4870 self.max_precise_f32x8(a0, b0),
4871 self.max_precise_f32x8(a1, b1),
4872 )
4873 }
4874 #[inline(always)]
4875 fn min_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4876 let (a0, a1) = self.split_f32x16(a);
4877 let (b0, b1) = self.split_f32x16(b);
4878 self.combine_f32x8(self.min_f32x8(a0, b0), self.min_f32x8(a1, b1))
4879 }
4880 #[inline(always)]
4881 fn min_precise_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4882 let (a0, a1) = self.split_f32x16(a);
4883 let (b0, b1) = self.split_f32x16(b);
4884 self.combine_f32x8(
4885 self.min_precise_f32x8(a0, b0),
4886 self.min_precise_f32x8(a1, b1),
4887 )
4888 }
4889 #[inline(always)]
4890 fn madd_f32x16(self, a: f32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
4891 let (a0, a1) = self.split_f32x16(a);
4892 let (b0, b1) = self.split_f32x16(b);
4893 let (c0, c1) = self.split_f32x16(c);
4894 self.combine_f32x8(self.madd_f32x8(a0, b0, c0), self.madd_f32x8(a1, b1, c1))
4895 }
4896 #[inline(always)]
4897 fn msub_f32x16(self, a: f32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
4898 let (a0, a1) = self.split_f32x16(a);
4899 let (b0, b1) = self.split_f32x16(b);
4900 let (c0, c1) = self.split_f32x16(c);
4901 self.combine_f32x8(self.msub_f32x8(a0, b0, c0), self.msub_f32x8(a1, b1, c1))
4902 }
4903 #[inline(always)]
4904 fn floor_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4905 let (a0, a1) = self.split_f32x16(a);
4906 self.combine_f32x8(self.floor_f32x8(a0), self.floor_f32x8(a1))
4907 }
4908 #[inline(always)]
4909 fn fract_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4910 let (a0, a1) = self.split_f32x16(a);
4911 self.combine_f32x8(self.fract_f32x8(a0), self.fract_f32x8(a1))
4912 }
4913 #[inline(always)]
4914 fn trunc_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4915 let (a0, a1) = self.split_f32x16(a);
4916 self.combine_f32x8(self.trunc_f32x8(a0), self.trunc_f32x8(a1))
4917 }
4918 #[inline(always)]
4919 fn select_f32x16(self, a: mask32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
4920 let (a0, a1) = self.split_mask32x16(a);
4921 let (b0, b1) = self.split_f32x16(b);
4922 let (c0, c1) = self.split_f32x16(c);
4923 self.combine_f32x8(self.select_f32x8(a0, b0, c0), self.select_f32x8(a1, b1, c1))
4924 }
4925 #[inline(always)]
4926 fn split_f32x16(self, a: f32x16<Self>) -> (f32x8<Self>, f32x8<Self>) {
4927 let mut b0 = [0.0; 8usize];
4928 let mut b1 = [0.0; 8usize];
4929 b0.copy_from_slice(&a.val[0..8usize]);
4930 b1.copy_from_slice(&a.val[8usize..16usize]);
4931 (b0.simd_into(self), b1.simd_into(self))
4932 }
4933 #[inline(always)]
4934 fn reinterpret_f64_f32x16(self, a: f32x16<Self>) -> f64x8<Self> {
4935 let (a0, a1) = self.split_f32x16(a);
4936 self.combine_f64x4(
4937 self.reinterpret_f64_f32x8(a0),
4938 self.reinterpret_f64_f32x8(a1),
4939 )
4940 }
4941 #[inline(always)]
4942 fn reinterpret_i32_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
4943 let (a0, a1) = self.split_f32x16(a);
4944 self.combine_i32x8(
4945 self.reinterpret_i32_f32x8(a0),
4946 self.reinterpret_i32_f32x8(a1),
4947 )
4948 }
4949 #[inline(always)]
4950 fn load_interleaved_128_f32x16(self, src: &[f32; 16usize]) -> f32x16<Self> {
4951 [
4952 src[0usize],
4953 src[4usize],
4954 src[8usize],
4955 src[12usize],
4956 src[1usize],
4957 src[5usize],
4958 src[9usize],
4959 src[13usize],
4960 src[2usize],
4961 src[6usize],
4962 src[10usize],
4963 src[14usize],
4964 src[3usize],
4965 src[7usize],
4966 src[11usize],
4967 src[15usize],
4968 ]
4969 .simd_into(self)
4970 }
4971 #[inline(always)]
4972 fn store_interleaved_128_f32x16(self, a: f32x16<Self>, dest: &mut [f32; 16usize]) -> () {
4973 *dest = [
4974 a[0usize], a[4usize], a[8usize], a[12usize], a[1usize], a[5usize], a[9usize],
4975 a[13usize], a[2usize], a[6usize], a[10usize], a[14usize], a[3usize], a[7usize],
4976 a[11usize], a[15usize],
4977 ];
4978 }
4979 #[inline(always)]
4980 fn reinterpret_u8_f32x16(self, a: f32x16<Self>) -> u8x64<Self> {
4981 let (a0, a1) = self.split_f32x16(a);
4982 self.combine_u8x32(self.reinterpret_u8_f32x8(a0), self.reinterpret_u8_f32x8(a1))
4983 }
4984 #[inline(always)]
4985 fn reinterpret_u32_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
4986 let (a0, a1) = self.split_f32x16(a);
4987 self.combine_u32x8(
4988 self.reinterpret_u32_f32x8(a0),
4989 self.reinterpret_u32_f32x8(a1),
4990 )
4991 }
4992 #[inline(always)]
4993 fn cvt_u32_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
4994 let (a0, a1) = self.split_f32x16(a);
4995 self.combine_u32x8(self.cvt_u32_f32x8(a0), self.cvt_u32_f32x8(a1))
4996 }
4997 #[inline(always)]
4998 fn cvt_i32_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
4999 let (a0, a1) = self.split_f32x16(a);
5000 self.combine_i32x8(self.cvt_i32_f32x8(a0), self.cvt_i32_f32x8(a1))
5001 }
5002 #[inline(always)]
5003 fn splat_i8x64(self, a: i8) -> i8x64<Self> {
5004 let half = self.splat_i8x32(a);
5005 self.combine_i8x32(half, half)
5006 }
5007 #[inline(always)]
5008 fn not_i8x64(self, a: i8x64<Self>) -> i8x64<Self> {
5009 let (a0, a1) = self.split_i8x64(a);
5010 self.combine_i8x32(self.not_i8x32(a0), self.not_i8x32(a1))
5011 }
5012 #[inline(always)]
5013 fn add_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5014 let (a0, a1) = self.split_i8x64(a);
5015 let (b0, b1) = self.split_i8x64(b);
5016 self.combine_i8x32(self.add_i8x32(a0, b0), self.add_i8x32(a1, b1))
5017 }
5018 #[inline(always)]
5019 fn sub_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5020 let (a0, a1) = self.split_i8x64(a);
5021 let (b0, b1) = self.split_i8x64(b);
5022 self.combine_i8x32(self.sub_i8x32(a0, b0), self.sub_i8x32(a1, b1))
5023 }
5024 #[inline(always)]
5025 fn mul_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5026 let (a0, a1) = self.split_i8x64(a);
5027 let (b0, b1) = self.split_i8x64(b);
5028 self.combine_i8x32(self.mul_i8x32(a0, b0), self.mul_i8x32(a1, b1))
5029 }
5030 #[inline(always)]
5031 fn and_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5032 let (a0, a1) = self.split_i8x64(a);
5033 let (b0, b1) = self.split_i8x64(b);
5034 self.combine_i8x32(self.and_i8x32(a0, b0), self.and_i8x32(a1, b1))
5035 }
5036 #[inline(always)]
5037 fn or_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5038 let (a0, a1) = self.split_i8x64(a);
5039 let (b0, b1) = self.split_i8x64(b);
5040 self.combine_i8x32(self.or_i8x32(a0, b0), self.or_i8x32(a1, b1))
5041 }
5042 #[inline(always)]
5043 fn xor_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5044 let (a0, a1) = self.split_i8x64(a);
5045 let (b0, b1) = self.split_i8x64(b);
5046 self.combine_i8x32(self.xor_i8x32(a0, b0), self.xor_i8x32(a1, b1))
5047 }
5048 #[inline(always)]
5049 fn shr_i8x64(self, a: i8x64<Self>, b: u32) -> i8x64<Self> {
5050 let (a0, a1) = self.split_i8x64(a);
5051 self.combine_i8x32(self.shr_i8x32(a0, b), self.shr_i8x32(a1, b))
5052 }
5053 #[inline(always)]
5054 fn shrv_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5055 let (a0, a1) = self.split_i8x64(a);
5056 let (b0, b1) = self.split_i8x64(b);
5057 self.combine_i8x32(self.shrv_i8x32(a0, b0), self.shrv_i8x32(a1, b1))
5058 }
5059 #[inline(always)]
5060 fn shl_i8x64(self, a: i8x64<Self>, b: u32) -> i8x64<Self> {
5061 let (a0, a1) = self.split_i8x64(a);
5062 self.combine_i8x32(self.shl_i8x32(a0, b), self.shl_i8x32(a1, b))
5063 }
5064 #[inline(always)]
5065 fn simd_eq_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
5066 let (a0, a1) = self.split_i8x64(a);
5067 let (b0, b1) = self.split_i8x64(b);
5068 self.combine_mask8x32(self.simd_eq_i8x32(a0, b0), self.simd_eq_i8x32(a1, b1))
5069 }
5070 #[inline(always)]
5071 fn simd_lt_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
5072 let (a0, a1) = self.split_i8x64(a);
5073 let (b0, b1) = self.split_i8x64(b);
5074 self.combine_mask8x32(self.simd_lt_i8x32(a0, b0), self.simd_lt_i8x32(a1, b1))
5075 }
5076 #[inline(always)]
5077 fn simd_le_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
5078 let (a0, a1) = self.split_i8x64(a);
5079 let (b0, b1) = self.split_i8x64(b);
5080 self.combine_mask8x32(self.simd_le_i8x32(a0, b0), self.simd_le_i8x32(a1, b1))
5081 }
5082 #[inline(always)]
5083 fn simd_ge_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
5084 let (a0, a1) = self.split_i8x64(a);
5085 let (b0, b1) = self.split_i8x64(b);
5086 self.combine_mask8x32(self.simd_ge_i8x32(a0, b0), self.simd_ge_i8x32(a1, b1))
5087 }
5088 #[inline(always)]
5089 fn simd_gt_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
5090 let (a0, a1) = self.split_i8x64(a);
5091 let (b0, b1) = self.split_i8x64(b);
5092 self.combine_mask8x32(self.simd_gt_i8x32(a0, b0), self.simd_gt_i8x32(a1, b1))
5093 }
5094 #[inline(always)]
5095 fn zip_low_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5096 let (a0, _) = self.split_i8x64(a);
5097 let (b0, _) = self.split_i8x64(b);
5098 self.combine_i8x32(self.zip_low_i8x32(a0, b0), self.zip_high_i8x32(a0, b0))
5099 }
5100 #[inline(always)]
5101 fn zip_high_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5102 let (_, a1) = self.split_i8x64(a);
5103 let (_, b1) = self.split_i8x64(b);
5104 self.combine_i8x32(self.zip_low_i8x32(a1, b1), self.zip_high_i8x32(a1, b1))
5105 }
5106 #[inline(always)]
5107 fn unzip_low_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5108 let (a0, a1) = self.split_i8x64(a);
5109 let (b0, b1) = self.split_i8x64(b);
5110 self.combine_i8x32(self.unzip_low_i8x32(a0, a1), self.unzip_low_i8x32(b0, b1))
5111 }
5112 #[inline(always)]
5113 fn unzip_high_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5114 let (a0, a1) = self.split_i8x64(a);
5115 let (b0, b1) = self.split_i8x64(b);
5116 self.combine_i8x32(self.unzip_high_i8x32(a0, a1), self.unzip_high_i8x32(b0, b1))
5117 }
5118 #[inline(always)]
5119 fn select_i8x64(self, a: mask8x64<Self>, b: i8x64<Self>, c: i8x64<Self>) -> i8x64<Self> {
5120 let (a0, a1) = self.split_mask8x64(a);
5121 let (b0, b1) = self.split_i8x64(b);
5122 let (c0, c1) = self.split_i8x64(c);
5123 self.combine_i8x32(self.select_i8x32(a0, b0, c0), self.select_i8x32(a1, b1, c1))
5124 }
5125 #[inline(always)]
5126 fn min_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5127 let (a0, a1) = self.split_i8x64(a);
5128 let (b0, b1) = self.split_i8x64(b);
5129 self.combine_i8x32(self.min_i8x32(a0, b0), self.min_i8x32(a1, b1))
5130 }
5131 #[inline(always)]
5132 fn max_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
5133 let (a0, a1) = self.split_i8x64(a);
5134 let (b0, b1) = self.split_i8x64(b);
5135 self.combine_i8x32(self.max_i8x32(a0, b0), self.max_i8x32(a1, b1))
5136 }
5137 #[inline(always)]
5138 fn split_i8x64(self, a: i8x64<Self>) -> (i8x32<Self>, i8x32<Self>) {
5139 let mut b0 = [0; 32usize];
5140 let mut b1 = [0; 32usize];
5141 b0.copy_from_slice(&a.val[0..32usize]);
5142 b1.copy_from_slice(&a.val[32usize..64usize]);
5143 (b0.simd_into(self), b1.simd_into(self))
5144 }
5145 #[inline(always)]
5146 fn neg_i8x64(self, a: i8x64<Self>) -> i8x64<Self> {
5147 let (a0, a1) = self.split_i8x64(a);
5148 self.combine_i8x32(self.neg_i8x32(a0), self.neg_i8x32(a1))
5149 }
5150 #[inline(always)]
5151 fn reinterpret_u8_i8x64(self, a: i8x64<Self>) -> u8x64<Self> {
5152 let (a0, a1) = self.split_i8x64(a);
5153 self.combine_u8x32(self.reinterpret_u8_i8x32(a0), self.reinterpret_u8_i8x32(a1))
5154 }
5155 #[inline(always)]
5156 fn reinterpret_u32_i8x64(self, a: i8x64<Self>) -> u32x16<Self> {
5157 let (a0, a1) = self.split_i8x64(a);
5158 self.combine_u32x8(
5159 self.reinterpret_u32_i8x32(a0),
5160 self.reinterpret_u32_i8x32(a1),
5161 )
5162 }
5163 #[inline(always)]
5164 fn splat_u8x64(self, a: u8) -> u8x64<Self> {
5165 let half = self.splat_u8x32(a);
5166 self.combine_u8x32(half, half)
5167 }
5168 #[inline(always)]
5169 fn not_u8x64(self, a: u8x64<Self>) -> u8x64<Self> {
5170 let (a0, a1) = self.split_u8x64(a);
5171 self.combine_u8x32(self.not_u8x32(a0), self.not_u8x32(a1))
5172 }
5173 #[inline(always)]
5174 fn add_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5175 let (a0, a1) = self.split_u8x64(a);
5176 let (b0, b1) = self.split_u8x64(b);
5177 self.combine_u8x32(self.add_u8x32(a0, b0), self.add_u8x32(a1, b1))
5178 }
5179 #[inline(always)]
5180 fn sub_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5181 let (a0, a1) = self.split_u8x64(a);
5182 let (b0, b1) = self.split_u8x64(b);
5183 self.combine_u8x32(self.sub_u8x32(a0, b0), self.sub_u8x32(a1, b1))
5184 }
5185 #[inline(always)]
5186 fn mul_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5187 let (a0, a1) = self.split_u8x64(a);
5188 let (b0, b1) = self.split_u8x64(b);
5189 self.combine_u8x32(self.mul_u8x32(a0, b0), self.mul_u8x32(a1, b1))
5190 }
5191 #[inline(always)]
5192 fn and_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5193 let (a0, a1) = self.split_u8x64(a);
5194 let (b0, b1) = self.split_u8x64(b);
5195 self.combine_u8x32(self.and_u8x32(a0, b0), self.and_u8x32(a1, b1))
5196 }
5197 #[inline(always)]
5198 fn or_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5199 let (a0, a1) = self.split_u8x64(a);
5200 let (b0, b1) = self.split_u8x64(b);
5201 self.combine_u8x32(self.or_u8x32(a0, b0), self.or_u8x32(a1, b1))
5202 }
5203 #[inline(always)]
5204 fn xor_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5205 let (a0, a1) = self.split_u8x64(a);
5206 let (b0, b1) = self.split_u8x64(b);
5207 self.combine_u8x32(self.xor_u8x32(a0, b0), self.xor_u8x32(a1, b1))
5208 }
5209 #[inline(always)]
5210 fn shr_u8x64(self, a: u8x64<Self>, b: u32) -> u8x64<Self> {
5211 let (a0, a1) = self.split_u8x64(a);
5212 self.combine_u8x32(self.shr_u8x32(a0, b), self.shr_u8x32(a1, b))
5213 }
5214 #[inline(always)]
5215 fn shrv_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5216 let (a0, a1) = self.split_u8x64(a);
5217 let (b0, b1) = self.split_u8x64(b);
5218 self.combine_u8x32(self.shrv_u8x32(a0, b0), self.shrv_u8x32(a1, b1))
5219 }
5220 #[inline(always)]
5221 fn shl_u8x64(self, a: u8x64<Self>, b: u32) -> u8x64<Self> {
5222 let (a0, a1) = self.split_u8x64(a);
5223 self.combine_u8x32(self.shl_u8x32(a0, b), self.shl_u8x32(a1, b))
5224 }
5225 #[inline(always)]
5226 fn simd_eq_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
5227 let (a0, a1) = self.split_u8x64(a);
5228 let (b0, b1) = self.split_u8x64(b);
5229 self.combine_mask8x32(self.simd_eq_u8x32(a0, b0), self.simd_eq_u8x32(a1, b1))
5230 }
5231 #[inline(always)]
5232 fn simd_lt_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
5233 let (a0, a1) = self.split_u8x64(a);
5234 let (b0, b1) = self.split_u8x64(b);
5235 self.combine_mask8x32(self.simd_lt_u8x32(a0, b0), self.simd_lt_u8x32(a1, b1))
5236 }
5237 #[inline(always)]
5238 fn simd_le_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
5239 let (a0, a1) = self.split_u8x64(a);
5240 let (b0, b1) = self.split_u8x64(b);
5241 self.combine_mask8x32(self.simd_le_u8x32(a0, b0), self.simd_le_u8x32(a1, b1))
5242 }
5243 #[inline(always)]
5244 fn simd_ge_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
5245 let (a0, a1) = self.split_u8x64(a);
5246 let (b0, b1) = self.split_u8x64(b);
5247 self.combine_mask8x32(self.simd_ge_u8x32(a0, b0), self.simd_ge_u8x32(a1, b1))
5248 }
5249 #[inline(always)]
5250 fn simd_gt_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
5251 let (a0, a1) = self.split_u8x64(a);
5252 let (b0, b1) = self.split_u8x64(b);
5253 self.combine_mask8x32(self.simd_gt_u8x32(a0, b0), self.simd_gt_u8x32(a1, b1))
5254 }
5255 #[inline(always)]
5256 fn zip_low_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5257 let (a0, _) = self.split_u8x64(a);
5258 let (b0, _) = self.split_u8x64(b);
5259 self.combine_u8x32(self.zip_low_u8x32(a0, b0), self.zip_high_u8x32(a0, b0))
5260 }
5261 #[inline(always)]
5262 fn zip_high_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5263 let (_, a1) = self.split_u8x64(a);
5264 let (_, b1) = self.split_u8x64(b);
5265 self.combine_u8x32(self.zip_low_u8x32(a1, b1), self.zip_high_u8x32(a1, b1))
5266 }
5267 #[inline(always)]
5268 fn unzip_low_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5269 let (a0, a1) = self.split_u8x64(a);
5270 let (b0, b1) = self.split_u8x64(b);
5271 self.combine_u8x32(self.unzip_low_u8x32(a0, a1), self.unzip_low_u8x32(b0, b1))
5272 }
5273 #[inline(always)]
5274 fn unzip_high_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5275 let (a0, a1) = self.split_u8x64(a);
5276 let (b0, b1) = self.split_u8x64(b);
5277 self.combine_u8x32(self.unzip_high_u8x32(a0, a1), self.unzip_high_u8x32(b0, b1))
5278 }
5279 #[inline(always)]
5280 fn select_u8x64(self, a: mask8x64<Self>, b: u8x64<Self>, c: u8x64<Self>) -> u8x64<Self> {
5281 let (a0, a1) = self.split_mask8x64(a);
5282 let (b0, b1) = self.split_u8x64(b);
5283 let (c0, c1) = self.split_u8x64(c);
5284 self.combine_u8x32(self.select_u8x32(a0, b0, c0), self.select_u8x32(a1, b1, c1))
5285 }
5286 #[inline(always)]
5287 fn min_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5288 let (a0, a1) = self.split_u8x64(a);
5289 let (b0, b1) = self.split_u8x64(b);
5290 self.combine_u8x32(self.min_u8x32(a0, b0), self.min_u8x32(a1, b1))
5291 }
5292 #[inline(always)]
5293 fn max_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
5294 let (a0, a1) = self.split_u8x64(a);
5295 let (b0, b1) = self.split_u8x64(b);
5296 self.combine_u8x32(self.max_u8x32(a0, b0), self.max_u8x32(a1, b1))
5297 }
5298 #[inline(always)]
5299 fn split_u8x64(self, a: u8x64<Self>) -> (u8x32<Self>, u8x32<Self>) {
5300 let mut b0 = [0; 32usize];
5301 let mut b1 = [0; 32usize];
5302 b0.copy_from_slice(&a.val[0..32usize]);
5303 b1.copy_from_slice(&a.val[32usize..64usize]);
5304 (b0.simd_into(self), b1.simd_into(self))
5305 }
5306 #[inline(always)]
5307 fn load_interleaved_128_u8x64(self, src: &[u8; 64usize]) -> u8x64<Self> {
5308 [
5309 src[0usize],
5310 src[4usize],
5311 src[8usize],
5312 src[12usize],
5313 src[16usize],
5314 src[20usize],
5315 src[24usize],
5316 src[28usize],
5317 src[32usize],
5318 src[36usize],
5319 src[40usize],
5320 src[44usize],
5321 src[48usize],
5322 src[52usize],
5323 src[56usize],
5324 src[60usize],
5325 src[1usize],
5326 src[5usize],
5327 src[9usize],
5328 src[13usize],
5329 src[17usize],
5330 src[21usize],
5331 src[25usize],
5332 src[29usize],
5333 src[33usize],
5334 src[37usize],
5335 src[41usize],
5336 src[45usize],
5337 src[49usize],
5338 src[53usize],
5339 src[57usize],
5340 src[61usize],
5341 src[2usize],
5342 src[6usize],
5343 src[10usize],
5344 src[14usize],
5345 src[18usize],
5346 src[22usize],
5347 src[26usize],
5348 src[30usize],
5349 src[34usize],
5350 src[38usize],
5351 src[42usize],
5352 src[46usize],
5353 src[50usize],
5354 src[54usize],
5355 src[58usize],
5356 src[62usize],
5357 src[3usize],
5358 src[7usize],
5359 src[11usize],
5360 src[15usize],
5361 src[19usize],
5362 src[23usize],
5363 src[27usize],
5364 src[31usize],
5365 src[35usize],
5366 src[39usize],
5367 src[43usize],
5368 src[47usize],
5369 src[51usize],
5370 src[55usize],
5371 src[59usize],
5372 src[63usize],
5373 ]
5374 .simd_into(self)
5375 }
5376 #[inline(always)]
5377 fn store_interleaved_128_u8x64(self, a: u8x64<Self>, dest: &mut [u8; 64usize]) -> () {
5378 *dest = [
5379 a[0usize], a[16usize], a[32usize], a[48usize], a[1usize], a[17usize], a[33usize],
5380 a[49usize], a[2usize], a[18usize], a[34usize], a[50usize], a[3usize], a[19usize],
5381 a[35usize], a[51usize], a[4usize], a[20usize], a[36usize], a[52usize], a[5usize],
5382 a[21usize], a[37usize], a[53usize], a[6usize], a[22usize], a[38usize], a[54usize],
5383 a[7usize], a[23usize], a[39usize], a[55usize], a[8usize], a[24usize], a[40usize],
5384 a[56usize], a[9usize], a[25usize], a[41usize], a[57usize], a[10usize], a[26usize],
5385 a[42usize], a[58usize], a[11usize], a[27usize], a[43usize], a[59usize], a[12usize],
5386 a[28usize], a[44usize], a[60usize], a[13usize], a[29usize], a[45usize], a[61usize],
5387 a[14usize], a[30usize], a[46usize], a[62usize], a[15usize], a[31usize], a[47usize],
5388 a[63usize],
5389 ];
5390 }
5391 #[inline(always)]
5392 fn reinterpret_u32_u8x64(self, a: u8x64<Self>) -> u32x16<Self> {
5393 let (a0, a1) = self.split_u8x64(a);
5394 self.combine_u32x8(
5395 self.reinterpret_u32_u8x32(a0),
5396 self.reinterpret_u32_u8x32(a1),
5397 )
5398 }
5399 #[inline(always)]
5400 fn splat_mask8x64(self, a: i8) -> mask8x64<Self> {
5401 let half = self.splat_mask8x32(a);
5402 self.combine_mask8x32(half, half)
5403 }
5404 #[inline(always)]
5405 fn not_mask8x64(self, a: mask8x64<Self>) -> mask8x64<Self> {
5406 let (a0, a1) = self.split_mask8x64(a);
5407 self.combine_mask8x32(self.not_mask8x32(a0), self.not_mask8x32(a1))
5408 }
5409 #[inline(always)]
5410 fn and_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
5411 let (a0, a1) = self.split_mask8x64(a);
5412 let (b0, b1) = self.split_mask8x64(b);
5413 self.combine_mask8x32(self.and_mask8x32(a0, b0), self.and_mask8x32(a1, b1))
5414 }
5415 #[inline(always)]
5416 fn or_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
5417 let (a0, a1) = self.split_mask8x64(a);
5418 let (b0, b1) = self.split_mask8x64(b);
5419 self.combine_mask8x32(self.or_mask8x32(a0, b0), self.or_mask8x32(a1, b1))
5420 }
5421 #[inline(always)]
5422 fn xor_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
5423 let (a0, a1) = self.split_mask8x64(a);
5424 let (b0, b1) = self.split_mask8x64(b);
5425 self.combine_mask8x32(self.xor_mask8x32(a0, b0), self.xor_mask8x32(a1, b1))
5426 }
5427 #[inline(always)]
5428 fn select_mask8x64(
5429 self,
5430 a: mask8x64<Self>,
5431 b: mask8x64<Self>,
5432 c: mask8x64<Self>,
5433 ) -> mask8x64<Self> {
5434 let (a0, a1) = self.split_mask8x64(a);
5435 let (b0, b1) = self.split_mask8x64(b);
5436 let (c0, c1) = self.split_mask8x64(c);
5437 self.combine_mask8x32(
5438 self.select_mask8x32(a0, b0, c0),
5439 self.select_mask8x32(a1, b1, c1),
5440 )
5441 }
5442 #[inline(always)]
5443 fn simd_eq_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
5444 let (a0, a1) = self.split_mask8x64(a);
5445 let (b0, b1) = self.split_mask8x64(b);
5446 self.combine_mask8x32(self.simd_eq_mask8x32(a0, b0), self.simd_eq_mask8x32(a1, b1))
5447 }
5448 #[inline(always)]
5449 fn split_mask8x64(self, a: mask8x64<Self>) -> (mask8x32<Self>, mask8x32<Self>) {
5450 let mut b0 = [0; 32usize];
5451 let mut b1 = [0; 32usize];
5452 b0.copy_from_slice(&a.val[0..32usize]);
5453 b1.copy_from_slice(&a.val[32usize..64usize]);
5454 (b0.simd_into(self), b1.simd_into(self))
5455 }
5456 #[inline(always)]
5457 fn splat_i16x32(self, a: i16) -> i16x32<Self> {
5458 let half = self.splat_i16x16(a);
5459 self.combine_i16x16(half, half)
5460 }
5461 #[inline(always)]
5462 fn not_i16x32(self, a: i16x32<Self>) -> i16x32<Self> {
5463 let (a0, a1) = self.split_i16x32(a);
5464 self.combine_i16x16(self.not_i16x16(a0), self.not_i16x16(a1))
5465 }
5466 #[inline(always)]
5467 fn add_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5468 let (a0, a1) = self.split_i16x32(a);
5469 let (b0, b1) = self.split_i16x32(b);
5470 self.combine_i16x16(self.add_i16x16(a0, b0), self.add_i16x16(a1, b1))
5471 }
5472 #[inline(always)]
5473 fn sub_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5474 let (a0, a1) = self.split_i16x32(a);
5475 let (b0, b1) = self.split_i16x32(b);
5476 self.combine_i16x16(self.sub_i16x16(a0, b0), self.sub_i16x16(a1, b1))
5477 }
5478 #[inline(always)]
5479 fn mul_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5480 let (a0, a1) = self.split_i16x32(a);
5481 let (b0, b1) = self.split_i16x32(b);
5482 self.combine_i16x16(self.mul_i16x16(a0, b0), self.mul_i16x16(a1, b1))
5483 }
5484 #[inline(always)]
5485 fn and_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5486 let (a0, a1) = self.split_i16x32(a);
5487 let (b0, b1) = self.split_i16x32(b);
5488 self.combine_i16x16(self.and_i16x16(a0, b0), self.and_i16x16(a1, b1))
5489 }
5490 #[inline(always)]
5491 fn or_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5492 let (a0, a1) = self.split_i16x32(a);
5493 let (b0, b1) = self.split_i16x32(b);
5494 self.combine_i16x16(self.or_i16x16(a0, b0), self.or_i16x16(a1, b1))
5495 }
5496 #[inline(always)]
5497 fn xor_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5498 let (a0, a1) = self.split_i16x32(a);
5499 let (b0, b1) = self.split_i16x32(b);
5500 self.combine_i16x16(self.xor_i16x16(a0, b0), self.xor_i16x16(a1, b1))
5501 }
5502 #[inline(always)]
5503 fn shr_i16x32(self, a: i16x32<Self>, b: u32) -> i16x32<Self> {
5504 let (a0, a1) = self.split_i16x32(a);
5505 self.combine_i16x16(self.shr_i16x16(a0, b), self.shr_i16x16(a1, b))
5506 }
5507 #[inline(always)]
5508 fn shrv_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5509 let (a0, a1) = self.split_i16x32(a);
5510 let (b0, b1) = self.split_i16x32(b);
5511 self.combine_i16x16(self.shrv_i16x16(a0, b0), self.shrv_i16x16(a1, b1))
5512 }
5513 #[inline(always)]
5514 fn shl_i16x32(self, a: i16x32<Self>, b: u32) -> i16x32<Self> {
5515 let (a0, a1) = self.split_i16x32(a);
5516 self.combine_i16x16(self.shl_i16x16(a0, b), self.shl_i16x16(a1, b))
5517 }
5518 #[inline(always)]
5519 fn simd_eq_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5520 let (a0, a1) = self.split_i16x32(a);
5521 let (b0, b1) = self.split_i16x32(b);
5522 self.combine_mask16x16(self.simd_eq_i16x16(a0, b0), self.simd_eq_i16x16(a1, b1))
5523 }
5524 #[inline(always)]
5525 fn simd_lt_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5526 let (a0, a1) = self.split_i16x32(a);
5527 let (b0, b1) = self.split_i16x32(b);
5528 self.combine_mask16x16(self.simd_lt_i16x16(a0, b0), self.simd_lt_i16x16(a1, b1))
5529 }
5530 #[inline(always)]
5531 fn simd_le_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5532 let (a0, a1) = self.split_i16x32(a);
5533 let (b0, b1) = self.split_i16x32(b);
5534 self.combine_mask16x16(self.simd_le_i16x16(a0, b0), self.simd_le_i16x16(a1, b1))
5535 }
5536 #[inline(always)]
5537 fn simd_ge_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5538 let (a0, a1) = self.split_i16x32(a);
5539 let (b0, b1) = self.split_i16x32(b);
5540 self.combine_mask16x16(self.simd_ge_i16x16(a0, b0), self.simd_ge_i16x16(a1, b1))
5541 }
5542 #[inline(always)]
5543 fn simd_gt_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5544 let (a0, a1) = self.split_i16x32(a);
5545 let (b0, b1) = self.split_i16x32(b);
5546 self.combine_mask16x16(self.simd_gt_i16x16(a0, b0), self.simd_gt_i16x16(a1, b1))
5547 }
5548 #[inline(always)]
5549 fn zip_low_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5550 let (a0, _) = self.split_i16x32(a);
5551 let (b0, _) = self.split_i16x32(b);
5552 self.combine_i16x16(self.zip_low_i16x16(a0, b0), self.zip_high_i16x16(a0, b0))
5553 }
5554 #[inline(always)]
5555 fn zip_high_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5556 let (_, a1) = self.split_i16x32(a);
5557 let (_, b1) = self.split_i16x32(b);
5558 self.combine_i16x16(self.zip_low_i16x16(a1, b1), self.zip_high_i16x16(a1, b1))
5559 }
5560 #[inline(always)]
5561 fn unzip_low_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5562 let (a0, a1) = self.split_i16x32(a);
5563 let (b0, b1) = self.split_i16x32(b);
5564 self.combine_i16x16(self.unzip_low_i16x16(a0, a1), self.unzip_low_i16x16(b0, b1))
5565 }
5566 #[inline(always)]
5567 fn unzip_high_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5568 let (a0, a1) = self.split_i16x32(a);
5569 let (b0, b1) = self.split_i16x32(b);
5570 self.combine_i16x16(
5571 self.unzip_high_i16x16(a0, a1),
5572 self.unzip_high_i16x16(b0, b1),
5573 )
5574 }
5575 #[inline(always)]
5576 fn select_i16x32(self, a: mask16x32<Self>, b: i16x32<Self>, c: i16x32<Self>) -> i16x32<Self> {
5577 let (a0, a1) = self.split_mask16x32(a);
5578 let (b0, b1) = self.split_i16x32(b);
5579 let (c0, c1) = self.split_i16x32(c);
5580 self.combine_i16x16(
5581 self.select_i16x16(a0, b0, c0),
5582 self.select_i16x16(a1, b1, c1),
5583 )
5584 }
5585 #[inline(always)]
5586 fn min_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5587 let (a0, a1) = self.split_i16x32(a);
5588 let (b0, b1) = self.split_i16x32(b);
5589 self.combine_i16x16(self.min_i16x16(a0, b0), self.min_i16x16(a1, b1))
5590 }
5591 #[inline(always)]
5592 fn max_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5593 let (a0, a1) = self.split_i16x32(a);
5594 let (b0, b1) = self.split_i16x32(b);
5595 self.combine_i16x16(self.max_i16x16(a0, b0), self.max_i16x16(a1, b1))
5596 }
5597 #[inline(always)]
5598 fn split_i16x32(self, a: i16x32<Self>) -> (i16x16<Self>, i16x16<Self>) {
5599 let mut b0 = [0; 16usize];
5600 let mut b1 = [0; 16usize];
5601 b0.copy_from_slice(&a.val[0..16usize]);
5602 b1.copy_from_slice(&a.val[16usize..32usize]);
5603 (b0.simd_into(self), b1.simd_into(self))
5604 }
5605 #[inline(always)]
5606 fn neg_i16x32(self, a: i16x32<Self>) -> i16x32<Self> {
5607 let (a0, a1) = self.split_i16x32(a);
5608 self.combine_i16x16(self.neg_i16x16(a0), self.neg_i16x16(a1))
5609 }
5610 #[inline(always)]
5611 fn reinterpret_u8_i16x32(self, a: i16x32<Self>) -> u8x64<Self> {
5612 let (a0, a1) = self.split_i16x32(a);
5613 self.combine_u8x32(
5614 self.reinterpret_u8_i16x16(a0),
5615 self.reinterpret_u8_i16x16(a1),
5616 )
5617 }
5618 #[inline(always)]
5619 fn reinterpret_u32_i16x32(self, a: i16x32<Self>) -> u32x16<Self> {
5620 let (a0, a1) = self.split_i16x32(a);
5621 self.combine_u32x8(
5622 self.reinterpret_u32_i16x16(a0),
5623 self.reinterpret_u32_i16x16(a1),
5624 )
5625 }
5626 #[inline(always)]
5627 fn splat_u16x32(self, a: u16) -> u16x32<Self> {
5628 let half = self.splat_u16x16(a);
5629 self.combine_u16x16(half, half)
5630 }
5631 #[inline(always)]
5632 fn not_u16x32(self, a: u16x32<Self>) -> u16x32<Self> {
5633 let (a0, a1) = self.split_u16x32(a);
5634 self.combine_u16x16(self.not_u16x16(a0), self.not_u16x16(a1))
5635 }
5636 #[inline(always)]
5637 fn add_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5638 let (a0, a1) = self.split_u16x32(a);
5639 let (b0, b1) = self.split_u16x32(b);
5640 self.combine_u16x16(self.add_u16x16(a0, b0), self.add_u16x16(a1, b1))
5641 }
5642 #[inline(always)]
5643 fn sub_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5644 let (a0, a1) = self.split_u16x32(a);
5645 let (b0, b1) = self.split_u16x32(b);
5646 self.combine_u16x16(self.sub_u16x16(a0, b0), self.sub_u16x16(a1, b1))
5647 }
5648 #[inline(always)]
5649 fn mul_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5650 let (a0, a1) = self.split_u16x32(a);
5651 let (b0, b1) = self.split_u16x32(b);
5652 self.combine_u16x16(self.mul_u16x16(a0, b0), self.mul_u16x16(a1, b1))
5653 }
5654 #[inline(always)]
5655 fn and_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5656 let (a0, a1) = self.split_u16x32(a);
5657 let (b0, b1) = self.split_u16x32(b);
5658 self.combine_u16x16(self.and_u16x16(a0, b0), self.and_u16x16(a1, b1))
5659 }
5660 #[inline(always)]
5661 fn or_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5662 let (a0, a1) = self.split_u16x32(a);
5663 let (b0, b1) = self.split_u16x32(b);
5664 self.combine_u16x16(self.or_u16x16(a0, b0), self.or_u16x16(a1, b1))
5665 }
5666 #[inline(always)]
5667 fn xor_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5668 let (a0, a1) = self.split_u16x32(a);
5669 let (b0, b1) = self.split_u16x32(b);
5670 self.combine_u16x16(self.xor_u16x16(a0, b0), self.xor_u16x16(a1, b1))
5671 }
5672 #[inline(always)]
5673 fn shr_u16x32(self, a: u16x32<Self>, b: u32) -> u16x32<Self> {
5674 let (a0, a1) = self.split_u16x32(a);
5675 self.combine_u16x16(self.shr_u16x16(a0, b), self.shr_u16x16(a1, b))
5676 }
5677 #[inline(always)]
5678 fn shrv_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5679 let (a0, a1) = self.split_u16x32(a);
5680 let (b0, b1) = self.split_u16x32(b);
5681 self.combine_u16x16(self.shrv_u16x16(a0, b0), self.shrv_u16x16(a1, b1))
5682 }
5683 #[inline(always)]
5684 fn shl_u16x32(self, a: u16x32<Self>, b: u32) -> u16x32<Self> {
5685 let (a0, a1) = self.split_u16x32(a);
5686 self.combine_u16x16(self.shl_u16x16(a0, b), self.shl_u16x16(a1, b))
5687 }
5688 #[inline(always)]
5689 fn simd_eq_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5690 let (a0, a1) = self.split_u16x32(a);
5691 let (b0, b1) = self.split_u16x32(b);
5692 self.combine_mask16x16(self.simd_eq_u16x16(a0, b0), self.simd_eq_u16x16(a1, b1))
5693 }
5694 #[inline(always)]
5695 fn simd_lt_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5696 let (a0, a1) = self.split_u16x32(a);
5697 let (b0, b1) = self.split_u16x32(b);
5698 self.combine_mask16x16(self.simd_lt_u16x16(a0, b0), self.simd_lt_u16x16(a1, b1))
5699 }
5700 #[inline(always)]
5701 fn simd_le_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5702 let (a0, a1) = self.split_u16x32(a);
5703 let (b0, b1) = self.split_u16x32(b);
5704 self.combine_mask16x16(self.simd_le_u16x16(a0, b0), self.simd_le_u16x16(a1, b1))
5705 }
5706 #[inline(always)]
5707 fn simd_ge_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5708 let (a0, a1) = self.split_u16x32(a);
5709 let (b0, b1) = self.split_u16x32(b);
5710 self.combine_mask16x16(self.simd_ge_u16x16(a0, b0), self.simd_ge_u16x16(a1, b1))
5711 }
5712 #[inline(always)]
5713 fn simd_gt_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5714 let (a0, a1) = self.split_u16x32(a);
5715 let (b0, b1) = self.split_u16x32(b);
5716 self.combine_mask16x16(self.simd_gt_u16x16(a0, b0), self.simd_gt_u16x16(a1, b1))
5717 }
5718 #[inline(always)]
5719 fn zip_low_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5720 let (a0, _) = self.split_u16x32(a);
5721 let (b0, _) = self.split_u16x32(b);
5722 self.combine_u16x16(self.zip_low_u16x16(a0, b0), self.zip_high_u16x16(a0, b0))
5723 }
5724 #[inline(always)]
5725 fn zip_high_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5726 let (_, a1) = self.split_u16x32(a);
5727 let (_, b1) = self.split_u16x32(b);
5728 self.combine_u16x16(self.zip_low_u16x16(a1, b1), self.zip_high_u16x16(a1, b1))
5729 }
5730 #[inline(always)]
5731 fn unzip_low_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5732 let (a0, a1) = self.split_u16x32(a);
5733 let (b0, b1) = self.split_u16x32(b);
5734 self.combine_u16x16(self.unzip_low_u16x16(a0, a1), self.unzip_low_u16x16(b0, b1))
5735 }
5736 #[inline(always)]
5737 fn unzip_high_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5738 let (a0, a1) = self.split_u16x32(a);
5739 let (b0, b1) = self.split_u16x32(b);
5740 self.combine_u16x16(
5741 self.unzip_high_u16x16(a0, a1),
5742 self.unzip_high_u16x16(b0, b1),
5743 )
5744 }
5745 #[inline(always)]
5746 fn select_u16x32(self, a: mask16x32<Self>, b: u16x32<Self>, c: u16x32<Self>) -> u16x32<Self> {
5747 let (a0, a1) = self.split_mask16x32(a);
5748 let (b0, b1) = self.split_u16x32(b);
5749 let (c0, c1) = self.split_u16x32(c);
5750 self.combine_u16x16(
5751 self.select_u16x16(a0, b0, c0),
5752 self.select_u16x16(a1, b1, c1),
5753 )
5754 }
5755 #[inline(always)]
5756 fn min_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5757 let (a0, a1) = self.split_u16x32(a);
5758 let (b0, b1) = self.split_u16x32(b);
5759 self.combine_u16x16(self.min_u16x16(a0, b0), self.min_u16x16(a1, b1))
5760 }
5761 #[inline(always)]
5762 fn max_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5763 let (a0, a1) = self.split_u16x32(a);
5764 let (b0, b1) = self.split_u16x32(b);
5765 self.combine_u16x16(self.max_u16x16(a0, b0), self.max_u16x16(a1, b1))
5766 }
5767 #[inline(always)]
5768 fn split_u16x32(self, a: u16x32<Self>) -> (u16x16<Self>, u16x16<Self>) {
5769 let mut b0 = [0; 16usize];
5770 let mut b1 = [0; 16usize];
5771 b0.copy_from_slice(&a.val[0..16usize]);
5772 b1.copy_from_slice(&a.val[16usize..32usize]);
5773 (b0.simd_into(self), b1.simd_into(self))
5774 }
5775 #[inline(always)]
5776 fn load_interleaved_128_u16x32(self, src: &[u16; 32usize]) -> u16x32<Self> {
5777 [
5778 src[0usize],
5779 src[4usize],
5780 src[8usize],
5781 src[12usize],
5782 src[16usize],
5783 src[20usize],
5784 src[24usize],
5785 src[28usize],
5786 src[1usize],
5787 src[5usize],
5788 src[9usize],
5789 src[13usize],
5790 src[17usize],
5791 src[21usize],
5792 src[25usize],
5793 src[29usize],
5794 src[2usize],
5795 src[6usize],
5796 src[10usize],
5797 src[14usize],
5798 src[18usize],
5799 src[22usize],
5800 src[26usize],
5801 src[30usize],
5802 src[3usize],
5803 src[7usize],
5804 src[11usize],
5805 src[15usize],
5806 src[19usize],
5807 src[23usize],
5808 src[27usize],
5809 src[31usize],
5810 ]
5811 .simd_into(self)
5812 }
5813 #[inline(always)]
5814 fn store_interleaved_128_u16x32(self, a: u16x32<Self>, dest: &mut [u16; 32usize]) -> () {
5815 *dest = [
5816 a[0usize], a[8usize], a[16usize], a[24usize], a[1usize], a[9usize], a[17usize],
5817 a[25usize], a[2usize], a[10usize], a[18usize], a[26usize], a[3usize], a[11usize],
5818 a[19usize], a[27usize], a[4usize], a[12usize], a[20usize], a[28usize], a[5usize],
5819 a[13usize], a[21usize], a[29usize], a[6usize], a[14usize], a[22usize], a[30usize],
5820 a[7usize], a[15usize], a[23usize], a[31usize],
5821 ];
5822 }
5823 #[inline(always)]
5824 fn narrow_u16x32(self, a: u16x32<Self>) -> u8x32<Self> {
5825 let (a0, a1) = self.split_u16x32(a);
5826 self.combine_u8x16(self.narrow_u16x16(a0), self.narrow_u16x16(a1))
5827 }
5828 #[inline(always)]
5829 fn reinterpret_u8_u16x32(self, a: u16x32<Self>) -> u8x64<Self> {
5830 let (a0, a1) = self.split_u16x32(a);
5831 self.combine_u8x32(
5832 self.reinterpret_u8_u16x16(a0),
5833 self.reinterpret_u8_u16x16(a1),
5834 )
5835 }
5836 #[inline(always)]
5837 fn reinterpret_u32_u16x32(self, a: u16x32<Self>) -> u32x16<Self> {
5838 let (a0, a1) = self.split_u16x32(a);
5839 self.combine_u32x8(
5840 self.reinterpret_u32_u16x16(a0),
5841 self.reinterpret_u32_u16x16(a1),
5842 )
5843 }
5844 #[inline(always)]
5845 fn splat_mask16x32(self, a: i16) -> mask16x32<Self> {
5846 let half = self.splat_mask16x16(a);
5847 self.combine_mask16x16(half, half)
5848 }
5849 #[inline(always)]
5850 fn not_mask16x32(self, a: mask16x32<Self>) -> mask16x32<Self> {
5851 let (a0, a1) = self.split_mask16x32(a);
5852 self.combine_mask16x16(self.not_mask16x16(a0), self.not_mask16x16(a1))
5853 }
5854 #[inline(always)]
5855 fn and_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
5856 let (a0, a1) = self.split_mask16x32(a);
5857 let (b0, b1) = self.split_mask16x32(b);
5858 self.combine_mask16x16(self.and_mask16x16(a0, b0), self.and_mask16x16(a1, b1))
5859 }
5860 #[inline(always)]
5861 fn or_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
5862 let (a0, a1) = self.split_mask16x32(a);
5863 let (b0, b1) = self.split_mask16x32(b);
5864 self.combine_mask16x16(self.or_mask16x16(a0, b0), self.or_mask16x16(a1, b1))
5865 }
5866 #[inline(always)]
5867 fn xor_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
5868 let (a0, a1) = self.split_mask16x32(a);
5869 let (b0, b1) = self.split_mask16x32(b);
5870 self.combine_mask16x16(self.xor_mask16x16(a0, b0), self.xor_mask16x16(a1, b1))
5871 }
5872 #[inline(always)]
5873 fn select_mask16x32(
5874 self,
5875 a: mask16x32<Self>,
5876 b: mask16x32<Self>,
5877 c: mask16x32<Self>,
5878 ) -> mask16x32<Self> {
5879 let (a0, a1) = self.split_mask16x32(a);
5880 let (b0, b1) = self.split_mask16x32(b);
5881 let (c0, c1) = self.split_mask16x32(c);
5882 self.combine_mask16x16(
5883 self.select_mask16x16(a0, b0, c0),
5884 self.select_mask16x16(a1, b1, c1),
5885 )
5886 }
5887 #[inline(always)]
5888 fn simd_eq_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
5889 let (a0, a1) = self.split_mask16x32(a);
5890 let (b0, b1) = self.split_mask16x32(b);
5891 self.combine_mask16x16(
5892 self.simd_eq_mask16x16(a0, b0),
5893 self.simd_eq_mask16x16(a1, b1),
5894 )
5895 }
5896 #[inline(always)]
5897 fn split_mask16x32(self, a: mask16x32<Self>) -> (mask16x16<Self>, mask16x16<Self>) {
5898 let mut b0 = [0; 16usize];
5899 let mut b1 = [0; 16usize];
5900 b0.copy_from_slice(&a.val[0..16usize]);
5901 b1.copy_from_slice(&a.val[16usize..32usize]);
5902 (b0.simd_into(self), b1.simd_into(self))
5903 }
5904 #[inline(always)]
5905 fn splat_i32x16(self, a: i32) -> i32x16<Self> {
5906 let half = self.splat_i32x8(a);
5907 self.combine_i32x8(half, half)
5908 }
5909 #[inline(always)]
5910 fn not_i32x16(self, a: i32x16<Self>) -> i32x16<Self> {
5911 let (a0, a1) = self.split_i32x16(a);
5912 self.combine_i32x8(self.not_i32x8(a0), self.not_i32x8(a1))
5913 }
5914 #[inline(always)]
5915 fn add_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5916 let (a0, a1) = self.split_i32x16(a);
5917 let (b0, b1) = self.split_i32x16(b);
5918 self.combine_i32x8(self.add_i32x8(a0, b0), self.add_i32x8(a1, b1))
5919 }
5920 #[inline(always)]
5921 fn sub_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5922 let (a0, a1) = self.split_i32x16(a);
5923 let (b0, b1) = self.split_i32x16(b);
5924 self.combine_i32x8(self.sub_i32x8(a0, b0), self.sub_i32x8(a1, b1))
5925 }
5926 #[inline(always)]
5927 fn mul_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5928 let (a0, a1) = self.split_i32x16(a);
5929 let (b0, b1) = self.split_i32x16(b);
5930 self.combine_i32x8(self.mul_i32x8(a0, b0), self.mul_i32x8(a1, b1))
5931 }
5932 #[inline(always)]
5933 fn and_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5934 let (a0, a1) = self.split_i32x16(a);
5935 let (b0, b1) = self.split_i32x16(b);
5936 self.combine_i32x8(self.and_i32x8(a0, b0), self.and_i32x8(a1, b1))
5937 }
5938 #[inline(always)]
5939 fn or_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5940 let (a0, a1) = self.split_i32x16(a);
5941 let (b0, b1) = self.split_i32x16(b);
5942 self.combine_i32x8(self.or_i32x8(a0, b0), self.or_i32x8(a1, b1))
5943 }
5944 #[inline(always)]
5945 fn xor_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5946 let (a0, a1) = self.split_i32x16(a);
5947 let (b0, b1) = self.split_i32x16(b);
5948 self.combine_i32x8(self.xor_i32x8(a0, b0), self.xor_i32x8(a1, b1))
5949 }
5950 #[inline(always)]
5951 fn shr_i32x16(self, a: i32x16<Self>, b: u32) -> i32x16<Self> {
5952 let (a0, a1) = self.split_i32x16(a);
5953 self.combine_i32x8(self.shr_i32x8(a0, b), self.shr_i32x8(a1, b))
5954 }
5955 #[inline(always)]
5956 fn shrv_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5957 let (a0, a1) = self.split_i32x16(a);
5958 let (b0, b1) = self.split_i32x16(b);
5959 self.combine_i32x8(self.shrv_i32x8(a0, b0), self.shrv_i32x8(a1, b1))
5960 }
5961 #[inline(always)]
5962 fn shl_i32x16(self, a: i32x16<Self>, b: u32) -> i32x16<Self> {
5963 let (a0, a1) = self.split_i32x16(a);
5964 self.combine_i32x8(self.shl_i32x8(a0, b), self.shl_i32x8(a1, b))
5965 }
5966 #[inline(always)]
5967 fn simd_eq_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5968 let (a0, a1) = self.split_i32x16(a);
5969 let (b0, b1) = self.split_i32x16(b);
5970 self.combine_mask32x8(self.simd_eq_i32x8(a0, b0), self.simd_eq_i32x8(a1, b1))
5971 }
5972 #[inline(always)]
5973 fn simd_lt_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5974 let (a0, a1) = self.split_i32x16(a);
5975 let (b0, b1) = self.split_i32x16(b);
5976 self.combine_mask32x8(self.simd_lt_i32x8(a0, b0), self.simd_lt_i32x8(a1, b1))
5977 }
5978 #[inline(always)]
5979 fn simd_le_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5980 let (a0, a1) = self.split_i32x16(a);
5981 let (b0, b1) = self.split_i32x16(b);
5982 self.combine_mask32x8(self.simd_le_i32x8(a0, b0), self.simd_le_i32x8(a1, b1))
5983 }
5984 #[inline(always)]
5985 fn simd_ge_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5986 let (a0, a1) = self.split_i32x16(a);
5987 let (b0, b1) = self.split_i32x16(b);
5988 self.combine_mask32x8(self.simd_ge_i32x8(a0, b0), self.simd_ge_i32x8(a1, b1))
5989 }
5990 #[inline(always)]
5991 fn simd_gt_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5992 let (a0, a1) = self.split_i32x16(a);
5993 let (b0, b1) = self.split_i32x16(b);
5994 self.combine_mask32x8(self.simd_gt_i32x8(a0, b0), self.simd_gt_i32x8(a1, b1))
5995 }
5996 #[inline(always)]
5997 fn zip_low_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5998 let (a0, _) = self.split_i32x16(a);
5999 let (b0, _) = self.split_i32x16(b);
6000 self.combine_i32x8(self.zip_low_i32x8(a0, b0), self.zip_high_i32x8(a0, b0))
6001 }
6002 #[inline(always)]
6003 fn zip_high_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
6004 let (_, a1) = self.split_i32x16(a);
6005 let (_, b1) = self.split_i32x16(b);
6006 self.combine_i32x8(self.zip_low_i32x8(a1, b1), self.zip_high_i32x8(a1, b1))
6007 }
6008 #[inline(always)]
6009 fn unzip_low_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
6010 let (a0, a1) = self.split_i32x16(a);
6011 let (b0, b1) = self.split_i32x16(b);
6012 self.combine_i32x8(self.unzip_low_i32x8(a0, a1), self.unzip_low_i32x8(b0, b1))
6013 }
6014 #[inline(always)]
6015 fn unzip_high_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
6016 let (a0, a1) = self.split_i32x16(a);
6017 let (b0, b1) = self.split_i32x16(b);
6018 self.combine_i32x8(self.unzip_high_i32x8(a0, a1), self.unzip_high_i32x8(b0, b1))
6019 }
6020 #[inline(always)]
6021 fn select_i32x16(self, a: mask32x16<Self>, b: i32x16<Self>, c: i32x16<Self>) -> i32x16<Self> {
6022 let (a0, a1) = self.split_mask32x16(a);
6023 let (b0, b1) = self.split_i32x16(b);
6024 let (c0, c1) = self.split_i32x16(c);
6025 self.combine_i32x8(self.select_i32x8(a0, b0, c0), self.select_i32x8(a1, b1, c1))
6026 }
6027 #[inline(always)]
6028 fn min_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
6029 let (a0, a1) = self.split_i32x16(a);
6030 let (b0, b1) = self.split_i32x16(b);
6031 self.combine_i32x8(self.min_i32x8(a0, b0), self.min_i32x8(a1, b1))
6032 }
6033 #[inline(always)]
6034 fn max_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
6035 let (a0, a1) = self.split_i32x16(a);
6036 let (b0, b1) = self.split_i32x16(b);
6037 self.combine_i32x8(self.max_i32x8(a0, b0), self.max_i32x8(a1, b1))
6038 }
6039 #[inline(always)]
6040 fn split_i32x16(self, a: i32x16<Self>) -> (i32x8<Self>, i32x8<Self>) {
6041 let mut b0 = [0; 8usize];
6042 let mut b1 = [0; 8usize];
6043 b0.copy_from_slice(&a.val[0..8usize]);
6044 b1.copy_from_slice(&a.val[8usize..16usize]);
6045 (b0.simd_into(self), b1.simd_into(self))
6046 }
6047 #[inline(always)]
6048 fn neg_i32x16(self, a: i32x16<Self>) -> i32x16<Self> {
6049 let (a0, a1) = self.split_i32x16(a);
6050 self.combine_i32x8(self.neg_i32x8(a0), self.neg_i32x8(a1))
6051 }
6052 #[inline(always)]
6053 fn reinterpret_u8_i32x16(self, a: i32x16<Self>) -> u8x64<Self> {
6054 let (a0, a1) = self.split_i32x16(a);
6055 self.combine_u8x32(self.reinterpret_u8_i32x8(a0), self.reinterpret_u8_i32x8(a1))
6056 }
6057 #[inline(always)]
6058 fn reinterpret_u32_i32x16(self, a: i32x16<Self>) -> u32x16<Self> {
6059 let (a0, a1) = self.split_i32x16(a);
6060 self.combine_u32x8(
6061 self.reinterpret_u32_i32x8(a0),
6062 self.reinterpret_u32_i32x8(a1),
6063 )
6064 }
6065 #[inline(always)]
6066 fn cvt_f32_i32x16(self, a: i32x16<Self>) -> f32x16<Self> {
6067 let (a0, a1) = self.split_i32x16(a);
6068 self.combine_f32x8(self.cvt_f32_i32x8(a0), self.cvt_f32_i32x8(a1))
6069 }
6070 #[inline(always)]
6071 fn splat_u32x16(self, a: u32) -> u32x16<Self> {
6072 let half = self.splat_u32x8(a);
6073 self.combine_u32x8(half, half)
6074 }
6075 #[inline(always)]
6076 fn not_u32x16(self, a: u32x16<Self>) -> u32x16<Self> {
6077 let (a0, a1) = self.split_u32x16(a);
6078 self.combine_u32x8(self.not_u32x8(a0), self.not_u32x8(a1))
6079 }
6080 #[inline(always)]
6081 fn add_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6082 let (a0, a1) = self.split_u32x16(a);
6083 let (b0, b1) = self.split_u32x16(b);
6084 self.combine_u32x8(self.add_u32x8(a0, b0), self.add_u32x8(a1, b1))
6085 }
6086 #[inline(always)]
6087 fn sub_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6088 let (a0, a1) = self.split_u32x16(a);
6089 let (b0, b1) = self.split_u32x16(b);
6090 self.combine_u32x8(self.sub_u32x8(a0, b0), self.sub_u32x8(a1, b1))
6091 }
6092 #[inline(always)]
6093 fn mul_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6094 let (a0, a1) = self.split_u32x16(a);
6095 let (b0, b1) = self.split_u32x16(b);
6096 self.combine_u32x8(self.mul_u32x8(a0, b0), self.mul_u32x8(a1, b1))
6097 }
6098 #[inline(always)]
6099 fn and_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6100 let (a0, a1) = self.split_u32x16(a);
6101 let (b0, b1) = self.split_u32x16(b);
6102 self.combine_u32x8(self.and_u32x8(a0, b0), self.and_u32x8(a1, b1))
6103 }
6104 #[inline(always)]
6105 fn or_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6106 let (a0, a1) = self.split_u32x16(a);
6107 let (b0, b1) = self.split_u32x16(b);
6108 self.combine_u32x8(self.or_u32x8(a0, b0), self.or_u32x8(a1, b1))
6109 }
6110 #[inline(always)]
6111 fn xor_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6112 let (a0, a1) = self.split_u32x16(a);
6113 let (b0, b1) = self.split_u32x16(b);
6114 self.combine_u32x8(self.xor_u32x8(a0, b0), self.xor_u32x8(a1, b1))
6115 }
6116 #[inline(always)]
6117 fn shr_u32x16(self, a: u32x16<Self>, b: u32) -> u32x16<Self> {
6118 let (a0, a1) = self.split_u32x16(a);
6119 self.combine_u32x8(self.shr_u32x8(a0, b), self.shr_u32x8(a1, b))
6120 }
6121 #[inline(always)]
6122 fn shrv_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6123 let (a0, a1) = self.split_u32x16(a);
6124 let (b0, b1) = self.split_u32x16(b);
6125 self.combine_u32x8(self.shrv_u32x8(a0, b0), self.shrv_u32x8(a1, b1))
6126 }
6127 #[inline(always)]
6128 fn shl_u32x16(self, a: u32x16<Self>, b: u32) -> u32x16<Self> {
6129 let (a0, a1) = self.split_u32x16(a);
6130 self.combine_u32x8(self.shl_u32x8(a0, b), self.shl_u32x8(a1, b))
6131 }
6132 #[inline(always)]
6133 fn simd_eq_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
6134 let (a0, a1) = self.split_u32x16(a);
6135 let (b0, b1) = self.split_u32x16(b);
6136 self.combine_mask32x8(self.simd_eq_u32x8(a0, b0), self.simd_eq_u32x8(a1, b1))
6137 }
6138 #[inline(always)]
6139 fn simd_lt_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
6140 let (a0, a1) = self.split_u32x16(a);
6141 let (b0, b1) = self.split_u32x16(b);
6142 self.combine_mask32x8(self.simd_lt_u32x8(a0, b0), self.simd_lt_u32x8(a1, b1))
6143 }
6144 #[inline(always)]
6145 fn simd_le_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
6146 let (a0, a1) = self.split_u32x16(a);
6147 let (b0, b1) = self.split_u32x16(b);
6148 self.combine_mask32x8(self.simd_le_u32x8(a0, b0), self.simd_le_u32x8(a1, b1))
6149 }
6150 #[inline(always)]
6151 fn simd_ge_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
6152 let (a0, a1) = self.split_u32x16(a);
6153 let (b0, b1) = self.split_u32x16(b);
6154 self.combine_mask32x8(self.simd_ge_u32x8(a0, b0), self.simd_ge_u32x8(a1, b1))
6155 }
6156 #[inline(always)]
6157 fn simd_gt_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
6158 let (a0, a1) = self.split_u32x16(a);
6159 let (b0, b1) = self.split_u32x16(b);
6160 self.combine_mask32x8(self.simd_gt_u32x8(a0, b0), self.simd_gt_u32x8(a1, b1))
6161 }
6162 #[inline(always)]
6163 fn zip_low_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6164 let (a0, _) = self.split_u32x16(a);
6165 let (b0, _) = self.split_u32x16(b);
6166 self.combine_u32x8(self.zip_low_u32x8(a0, b0), self.zip_high_u32x8(a0, b0))
6167 }
6168 #[inline(always)]
6169 fn zip_high_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6170 let (_, a1) = self.split_u32x16(a);
6171 let (_, b1) = self.split_u32x16(b);
6172 self.combine_u32x8(self.zip_low_u32x8(a1, b1), self.zip_high_u32x8(a1, b1))
6173 }
6174 #[inline(always)]
6175 fn unzip_low_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6176 let (a0, a1) = self.split_u32x16(a);
6177 let (b0, b1) = self.split_u32x16(b);
6178 self.combine_u32x8(self.unzip_low_u32x8(a0, a1), self.unzip_low_u32x8(b0, b1))
6179 }
6180 #[inline(always)]
6181 fn unzip_high_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6182 let (a0, a1) = self.split_u32x16(a);
6183 let (b0, b1) = self.split_u32x16(b);
6184 self.combine_u32x8(self.unzip_high_u32x8(a0, a1), self.unzip_high_u32x8(b0, b1))
6185 }
6186 #[inline(always)]
6187 fn select_u32x16(self, a: mask32x16<Self>, b: u32x16<Self>, c: u32x16<Self>) -> u32x16<Self> {
6188 let (a0, a1) = self.split_mask32x16(a);
6189 let (b0, b1) = self.split_u32x16(b);
6190 let (c0, c1) = self.split_u32x16(c);
6191 self.combine_u32x8(self.select_u32x8(a0, b0, c0), self.select_u32x8(a1, b1, c1))
6192 }
6193 #[inline(always)]
6194 fn min_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6195 let (a0, a1) = self.split_u32x16(a);
6196 let (b0, b1) = self.split_u32x16(b);
6197 self.combine_u32x8(self.min_u32x8(a0, b0), self.min_u32x8(a1, b1))
6198 }
6199 #[inline(always)]
6200 fn max_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
6201 let (a0, a1) = self.split_u32x16(a);
6202 let (b0, b1) = self.split_u32x16(b);
6203 self.combine_u32x8(self.max_u32x8(a0, b0), self.max_u32x8(a1, b1))
6204 }
6205 #[inline(always)]
6206 fn split_u32x16(self, a: u32x16<Self>) -> (u32x8<Self>, u32x8<Self>) {
6207 let mut b0 = [0; 8usize];
6208 let mut b1 = [0; 8usize];
6209 b0.copy_from_slice(&a.val[0..8usize]);
6210 b1.copy_from_slice(&a.val[8usize..16usize]);
6211 (b0.simd_into(self), b1.simd_into(self))
6212 }
6213 #[inline(always)]
6214 fn load_interleaved_128_u32x16(self, src: &[u32; 16usize]) -> u32x16<Self> {
6215 [
6216 src[0usize],
6217 src[4usize],
6218 src[8usize],
6219 src[12usize],
6220 src[1usize],
6221 src[5usize],
6222 src[9usize],
6223 src[13usize],
6224 src[2usize],
6225 src[6usize],
6226 src[10usize],
6227 src[14usize],
6228 src[3usize],
6229 src[7usize],
6230 src[11usize],
6231 src[15usize],
6232 ]
6233 .simd_into(self)
6234 }
6235 #[inline(always)]
6236 fn store_interleaved_128_u32x16(self, a: u32x16<Self>, dest: &mut [u32; 16usize]) -> () {
6237 *dest = [
6238 a[0usize], a[4usize], a[8usize], a[12usize], a[1usize], a[5usize], a[9usize],
6239 a[13usize], a[2usize], a[6usize], a[10usize], a[14usize], a[3usize], a[7usize],
6240 a[11usize], a[15usize],
6241 ];
6242 }
6243 #[inline(always)]
6244 fn reinterpret_u8_u32x16(self, a: u32x16<Self>) -> u8x64<Self> {
6245 let (a0, a1) = self.split_u32x16(a);
6246 self.combine_u8x32(self.reinterpret_u8_u32x8(a0), self.reinterpret_u8_u32x8(a1))
6247 }
6248 #[inline(always)]
6249 fn cvt_f32_u32x16(self, a: u32x16<Self>) -> f32x16<Self> {
6250 let (a0, a1) = self.split_u32x16(a);
6251 self.combine_f32x8(self.cvt_f32_u32x8(a0), self.cvt_f32_u32x8(a1))
6252 }
6253 #[inline(always)]
6254 fn splat_mask32x16(self, a: i32) -> mask32x16<Self> {
6255 let half = self.splat_mask32x8(a);
6256 self.combine_mask32x8(half, half)
6257 }
6258 #[inline(always)]
6259 fn not_mask32x16(self, a: mask32x16<Self>) -> mask32x16<Self> {
6260 let (a0, a1) = self.split_mask32x16(a);
6261 self.combine_mask32x8(self.not_mask32x8(a0), self.not_mask32x8(a1))
6262 }
6263 #[inline(always)]
6264 fn and_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
6265 let (a0, a1) = self.split_mask32x16(a);
6266 let (b0, b1) = self.split_mask32x16(b);
6267 self.combine_mask32x8(self.and_mask32x8(a0, b0), self.and_mask32x8(a1, b1))
6268 }
6269 #[inline(always)]
6270 fn or_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
6271 let (a0, a1) = self.split_mask32x16(a);
6272 let (b0, b1) = self.split_mask32x16(b);
6273 self.combine_mask32x8(self.or_mask32x8(a0, b0), self.or_mask32x8(a1, b1))
6274 }
6275 #[inline(always)]
6276 fn xor_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
6277 let (a0, a1) = self.split_mask32x16(a);
6278 let (b0, b1) = self.split_mask32x16(b);
6279 self.combine_mask32x8(self.xor_mask32x8(a0, b0), self.xor_mask32x8(a1, b1))
6280 }
6281 #[inline(always)]
6282 fn select_mask32x16(
6283 self,
6284 a: mask32x16<Self>,
6285 b: mask32x16<Self>,
6286 c: mask32x16<Self>,
6287 ) -> mask32x16<Self> {
6288 let (a0, a1) = self.split_mask32x16(a);
6289 let (b0, b1) = self.split_mask32x16(b);
6290 let (c0, c1) = self.split_mask32x16(c);
6291 self.combine_mask32x8(
6292 self.select_mask32x8(a0, b0, c0),
6293 self.select_mask32x8(a1, b1, c1),
6294 )
6295 }
6296 #[inline(always)]
6297 fn simd_eq_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
6298 let (a0, a1) = self.split_mask32x16(a);
6299 let (b0, b1) = self.split_mask32x16(b);
6300 self.combine_mask32x8(self.simd_eq_mask32x8(a0, b0), self.simd_eq_mask32x8(a1, b1))
6301 }
6302 #[inline(always)]
6303 fn split_mask32x16(self, a: mask32x16<Self>) -> (mask32x8<Self>, mask32x8<Self>) {
6304 let mut b0 = [0; 8usize];
6305 let mut b1 = [0; 8usize];
6306 b0.copy_from_slice(&a.val[0..8usize]);
6307 b1.copy_from_slice(&a.val[8usize..16usize]);
6308 (b0.simd_into(self), b1.simd_into(self))
6309 }
6310 #[inline(always)]
6311 fn splat_f64x8(self, a: f64) -> f64x8<Self> {
6312 let half = self.splat_f64x4(a);
6313 self.combine_f64x4(half, half)
6314 }
6315 #[inline(always)]
6316 fn abs_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
6317 let (a0, a1) = self.split_f64x8(a);
6318 self.combine_f64x4(self.abs_f64x4(a0), self.abs_f64x4(a1))
6319 }
6320 #[inline(always)]
6321 fn neg_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
6322 let (a0, a1) = self.split_f64x8(a);
6323 self.combine_f64x4(self.neg_f64x4(a0), self.neg_f64x4(a1))
6324 }
6325 #[inline(always)]
6326 fn sqrt_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
6327 let (a0, a1) = self.split_f64x8(a);
6328 self.combine_f64x4(self.sqrt_f64x4(a0), self.sqrt_f64x4(a1))
6329 }
6330 #[inline(always)]
6331 fn add_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6332 let (a0, a1) = self.split_f64x8(a);
6333 let (b0, b1) = self.split_f64x8(b);
6334 self.combine_f64x4(self.add_f64x4(a0, b0), self.add_f64x4(a1, b1))
6335 }
6336 #[inline(always)]
6337 fn sub_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6338 let (a0, a1) = self.split_f64x8(a);
6339 let (b0, b1) = self.split_f64x8(b);
6340 self.combine_f64x4(self.sub_f64x4(a0, b0), self.sub_f64x4(a1, b1))
6341 }
6342 #[inline(always)]
6343 fn mul_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6344 let (a0, a1) = self.split_f64x8(a);
6345 let (b0, b1) = self.split_f64x8(b);
6346 self.combine_f64x4(self.mul_f64x4(a0, b0), self.mul_f64x4(a1, b1))
6347 }
6348 #[inline(always)]
6349 fn div_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6350 let (a0, a1) = self.split_f64x8(a);
6351 let (b0, b1) = self.split_f64x8(b);
6352 self.combine_f64x4(self.div_f64x4(a0, b0), self.div_f64x4(a1, b1))
6353 }
6354 #[inline(always)]
6355 fn copysign_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6356 let (a0, a1) = self.split_f64x8(a);
6357 let (b0, b1) = self.split_f64x8(b);
6358 self.combine_f64x4(self.copysign_f64x4(a0, b0), self.copysign_f64x4(a1, b1))
6359 }
6360 #[inline(always)]
6361 fn simd_eq_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
6362 let (a0, a1) = self.split_f64x8(a);
6363 let (b0, b1) = self.split_f64x8(b);
6364 self.combine_mask64x4(self.simd_eq_f64x4(a0, b0), self.simd_eq_f64x4(a1, b1))
6365 }
6366 #[inline(always)]
6367 fn simd_lt_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
6368 let (a0, a1) = self.split_f64x8(a);
6369 let (b0, b1) = self.split_f64x8(b);
6370 self.combine_mask64x4(self.simd_lt_f64x4(a0, b0), self.simd_lt_f64x4(a1, b1))
6371 }
6372 #[inline(always)]
6373 fn simd_le_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
6374 let (a0, a1) = self.split_f64x8(a);
6375 let (b0, b1) = self.split_f64x8(b);
6376 self.combine_mask64x4(self.simd_le_f64x4(a0, b0), self.simd_le_f64x4(a1, b1))
6377 }
6378 #[inline(always)]
6379 fn simd_ge_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
6380 let (a0, a1) = self.split_f64x8(a);
6381 let (b0, b1) = self.split_f64x8(b);
6382 self.combine_mask64x4(self.simd_ge_f64x4(a0, b0), self.simd_ge_f64x4(a1, b1))
6383 }
6384 #[inline(always)]
6385 fn simd_gt_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
6386 let (a0, a1) = self.split_f64x8(a);
6387 let (b0, b1) = self.split_f64x8(b);
6388 self.combine_mask64x4(self.simd_gt_f64x4(a0, b0), self.simd_gt_f64x4(a1, b1))
6389 }
6390 #[inline(always)]
6391 fn zip_low_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6392 let (a0, _) = self.split_f64x8(a);
6393 let (b0, _) = self.split_f64x8(b);
6394 self.combine_f64x4(self.zip_low_f64x4(a0, b0), self.zip_high_f64x4(a0, b0))
6395 }
6396 #[inline(always)]
6397 fn zip_high_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6398 let (_, a1) = self.split_f64x8(a);
6399 let (_, b1) = self.split_f64x8(b);
6400 self.combine_f64x4(self.zip_low_f64x4(a1, b1), self.zip_high_f64x4(a1, b1))
6401 }
6402 #[inline(always)]
6403 fn unzip_low_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6404 let (a0, a1) = self.split_f64x8(a);
6405 let (b0, b1) = self.split_f64x8(b);
6406 self.combine_f64x4(self.unzip_low_f64x4(a0, a1), self.unzip_low_f64x4(b0, b1))
6407 }
6408 #[inline(always)]
6409 fn unzip_high_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6410 let (a0, a1) = self.split_f64x8(a);
6411 let (b0, b1) = self.split_f64x8(b);
6412 self.combine_f64x4(self.unzip_high_f64x4(a0, a1), self.unzip_high_f64x4(b0, b1))
6413 }
6414 #[inline(always)]
6415 fn max_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6416 let (a0, a1) = self.split_f64x8(a);
6417 let (b0, b1) = self.split_f64x8(b);
6418 self.combine_f64x4(self.max_f64x4(a0, b0), self.max_f64x4(a1, b1))
6419 }
6420 #[inline(always)]
6421 fn max_precise_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6422 let (a0, a1) = self.split_f64x8(a);
6423 let (b0, b1) = self.split_f64x8(b);
6424 self.combine_f64x4(
6425 self.max_precise_f64x4(a0, b0),
6426 self.max_precise_f64x4(a1, b1),
6427 )
6428 }
6429 #[inline(always)]
6430 fn min_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6431 let (a0, a1) = self.split_f64x8(a);
6432 let (b0, b1) = self.split_f64x8(b);
6433 self.combine_f64x4(self.min_f64x4(a0, b0), self.min_f64x4(a1, b1))
6434 }
6435 #[inline(always)]
6436 fn min_precise_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6437 let (a0, a1) = self.split_f64x8(a);
6438 let (b0, b1) = self.split_f64x8(b);
6439 self.combine_f64x4(
6440 self.min_precise_f64x4(a0, b0),
6441 self.min_precise_f64x4(a1, b1),
6442 )
6443 }
6444 #[inline(always)]
6445 fn madd_f64x8(self, a: f64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
6446 let (a0, a1) = self.split_f64x8(a);
6447 let (b0, b1) = self.split_f64x8(b);
6448 let (c0, c1) = self.split_f64x8(c);
6449 self.combine_f64x4(self.madd_f64x4(a0, b0, c0), self.madd_f64x4(a1, b1, c1))
6450 }
6451 #[inline(always)]
6452 fn msub_f64x8(self, a: f64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
6453 let (a0, a1) = self.split_f64x8(a);
6454 let (b0, b1) = self.split_f64x8(b);
6455 let (c0, c1) = self.split_f64x8(c);
6456 self.combine_f64x4(self.msub_f64x4(a0, b0, c0), self.msub_f64x4(a1, b1, c1))
6457 }
6458 #[inline(always)]
6459 fn floor_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
6460 let (a0, a1) = self.split_f64x8(a);
6461 self.combine_f64x4(self.floor_f64x4(a0), self.floor_f64x4(a1))
6462 }
6463 #[inline(always)]
6464 fn fract_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
6465 let (a0, a1) = self.split_f64x8(a);
6466 self.combine_f64x4(self.fract_f64x4(a0), self.fract_f64x4(a1))
6467 }
6468 #[inline(always)]
6469 fn trunc_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
6470 let (a0, a1) = self.split_f64x8(a);
6471 self.combine_f64x4(self.trunc_f64x4(a0), self.trunc_f64x4(a1))
6472 }
6473 #[inline(always)]
6474 fn select_f64x8(self, a: mask64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
6475 let (a0, a1) = self.split_mask64x8(a);
6476 let (b0, b1) = self.split_f64x8(b);
6477 let (c0, c1) = self.split_f64x8(c);
6478 self.combine_f64x4(self.select_f64x4(a0, b0, c0), self.select_f64x4(a1, b1, c1))
6479 }
6480 #[inline(always)]
6481 fn split_f64x8(self, a: f64x8<Self>) -> (f64x4<Self>, f64x4<Self>) {
6482 let mut b0 = [0.0; 4usize];
6483 let mut b1 = [0.0; 4usize];
6484 b0.copy_from_slice(&a.val[0..4usize]);
6485 b1.copy_from_slice(&a.val[4usize..8usize]);
6486 (b0.simd_into(self), b1.simd_into(self))
6487 }
6488 #[inline(always)]
6489 fn reinterpret_f32_f64x8(self, a: f64x8<Self>) -> f32x16<Self> {
6490 let (a0, a1) = self.split_f64x8(a);
6491 self.combine_f32x8(
6492 self.reinterpret_f32_f64x4(a0),
6493 self.reinterpret_f32_f64x4(a1),
6494 )
6495 }
6496 #[inline(always)]
6497 fn splat_mask64x8(self, a: i64) -> mask64x8<Self> {
6498 let half = self.splat_mask64x4(a);
6499 self.combine_mask64x4(half, half)
6500 }
6501 #[inline(always)]
6502 fn not_mask64x8(self, a: mask64x8<Self>) -> mask64x8<Self> {
6503 let (a0, a1) = self.split_mask64x8(a);
6504 self.combine_mask64x4(self.not_mask64x4(a0), self.not_mask64x4(a1))
6505 }
6506 #[inline(always)]
6507 fn and_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
6508 let (a0, a1) = self.split_mask64x8(a);
6509 let (b0, b1) = self.split_mask64x8(b);
6510 self.combine_mask64x4(self.and_mask64x4(a0, b0), self.and_mask64x4(a1, b1))
6511 }
6512 #[inline(always)]
6513 fn or_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
6514 let (a0, a1) = self.split_mask64x8(a);
6515 let (b0, b1) = self.split_mask64x8(b);
6516 self.combine_mask64x4(self.or_mask64x4(a0, b0), self.or_mask64x4(a1, b1))
6517 }
6518 #[inline(always)]
6519 fn xor_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
6520 let (a0, a1) = self.split_mask64x8(a);
6521 let (b0, b1) = self.split_mask64x8(b);
6522 self.combine_mask64x4(self.xor_mask64x4(a0, b0), self.xor_mask64x4(a1, b1))
6523 }
6524 #[inline(always)]
6525 fn select_mask64x8(
6526 self,
6527 a: mask64x8<Self>,
6528 b: mask64x8<Self>,
6529 c: mask64x8<Self>,
6530 ) -> mask64x8<Self> {
6531 let (a0, a1) = self.split_mask64x8(a);
6532 let (b0, b1) = self.split_mask64x8(b);
6533 let (c0, c1) = self.split_mask64x8(c);
6534 self.combine_mask64x4(
6535 self.select_mask64x4(a0, b0, c0),
6536 self.select_mask64x4(a1, b1, c1),
6537 )
6538 }
6539 #[inline(always)]
6540 fn simd_eq_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
6541 let (a0, a1) = self.split_mask64x8(a);
6542 let (b0, b1) = self.split_mask64x8(b);
6543 self.combine_mask64x4(self.simd_eq_mask64x4(a0, b0), self.simd_eq_mask64x4(a1, b1))
6544 }
6545 #[inline(always)]
6546 fn split_mask64x8(self, a: mask64x8<Self>) -> (mask64x4<Self>, mask64x4<Self>) {
6547 let mut b0 = [0; 4usize];
6548 let mut b1 = [0; 4usize];
6549 b0.copy_from_slice(&a.val[0..4usize]);
6550 b1.copy_from_slice(&a.val[4usize..8usize]);
6551 (b0.simd_into(self), b1.simd_into(self))
6552 }
6553}