1use crate::{Level, Simd, SimdInto, seal::Seal};
7use crate::{
8 f32x4, f32x8, f32x16, f64x2, f64x4, f64x8, i8x16, i8x32, i8x64, i16x8, i16x16, i16x32, i32x4,
9 i32x8, i32x16, mask8x16, mask8x32, mask8x64, mask16x8, mask16x16, mask16x32, mask32x4,
10 mask32x8, mask32x16, mask64x2, mask64x4, mask64x8, u8x16, u8x32, u8x64, u16x8, u16x16, u16x32,
11 u32x4, u32x8, u32x16,
12};
13use core::ops::*;
14#[cfg(all(feature = "libm", not(feature = "std")))]
15trait FloatExt {
16 fn floor(self) -> Self;
17 fn fract(self) -> Self;
18 fn sqrt(self) -> Self;
19 fn trunc(self) -> Self;
20}
21#[cfg(all(feature = "libm", not(feature = "std")))]
22impl FloatExt for f32 {
23 #[inline(always)]
24 fn floor(self) -> f32 {
25 libm::floorf(self)
26 }
27 #[inline(always)]
28 fn sqrt(self) -> f32 {
29 libm::sqrtf(self)
30 }
31 #[inline(always)]
32 fn fract(self) -> f32 {
33 self - self.trunc()
34 }
35 #[inline(always)]
36 fn trunc(self) -> f32 {
37 libm::truncf(self)
38 }
39}
40#[cfg(all(feature = "libm", not(feature = "std")))]
41impl FloatExt for f64 {
42 #[inline(always)]
43 fn floor(self) -> f64 {
44 libm::floor(self)
45 }
46 #[inline(always)]
47 fn sqrt(self) -> f64 {
48 libm::sqrt(self)
49 }
50 #[inline(always)]
51 fn fract(self) -> f64 {
52 self - self.trunc()
53 }
54 #[inline(always)]
55 fn trunc(self) -> f64 {
56 libm::trunc(self)
57 }
58}
59#[doc = r#" The SIMD token for the "fallback" level."#]
60#[derive(Clone, Copy, Debug)]
61pub struct Fallback {
62 pub fallback: crate::core_arch::fallback::Fallback,
63}
64impl Fallback {
65 #[inline]
66 pub fn new() -> Self {
67 Fallback {
68 fallback: crate::core_arch::fallback::Fallback::new(),
69 }
70 }
71}
72impl Seal for Fallback {}
73impl Simd for Fallback {
74 type f32s = f32x4<Self>;
75 type u8s = u8x16<Self>;
76 type i8s = i8x16<Self>;
77 type u16s = u16x8<Self>;
78 type i16s = i16x8<Self>;
79 type u32s = u32x4<Self>;
80 type i32s = i32x4<Self>;
81 type mask8s = mask8x16<Self>;
82 type mask16s = mask16x8<Self>;
83 type mask32s = mask32x4<Self>;
84 #[inline(always)]
85 fn level(self) -> Level {
86 Level::Fallback(self)
87 }
88 #[inline]
89 fn vectorize<F: FnOnce() -> R, R>(self, f: F) -> R {
90 f()
91 }
92 #[inline(always)]
93 fn splat_f32x4(self, val: f32) -> f32x4<Self> {
94 [val; 4usize].simd_into(self)
95 }
96 #[inline(always)]
97 fn abs_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
98 [
99 f32::abs(a[0usize]),
100 f32::abs(a[1usize]),
101 f32::abs(a[2usize]),
102 f32::abs(a[3usize]),
103 ]
104 .simd_into(self)
105 }
106 #[inline(always)]
107 fn neg_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
108 [
109 f32::neg(a[0usize]),
110 f32::neg(a[1usize]),
111 f32::neg(a[2usize]),
112 f32::neg(a[3usize]),
113 ]
114 .simd_into(self)
115 }
116 #[inline(always)]
117 fn sqrt_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
118 [
119 f32::sqrt(a[0usize]),
120 f32::sqrt(a[1usize]),
121 f32::sqrt(a[2usize]),
122 f32::sqrt(a[3usize]),
123 ]
124 .simd_into(self)
125 }
126 #[inline(always)]
127 fn add_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
128 [
129 f32::add(a[0usize], &b[0usize]),
130 f32::add(a[1usize], &b[1usize]),
131 f32::add(a[2usize], &b[2usize]),
132 f32::add(a[3usize], &b[3usize]),
133 ]
134 .simd_into(self)
135 }
136 #[inline(always)]
137 fn sub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
138 [
139 f32::sub(a[0usize], &b[0usize]),
140 f32::sub(a[1usize], &b[1usize]),
141 f32::sub(a[2usize], &b[2usize]),
142 f32::sub(a[3usize], &b[3usize]),
143 ]
144 .simd_into(self)
145 }
146 #[inline(always)]
147 fn mul_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
148 [
149 f32::mul(a[0usize], &b[0usize]),
150 f32::mul(a[1usize], &b[1usize]),
151 f32::mul(a[2usize], &b[2usize]),
152 f32::mul(a[3usize], &b[3usize]),
153 ]
154 .simd_into(self)
155 }
156 #[inline(always)]
157 fn div_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
158 [
159 f32::div(a[0usize], &b[0usize]),
160 f32::div(a[1usize], &b[1usize]),
161 f32::div(a[2usize], &b[2usize]),
162 f32::div(a[3usize], &b[3usize]),
163 ]
164 .simd_into(self)
165 }
166 #[inline(always)]
167 fn copysign_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
168 [
169 f32::copysign(a[0usize], b[0usize]),
170 f32::copysign(a[1usize], b[1usize]),
171 f32::copysign(a[2usize], b[2usize]),
172 f32::copysign(a[3usize], b[3usize]),
173 ]
174 .simd_into(self)
175 }
176 #[inline(always)]
177 fn simd_eq_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
178 [
179 -(f32::eq(&a[0usize], &b[0usize]) as i32),
180 -(f32::eq(&a[1usize], &b[1usize]) as i32),
181 -(f32::eq(&a[2usize], &b[2usize]) as i32),
182 -(f32::eq(&a[3usize], &b[3usize]) as i32),
183 ]
184 .simd_into(self)
185 }
186 #[inline(always)]
187 fn simd_lt_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
188 [
189 -(f32::lt(&a[0usize], &b[0usize]) as i32),
190 -(f32::lt(&a[1usize], &b[1usize]) as i32),
191 -(f32::lt(&a[2usize], &b[2usize]) as i32),
192 -(f32::lt(&a[3usize], &b[3usize]) as i32),
193 ]
194 .simd_into(self)
195 }
196 #[inline(always)]
197 fn simd_le_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
198 [
199 -(f32::le(&a[0usize], &b[0usize]) as i32),
200 -(f32::le(&a[1usize], &b[1usize]) as i32),
201 -(f32::le(&a[2usize], &b[2usize]) as i32),
202 -(f32::le(&a[3usize], &b[3usize]) as i32),
203 ]
204 .simd_into(self)
205 }
206 #[inline(always)]
207 fn simd_ge_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
208 [
209 -(f32::ge(&a[0usize], &b[0usize]) as i32),
210 -(f32::ge(&a[1usize], &b[1usize]) as i32),
211 -(f32::ge(&a[2usize], &b[2usize]) as i32),
212 -(f32::ge(&a[3usize], &b[3usize]) as i32),
213 ]
214 .simd_into(self)
215 }
216 #[inline(always)]
217 fn simd_gt_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
218 [
219 -(f32::gt(&a[0usize], &b[0usize]) as i32),
220 -(f32::gt(&a[1usize], &b[1usize]) as i32),
221 -(f32::gt(&a[2usize], &b[2usize]) as i32),
222 -(f32::gt(&a[3usize], &b[3usize]) as i32),
223 ]
224 .simd_into(self)
225 }
226 #[inline(always)]
227 fn zip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
228 [a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
229 }
230 #[inline(always)]
231 fn zip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
232 [a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
233 }
234 #[inline(always)]
235 fn unzip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
236 [a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
237 }
238 #[inline(always)]
239 fn unzip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
240 [a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
241 }
242 #[inline(always)]
243 fn max_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
244 [
245 f32::max(a[0usize], b[0usize]),
246 f32::max(a[1usize], b[1usize]),
247 f32::max(a[2usize], b[2usize]),
248 f32::max(a[3usize], b[3usize]),
249 ]
250 .simd_into(self)
251 }
252 #[inline(always)]
253 fn max_precise_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
254 [
255 f32::max(a[0usize], b[0usize]),
256 f32::max(a[1usize], b[1usize]),
257 f32::max(a[2usize], b[2usize]),
258 f32::max(a[3usize], b[3usize]),
259 ]
260 .simd_into(self)
261 }
262 #[inline(always)]
263 fn min_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
264 [
265 f32::min(a[0usize], b[0usize]),
266 f32::min(a[1usize], b[1usize]),
267 f32::min(a[2usize], b[2usize]),
268 f32::min(a[3usize], b[3usize]),
269 ]
270 .simd_into(self)
271 }
272 #[inline(always)]
273 fn min_precise_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
274 [
275 f32::min(a[0usize], b[0usize]),
276 f32::min(a[1usize], b[1usize]),
277 f32::min(a[2usize], b[2usize]),
278 f32::min(a[3usize], b[3usize]),
279 ]
280 .simd_into(self)
281 }
282 #[inline(always)]
283 fn madd_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
284 a.add(b.mul(c))
285 }
286 #[inline(always)]
287 fn msub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
288 a.sub(b.mul(c))
289 }
290 #[inline(always)]
291 fn floor_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
292 [
293 f32::floor(a[0usize]),
294 f32::floor(a[1usize]),
295 f32::floor(a[2usize]),
296 f32::floor(a[3usize]),
297 ]
298 .simd_into(self)
299 }
300 #[inline(always)]
301 fn fract_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
302 [
303 f32::fract(a[0usize]),
304 f32::fract(a[1usize]),
305 f32::fract(a[2usize]),
306 f32::fract(a[3usize]),
307 ]
308 .simd_into(self)
309 }
310 #[inline(always)]
311 fn trunc_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
312 [
313 f32::trunc(a[0usize]),
314 f32::trunc(a[1usize]),
315 f32::trunc(a[2usize]),
316 f32::trunc(a[3usize]),
317 ]
318 .simd_into(self)
319 }
320 #[inline(always)]
321 fn select_f32x4(self, a: mask32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
322 [
323 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
324 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
325 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
326 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
327 ]
328 .simd_into(self)
329 }
330 #[inline(always)]
331 fn combine_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x8<Self> {
332 let mut result = [0.0; 8usize];
333 result[0..4usize].copy_from_slice(&a.val);
334 result[4usize..8usize].copy_from_slice(&b.val);
335 result.simd_into(self)
336 }
337 #[inline(always)]
338 fn reinterpret_f64_f32x4(self, a: f32x4<Self>) -> f64x2<Self> {
339 f64x2 {
340 val: bytemuck::cast(a.val),
341 simd: a.simd,
342 }
343 }
344 #[inline(always)]
345 fn reinterpret_i32_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
346 i32x4 {
347 val: bytemuck::cast(a.val),
348 simd: a.simd,
349 }
350 }
351 #[inline(always)]
352 fn reinterpret_u8_f32x4(self, a: f32x4<Self>) -> u8x16<Self> {
353 u8x16 {
354 val: bytemuck::cast(a.val),
355 simd: a.simd,
356 }
357 }
358 #[inline(always)]
359 fn reinterpret_u32_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
360 u32x4 {
361 val: bytemuck::cast(a.val),
362 simd: a.simd,
363 }
364 }
365 #[inline(always)]
366 fn cvt_u32_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
367 [
368 a[0usize] as u32,
369 a[1usize] as u32,
370 a[2usize] as u32,
371 a[3usize] as u32,
372 ]
373 .simd_into(self)
374 }
375 #[inline(always)]
376 fn cvt_i32_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
377 [
378 a[0usize] as i32,
379 a[1usize] as i32,
380 a[2usize] as i32,
381 a[3usize] as i32,
382 ]
383 .simd_into(self)
384 }
385 #[inline(always)]
386 fn splat_i8x16(self, val: i8) -> i8x16<Self> {
387 [val; 16usize].simd_into(self)
388 }
389 #[inline(always)]
390 fn not_i8x16(self, a: i8x16<Self>) -> i8x16<Self> {
391 [
392 i8::not(a[0usize]),
393 i8::not(a[1usize]),
394 i8::not(a[2usize]),
395 i8::not(a[3usize]),
396 i8::not(a[4usize]),
397 i8::not(a[5usize]),
398 i8::not(a[6usize]),
399 i8::not(a[7usize]),
400 i8::not(a[8usize]),
401 i8::not(a[9usize]),
402 i8::not(a[10usize]),
403 i8::not(a[11usize]),
404 i8::not(a[12usize]),
405 i8::not(a[13usize]),
406 i8::not(a[14usize]),
407 i8::not(a[15usize]),
408 ]
409 .simd_into(self)
410 }
411 #[inline(always)]
412 fn add_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
413 [
414 i8::add(a[0usize], &b[0usize]),
415 i8::add(a[1usize], &b[1usize]),
416 i8::add(a[2usize], &b[2usize]),
417 i8::add(a[3usize], &b[3usize]),
418 i8::add(a[4usize], &b[4usize]),
419 i8::add(a[5usize], &b[5usize]),
420 i8::add(a[6usize], &b[6usize]),
421 i8::add(a[7usize], &b[7usize]),
422 i8::add(a[8usize], &b[8usize]),
423 i8::add(a[9usize], &b[9usize]),
424 i8::add(a[10usize], &b[10usize]),
425 i8::add(a[11usize], &b[11usize]),
426 i8::add(a[12usize], &b[12usize]),
427 i8::add(a[13usize], &b[13usize]),
428 i8::add(a[14usize], &b[14usize]),
429 i8::add(a[15usize], &b[15usize]),
430 ]
431 .simd_into(self)
432 }
433 #[inline(always)]
434 fn sub_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
435 [
436 i8::wrapping_sub(a[0usize], b[0usize]),
437 i8::wrapping_sub(a[1usize], b[1usize]),
438 i8::wrapping_sub(a[2usize], b[2usize]),
439 i8::wrapping_sub(a[3usize], b[3usize]),
440 i8::wrapping_sub(a[4usize], b[4usize]),
441 i8::wrapping_sub(a[5usize], b[5usize]),
442 i8::wrapping_sub(a[6usize], b[6usize]),
443 i8::wrapping_sub(a[7usize], b[7usize]),
444 i8::wrapping_sub(a[8usize], b[8usize]),
445 i8::wrapping_sub(a[9usize], b[9usize]),
446 i8::wrapping_sub(a[10usize], b[10usize]),
447 i8::wrapping_sub(a[11usize], b[11usize]),
448 i8::wrapping_sub(a[12usize], b[12usize]),
449 i8::wrapping_sub(a[13usize], b[13usize]),
450 i8::wrapping_sub(a[14usize], b[14usize]),
451 i8::wrapping_sub(a[15usize], b[15usize]),
452 ]
453 .simd_into(self)
454 }
455 #[inline(always)]
456 fn mul_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
457 [
458 i8::wrapping_mul(a[0usize], b[0usize]),
459 i8::wrapping_mul(a[1usize], b[1usize]),
460 i8::wrapping_mul(a[2usize], b[2usize]),
461 i8::wrapping_mul(a[3usize], b[3usize]),
462 i8::wrapping_mul(a[4usize], b[4usize]),
463 i8::wrapping_mul(a[5usize], b[5usize]),
464 i8::wrapping_mul(a[6usize], b[6usize]),
465 i8::wrapping_mul(a[7usize], b[7usize]),
466 i8::wrapping_mul(a[8usize], b[8usize]),
467 i8::wrapping_mul(a[9usize], b[9usize]),
468 i8::wrapping_mul(a[10usize], b[10usize]),
469 i8::wrapping_mul(a[11usize], b[11usize]),
470 i8::wrapping_mul(a[12usize], b[12usize]),
471 i8::wrapping_mul(a[13usize], b[13usize]),
472 i8::wrapping_mul(a[14usize], b[14usize]),
473 i8::wrapping_mul(a[15usize], b[15usize]),
474 ]
475 .simd_into(self)
476 }
477 #[inline(always)]
478 fn and_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
479 [
480 i8::bitand(a[0usize], &b[0usize]),
481 i8::bitand(a[1usize], &b[1usize]),
482 i8::bitand(a[2usize], &b[2usize]),
483 i8::bitand(a[3usize], &b[3usize]),
484 i8::bitand(a[4usize], &b[4usize]),
485 i8::bitand(a[5usize], &b[5usize]),
486 i8::bitand(a[6usize], &b[6usize]),
487 i8::bitand(a[7usize], &b[7usize]),
488 i8::bitand(a[8usize], &b[8usize]),
489 i8::bitand(a[9usize], &b[9usize]),
490 i8::bitand(a[10usize], &b[10usize]),
491 i8::bitand(a[11usize], &b[11usize]),
492 i8::bitand(a[12usize], &b[12usize]),
493 i8::bitand(a[13usize], &b[13usize]),
494 i8::bitand(a[14usize], &b[14usize]),
495 i8::bitand(a[15usize], &b[15usize]),
496 ]
497 .simd_into(self)
498 }
499 #[inline(always)]
500 fn or_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
501 [
502 i8::bitor(a[0usize], &b[0usize]),
503 i8::bitor(a[1usize], &b[1usize]),
504 i8::bitor(a[2usize], &b[2usize]),
505 i8::bitor(a[3usize], &b[3usize]),
506 i8::bitor(a[4usize], &b[4usize]),
507 i8::bitor(a[5usize], &b[5usize]),
508 i8::bitor(a[6usize], &b[6usize]),
509 i8::bitor(a[7usize], &b[7usize]),
510 i8::bitor(a[8usize], &b[8usize]),
511 i8::bitor(a[9usize], &b[9usize]),
512 i8::bitor(a[10usize], &b[10usize]),
513 i8::bitor(a[11usize], &b[11usize]),
514 i8::bitor(a[12usize], &b[12usize]),
515 i8::bitor(a[13usize], &b[13usize]),
516 i8::bitor(a[14usize], &b[14usize]),
517 i8::bitor(a[15usize], &b[15usize]),
518 ]
519 .simd_into(self)
520 }
521 #[inline(always)]
522 fn xor_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
523 [
524 i8::bitxor(a[0usize], &b[0usize]),
525 i8::bitxor(a[1usize], &b[1usize]),
526 i8::bitxor(a[2usize], &b[2usize]),
527 i8::bitxor(a[3usize], &b[3usize]),
528 i8::bitxor(a[4usize], &b[4usize]),
529 i8::bitxor(a[5usize], &b[5usize]),
530 i8::bitxor(a[6usize], &b[6usize]),
531 i8::bitxor(a[7usize], &b[7usize]),
532 i8::bitxor(a[8usize], &b[8usize]),
533 i8::bitxor(a[9usize], &b[9usize]),
534 i8::bitxor(a[10usize], &b[10usize]),
535 i8::bitxor(a[11usize], &b[11usize]),
536 i8::bitxor(a[12usize], &b[12usize]),
537 i8::bitxor(a[13usize], &b[13usize]),
538 i8::bitxor(a[14usize], &b[14usize]),
539 i8::bitxor(a[15usize], &b[15usize]),
540 ]
541 .simd_into(self)
542 }
543 #[inline(always)]
544 fn shr_i8x16(self, a: i8x16<Self>, shift: u32) -> i8x16<Self> {
545 [
546 i8::shr(a[0usize], shift as i8),
547 i8::shr(a[1usize], shift as i8),
548 i8::shr(a[2usize], shift as i8),
549 i8::shr(a[3usize], shift as i8),
550 i8::shr(a[4usize], shift as i8),
551 i8::shr(a[5usize], shift as i8),
552 i8::shr(a[6usize], shift as i8),
553 i8::shr(a[7usize], shift as i8),
554 i8::shr(a[8usize], shift as i8),
555 i8::shr(a[9usize], shift as i8),
556 i8::shr(a[10usize], shift as i8),
557 i8::shr(a[11usize], shift as i8),
558 i8::shr(a[12usize], shift as i8),
559 i8::shr(a[13usize], shift as i8),
560 i8::shr(a[14usize], shift as i8),
561 i8::shr(a[15usize], shift as i8),
562 ]
563 .simd_into(self)
564 }
565 #[inline(always)]
566 fn simd_eq_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
567 [
568 -(i8::eq(&a[0usize], &b[0usize]) as i8),
569 -(i8::eq(&a[1usize], &b[1usize]) as i8),
570 -(i8::eq(&a[2usize], &b[2usize]) as i8),
571 -(i8::eq(&a[3usize], &b[3usize]) as i8),
572 -(i8::eq(&a[4usize], &b[4usize]) as i8),
573 -(i8::eq(&a[5usize], &b[5usize]) as i8),
574 -(i8::eq(&a[6usize], &b[6usize]) as i8),
575 -(i8::eq(&a[7usize], &b[7usize]) as i8),
576 -(i8::eq(&a[8usize], &b[8usize]) as i8),
577 -(i8::eq(&a[9usize], &b[9usize]) as i8),
578 -(i8::eq(&a[10usize], &b[10usize]) as i8),
579 -(i8::eq(&a[11usize], &b[11usize]) as i8),
580 -(i8::eq(&a[12usize], &b[12usize]) as i8),
581 -(i8::eq(&a[13usize], &b[13usize]) as i8),
582 -(i8::eq(&a[14usize], &b[14usize]) as i8),
583 -(i8::eq(&a[15usize], &b[15usize]) as i8),
584 ]
585 .simd_into(self)
586 }
587 #[inline(always)]
588 fn simd_lt_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
589 [
590 -(i8::lt(&a[0usize], &b[0usize]) as i8),
591 -(i8::lt(&a[1usize], &b[1usize]) as i8),
592 -(i8::lt(&a[2usize], &b[2usize]) as i8),
593 -(i8::lt(&a[3usize], &b[3usize]) as i8),
594 -(i8::lt(&a[4usize], &b[4usize]) as i8),
595 -(i8::lt(&a[5usize], &b[5usize]) as i8),
596 -(i8::lt(&a[6usize], &b[6usize]) as i8),
597 -(i8::lt(&a[7usize], &b[7usize]) as i8),
598 -(i8::lt(&a[8usize], &b[8usize]) as i8),
599 -(i8::lt(&a[9usize], &b[9usize]) as i8),
600 -(i8::lt(&a[10usize], &b[10usize]) as i8),
601 -(i8::lt(&a[11usize], &b[11usize]) as i8),
602 -(i8::lt(&a[12usize], &b[12usize]) as i8),
603 -(i8::lt(&a[13usize], &b[13usize]) as i8),
604 -(i8::lt(&a[14usize], &b[14usize]) as i8),
605 -(i8::lt(&a[15usize], &b[15usize]) as i8),
606 ]
607 .simd_into(self)
608 }
609 #[inline(always)]
610 fn simd_le_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
611 [
612 -(i8::le(&a[0usize], &b[0usize]) as i8),
613 -(i8::le(&a[1usize], &b[1usize]) as i8),
614 -(i8::le(&a[2usize], &b[2usize]) as i8),
615 -(i8::le(&a[3usize], &b[3usize]) as i8),
616 -(i8::le(&a[4usize], &b[4usize]) as i8),
617 -(i8::le(&a[5usize], &b[5usize]) as i8),
618 -(i8::le(&a[6usize], &b[6usize]) as i8),
619 -(i8::le(&a[7usize], &b[7usize]) as i8),
620 -(i8::le(&a[8usize], &b[8usize]) as i8),
621 -(i8::le(&a[9usize], &b[9usize]) as i8),
622 -(i8::le(&a[10usize], &b[10usize]) as i8),
623 -(i8::le(&a[11usize], &b[11usize]) as i8),
624 -(i8::le(&a[12usize], &b[12usize]) as i8),
625 -(i8::le(&a[13usize], &b[13usize]) as i8),
626 -(i8::le(&a[14usize], &b[14usize]) as i8),
627 -(i8::le(&a[15usize], &b[15usize]) as i8),
628 ]
629 .simd_into(self)
630 }
631 #[inline(always)]
632 fn simd_ge_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
633 [
634 -(i8::ge(&a[0usize], &b[0usize]) as i8),
635 -(i8::ge(&a[1usize], &b[1usize]) as i8),
636 -(i8::ge(&a[2usize], &b[2usize]) as i8),
637 -(i8::ge(&a[3usize], &b[3usize]) as i8),
638 -(i8::ge(&a[4usize], &b[4usize]) as i8),
639 -(i8::ge(&a[5usize], &b[5usize]) as i8),
640 -(i8::ge(&a[6usize], &b[6usize]) as i8),
641 -(i8::ge(&a[7usize], &b[7usize]) as i8),
642 -(i8::ge(&a[8usize], &b[8usize]) as i8),
643 -(i8::ge(&a[9usize], &b[9usize]) as i8),
644 -(i8::ge(&a[10usize], &b[10usize]) as i8),
645 -(i8::ge(&a[11usize], &b[11usize]) as i8),
646 -(i8::ge(&a[12usize], &b[12usize]) as i8),
647 -(i8::ge(&a[13usize], &b[13usize]) as i8),
648 -(i8::ge(&a[14usize], &b[14usize]) as i8),
649 -(i8::ge(&a[15usize], &b[15usize]) as i8),
650 ]
651 .simd_into(self)
652 }
653 #[inline(always)]
654 fn simd_gt_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
655 [
656 -(i8::gt(&a[0usize], &b[0usize]) as i8),
657 -(i8::gt(&a[1usize], &b[1usize]) as i8),
658 -(i8::gt(&a[2usize], &b[2usize]) as i8),
659 -(i8::gt(&a[3usize], &b[3usize]) as i8),
660 -(i8::gt(&a[4usize], &b[4usize]) as i8),
661 -(i8::gt(&a[5usize], &b[5usize]) as i8),
662 -(i8::gt(&a[6usize], &b[6usize]) as i8),
663 -(i8::gt(&a[7usize], &b[7usize]) as i8),
664 -(i8::gt(&a[8usize], &b[8usize]) as i8),
665 -(i8::gt(&a[9usize], &b[9usize]) as i8),
666 -(i8::gt(&a[10usize], &b[10usize]) as i8),
667 -(i8::gt(&a[11usize], &b[11usize]) as i8),
668 -(i8::gt(&a[12usize], &b[12usize]) as i8),
669 -(i8::gt(&a[13usize], &b[13usize]) as i8),
670 -(i8::gt(&a[14usize], &b[14usize]) as i8),
671 -(i8::gt(&a[15usize], &b[15usize]) as i8),
672 ]
673 .simd_into(self)
674 }
675 #[inline(always)]
676 fn zip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
677 [
678 a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
679 a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
680 ]
681 .simd_into(self)
682 }
683 #[inline(always)]
684 fn zip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
685 [
686 a[8usize], b[8usize], a[9usize], b[9usize], a[10usize], b[10usize], a[11usize],
687 b[11usize], a[12usize], b[12usize], a[13usize], b[13usize], a[14usize], b[14usize],
688 a[15usize], b[15usize],
689 ]
690 .simd_into(self)
691 }
692 #[inline(always)]
693 fn unzip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
694 [
695 a[0usize], a[2usize], a[4usize], a[6usize], a[8usize], a[10usize], a[12usize],
696 a[14usize], b[0usize], b[2usize], b[4usize], b[6usize], b[8usize], b[10usize],
697 b[12usize], b[14usize],
698 ]
699 .simd_into(self)
700 }
701 #[inline(always)]
702 fn unzip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
703 [
704 a[1usize], a[3usize], a[5usize], a[7usize], a[9usize], a[11usize], a[13usize],
705 a[15usize], b[1usize], b[3usize], b[5usize], b[7usize], b[9usize], b[11usize],
706 b[13usize], b[15usize],
707 ]
708 .simd_into(self)
709 }
710 #[inline(always)]
711 fn select_i8x16(self, a: mask8x16<Self>, b: i8x16<Self>, c: i8x16<Self>) -> i8x16<Self> {
712 [
713 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
714 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
715 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
716 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
717 if a[4usize] != 0 { b[4usize] } else { c[4usize] },
718 if a[5usize] != 0 { b[5usize] } else { c[5usize] },
719 if a[6usize] != 0 { b[6usize] } else { c[6usize] },
720 if a[7usize] != 0 { b[7usize] } else { c[7usize] },
721 if a[8usize] != 0 { b[8usize] } else { c[8usize] },
722 if a[9usize] != 0 { b[9usize] } else { c[9usize] },
723 if a[10usize] != 0 {
724 b[10usize]
725 } else {
726 c[10usize]
727 },
728 if a[11usize] != 0 {
729 b[11usize]
730 } else {
731 c[11usize]
732 },
733 if a[12usize] != 0 {
734 b[12usize]
735 } else {
736 c[12usize]
737 },
738 if a[13usize] != 0 {
739 b[13usize]
740 } else {
741 c[13usize]
742 },
743 if a[14usize] != 0 {
744 b[14usize]
745 } else {
746 c[14usize]
747 },
748 if a[15usize] != 0 {
749 b[15usize]
750 } else {
751 c[15usize]
752 },
753 ]
754 .simd_into(self)
755 }
756 #[inline(always)]
757 fn min_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
758 [
759 i8::min(a[0usize], b[0usize]),
760 i8::min(a[1usize], b[1usize]),
761 i8::min(a[2usize], b[2usize]),
762 i8::min(a[3usize], b[3usize]),
763 i8::min(a[4usize], b[4usize]),
764 i8::min(a[5usize], b[5usize]),
765 i8::min(a[6usize], b[6usize]),
766 i8::min(a[7usize], b[7usize]),
767 i8::min(a[8usize], b[8usize]),
768 i8::min(a[9usize], b[9usize]),
769 i8::min(a[10usize], b[10usize]),
770 i8::min(a[11usize], b[11usize]),
771 i8::min(a[12usize], b[12usize]),
772 i8::min(a[13usize], b[13usize]),
773 i8::min(a[14usize], b[14usize]),
774 i8::min(a[15usize], b[15usize]),
775 ]
776 .simd_into(self)
777 }
778 #[inline(always)]
779 fn max_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
780 [
781 i8::max(a[0usize], b[0usize]),
782 i8::max(a[1usize], b[1usize]),
783 i8::max(a[2usize], b[2usize]),
784 i8::max(a[3usize], b[3usize]),
785 i8::max(a[4usize], b[4usize]),
786 i8::max(a[5usize], b[5usize]),
787 i8::max(a[6usize], b[6usize]),
788 i8::max(a[7usize], b[7usize]),
789 i8::max(a[8usize], b[8usize]),
790 i8::max(a[9usize], b[9usize]),
791 i8::max(a[10usize], b[10usize]),
792 i8::max(a[11usize], b[11usize]),
793 i8::max(a[12usize], b[12usize]),
794 i8::max(a[13usize], b[13usize]),
795 i8::max(a[14usize], b[14usize]),
796 i8::max(a[15usize], b[15usize]),
797 ]
798 .simd_into(self)
799 }
800 #[inline(always)]
801 fn combine_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x32<Self> {
802 let mut result = [0; 32usize];
803 result[0..16usize].copy_from_slice(&a.val);
804 result[16usize..32usize].copy_from_slice(&b.val);
805 result.simd_into(self)
806 }
807 #[inline(always)]
808 fn reinterpret_u8_i8x16(self, a: i8x16<Self>) -> u8x16<Self> {
809 u8x16 {
810 val: bytemuck::cast(a.val),
811 simd: a.simd,
812 }
813 }
814 #[inline(always)]
815 fn reinterpret_u32_i8x16(self, a: i8x16<Self>) -> u32x4<Self> {
816 u32x4 {
817 val: bytemuck::cast(a.val),
818 simd: a.simd,
819 }
820 }
821 #[inline(always)]
822 fn splat_u8x16(self, val: u8) -> u8x16<Self> {
823 [val; 16usize].simd_into(self)
824 }
825 #[inline(always)]
826 fn not_u8x16(self, a: u8x16<Self>) -> u8x16<Self> {
827 [
828 u8::not(a[0usize]),
829 u8::not(a[1usize]),
830 u8::not(a[2usize]),
831 u8::not(a[3usize]),
832 u8::not(a[4usize]),
833 u8::not(a[5usize]),
834 u8::not(a[6usize]),
835 u8::not(a[7usize]),
836 u8::not(a[8usize]),
837 u8::not(a[9usize]),
838 u8::not(a[10usize]),
839 u8::not(a[11usize]),
840 u8::not(a[12usize]),
841 u8::not(a[13usize]),
842 u8::not(a[14usize]),
843 u8::not(a[15usize]),
844 ]
845 .simd_into(self)
846 }
847 #[inline(always)]
848 fn add_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
849 [
850 u8::add(a[0usize], &b[0usize]),
851 u8::add(a[1usize], &b[1usize]),
852 u8::add(a[2usize], &b[2usize]),
853 u8::add(a[3usize], &b[3usize]),
854 u8::add(a[4usize], &b[4usize]),
855 u8::add(a[5usize], &b[5usize]),
856 u8::add(a[6usize], &b[6usize]),
857 u8::add(a[7usize], &b[7usize]),
858 u8::add(a[8usize], &b[8usize]),
859 u8::add(a[9usize], &b[9usize]),
860 u8::add(a[10usize], &b[10usize]),
861 u8::add(a[11usize], &b[11usize]),
862 u8::add(a[12usize], &b[12usize]),
863 u8::add(a[13usize], &b[13usize]),
864 u8::add(a[14usize], &b[14usize]),
865 u8::add(a[15usize], &b[15usize]),
866 ]
867 .simd_into(self)
868 }
869 #[inline(always)]
870 fn sub_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
871 [
872 u8::wrapping_sub(a[0usize], b[0usize]),
873 u8::wrapping_sub(a[1usize], b[1usize]),
874 u8::wrapping_sub(a[2usize], b[2usize]),
875 u8::wrapping_sub(a[3usize], b[3usize]),
876 u8::wrapping_sub(a[4usize], b[4usize]),
877 u8::wrapping_sub(a[5usize], b[5usize]),
878 u8::wrapping_sub(a[6usize], b[6usize]),
879 u8::wrapping_sub(a[7usize], b[7usize]),
880 u8::wrapping_sub(a[8usize], b[8usize]),
881 u8::wrapping_sub(a[9usize], b[9usize]),
882 u8::wrapping_sub(a[10usize], b[10usize]),
883 u8::wrapping_sub(a[11usize], b[11usize]),
884 u8::wrapping_sub(a[12usize], b[12usize]),
885 u8::wrapping_sub(a[13usize], b[13usize]),
886 u8::wrapping_sub(a[14usize], b[14usize]),
887 u8::wrapping_sub(a[15usize], b[15usize]),
888 ]
889 .simd_into(self)
890 }
891 #[inline(always)]
892 fn mul_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
893 [
894 u8::wrapping_mul(a[0usize], b[0usize]),
895 u8::wrapping_mul(a[1usize], b[1usize]),
896 u8::wrapping_mul(a[2usize], b[2usize]),
897 u8::wrapping_mul(a[3usize], b[3usize]),
898 u8::wrapping_mul(a[4usize], b[4usize]),
899 u8::wrapping_mul(a[5usize], b[5usize]),
900 u8::wrapping_mul(a[6usize], b[6usize]),
901 u8::wrapping_mul(a[7usize], b[7usize]),
902 u8::wrapping_mul(a[8usize], b[8usize]),
903 u8::wrapping_mul(a[9usize], b[9usize]),
904 u8::wrapping_mul(a[10usize], b[10usize]),
905 u8::wrapping_mul(a[11usize], b[11usize]),
906 u8::wrapping_mul(a[12usize], b[12usize]),
907 u8::wrapping_mul(a[13usize], b[13usize]),
908 u8::wrapping_mul(a[14usize], b[14usize]),
909 u8::wrapping_mul(a[15usize], b[15usize]),
910 ]
911 .simd_into(self)
912 }
913 #[inline(always)]
914 fn and_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
915 [
916 u8::bitand(a[0usize], &b[0usize]),
917 u8::bitand(a[1usize], &b[1usize]),
918 u8::bitand(a[2usize], &b[2usize]),
919 u8::bitand(a[3usize], &b[3usize]),
920 u8::bitand(a[4usize], &b[4usize]),
921 u8::bitand(a[5usize], &b[5usize]),
922 u8::bitand(a[6usize], &b[6usize]),
923 u8::bitand(a[7usize], &b[7usize]),
924 u8::bitand(a[8usize], &b[8usize]),
925 u8::bitand(a[9usize], &b[9usize]),
926 u8::bitand(a[10usize], &b[10usize]),
927 u8::bitand(a[11usize], &b[11usize]),
928 u8::bitand(a[12usize], &b[12usize]),
929 u8::bitand(a[13usize], &b[13usize]),
930 u8::bitand(a[14usize], &b[14usize]),
931 u8::bitand(a[15usize], &b[15usize]),
932 ]
933 .simd_into(self)
934 }
935 #[inline(always)]
936 fn or_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
937 [
938 u8::bitor(a[0usize], &b[0usize]),
939 u8::bitor(a[1usize], &b[1usize]),
940 u8::bitor(a[2usize], &b[2usize]),
941 u8::bitor(a[3usize], &b[3usize]),
942 u8::bitor(a[4usize], &b[4usize]),
943 u8::bitor(a[5usize], &b[5usize]),
944 u8::bitor(a[6usize], &b[6usize]),
945 u8::bitor(a[7usize], &b[7usize]),
946 u8::bitor(a[8usize], &b[8usize]),
947 u8::bitor(a[9usize], &b[9usize]),
948 u8::bitor(a[10usize], &b[10usize]),
949 u8::bitor(a[11usize], &b[11usize]),
950 u8::bitor(a[12usize], &b[12usize]),
951 u8::bitor(a[13usize], &b[13usize]),
952 u8::bitor(a[14usize], &b[14usize]),
953 u8::bitor(a[15usize], &b[15usize]),
954 ]
955 .simd_into(self)
956 }
957 #[inline(always)]
958 fn xor_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
959 [
960 u8::bitxor(a[0usize], &b[0usize]),
961 u8::bitxor(a[1usize], &b[1usize]),
962 u8::bitxor(a[2usize], &b[2usize]),
963 u8::bitxor(a[3usize], &b[3usize]),
964 u8::bitxor(a[4usize], &b[4usize]),
965 u8::bitxor(a[5usize], &b[5usize]),
966 u8::bitxor(a[6usize], &b[6usize]),
967 u8::bitxor(a[7usize], &b[7usize]),
968 u8::bitxor(a[8usize], &b[8usize]),
969 u8::bitxor(a[9usize], &b[9usize]),
970 u8::bitxor(a[10usize], &b[10usize]),
971 u8::bitxor(a[11usize], &b[11usize]),
972 u8::bitxor(a[12usize], &b[12usize]),
973 u8::bitxor(a[13usize], &b[13usize]),
974 u8::bitxor(a[14usize], &b[14usize]),
975 u8::bitxor(a[15usize], &b[15usize]),
976 ]
977 .simd_into(self)
978 }
979 #[inline(always)]
980 fn shr_u8x16(self, a: u8x16<Self>, shift: u32) -> u8x16<Self> {
981 [
982 u8::shr(a[0usize], shift as u8),
983 u8::shr(a[1usize], shift as u8),
984 u8::shr(a[2usize], shift as u8),
985 u8::shr(a[3usize], shift as u8),
986 u8::shr(a[4usize], shift as u8),
987 u8::shr(a[5usize], shift as u8),
988 u8::shr(a[6usize], shift as u8),
989 u8::shr(a[7usize], shift as u8),
990 u8::shr(a[8usize], shift as u8),
991 u8::shr(a[9usize], shift as u8),
992 u8::shr(a[10usize], shift as u8),
993 u8::shr(a[11usize], shift as u8),
994 u8::shr(a[12usize], shift as u8),
995 u8::shr(a[13usize], shift as u8),
996 u8::shr(a[14usize], shift as u8),
997 u8::shr(a[15usize], shift as u8),
998 ]
999 .simd_into(self)
1000 }
1001 #[inline(always)]
1002 fn simd_eq_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1003 [
1004 -(u8::eq(&a[0usize], &b[0usize]) as i8),
1005 -(u8::eq(&a[1usize], &b[1usize]) as i8),
1006 -(u8::eq(&a[2usize], &b[2usize]) as i8),
1007 -(u8::eq(&a[3usize], &b[3usize]) as i8),
1008 -(u8::eq(&a[4usize], &b[4usize]) as i8),
1009 -(u8::eq(&a[5usize], &b[5usize]) as i8),
1010 -(u8::eq(&a[6usize], &b[6usize]) as i8),
1011 -(u8::eq(&a[7usize], &b[7usize]) as i8),
1012 -(u8::eq(&a[8usize], &b[8usize]) as i8),
1013 -(u8::eq(&a[9usize], &b[9usize]) as i8),
1014 -(u8::eq(&a[10usize], &b[10usize]) as i8),
1015 -(u8::eq(&a[11usize], &b[11usize]) as i8),
1016 -(u8::eq(&a[12usize], &b[12usize]) as i8),
1017 -(u8::eq(&a[13usize], &b[13usize]) as i8),
1018 -(u8::eq(&a[14usize], &b[14usize]) as i8),
1019 -(u8::eq(&a[15usize], &b[15usize]) as i8),
1020 ]
1021 .simd_into(self)
1022 }
1023 #[inline(always)]
1024 fn simd_lt_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1025 [
1026 -(u8::lt(&a[0usize], &b[0usize]) as i8),
1027 -(u8::lt(&a[1usize], &b[1usize]) as i8),
1028 -(u8::lt(&a[2usize], &b[2usize]) as i8),
1029 -(u8::lt(&a[3usize], &b[3usize]) as i8),
1030 -(u8::lt(&a[4usize], &b[4usize]) as i8),
1031 -(u8::lt(&a[5usize], &b[5usize]) as i8),
1032 -(u8::lt(&a[6usize], &b[6usize]) as i8),
1033 -(u8::lt(&a[7usize], &b[7usize]) as i8),
1034 -(u8::lt(&a[8usize], &b[8usize]) as i8),
1035 -(u8::lt(&a[9usize], &b[9usize]) as i8),
1036 -(u8::lt(&a[10usize], &b[10usize]) as i8),
1037 -(u8::lt(&a[11usize], &b[11usize]) as i8),
1038 -(u8::lt(&a[12usize], &b[12usize]) as i8),
1039 -(u8::lt(&a[13usize], &b[13usize]) as i8),
1040 -(u8::lt(&a[14usize], &b[14usize]) as i8),
1041 -(u8::lt(&a[15usize], &b[15usize]) as i8),
1042 ]
1043 .simd_into(self)
1044 }
1045 #[inline(always)]
1046 fn simd_le_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1047 [
1048 -(u8::le(&a[0usize], &b[0usize]) as i8),
1049 -(u8::le(&a[1usize], &b[1usize]) as i8),
1050 -(u8::le(&a[2usize], &b[2usize]) as i8),
1051 -(u8::le(&a[3usize], &b[3usize]) as i8),
1052 -(u8::le(&a[4usize], &b[4usize]) as i8),
1053 -(u8::le(&a[5usize], &b[5usize]) as i8),
1054 -(u8::le(&a[6usize], &b[6usize]) as i8),
1055 -(u8::le(&a[7usize], &b[7usize]) as i8),
1056 -(u8::le(&a[8usize], &b[8usize]) as i8),
1057 -(u8::le(&a[9usize], &b[9usize]) as i8),
1058 -(u8::le(&a[10usize], &b[10usize]) as i8),
1059 -(u8::le(&a[11usize], &b[11usize]) as i8),
1060 -(u8::le(&a[12usize], &b[12usize]) as i8),
1061 -(u8::le(&a[13usize], &b[13usize]) as i8),
1062 -(u8::le(&a[14usize], &b[14usize]) as i8),
1063 -(u8::le(&a[15usize], &b[15usize]) as i8),
1064 ]
1065 .simd_into(self)
1066 }
1067 #[inline(always)]
1068 fn simd_ge_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1069 [
1070 -(u8::ge(&a[0usize], &b[0usize]) as i8),
1071 -(u8::ge(&a[1usize], &b[1usize]) as i8),
1072 -(u8::ge(&a[2usize], &b[2usize]) as i8),
1073 -(u8::ge(&a[3usize], &b[3usize]) as i8),
1074 -(u8::ge(&a[4usize], &b[4usize]) as i8),
1075 -(u8::ge(&a[5usize], &b[5usize]) as i8),
1076 -(u8::ge(&a[6usize], &b[6usize]) as i8),
1077 -(u8::ge(&a[7usize], &b[7usize]) as i8),
1078 -(u8::ge(&a[8usize], &b[8usize]) as i8),
1079 -(u8::ge(&a[9usize], &b[9usize]) as i8),
1080 -(u8::ge(&a[10usize], &b[10usize]) as i8),
1081 -(u8::ge(&a[11usize], &b[11usize]) as i8),
1082 -(u8::ge(&a[12usize], &b[12usize]) as i8),
1083 -(u8::ge(&a[13usize], &b[13usize]) as i8),
1084 -(u8::ge(&a[14usize], &b[14usize]) as i8),
1085 -(u8::ge(&a[15usize], &b[15usize]) as i8),
1086 ]
1087 .simd_into(self)
1088 }
1089 #[inline(always)]
1090 fn simd_gt_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
1091 [
1092 -(u8::gt(&a[0usize], &b[0usize]) as i8),
1093 -(u8::gt(&a[1usize], &b[1usize]) as i8),
1094 -(u8::gt(&a[2usize], &b[2usize]) as i8),
1095 -(u8::gt(&a[3usize], &b[3usize]) as i8),
1096 -(u8::gt(&a[4usize], &b[4usize]) as i8),
1097 -(u8::gt(&a[5usize], &b[5usize]) as i8),
1098 -(u8::gt(&a[6usize], &b[6usize]) as i8),
1099 -(u8::gt(&a[7usize], &b[7usize]) as i8),
1100 -(u8::gt(&a[8usize], &b[8usize]) as i8),
1101 -(u8::gt(&a[9usize], &b[9usize]) as i8),
1102 -(u8::gt(&a[10usize], &b[10usize]) as i8),
1103 -(u8::gt(&a[11usize], &b[11usize]) as i8),
1104 -(u8::gt(&a[12usize], &b[12usize]) as i8),
1105 -(u8::gt(&a[13usize], &b[13usize]) as i8),
1106 -(u8::gt(&a[14usize], &b[14usize]) as i8),
1107 -(u8::gt(&a[15usize], &b[15usize]) as i8),
1108 ]
1109 .simd_into(self)
1110 }
1111 #[inline(always)]
1112 fn zip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1113 [
1114 a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
1115 a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
1116 ]
1117 .simd_into(self)
1118 }
1119 #[inline(always)]
1120 fn zip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1121 [
1122 a[8usize], b[8usize], a[9usize], b[9usize], a[10usize], b[10usize], a[11usize],
1123 b[11usize], a[12usize], b[12usize], a[13usize], b[13usize], a[14usize], b[14usize],
1124 a[15usize], b[15usize],
1125 ]
1126 .simd_into(self)
1127 }
1128 #[inline(always)]
1129 fn unzip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1130 [
1131 a[0usize], a[2usize], a[4usize], a[6usize], a[8usize], a[10usize], a[12usize],
1132 a[14usize], b[0usize], b[2usize], b[4usize], b[6usize], b[8usize], b[10usize],
1133 b[12usize], b[14usize],
1134 ]
1135 .simd_into(self)
1136 }
1137 #[inline(always)]
1138 fn unzip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1139 [
1140 a[1usize], a[3usize], a[5usize], a[7usize], a[9usize], a[11usize], a[13usize],
1141 a[15usize], b[1usize], b[3usize], b[5usize], b[7usize], b[9usize], b[11usize],
1142 b[13usize], b[15usize],
1143 ]
1144 .simd_into(self)
1145 }
1146 #[inline(always)]
1147 fn select_u8x16(self, a: mask8x16<Self>, b: u8x16<Self>, c: u8x16<Self>) -> u8x16<Self> {
1148 [
1149 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1150 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1151 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1152 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1153 if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1154 if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1155 if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1156 if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1157 if a[8usize] != 0 { b[8usize] } else { c[8usize] },
1158 if a[9usize] != 0 { b[9usize] } else { c[9usize] },
1159 if a[10usize] != 0 {
1160 b[10usize]
1161 } else {
1162 c[10usize]
1163 },
1164 if a[11usize] != 0 {
1165 b[11usize]
1166 } else {
1167 c[11usize]
1168 },
1169 if a[12usize] != 0 {
1170 b[12usize]
1171 } else {
1172 c[12usize]
1173 },
1174 if a[13usize] != 0 {
1175 b[13usize]
1176 } else {
1177 c[13usize]
1178 },
1179 if a[14usize] != 0 {
1180 b[14usize]
1181 } else {
1182 c[14usize]
1183 },
1184 if a[15usize] != 0 {
1185 b[15usize]
1186 } else {
1187 c[15usize]
1188 },
1189 ]
1190 .simd_into(self)
1191 }
1192 #[inline(always)]
1193 fn min_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1194 [
1195 u8::min(a[0usize], b[0usize]),
1196 u8::min(a[1usize], b[1usize]),
1197 u8::min(a[2usize], b[2usize]),
1198 u8::min(a[3usize], b[3usize]),
1199 u8::min(a[4usize], b[4usize]),
1200 u8::min(a[5usize], b[5usize]),
1201 u8::min(a[6usize], b[6usize]),
1202 u8::min(a[7usize], b[7usize]),
1203 u8::min(a[8usize], b[8usize]),
1204 u8::min(a[9usize], b[9usize]),
1205 u8::min(a[10usize], b[10usize]),
1206 u8::min(a[11usize], b[11usize]),
1207 u8::min(a[12usize], b[12usize]),
1208 u8::min(a[13usize], b[13usize]),
1209 u8::min(a[14usize], b[14usize]),
1210 u8::min(a[15usize], b[15usize]),
1211 ]
1212 .simd_into(self)
1213 }
1214 #[inline(always)]
1215 fn max_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
1216 [
1217 u8::max(a[0usize], b[0usize]),
1218 u8::max(a[1usize], b[1usize]),
1219 u8::max(a[2usize], b[2usize]),
1220 u8::max(a[3usize], b[3usize]),
1221 u8::max(a[4usize], b[4usize]),
1222 u8::max(a[5usize], b[5usize]),
1223 u8::max(a[6usize], b[6usize]),
1224 u8::max(a[7usize], b[7usize]),
1225 u8::max(a[8usize], b[8usize]),
1226 u8::max(a[9usize], b[9usize]),
1227 u8::max(a[10usize], b[10usize]),
1228 u8::max(a[11usize], b[11usize]),
1229 u8::max(a[12usize], b[12usize]),
1230 u8::max(a[13usize], b[13usize]),
1231 u8::max(a[14usize], b[14usize]),
1232 u8::max(a[15usize], b[15usize]),
1233 ]
1234 .simd_into(self)
1235 }
1236 #[inline(always)]
1237 fn combine_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x32<Self> {
1238 let mut result = [0; 32usize];
1239 result[0..16usize].copy_from_slice(&a.val);
1240 result[16usize..32usize].copy_from_slice(&b.val);
1241 result.simd_into(self)
1242 }
1243 #[inline(always)]
1244 fn widen_u8x16(self, a: u8x16<Self>) -> u16x16<Self> {
1245 [
1246 a[0usize] as u16,
1247 a[1usize] as u16,
1248 a[2usize] as u16,
1249 a[3usize] as u16,
1250 a[4usize] as u16,
1251 a[5usize] as u16,
1252 a[6usize] as u16,
1253 a[7usize] as u16,
1254 a[8usize] as u16,
1255 a[9usize] as u16,
1256 a[10usize] as u16,
1257 a[11usize] as u16,
1258 a[12usize] as u16,
1259 a[13usize] as u16,
1260 a[14usize] as u16,
1261 a[15usize] as u16,
1262 ]
1263 .simd_into(self)
1264 }
1265 #[inline(always)]
1266 fn reinterpret_u32_u8x16(self, a: u8x16<Self>) -> u32x4<Self> {
1267 u32x4 {
1268 val: bytemuck::cast(a.val),
1269 simd: a.simd,
1270 }
1271 }
1272 #[inline(always)]
1273 fn splat_mask8x16(self, val: i8) -> mask8x16<Self> {
1274 [val; 16usize].simd_into(self)
1275 }
1276 #[inline(always)]
1277 fn not_mask8x16(self, a: mask8x16<Self>) -> mask8x16<Self> {
1278 [
1279 i8::not(a[0usize]),
1280 i8::not(a[1usize]),
1281 i8::not(a[2usize]),
1282 i8::not(a[3usize]),
1283 i8::not(a[4usize]),
1284 i8::not(a[5usize]),
1285 i8::not(a[6usize]),
1286 i8::not(a[7usize]),
1287 i8::not(a[8usize]),
1288 i8::not(a[9usize]),
1289 i8::not(a[10usize]),
1290 i8::not(a[11usize]),
1291 i8::not(a[12usize]),
1292 i8::not(a[13usize]),
1293 i8::not(a[14usize]),
1294 i8::not(a[15usize]),
1295 ]
1296 .simd_into(self)
1297 }
1298 #[inline(always)]
1299 fn and_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1300 [
1301 i8::bitand(a[0usize], &b[0usize]),
1302 i8::bitand(a[1usize], &b[1usize]),
1303 i8::bitand(a[2usize], &b[2usize]),
1304 i8::bitand(a[3usize], &b[3usize]),
1305 i8::bitand(a[4usize], &b[4usize]),
1306 i8::bitand(a[5usize], &b[5usize]),
1307 i8::bitand(a[6usize], &b[6usize]),
1308 i8::bitand(a[7usize], &b[7usize]),
1309 i8::bitand(a[8usize], &b[8usize]),
1310 i8::bitand(a[9usize], &b[9usize]),
1311 i8::bitand(a[10usize], &b[10usize]),
1312 i8::bitand(a[11usize], &b[11usize]),
1313 i8::bitand(a[12usize], &b[12usize]),
1314 i8::bitand(a[13usize], &b[13usize]),
1315 i8::bitand(a[14usize], &b[14usize]),
1316 i8::bitand(a[15usize], &b[15usize]),
1317 ]
1318 .simd_into(self)
1319 }
1320 #[inline(always)]
1321 fn or_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1322 [
1323 i8::bitor(a[0usize], &b[0usize]),
1324 i8::bitor(a[1usize], &b[1usize]),
1325 i8::bitor(a[2usize], &b[2usize]),
1326 i8::bitor(a[3usize], &b[3usize]),
1327 i8::bitor(a[4usize], &b[4usize]),
1328 i8::bitor(a[5usize], &b[5usize]),
1329 i8::bitor(a[6usize], &b[6usize]),
1330 i8::bitor(a[7usize], &b[7usize]),
1331 i8::bitor(a[8usize], &b[8usize]),
1332 i8::bitor(a[9usize], &b[9usize]),
1333 i8::bitor(a[10usize], &b[10usize]),
1334 i8::bitor(a[11usize], &b[11usize]),
1335 i8::bitor(a[12usize], &b[12usize]),
1336 i8::bitor(a[13usize], &b[13usize]),
1337 i8::bitor(a[14usize], &b[14usize]),
1338 i8::bitor(a[15usize], &b[15usize]),
1339 ]
1340 .simd_into(self)
1341 }
1342 #[inline(always)]
1343 fn xor_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1344 [
1345 i8::bitxor(a[0usize], &b[0usize]),
1346 i8::bitxor(a[1usize], &b[1usize]),
1347 i8::bitxor(a[2usize], &b[2usize]),
1348 i8::bitxor(a[3usize], &b[3usize]),
1349 i8::bitxor(a[4usize], &b[4usize]),
1350 i8::bitxor(a[5usize], &b[5usize]),
1351 i8::bitxor(a[6usize], &b[6usize]),
1352 i8::bitxor(a[7usize], &b[7usize]),
1353 i8::bitxor(a[8usize], &b[8usize]),
1354 i8::bitxor(a[9usize], &b[9usize]),
1355 i8::bitxor(a[10usize], &b[10usize]),
1356 i8::bitxor(a[11usize], &b[11usize]),
1357 i8::bitxor(a[12usize], &b[12usize]),
1358 i8::bitxor(a[13usize], &b[13usize]),
1359 i8::bitxor(a[14usize], &b[14usize]),
1360 i8::bitxor(a[15usize], &b[15usize]),
1361 ]
1362 .simd_into(self)
1363 }
1364 #[inline(always)]
1365 fn select_mask8x16(
1366 self,
1367 a: mask8x16<Self>,
1368 b: mask8x16<Self>,
1369 c: mask8x16<Self>,
1370 ) -> mask8x16<Self> {
1371 [
1372 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1373 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1374 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1375 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1376 if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1377 if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1378 if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1379 if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1380 if a[8usize] != 0 { b[8usize] } else { c[8usize] },
1381 if a[9usize] != 0 { b[9usize] } else { c[9usize] },
1382 if a[10usize] != 0 {
1383 b[10usize]
1384 } else {
1385 c[10usize]
1386 },
1387 if a[11usize] != 0 {
1388 b[11usize]
1389 } else {
1390 c[11usize]
1391 },
1392 if a[12usize] != 0 {
1393 b[12usize]
1394 } else {
1395 c[12usize]
1396 },
1397 if a[13usize] != 0 {
1398 b[13usize]
1399 } else {
1400 c[13usize]
1401 },
1402 if a[14usize] != 0 {
1403 b[14usize]
1404 } else {
1405 c[14usize]
1406 },
1407 if a[15usize] != 0 {
1408 b[15usize]
1409 } else {
1410 c[15usize]
1411 },
1412 ]
1413 .simd_into(self)
1414 }
1415 #[inline(always)]
1416 fn simd_eq_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
1417 [
1418 -(i8::eq(&a[0usize], &b[0usize]) as i8),
1419 -(i8::eq(&a[1usize], &b[1usize]) as i8),
1420 -(i8::eq(&a[2usize], &b[2usize]) as i8),
1421 -(i8::eq(&a[3usize], &b[3usize]) as i8),
1422 -(i8::eq(&a[4usize], &b[4usize]) as i8),
1423 -(i8::eq(&a[5usize], &b[5usize]) as i8),
1424 -(i8::eq(&a[6usize], &b[6usize]) as i8),
1425 -(i8::eq(&a[7usize], &b[7usize]) as i8),
1426 -(i8::eq(&a[8usize], &b[8usize]) as i8),
1427 -(i8::eq(&a[9usize], &b[9usize]) as i8),
1428 -(i8::eq(&a[10usize], &b[10usize]) as i8),
1429 -(i8::eq(&a[11usize], &b[11usize]) as i8),
1430 -(i8::eq(&a[12usize], &b[12usize]) as i8),
1431 -(i8::eq(&a[13usize], &b[13usize]) as i8),
1432 -(i8::eq(&a[14usize], &b[14usize]) as i8),
1433 -(i8::eq(&a[15usize], &b[15usize]) as i8),
1434 ]
1435 .simd_into(self)
1436 }
1437 #[inline(always)]
1438 fn combine_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x32<Self> {
1439 let mut result = [0; 32usize];
1440 result[0..16usize].copy_from_slice(&a.val);
1441 result[16usize..32usize].copy_from_slice(&b.val);
1442 result.simd_into(self)
1443 }
1444 #[inline(always)]
1445 fn splat_i16x8(self, val: i16) -> i16x8<Self> {
1446 [val; 8usize].simd_into(self)
1447 }
1448 #[inline(always)]
1449 fn not_i16x8(self, a: i16x8<Self>) -> i16x8<Self> {
1450 [
1451 i16::not(a[0usize]),
1452 i16::not(a[1usize]),
1453 i16::not(a[2usize]),
1454 i16::not(a[3usize]),
1455 i16::not(a[4usize]),
1456 i16::not(a[5usize]),
1457 i16::not(a[6usize]),
1458 i16::not(a[7usize]),
1459 ]
1460 .simd_into(self)
1461 }
1462 #[inline(always)]
1463 fn add_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1464 [
1465 i16::add(a[0usize], &b[0usize]),
1466 i16::add(a[1usize], &b[1usize]),
1467 i16::add(a[2usize], &b[2usize]),
1468 i16::add(a[3usize], &b[3usize]),
1469 i16::add(a[4usize], &b[4usize]),
1470 i16::add(a[5usize], &b[5usize]),
1471 i16::add(a[6usize], &b[6usize]),
1472 i16::add(a[7usize], &b[7usize]),
1473 ]
1474 .simd_into(self)
1475 }
1476 #[inline(always)]
1477 fn sub_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1478 [
1479 i16::wrapping_sub(a[0usize], b[0usize]),
1480 i16::wrapping_sub(a[1usize], b[1usize]),
1481 i16::wrapping_sub(a[2usize], b[2usize]),
1482 i16::wrapping_sub(a[3usize], b[3usize]),
1483 i16::wrapping_sub(a[4usize], b[4usize]),
1484 i16::wrapping_sub(a[5usize], b[5usize]),
1485 i16::wrapping_sub(a[6usize], b[6usize]),
1486 i16::wrapping_sub(a[7usize], b[7usize]),
1487 ]
1488 .simd_into(self)
1489 }
1490 #[inline(always)]
1491 fn mul_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1492 [
1493 i16::wrapping_mul(a[0usize], b[0usize]),
1494 i16::wrapping_mul(a[1usize], b[1usize]),
1495 i16::wrapping_mul(a[2usize], b[2usize]),
1496 i16::wrapping_mul(a[3usize], b[3usize]),
1497 i16::wrapping_mul(a[4usize], b[4usize]),
1498 i16::wrapping_mul(a[5usize], b[5usize]),
1499 i16::wrapping_mul(a[6usize], b[6usize]),
1500 i16::wrapping_mul(a[7usize], b[7usize]),
1501 ]
1502 .simd_into(self)
1503 }
1504 #[inline(always)]
1505 fn and_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1506 [
1507 i16::bitand(a[0usize], &b[0usize]),
1508 i16::bitand(a[1usize], &b[1usize]),
1509 i16::bitand(a[2usize], &b[2usize]),
1510 i16::bitand(a[3usize], &b[3usize]),
1511 i16::bitand(a[4usize], &b[4usize]),
1512 i16::bitand(a[5usize], &b[5usize]),
1513 i16::bitand(a[6usize], &b[6usize]),
1514 i16::bitand(a[7usize], &b[7usize]),
1515 ]
1516 .simd_into(self)
1517 }
1518 #[inline(always)]
1519 fn or_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1520 [
1521 i16::bitor(a[0usize], &b[0usize]),
1522 i16::bitor(a[1usize], &b[1usize]),
1523 i16::bitor(a[2usize], &b[2usize]),
1524 i16::bitor(a[3usize], &b[3usize]),
1525 i16::bitor(a[4usize], &b[4usize]),
1526 i16::bitor(a[5usize], &b[5usize]),
1527 i16::bitor(a[6usize], &b[6usize]),
1528 i16::bitor(a[7usize], &b[7usize]),
1529 ]
1530 .simd_into(self)
1531 }
1532 #[inline(always)]
1533 fn xor_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1534 [
1535 i16::bitxor(a[0usize], &b[0usize]),
1536 i16::bitxor(a[1usize], &b[1usize]),
1537 i16::bitxor(a[2usize], &b[2usize]),
1538 i16::bitxor(a[3usize], &b[3usize]),
1539 i16::bitxor(a[4usize], &b[4usize]),
1540 i16::bitxor(a[5usize], &b[5usize]),
1541 i16::bitxor(a[6usize], &b[6usize]),
1542 i16::bitxor(a[7usize], &b[7usize]),
1543 ]
1544 .simd_into(self)
1545 }
1546 #[inline(always)]
1547 fn shr_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {
1548 [
1549 i16::shr(a[0usize], shift as i16),
1550 i16::shr(a[1usize], shift as i16),
1551 i16::shr(a[2usize], shift as i16),
1552 i16::shr(a[3usize], shift as i16),
1553 i16::shr(a[4usize], shift as i16),
1554 i16::shr(a[5usize], shift as i16),
1555 i16::shr(a[6usize], shift as i16),
1556 i16::shr(a[7usize], shift as i16),
1557 ]
1558 .simd_into(self)
1559 }
1560 #[inline(always)]
1561 fn simd_eq_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1562 [
1563 -(i16::eq(&a[0usize], &b[0usize]) as i16),
1564 -(i16::eq(&a[1usize], &b[1usize]) as i16),
1565 -(i16::eq(&a[2usize], &b[2usize]) as i16),
1566 -(i16::eq(&a[3usize], &b[3usize]) as i16),
1567 -(i16::eq(&a[4usize], &b[4usize]) as i16),
1568 -(i16::eq(&a[5usize], &b[5usize]) as i16),
1569 -(i16::eq(&a[6usize], &b[6usize]) as i16),
1570 -(i16::eq(&a[7usize], &b[7usize]) as i16),
1571 ]
1572 .simd_into(self)
1573 }
1574 #[inline(always)]
1575 fn simd_lt_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1576 [
1577 -(i16::lt(&a[0usize], &b[0usize]) as i16),
1578 -(i16::lt(&a[1usize], &b[1usize]) as i16),
1579 -(i16::lt(&a[2usize], &b[2usize]) as i16),
1580 -(i16::lt(&a[3usize], &b[3usize]) as i16),
1581 -(i16::lt(&a[4usize], &b[4usize]) as i16),
1582 -(i16::lt(&a[5usize], &b[5usize]) as i16),
1583 -(i16::lt(&a[6usize], &b[6usize]) as i16),
1584 -(i16::lt(&a[7usize], &b[7usize]) as i16),
1585 ]
1586 .simd_into(self)
1587 }
1588 #[inline(always)]
1589 fn simd_le_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1590 [
1591 -(i16::le(&a[0usize], &b[0usize]) as i16),
1592 -(i16::le(&a[1usize], &b[1usize]) as i16),
1593 -(i16::le(&a[2usize], &b[2usize]) as i16),
1594 -(i16::le(&a[3usize], &b[3usize]) as i16),
1595 -(i16::le(&a[4usize], &b[4usize]) as i16),
1596 -(i16::le(&a[5usize], &b[5usize]) as i16),
1597 -(i16::le(&a[6usize], &b[6usize]) as i16),
1598 -(i16::le(&a[7usize], &b[7usize]) as i16),
1599 ]
1600 .simd_into(self)
1601 }
1602 #[inline(always)]
1603 fn simd_ge_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1604 [
1605 -(i16::ge(&a[0usize], &b[0usize]) as i16),
1606 -(i16::ge(&a[1usize], &b[1usize]) as i16),
1607 -(i16::ge(&a[2usize], &b[2usize]) as i16),
1608 -(i16::ge(&a[3usize], &b[3usize]) as i16),
1609 -(i16::ge(&a[4usize], &b[4usize]) as i16),
1610 -(i16::ge(&a[5usize], &b[5usize]) as i16),
1611 -(i16::ge(&a[6usize], &b[6usize]) as i16),
1612 -(i16::ge(&a[7usize], &b[7usize]) as i16),
1613 ]
1614 .simd_into(self)
1615 }
1616 #[inline(always)]
1617 fn simd_gt_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
1618 [
1619 -(i16::gt(&a[0usize], &b[0usize]) as i16),
1620 -(i16::gt(&a[1usize], &b[1usize]) as i16),
1621 -(i16::gt(&a[2usize], &b[2usize]) as i16),
1622 -(i16::gt(&a[3usize], &b[3usize]) as i16),
1623 -(i16::gt(&a[4usize], &b[4usize]) as i16),
1624 -(i16::gt(&a[5usize], &b[5usize]) as i16),
1625 -(i16::gt(&a[6usize], &b[6usize]) as i16),
1626 -(i16::gt(&a[7usize], &b[7usize]) as i16),
1627 ]
1628 .simd_into(self)
1629 }
1630 #[inline(always)]
1631 fn zip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1632 [
1633 a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
1634 ]
1635 .simd_into(self)
1636 }
1637 #[inline(always)]
1638 fn zip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1639 [
1640 a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
1641 ]
1642 .simd_into(self)
1643 }
1644 #[inline(always)]
1645 fn unzip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1646 [
1647 a[0usize], a[2usize], a[4usize], a[6usize], b[0usize], b[2usize], b[4usize], b[6usize],
1648 ]
1649 .simd_into(self)
1650 }
1651 #[inline(always)]
1652 fn unzip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1653 [
1654 a[1usize], a[3usize], a[5usize], a[7usize], b[1usize], b[3usize], b[5usize], b[7usize],
1655 ]
1656 .simd_into(self)
1657 }
1658 #[inline(always)]
1659 fn select_i16x8(self, a: mask16x8<Self>, b: i16x8<Self>, c: i16x8<Self>) -> i16x8<Self> {
1660 [
1661 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1662 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1663 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1664 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1665 if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1666 if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1667 if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1668 if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1669 ]
1670 .simd_into(self)
1671 }
1672 #[inline(always)]
1673 fn min_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1674 [
1675 i16::min(a[0usize], b[0usize]),
1676 i16::min(a[1usize], b[1usize]),
1677 i16::min(a[2usize], b[2usize]),
1678 i16::min(a[3usize], b[3usize]),
1679 i16::min(a[4usize], b[4usize]),
1680 i16::min(a[5usize], b[5usize]),
1681 i16::min(a[6usize], b[6usize]),
1682 i16::min(a[7usize], b[7usize]),
1683 ]
1684 .simd_into(self)
1685 }
1686 #[inline(always)]
1687 fn max_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
1688 [
1689 i16::max(a[0usize], b[0usize]),
1690 i16::max(a[1usize], b[1usize]),
1691 i16::max(a[2usize], b[2usize]),
1692 i16::max(a[3usize], b[3usize]),
1693 i16::max(a[4usize], b[4usize]),
1694 i16::max(a[5usize], b[5usize]),
1695 i16::max(a[6usize], b[6usize]),
1696 i16::max(a[7usize], b[7usize]),
1697 ]
1698 .simd_into(self)
1699 }
1700 #[inline(always)]
1701 fn combine_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x16<Self> {
1702 let mut result = [0; 16usize];
1703 result[0..8usize].copy_from_slice(&a.val);
1704 result[8usize..16usize].copy_from_slice(&b.val);
1705 result.simd_into(self)
1706 }
1707 #[inline(always)]
1708 fn reinterpret_u8_i16x8(self, a: i16x8<Self>) -> u8x16<Self> {
1709 u8x16 {
1710 val: bytemuck::cast(a.val),
1711 simd: a.simd,
1712 }
1713 }
1714 #[inline(always)]
1715 fn reinterpret_u32_i16x8(self, a: i16x8<Self>) -> u32x4<Self> {
1716 u32x4 {
1717 val: bytemuck::cast(a.val),
1718 simd: a.simd,
1719 }
1720 }
1721 #[inline(always)]
1722 fn splat_u16x8(self, val: u16) -> u16x8<Self> {
1723 [val; 8usize].simd_into(self)
1724 }
1725 #[inline(always)]
1726 fn not_u16x8(self, a: u16x8<Self>) -> u16x8<Self> {
1727 [
1728 u16::not(a[0usize]),
1729 u16::not(a[1usize]),
1730 u16::not(a[2usize]),
1731 u16::not(a[3usize]),
1732 u16::not(a[4usize]),
1733 u16::not(a[5usize]),
1734 u16::not(a[6usize]),
1735 u16::not(a[7usize]),
1736 ]
1737 .simd_into(self)
1738 }
1739 #[inline(always)]
1740 fn add_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1741 [
1742 u16::add(a[0usize], &b[0usize]),
1743 u16::add(a[1usize], &b[1usize]),
1744 u16::add(a[2usize], &b[2usize]),
1745 u16::add(a[3usize], &b[3usize]),
1746 u16::add(a[4usize], &b[4usize]),
1747 u16::add(a[5usize], &b[5usize]),
1748 u16::add(a[6usize], &b[6usize]),
1749 u16::add(a[7usize], &b[7usize]),
1750 ]
1751 .simd_into(self)
1752 }
1753 #[inline(always)]
1754 fn sub_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1755 [
1756 u16::wrapping_sub(a[0usize], b[0usize]),
1757 u16::wrapping_sub(a[1usize], b[1usize]),
1758 u16::wrapping_sub(a[2usize], b[2usize]),
1759 u16::wrapping_sub(a[3usize], b[3usize]),
1760 u16::wrapping_sub(a[4usize], b[4usize]),
1761 u16::wrapping_sub(a[5usize], b[5usize]),
1762 u16::wrapping_sub(a[6usize], b[6usize]),
1763 u16::wrapping_sub(a[7usize], b[7usize]),
1764 ]
1765 .simd_into(self)
1766 }
1767 #[inline(always)]
1768 fn mul_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1769 [
1770 u16::wrapping_mul(a[0usize], b[0usize]),
1771 u16::wrapping_mul(a[1usize], b[1usize]),
1772 u16::wrapping_mul(a[2usize], b[2usize]),
1773 u16::wrapping_mul(a[3usize], b[3usize]),
1774 u16::wrapping_mul(a[4usize], b[4usize]),
1775 u16::wrapping_mul(a[5usize], b[5usize]),
1776 u16::wrapping_mul(a[6usize], b[6usize]),
1777 u16::wrapping_mul(a[7usize], b[7usize]),
1778 ]
1779 .simd_into(self)
1780 }
1781 #[inline(always)]
1782 fn and_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1783 [
1784 u16::bitand(a[0usize], &b[0usize]),
1785 u16::bitand(a[1usize], &b[1usize]),
1786 u16::bitand(a[2usize], &b[2usize]),
1787 u16::bitand(a[3usize], &b[3usize]),
1788 u16::bitand(a[4usize], &b[4usize]),
1789 u16::bitand(a[5usize], &b[5usize]),
1790 u16::bitand(a[6usize], &b[6usize]),
1791 u16::bitand(a[7usize], &b[7usize]),
1792 ]
1793 .simd_into(self)
1794 }
1795 #[inline(always)]
1796 fn or_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1797 [
1798 u16::bitor(a[0usize], &b[0usize]),
1799 u16::bitor(a[1usize], &b[1usize]),
1800 u16::bitor(a[2usize], &b[2usize]),
1801 u16::bitor(a[3usize], &b[3usize]),
1802 u16::bitor(a[4usize], &b[4usize]),
1803 u16::bitor(a[5usize], &b[5usize]),
1804 u16::bitor(a[6usize], &b[6usize]),
1805 u16::bitor(a[7usize], &b[7usize]),
1806 ]
1807 .simd_into(self)
1808 }
1809 #[inline(always)]
1810 fn xor_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1811 [
1812 u16::bitxor(a[0usize], &b[0usize]),
1813 u16::bitxor(a[1usize], &b[1usize]),
1814 u16::bitxor(a[2usize], &b[2usize]),
1815 u16::bitxor(a[3usize], &b[3usize]),
1816 u16::bitxor(a[4usize], &b[4usize]),
1817 u16::bitxor(a[5usize], &b[5usize]),
1818 u16::bitxor(a[6usize], &b[6usize]),
1819 u16::bitxor(a[7usize], &b[7usize]),
1820 ]
1821 .simd_into(self)
1822 }
1823 #[inline(always)]
1824 fn shr_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {
1825 [
1826 u16::shr(a[0usize], shift as u16),
1827 u16::shr(a[1usize], shift as u16),
1828 u16::shr(a[2usize], shift as u16),
1829 u16::shr(a[3usize], shift as u16),
1830 u16::shr(a[4usize], shift as u16),
1831 u16::shr(a[5usize], shift as u16),
1832 u16::shr(a[6usize], shift as u16),
1833 u16::shr(a[7usize], shift as u16),
1834 ]
1835 .simd_into(self)
1836 }
1837 #[inline(always)]
1838 fn simd_eq_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
1839 [
1840 -(u16::eq(&a[0usize], &b[0usize]) as i16),
1841 -(u16::eq(&a[1usize], &b[1usize]) as i16),
1842 -(u16::eq(&a[2usize], &b[2usize]) as i16),
1843 -(u16::eq(&a[3usize], &b[3usize]) as i16),
1844 -(u16::eq(&a[4usize], &b[4usize]) as i16),
1845 -(u16::eq(&a[5usize], &b[5usize]) as i16),
1846 -(u16::eq(&a[6usize], &b[6usize]) as i16),
1847 -(u16::eq(&a[7usize], &b[7usize]) as i16),
1848 ]
1849 .simd_into(self)
1850 }
1851 #[inline(always)]
1852 fn simd_lt_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
1853 [
1854 -(u16::lt(&a[0usize], &b[0usize]) as i16),
1855 -(u16::lt(&a[1usize], &b[1usize]) as i16),
1856 -(u16::lt(&a[2usize], &b[2usize]) as i16),
1857 -(u16::lt(&a[3usize], &b[3usize]) as i16),
1858 -(u16::lt(&a[4usize], &b[4usize]) as i16),
1859 -(u16::lt(&a[5usize], &b[5usize]) as i16),
1860 -(u16::lt(&a[6usize], &b[6usize]) as i16),
1861 -(u16::lt(&a[7usize], &b[7usize]) as i16),
1862 ]
1863 .simd_into(self)
1864 }
1865 #[inline(always)]
1866 fn simd_le_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
1867 [
1868 -(u16::le(&a[0usize], &b[0usize]) as i16),
1869 -(u16::le(&a[1usize], &b[1usize]) as i16),
1870 -(u16::le(&a[2usize], &b[2usize]) as i16),
1871 -(u16::le(&a[3usize], &b[3usize]) as i16),
1872 -(u16::le(&a[4usize], &b[4usize]) as i16),
1873 -(u16::le(&a[5usize], &b[5usize]) as i16),
1874 -(u16::le(&a[6usize], &b[6usize]) as i16),
1875 -(u16::le(&a[7usize], &b[7usize]) as i16),
1876 ]
1877 .simd_into(self)
1878 }
1879 #[inline(always)]
1880 fn simd_ge_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
1881 [
1882 -(u16::ge(&a[0usize], &b[0usize]) as i16),
1883 -(u16::ge(&a[1usize], &b[1usize]) as i16),
1884 -(u16::ge(&a[2usize], &b[2usize]) as i16),
1885 -(u16::ge(&a[3usize], &b[3usize]) as i16),
1886 -(u16::ge(&a[4usize], &b[4usize]) as i16),
1887 -(u16::ge(&a[5usize], &b[5usize]) as i16),
1888 -(u16::ge(&a[6usize], &b[6usize]) as i16),
1889 -(u16::ge(&a[7usize], &b[7usize]) as i16),
1890 ]
1891 .simd_into(self)
1892 }
1893 #[inline(always)]
1894 fn simd_gt_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
1895 [
1896 -(u16::gt(&a[0usize], &b[0usize]) as i16),
1897 -(u16::gt(&a[1usize], &b[1usize]) as i16),
1898 -(u16::gt(&a[2usize], &b[2usize]) as i16),
1899 -(u16::gt(&a[3usize], &b[3usize]) as i16),
1900 -(u16::gt(&a[4usize], &b[4usize]) as i16),
1901 -(u16::gt(&a[5usize], &b[5usize]) as i16),
1902 -(u16::gt(&a[6usize], &b[6usize]) as i16),
1903 -(u16::gt(&a[7usize], &b[7usize]) as i16),
1904 ]
1905 .simd_into(self)
1906 }
1907 #[inline(always)]
1908 fn zip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1909 [
1910 a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
1911 ]
1912 .simd_into(self)
1913 }
1914 #[inline(always)]
1915 fn zip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1916 [
1917 a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
1918 ]
1919 .simd_into(self)
1920 }
1921 #[inline(always)]
1922 fn unzip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1923 [
1924 a[0usize], a[2usize], a[4usize], a[6usize], b[0usize], b[2usize], b[4usize], b[6usize],
1925 ]
1926 .simd_into(self)
1927 }
1928 #[inline(always)]
1929 fn unzip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1930 [
1931 a[1usize], a[3usize], a[5usize], a[7usize], b[1usize], b[3usize], b[5usize], b[7usize],
1932 ]
1933 .simd_into(self)
1934 }
1935 #[inline(always)]
1936 fn select_u16x8(self, a: mask16x8<Self>, b: u16x8<Self>, c: u16x8<Self>) -> u16x8<Self> {
1937 [
1938 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
1939 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
1940 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
1941 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
1942 if a[4usize] != 0 { b[4usize] } else { c[4usize] },
1943 if a[5usize] != 0 { b[5usize] } else { c[5usize] },
1944 if a[6usize] != 0 { b[6usize] } else { c[6usize] },
1945 if a[7usize] != 0 { b[7usize] } else { c[7usize] },
1946 ]
1947 .simd_into(self)
1948 }
1949 #[inline(always)]
1950 fn min_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1951 [
1952 u16::min(a[0usize], b[0usize]),
1953 u16::min(a[1usize], b[1usize]),
1954 u16::min(a[2usize], b[2usize]),
1955 u16::min(a[3usize], b[3usize]),
1956 u16::min(a[4usize], b[4usize]),
1957 u16::min(a[5usize], b[5usize]),
1958 u16::min(a[6usize], b[6usize]),
1959 u16::min(a[7usize], b[7usize]),
1960 ]
1961 .simd_into(self)
1962 }
1963 #[inline(always)]
1964 fn max_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
1965 [
1966 u16::max(a[0usize], b[0usize]),
1967 u16::max(a[1usize], b[1usize]),
1968 u16::max(a[2usize], b[2usize]),
1969 u16::max(a[3usize], b[3usize]),
1970 u16::max(a[4usize], b[4usize]),
1971 u16::max(a[5usize], b[5usize]),
1972 u16::max(a[6usize], b[6usize]),
1973 u16::max(a[7usize], b[7usize]),
1974 ]
1975 .simd_into(self)
1976 }
1977 #[inline(always)]
1978 fn combine_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x16<Self> {
1979 let mut result = [0; 16usize];
1980 result[0..8usize].copy_from_slice(&a.val);
1981 result[8usize..16usize].copy_from_slice(&b.val);
1982 result.simd_into(self)
1983 }
1984 #[inline(always)]
1985 fn reinterpret_u8_u16x8(self, a: u16x8<Self>) -> u8x16<Self> {
1986 u8x16 {
1987 val: bytemuck::cast(a.val),
1988 simd: a.simd,
1989 }
1990 }
1991 #[inline(always)]
1992 fn reinterpret_u32_u16x8(self, a: u16x8<Self>) -> u32x4<Self> {
1993 u32x4 {
1994 val: bytemuck::cast(a.val),
1995 simd: a.simd,
1996 }
1997 }
1998 #[inline(always)]
1999 fn splat_mask16x8(self, val: i16) -> mask16x8<Self> {
2000 [val; 8usize].simd_into(self)
2001 }
2002 #[inline(always)]
2003 fn not_mask16x8(self, a: mask16x8<Self>) -> mask16x8<Self> {
2004 [
2005 i16::not(a[0usize]),
2006 i16::not(a[1usize]),
2007 i16::not(a[2usize]),
2008 i16::not(a[3usize]),
2009 i16::not(a[4usize]),
2010 i16::not(a[5usize]),
2011 i16::not(a[6usize]),
2012 i16::not(a[7usize]),
2013 ]
2014 .simd_into(self)
2015 }
2016 #[inline(always)]
2017 fn and_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2018 [
2019 i16::bitand(a[0usize], &b[0usize]),
2020 i16::bitand(a[1usize], &b[1usize]),
2021 i16::bitand(a[2usize], &b[2usize]),
2022 i16::bitand(a[3usize], &b[3usize]),
2023 i16::bitand(a[4usize], &b[4usize]),
2024 i16::bitand(a[5usize], &b[5usize]),
2025 i16::bitand(a[6usize], &b[6usize]),
2026 i16::bitand(a[7usize], &b[7usize]),
2027 ]
2028 .simd_into(self)
2029 }
2030 #[inline(always)]
2031 fn or_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2032 [
2033 i16::bitor(a[0usize], &b[0usize]),
2034 i16::bitor(a[1usize], &b[1usize]),
2035 i16::bitor(a[2usize], &b[2usize]),
2036 i16::bitor(a[3usize], &b[3usize]),
2037 i16::bitor(a[4usize], &b[4usize]),
2038 i16::bitor(a[5usize], &b[5usize]),
2039 i16::bitor(a[6usize], &b[6usize]),
2040 i16::bitor(a[7usize], &b[7usize]),
2041 ]
2042 .simd_into(self)
2043 }
2044 #[inline(always)]
2045 fn xor_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2046 [
2047 i16::bitxor(a[0usize], &b[0usize]),
2048 i16::bitxor(a[1usize], &b[1usize]),
2049 i16::bitxor(a[2usize], &b[2usize]),
2050 i16::bitxor(a[3usize], &b[3usize]),
2051 i16::bitxor(a[4usize], &b[4usize]),
2052 i16::bitxor(a[5usize], &b[5usize]),
2053 i16::bitxor(a[6usize], &b[6usize]),
2054 i16::bitxor(a[7usize], &b[7usize]),
2055 ]
2056 .simd_into(self)
2057 }
2058 #[inline(always)]
2059 fn select_mask16x8(
2060 self,
2061 a: mask16x8<Self>,
2062 b: mask16x8<Self>,
2063 c: mask16x8<Self>,
2064 ) -> mask16x8<Self> {
2065 [
2066 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2067 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2068 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2069 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2070 if a[4usize] != 0 { b[4usize] } else { c[4usize] },
2071 if a[5usize] != 0 { b[5usize] } else { c[5usize] },
2072 if a[6usize] != 0 { b[6usize] } else { c[6usize] },
2073 if a[7usize] != 0 { b[7usize] } else { c[7usize] },
2074 ]
2075 .simd_into(self)
2076 }
2077 #[inline(always)]
2078 fn simd_eq_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
2079 [
2080 -(i16::eq(&a[0usize], &b[0usize]) as i16),
2081 -(i16::eq(&a[1usize], &b[1usize]) as i16),
2082 -(i16::eq(&a[2usize], &b[2usize]) as i16),
2083 -(i16::eq(&a[3usize], &b[3usize]) as i16),
2084 -(i16::eq(&a[4usize], &b[4usize]) as i16),
2085 -(i16::eq(&a[5usize], &b[5usize]) as i16),
2086 -(i16::eq(&a[6usize], &b[6usize]) as i16),
2087 -(i16::eq(&a[7usize], &b[7usize]) as i16),
2088 ]
2089 .simd_into(self)
2090 }
2091 #[inline(always)]
2092 fn combine_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x16<Self> {
2093 let mut result = [0; 16usize];
2094 result[0..8usize].copy_from_slice(&a.val);
2095 result[8usize..16usize].copy_from_slice(&b.val);
2096 result.simd_into(self)
2097 }
2098 #[inline(always)]
2099 fn splat_i32x4(self, val: i32) -> i32x4<Self> {
2100 [val; 4usize].simd_into(self)
2101 }
2102 #[inline(always)]
2103 fn not_i32x4(self, a: i32x4<Self>) -> i32x4<Self> {
2104 [
2105 i32::not(a[0usize]),
2106 i32::not(a[1usize]),
2107 i32::not(a[2usize]),
2108 i32::not(a[3usize]),
2109 ]
2110 .simd_into(self)
2111 }
2112 #[inline(always)]
2113 fn add_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2114 [
2115 i32::add(a[0usize], &b[0usize]),
2116 i32::add(a[1usize], &b[1usize]),
2117 i32::add(a[2usize], &b[2usize]),
2118 i32::add(a[3usize], &b[3usize]),
2119 ]
2120 .simd_into(self)
2121 }
2122 #[inline(always)]
2123 fn sub_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2124 [
2125 i32::wrapping_sub(a[0usize], b[0usize]),
2126 i32::wrapping_sub(a[1usize], b[1usize]),
2127 i32::wrapping_sub(a[2usize], b[2usize]),
2128 i32::wrapping_sub(a[3usize], b[3usize]),
2129 ]
2130 .simd_into(self)
2131 }
2132 #[inline(always)]
2133 fn mul_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2134 [
2135 i32::wrapping_mul(a[0usize], b[0usize]),
2136 i32::wrapping_mul(a[1usize], b[1usize]),
2137 i32::wrapping_mul(a[2usize], b[2usize]),
2138 i32::wrapping_mul(a[3usize], b[3usize]),
2139 ]
2140 .simd_into(self)
2141 }
2142 #[inline(always)]
2143 fn and_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2144 [
2145 i32::bitand(a[0usize], &b[0usize]),
2146 i32::bitand(a[1usize], &b[1usize]),
2147 i32::bitand(a[2usize], &b[2usize]),
2148 i32::bitand(a[3usize], &b[3usize]),
2149 ]
2150 .simd_into(self)
2151 }
2152 #[inline(always)]
2153 fn or_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2154 [
2155 i32::bitor(a[0usize], &b[0usize]),
2156 i32::bitor(a[1usize], &b[1usize]),
2157 i32::bitor(a[2usize], &b[2usize]),
2158 i32::bitor(a[3usize], &b[3usize]),
2159 ]
2160 .simd_into(self)
2161 }
2162 #[inline(always)]
2163 fn xor_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2164 [
2165 i32::bitxor(a[0usize], &b[0usize]),
2166 i32::bitxor(a[1usize], &b[1usize]),
2167 i32::bitxor(a[2usize], &b[2usize]),
2168 i32::bitxor(a[3usize], &b[3usize]),
2169 ]
2170 .simd_into(self)
2171 }
2172 #[inline(always)]
2173 fn shr_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {
2174 [
2175 i32::shr(a[0usize], shift as i32),
2176 i32::shr(a[1usize], shift as i32),
2177 i32::shr(a[2usize], shift as i32),
2178 i32::shr(a[3usize], shift as i32),
2179 ]
2180 .simd_into(self)
2181 }
2182 #[inline(always)]
2183 fn simd_eq_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2184 [
2185 -(i32::eq(&a[0usize], &b[0usize]) as i32),
2186 -(i32::eq(&a[1usize], &b[1usize]) as i32),
2187 -(i32::eq(&a[2usize], &b[2usize]) as i32),
2188 -(i32::eq(&a[3usize], &b[3usize]) as i32),
2189 ]
2190 .simd_into(self)
2191 }
2192 #[inline(always)]
2193 fn simd_lt_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2194 [
2195 -(i32::lt(&a[0usize], &b[0usize]) as i32),
2196 -(i32::lt(&a[1usize], &b[1usize]) as i32),
2197 -(i32::lt(&a[2usize], &b[2usize]) as i32),
2198 -(i32::lt(&a[3usize], &b[3usize]) as i32),
2199 ]
2200 .simd_into(self)
2201 }
2202 #[inline(always)]
2203 fn simd_le_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2204 [
2205 -(i32::le(&a[0usize], &b[0usize]) as i32),
2206 -(i32::le(&a[1usize], &b[1usize]) as i32),
2207 -(i32::le(&a[2usize], &b[2usize]) as i32),
2208 -(i32::le(&a[3usize], &b[3usize]) as i32),
2209 ]
2210 .simd_into(self)
2211 }
2212 #[inline(always)]
2213 fn simd_ge_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2214 [
2215 -(i32::ge(&a[0usize], &b[0usize]) as i32),
2216 -(i32::ge(&a[1usize], &b[1usize]) as i32),
2217 -(i32::ge(&a[2usize], &b[2usize]) as i32),
2218 -(i32::ge(&a[3usize], &b[3usize]) as i32),
2219 ]
2220 .simd_into(self)
2221 }
2222 #[inline(always)]
2223 fn simd_gt_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
2224 [
2225 -(i32::gt(&a[0usize], &b[0usize]) as i32),
2226 -(i32::gt(&a[1usize], &b[1usize]) as i32),
2227 -(i32::gt(&a[2usize], &b[2usize]) as i32),
2228 -(i32::gt(&a[3usize], &b[3usize]) as i32),
2229 ]
2230 .simd_into(self)
2231 }
2232 #[inline(always)]
2233 fn zip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2234 [a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
2235 }
2236 #[inline(always)]
2237 fn zip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2238 [a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
2239 }
2240 #[inline(always)]
2241 fn unzip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2242 [a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
2243 }
2244 #[inline(always)]
2245 fn unzip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2246 [a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
2247 }
2248 #[inline(always)]
2249 fn select_i32x4(self, a: mask32x4<Self>, b: i32x4<Self>, c: i32x4<Self>) -> i32x4<Self> {
2250 [
2251 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2252 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2253 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2254 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2255 ]
2256 .simd_into(self)
2257 }
2258 #[inline(always)]
2259 fn min_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2260 [
2261 i32::min(a[0usize], b[0usize]),
2262 i32::min(a[1usize], b[1usize]),
2263 i32::min(a[2usize], b[2usize]),
2264 i32::min(a[3usize], b[3usize]),
2265 ]
2266 .simd_into(self)
2267 }
2268 #[inline(always)]
2269 fn max_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
2270 [
2271 i32::max(a[0usize], b[0usize]),
2272 i32::max(a[1usize], b[1usize]),
2273 i32::max(a[2usize], b[2usize]),
2274 i32::max(a[3usize], b[3usize]),
2275 ]
2276 .simd_into(self)
2277 }
2278 #[inline(always)]
2279 fn combine_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x8<Self> {
2280 let mut result = [0; 8usize];
2281 result[0..4usize].copy_from_slice(&a.val);
2282 result[4usize..8usize].copy_from_slice(&b.val);
2283 result.simd_into(self)
2284 }
2285 #[inline(always)]
2286 fn reinterpret_u8_i32x4(self, a: i32x4<Self>) -> u8x16<Self> {
2287 u8x16 {
2288 val: bytemuck::cast(a.val),
2289 simd: a.simd,
2290 }
2291 }
2292 #[inline(always)]
2293 fn reinterpret_u32_i32x4(self, a: i32x4<Self>) -> u32x4<Self> {
2294 u32x4 {
2295 val: bytemuck::cast(a.val),
2296 simd: a.simd,
2297 }
2298 }
2299 #[inline(always)]
2300 fn cvt_f32_i32x4(self, a: i32x4<Self>) -> f32x4<Self> {
2301 [
2302 a[0usize] as f32,
2303 a[1usize] as f32,
2304 a[2usize] as f32,
2305 a[3usize] as f32,
2306 ]
2307 .simd_into(self)
2308 }
2309 #[inline(always)]
2310 fn splat_u32x4(self, val: u32) -> u32x4<Self> {
2311 [val; 4usize].simd_into(self)
2312 }
2313 #[inline(always)]
2314 fn not_u32x4(self, a: u32x4<Self>) -> u32x4<Self> {
2315 [
2316 u32::not(a[0usize]),
2317 u32::not(a[1usize]),
2318 u32::not(a[2usize]),
2319 u32::not(a[3usize]),
2320 ]
2321 .simd_into(self)
2322 }
2323 #[inline(always)]
2324 fn add_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2325 [
2326 u32::add(a[0usize], &b[0usize]),
2327 u32::add(a[1usize], &b[1usize]),
2328 u32::add(a[2usize], &b[2usize]),
2329 u32::add(a[3usize], &b[3usize]),
2330 ]
2331 .simd_into(self)
2332 }
2333 #[inline(always)]
2334 fn sub_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2335 [
2336 u32::wrapping_sub(a[0usize], b[0usize]),
2337 u32::wrapping_sub(a[1usize], b[1usize]),
2338 u32::wrapping_sub(a[2usize], b[2usize]),
2339 u32::wrapping_sub(a[3usize], b[3usize]),
2340 ]
2341 .simd_into(self)
2342 }
2343 #[inline(always)]
2344 fn mul_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2345 [
2346 u32::wrapping_mul(a[0usize], b[0usize]),
2347 u32::wrapping_mul(a[1usize], b[1usize]),
2348 u32::wrapping_mul(a[2usize], b[2usize]),
2349 u32::wrapping_mul(a[3usize], b[3usize]),
2350 ]
2351 .simd_into(self)
2352 }
2353 #[inline(always)]
2354 fn and_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2355 [
2356 u32::bitand(a[0usize], &b[0usize]),
2357 u32::bitand(a[1usize], &b[1usize]),
2358 u32::bitand(a[2usize], &b[2usize]),
2359 u32::bitand(a[3usize], &b[3usize]),
2360 ]
2361 .simd_into(self)
2362 }
2363 #[inline(always)]
2364 fn or_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2365 [
2366 u32::bitor(a[0usize], &b[0usize]),
2367 u32::bitor(a[1usize], &b[1usize]),
2368 u32::bitor(a[2usize], &b[2usize]),
2369 u32::bitor(a[3usize], &b[3usize]),
2370 ]
2371 .simd_into(self)
2372 }
2373 #[inline(always)]
2374 fn xor_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2375 [
2376 u32::bitxor(a[0usize], &b[0usize]),
2377 u32::bitxor(a[1usize], &b[1usize]),
2378 u32::bitxor(a[2usize], &b[2usize]),
2379 u32::bitxor(a[3usize], &b[3usize]),
2380 ]
2381 .simd_into(self)
2382 }
2383 #[inline(always)]
2384 fn shr_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {
2385 [
2386 u32::shr(a[0usize], shift as u32),
2387 u32::shr(a[1usize], shift as u32),
2388 u32::shr(a[2usize], shift as u32),
2389 u32::shr(a[3usize], shift as u32),
2390 ]
2391 .simd_into(self)
2392 }
2393 #[inline(always)]
2394 fn simd_eq_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2395 [
2396 -(u32::eq(&a[0usize], &b[0usize]) as i32),
2397 -(u32::eq(&a[1usize], &b[1usize]) as i32),
2398 -(u32::eq(&a[2usize], &b[2usize]) as i32),
2399 -(u32::eq(&a[3usize], &b[3usize]) as i32),
2400 ]
2401 .simd_into(self)
2402 }
2403 #[inline(always)]
2404 fn simd_lt_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2405 [
2406 -(u32::lt(&a[0usize], &b[0usize]) as i32),
2407 -(u32::lt(&a[1usize], &b[1usize]) as i32),
2408 -(u32::lt(&a[2usize], &b[2usize]) as i32),
2409 -(u32::lt(&a[3usize], &b[3usize]) as i32),
2410 ]
2411 .simd_into(self)
2412 }
2413 #[inline(always)]
2414 fn simd_le_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2415 [
2416 -(u32::le(&a[0usize], &b[0usize]) as i32),
2417 -(u32::le(&a[1usize], &b[1usize]) as i32),
2418 -(u32::le(&a[2usize], &b[2usize]) as i32),
2419 -(u32::le(&a[3usize], &b[3usize]) as i32),
2420 ]
2421 .simd_into(self)
2422 }
2423 #[inline(always)]
2424 fn simd_ge_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2425 [
2426 -(u32::ge(&a[0usize], &b[0usize]) as i32),
2427 -(u32::ge(&a[1usize], &b[1usize]) as i32),
2428 -(u32::ge(&a[2usize], &b[2usize]) as i32),
2429 -(u32::ge(&a[3usize], &b[3usize]) as i32),
2430 ]
2431 .simd_into(self)
2432 }
2433 #[inline(always)]
2434 fn simd_gt_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
2435 [
2436 -(u32::gt(&a[0usize], &b[0usize]) as i32),
2437 -(u32::gt(&a[1usize], &b[1usize]) as i32),
2438 -(u32::gt(&a[2usize], &b[2usize]) as i32),
2439 -(u32::gt(&a[3usize], &b[3usize]) as i32),
2440 ]
2441 .simd_into(self)
2442 }
2443 #[inline(always)]
2444 fn zip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2445 [a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
2446 }
2447 #[inline(always)]
2448 fn zip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2449 [a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
2450 }
2451 #[inline(always)]
2452 fn unzip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2453 [a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
2454 }
2455 #[inline(always)]
2456 fn unzip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2457 [a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
2458 }
2459 #[inline(always)]
2460 fn select_u32x4(self, a: mask32x4<Self>, b: u32x4<Self>, c: u32x4<Self>) -> u32x4<Self> {
2461 [
2462 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2463 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2464 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2465 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2466 ]
2467 .simd_into(self)
2468 }
2469 #[inline(always)]
2470 fn min_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2471 [
2472 u32::min(a[0usize], b[0usize]),
2473 u32::min(a[1usize], b[1usize]),
2474 u32::min(a[2usize], b[2usize]),
2475 u32::min(a[3usize], b[3usize]),
2476 ]
2477 .simd_into(self)
2478 }
2479 #[inline(always)]
2480 fn max_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
2481 [
2482 u32::max(a[0usize], b[0usize]),
2483 u32::max(a[1usize], b[1usize]),
2484 u32::max(a[2usize], b[2usize]),
2485 u32::max(a[3usize], b[3usize]),
2486 ]
2487 .simd_into(self)
2488 }
2489 #[inline(always)]
2490 fn combine_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x8<Self> {
2491 let mut result = [0; 8usize];
2492 result[0..4usize].copy_from_slice(&a.val);
2493 result[4usize..8usize].copy_from_slice(&b.val);
2494 result.simd_into(self)
2495 }
2496 #[inline(always)]
2497 fn reinterpret_u8_u32x4(self, a: u32x4<Self>) -> u8x16<Self> {
2498 u8x16 {
2499 val: bytemuck::cast(a.val),
2500 simd: a.simd,
2501 }
2502 }
2503 #[inline(always)]
2504 fn cvt_f32_u32x4(self, a: u32x4<Self>) -> f32x4<Self> {
2505 [
2506 a[0usize] as f32,
2507 a[1usize] as f32,
2508 a[2usize] as f32,
2509 a[3usize] as f32,
2510 ]
2511 .simd_into(self)
2512 }
2513 #[inline(always)]
2514 fn splat_mask32x4(self, val: i32) -> mask32x4<Self> {
2515 [val; 4usize].simd_into(self)
2516 }
2517 #[inline(always)]
2518 fn not_mask32x4(self, a: mask32x4<Self>) -> mask32x4<Self> {
2519 [
2520 i32::not(a[0usize]),
2521 i32::not(a[1usize]),
2522 i32::not(a[2usize]),
2523 i32::not(a[3usize]),
2524 ]
2525 .simd_into(self)
2526 }
2527 #[inline(always)]
2528 fn and_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
2529 [
2530 i32::bitand(a[0usize], &b[0usize]),
2531 i32::bitand(a[1usize], &b[1usize]),
2532 i32::bitand(a[2usize], &b[2usize]),
2533 i32::bitand(a[3usize], &b[3usize]),
2534 ]
2535 .simd_into(self)
2536 }
2537 #[inline(always)]
2538 fn or_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
2539 [
2540 i32::bitor(a[0usize], &b[0usize]),
2541 i32::bitor(a[1usize], &b[1usize]),
2542 i32::bitor(a[2usize], &b[2usize]),
2543 i32::bitor(a[3usize], &b[3usize]),
2544 ]
2545 .simd_into(self)
2546 }
2547 #[inline(always)]
2548 fn xor_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
2549 [
2550 i32::bitxor(a[0usize], &b[0usize]),
2551 i32::bitxor(a[1usize], &b[1usize]),
2552 i32::bitxor(a[2usize], &b[2usize]),
2553 i32::bitxor(a[3usize], &b[3usize]),
2554 ]
2555 .simd_into(self)
2556 }
2557 #[inline(always)]
2558 fn select_mask32x4(
2559 self,
2560 a: mask32x4<Self>,
2561 b: mask32x4<Self>,
2562 c: mask32x4<Self>,
2563 ) -> mask32x4<Self> {
2564 [
2565 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2566 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2567 if a[2usize] != 0 { b[2usize] } else { c[2usize] },
2568 if a[3usize] != 0 { b[3usize] } else { c[3usize] },
2569 ]
2570 .simd_into(self)
2571 }
2572 #[inline(always)]
2573 fn simd_eq_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
2574 [
2575 -(i32::eq(&a[0usize], &b[0usize]) as i32),
2576 -(i32::eq(&a[1usize], &b[1usize]) as i32),
2577 -(i32::eq(&a[2usize], &b[2usize]) as i32),
2578 -(i32::eq(&a[3usize], &b[3usize]) as i32),
2579 ]
2580 .simd_into(self)
2581 }
2582 #[inline(always)]
2583 fn combine_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x8<Self> {
2584 let mut result = [0; 8usize];
2585 result[0..4usize].copy_from_slice(&a.val);
2586 result[4usize..8usize].copy_from_slice(&b.val);
2587 result.simd_into(self)
2588 }
2589 #[inline(always)]
2590 fn splat_f64x2(self, val: f64) -> f64x2<Self> {
2591 [val; 2usize].simd_into(self)
2592 }
2593 #[inline(always)]
2594 fn abs_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2595 [f64::abs(a[0usize]), f64::abs(a[1usize])].simd_into(self)
2596 }
2597 #[inline(always)]
2598 fn neg_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2599 [f64::neg(a[0usize]), f64::neg(a[1usize])].simd_into(self)
2600 }
2601 #[inline(always)]
2602 fn sqrt_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2603 [f64::sqrt(a[0usize]), f64::sqrt(a[1usize])].simd_into(self)
2604 }
2605 #[inline(always)]
2606 fn add_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2607 [
2608 f64::add(a[0usize], &b[0usize]),
2609 f64::add(a[1usize], &b[1usize]),
2610 ]
2611 .simd_into(self)
2612 }
2613 #[inline(always)]
2614 fn sub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2615 [
2616 f64::sub(a[0usize], &b[0usize]),
2617 f64::sub(a[1usize], &b[1usize]),
2618 ]
2619 .simd_into(self)
2620 }
2621 #[inline(always)]
2622 fn mul_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2623 [
2624 f64::mul(a[0usize], &b[0usize]),
2625 f64::mul(a[1usize], &b[1usize]),
2626 ]
2627 .simd_into(self)
2628 }
2629 #[inline(always)]
2630 fn div_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2631 [
2632 f64::div(a[0usize], &b[0usize]),
2633 f64::div(a[1usize], &b[1usize]),
2634 ]
2635 .simd_into(self)
2636 }
2637 #[inline(always)]
2638 fn copysign_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2639 [
2640 f64::copysign(a[0usize], b[0usize]),
2641 f64::copysign(a[1usize], b[1usize]),
2642 ]
2643 .simd_into(self)
2644 }
2645 #[inline(always)]
2646 fn simd_eq_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2647 [
2648 -(f64::eq(&a[0usize], &b[0usize]) as i64),
2649 -(f64::eq(&a[1usize], &b[1usize]) as i64),
2650 ]
2651 .simd_into(self)
2652 }
2653 #[inline(always)]
2654 fn simd_lt_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2655 [
2656 -(f64::lt(&a[0usize], &b[0usize]) as i64),
2657 -(f64::lt(&a[1usize], &b[1usize]) as i64),
2658 ]
2659 .simd_into(self)
2660 }
2661 #[inline(always)]
2662 fn simd_le_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2663 [
2664 -(f64::le(&a[0usize], &b[0usize]) as i64),
2665 -(f64::le(&a[1usize], &b[1usize]) as i64),
2666 ]
2667 .simd_into(self)
2668 }
2669 #[inline(always)]
2670 fn simd_ge_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2671 [
2672 -(f64::ge(&a[0usize], &b[0usize]) as i64),
2673 -(f64::ge(&a[1usize], &b[1usize]) as i64),
2674 ]
2675 .simd_into(self)
2676 }
2677 #[inline(always)]
2678 fn simd_gt_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
2679 [
2680 -(f64::gt(&a[0usize], &b[0usize]) as i64),
2681 -(f64::gt(&a[1usize], &b[1usize]) as i64),
2682 ]
2683 .simd_into(self)
2684 }
2685 #[inline(always)]
2686 fn zip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2687 [a[0usize], b[0usize]].simd_into(self)
2688 }
2689 #[inline(always)]
2690 fn zip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2691 [a[1usize], b[1usize]].simd_into(self)
2692 }
2693 #[inline(always)]
2694 fn unzip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2695 [a[0usize], b[0usize]].simd_into(self)
2696 }
2697 #[inline(always)]
2698 fn unzip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2699 [a[1usize], b[1usize]].simd_into(self)
2700 }
2701 #[inline(always)]
2702 fn max_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2703 [
2704 f64::max(a[0usize], b[0usize]),
2705 f64::max(a[1usize], b[1usize]),
2706 ]
2707 .simd_into(self)
2708 }
2709 #[inline(always)]
2710 fn max_precise_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2711 [
2712 f64::max(a[0usize], b[0usize]),
2713 f64::max(a[1usize], b[1usize]),
2714 ]
2715 .simd_into(self)
2716 }
2717 #[inline(always)]
2718 fn min_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2719 [
2720 f64::min(a[0usize], b[0usize]),
2721 f64::min(a[1usize], b[1usize]),
2722 ]
2723 .simd_into(self)
2724 }
2725 #[inline(always)]
2726 fn min_precise_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
2727 [
2728 f64::min(a[0usize], b[0usize]),
2729 f64::min(a[1usize], b[1usize]),
2730 ]
2731 .simd_into(self)
2732 }
2733 #[inline(always)]
2734 fn madd_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
2735 a.add(b.mul(c))
2736 }
2737 #[inline(always)]
2738 fn msub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
2739 a.sub(b.mul(c))
2740 }
2741 #[inline(always)]
2742 fn floor_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2743 [f64::floor(a[0usize]), f64::floor(a[1usize])].simd_into(self)
2744 }
2745 #[inline(always)]
2746 fn fract_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2747 [f64::fract(a[0usize]), f64::fract(a[1usize])].simd_into(self)
2748 }
2749 #[inline(always)]
2750 fn trunc_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
2751 [f64::trunc(a[0usize]), f64::trunc(a[1usize])].simd_into(self)
2752 }
2753 #[inline(always)]
2754 fn select_f64x2(self, a: mask64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
2755 [
2756 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2757 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2758 ]
2759 .simd_into(self)
2760 }
2761 #[inline(always)]
2762 fn combine_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x4<Self> {
2763 let mut result = [0.0; 4usize];
2764 result[0..2usize].copy_from_slice(&a.val);
2765 result[2usize..4usize].copy_from_slice(&b.val);
2766 result.simd_into(self)
2767 }
2768 #[inline(always)]
2769 fn reinterpret_f32_f64x2(self, a: f64x2<Self>) -> f32x4<Self> {
2770 f32x4 {
2771 val: bytemuck::cast(a.val),
2772 simd: a.simd,
2773 }
2774 }
2775 #[inline(always)]
2776 fn splat_mask64x2(self, val: i64) -> mask64x2<Self> {
2777 [val; 2usize].simd_into(self)
2778 }
2779 #[inline(always)]
2780 fn not_mask64x2(self, a: mask64x2<Self>) -> mask64x2<Self> {
2781 [i64::not(a[0usize]), i64::not(a[1usize])].simd_into(self)
2782 }
2783 #[inline(always)]
2784 fn and_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
2785 [
2786 i64::bitand(a[0usize], &b[0usize]),
2787 i64::bitand(a[1usize], &b[1usize]),
2788 ]
2789 .simd_into(self)
2790 }
2791 #[inline(always)]
2792 fn or_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
2793 [
2794 i64::bitor(a[0usize], &b[0usize]),
2795 i64::bitor(a[1usize], &b[1usize]),
2796 ]
2797 .simd_into(self)
2798 }
2799 #[inline(always)]
2800 fn xor_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
2801 [
2802 i64::bitxor(a[0usize], &b[0usize]),
2803 i64::bitxor(a[1usize], &b[1usize]),
2804 ]
2805 .simd_into(self)
2806 }
2807 #[inline(always)]
2808 fn select_mask64x2(
2809 self,
2810 a: mask64x2<Self>,
2811 b: mask64x2<Self>,
2812 c: mask64x2<Self>,
2813 ) -> mask64x2<Self> {
2814 [
2815 if a[0usize] != 0 { b[0usize] } else { c[0usize] },
2816 if a[1usize] != 0 { b[1usize] } else { c[1usize] },
2817 ]
2818 .simd_into(self)
2819 }
2820 #[inline(always)]
2821 fn simd_eq_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
2822 [
2823 -(i64::eq(&a[0usize], &b[0usize]) as i64),
2824 -(i64::eq(&a[1usize], &b[1usize]) as i64),
2825 ]
2826 .simd_into(self)
2827 }
2828 #[inline(always)]
2829 fn combine_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x4<Self> {
2830 let mut result = [0; 4usize];
2831 result[0..2usize].copy_from_slice(&a.val);
2832 result[2usize..4usize].copy_from_slice(&b.val);
2833 result.simd_into(self)
2834 }
2835 #[inline(always)]
2836 fn splat_f32x8(self, a: f32) -> f32x8<Self> {
2837 let half = self.splat_f32x4(a);
2838 self.combine_f32x4(half, half)
2839 }
2840 #[inline(always)]
2841 fn abs_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
2842 let (a0, a1) = self.split_f32x8(a);
2843 self.combine_f32x4(self.abs_f32x4(a0), self.abs_f32x4(a1))
2844 }
2845 #[inline(always)]
2846 fn neg_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
2847 let (a0, a1) = self.split_f32x8(a);
2848 self.combine_f32x4(self.neg_f32x4(a0), self.neg_f32x4(a1))
2849 }
2850 #[inline(always)]
2851 fn sqrt_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
2852 let (a0, a1) = self.split_f32x8(a);
2853 self.combine_f32x4(self.sqrt_f32x4(a0), self.sqrt_f32x4(a1))
2854 }
2855 #[inline(always)]
2856 fn add_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2857 let (a0, a1) = self.split_f32x8(a);
2858 let (b0, b1) = self.split_f32x8(b);
2859 self.combine_f32x4(self.add_f32x4(a0, b0), self.add_f32x4(a1, b1))
2860 }
2861 #[inline(always)]
2862 fn sub_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2863 let (a0, a1) = self.split_f32x8(a);
2864 let (b0, b1) = self.split_f32x8(b);
2865 self.combine_f32x4(self.sub_f32x4(a0, b0), self.sub_f32x4(a1, b1))
2866 }
2867 #[inline(always)]
2868 fn mul_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2869 let (a0, a1) = self.split_f32x8(a);
2870 let (b0, b1) = self.split_f32x8(b);
2871 self.combine_f32x4(self.mul_f32x4(a0, b0), self.mul_f32x4(a1, b1))
2872 }
2873 #[inline(always)]
2874 fn div_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2875 let (a0, a1) = self.split_f32x8(a);
2876 let (b0, b1) = self.split_f32x8(b);
2877 self.combine_f32x4(self.div_f32x4(a0, b0), self.div_f32x4(a1, b1))
2878 }
2879 #[inline(always)]
2880 fn copysign_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2881 let (a0, a1) = self.split_f32x8(a);
2882 let (b0, b1) = self.split_f32x8(b);
2883 self.combine_f32x4(self.copysign_f32x4(a0, b0), self.copysign_f32x4(a1, b1))
2884 }
2885 #[inline(always)]
2886 fn simd_eq_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
2887 let (a0, a1) = self.split_f32x8(a);
2888 let (b0, b1) = self.split_f32x8(b);
2889 self.combine_mask32x4(self.simd_eq_f32x4(a0, b0), self.simd_eq_f32x4(a1, b1))
2890 }
2891 #[inline(always)]
2892 fn simd_lt_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
2893 let (a0, a1) = self.split_f32x8(a);
2894 let (b0, b1) = self.split_f32x8(b);
2895 self.combine_mask32x4(self.simd_lt_f32x4(a0, b0), self.simd_lt_f32x4(a1, b1))
2896 }
2897 #[inline(always)]
2898 fn simd_le_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
2899 let (a0, a1) = self.split_f32x8(a);
2900 let (b0, b1) = self.split_f32x8(b);
2901 self.combine_mask32x4(self.simd_le_f32x4(a0, b0), self.simd_le_f32x4(a1, b1))
2902 }
2903 #[inline(always)]
2904 fn simd_ge_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
2905 let (a0, a1) = self.split_f32x8(a);
2906 let (b0, b1) = self.split_f32x8(b);
2907 self.combine_mask32x4(self.simd_ge_f32x4(a0, b0), self.simd_ge_f32x4(a1, b1))
2908 }
2909 #[inline(always)]
2910 fn simd_gt_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
2911 let (a0, a1) = self.split_f32x8(a);
2912 let (b0, b1) = self.split_f32x8(b);
2913 self.combine_mask32x4(self.simd_gt_f32x4(a0, b0), self.simd_gt_f32x4(a1, b1))
2914 }
2915 #[inline(always)]
2916 fn zip_low_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2917 let (a0, _) = self.split_f32x8(a);
2918 let (b0, _) = self.split_f32x8(b);
2919 self.combine_f32x4(self.zip_low_f32x4(a0, b0), self.zip_high_f32x4(a0, b0))
2920 }
2921 #[inline(always)]
2922 fn zip_high_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2923 let (_, a1) = self.split_f32x8(a);
2924 let (_, b1) = self.split_f32x8(b);
2925 self.combine_f32x4(self.zip_low_f32x4(a1, b1), self.zip_high_f32x4(a1, b1))
2926 }
2927 #[inline(always)]
2928 fn unzip_low_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2929 let (a0, a1) = self.split_f32x8(a);
2930 let (b0, b1) = self.split_f32x8(b);
2931 self.combine_f32x4(self.unzip_low_f32x4(a0, a1), self.unzip_low_f32x4(b0, b1))
2932 }
2933 #[inline(always)]
2934 fn unzip_high_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2935 let (a0, a1) = self.split_f32x8(a);
2936 let (b0, b1) = self.split_f32x8(b);
2937 self.combine_f32x4(self.unzip_high_f32x4(a0, a1), self.unzip_high_f32x4(b0, b1))
2938 }
2939 #[inline(always)]
2940 fn max_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2941 let (a0, a1) = self.split_f32x8(a);
2942 let (b0, b1) = self.split_f32x8(b);
2943 self.combine_f32x4(self.max_f32x4(a0, b0), self.max_f32x4(a1, b1))
2944 }
2945 #[inline(always)]
2946 fn max_precise_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2947 let (a0, a1) = self.split_f32x8(a);
2948 let (b0, b1) = self.split_f32x8(b);
2949 self.combine_f32x4(
2950 self.max_precise_f32x4(a0, b0),
2951 self.max_precise_f32x4(a1, b1),
2952 )
2953 }
2954 #[inline(always)]
2955 fn min_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2956 let (a0, a1) = self.split_f32x8(a);
2957 let (b0, b1) = self.split_f32x8(b);
2958 self.combine_f32x4(self.min_f32x4(a0, b0), self.min_f32x4(a1, b1))
2959 }
2960 #[inline(always)]
2961 fn min_precise_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
2962 let (a0, a1) = self.split_f32x8(a);
2963 let (b0, b1) = self.split_f32x8(b);
2964 self.combine_f32x4(
2965 self.min_precise_f32x4(a0, b0),
2966 self.min_precise_f32x4(a1, b1),
2967 )
2968 }
2969 #[inline(always)]
2970 fn madd_f32x8(self, a: f32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
2971 let (a0, a1) = self.split_f32x8(a);
2972 let (b0, b1) = self.split_f32x8(b);
2973 let (c0, c1) = self.split_f32x8(c);
2974 self.combine_f32x4(self.madd_f32x4(a0, b0, c0), self.madd_f32x4(a1, b1, c1))
2975 }
2976 #[inline(always)]
2977 fn msub_f32x8(self, a: f32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
2978 let (a0, a1) = self.split_f32x8(a);
2979 let (b0, b1) = self.split_f32x8(b);
2980 let (c0, c1) = self.split_f32x8(c);
2981 self.combine_f32x4(self.msub_f32x4(a0, b0, c0), self.msub_f32x4(a1, b1, c1))
2982 }
2983 #[inline(always)]
2984 fn floor_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
2985 let (a0, a1) = self.split_f32x8(a);
2986 self.combine_f32x4(self.floor_f32x4(a0), self.floor_f32x4(a1))
2987 }
2988 #[inline(always)]
2989 fn fract_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
2990 let (a0, a1) = self.split_f32x8(a);
2991 self.combine_f32x4(self.fract_f32x4(a0), self.fract_f32x4(a1))
2992 }
2993 #[inline(always)]
2994 fn trunc_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
2995 let (a0, a1) = self.split_f32x8(a);
2996 self.combine_f32x4(self.trunc_f32x4(a0), self.trunc_f32x4(a1))
2997 }
2998 #[inline(always)]
2999 fn select_f32x8(self, a: mask32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
3000 let (a0, a1) = self.split_mask32x8(a);
3001 let (b0, b1) = self.split_f32x8(b);
3002 let (c0, c1) = self.split_f32x8(c);
3003 self.combine_f32x4(self.select_f32x4(a0, b0, c0), self.select_f32x4(a1, b1, c1))
3004 }
3005 #[inline(always)]
3006 fn combine_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x16<Self> {
3007 let mut result = [0.0; 16usize];
3008 result[0..8usize].copy_from_slice(&a.val);
3009 result[8usize..16usize].copy_from_slice(&b.val);
3010 result.simd_into(self)
3011 }
3012 #[inline(always)]
3013 fn split_f32x8(self, a: f32x8<Self>) -> (f32x4<Self>, f32x4<Self>) {
3014 let mut b0 = [0.0; 4usize];
3015 let mut b1 = [0.0; 4usize];
3016 b0.copy_from_slice(&a.val[0..4usize]);
3017 b1.copy_from_slice(&a.val[4usize..8usize]);
3018 (b0.simd_into(self), b1.simd_into(self))
3019 }
3020 #[inline(always)]
3021 fn reinterpret_f64_f32x8(self, a: f32x8<Self>) -> f64x4<Self> {
3022 let (a0, a1) = self.split_f32x8(a);
3023 self.combine_f64x2(
3024 self.reinterpret_f64_f32x4(a0),
3025 self.reinterpret_f64_f32x4(a1),
3026 )
3027 }
3028 #[inline(always)]
3029 fn reinterpret_i32_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
3030 let (a0, a1) = self.split_f32x8(a);
3031 self.combine_i32x4(
3032 self.reinterpret_i32_f32x4(a0),
3033 self.reinterpret_i32_f32x4(a1),
3034 )
3035 }
3036 #[inline(always)]
3037 fn reinterpret_u8_f32x8(self, a: f32x8<Self>) -> u8x32<Self> {
3038 let (a0, a1) = self.split_f32x8(a);
3039 self.combine_u8x16(self.reinterpret_u8_f32x4(a0), self.reinterpret_u8_f32x4(a1))
3040 }
3041 #[inline(always)]
3042 fn reinterpret_u32_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
3043 let (a0, a1) = self.split_f32x8(a);
3044 self.combine_u32x4(
3045 self.reinterpret_u32_f32x4(a0),
3046 self.reinterpret_u32_f32x4(a1),
3047 )
3048 }
3049 #[inline(always)]
3050 fn cvt_u32_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
3051 let (a0, a1) = self.split_f32x8(a);
3052 self.combine_u32x4(self.cvt_u32_f32x4(a0), self.cvt_u32_f32x4(a1))
3053 }
3054 #[inline(always)]
3055 fn cvt_i32_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
3056 let (a0, a1) = self.split_f32x8(a);
3057 self.combine_i32x4(self.cvt_i32_f32x4(a0), self.cvt_i32_f32x4(a1))
3058 }
3059 #[inline(always)]
3060 fn splat_i8x32(self, a: i8) -> i8x32<Self> {
3061 let half = self.splat_i8x16(a);
3062 self.combine_i8x16(half, half)
3063 }
3064 #[inline(always)]
3065 fn not_i8x32(self, a: i8x32<Self>) -> i8x32<Self> {
3066 let (a0, a1) = self.split_i8x32(a);
3067 self.combine_i8x16(self.not_i8x16(a0), self.not_i8x16(a1))
3068 }
3069 #[inline(always)]
3070 fn add_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3071 let (a0, a1) = self.split_i8x32(a);
3072 let (b0, b1) = self.split_i8x32(b);
3073 self.combine_i8x16(self.add_i8x16(a0, b0), self.add_i8x16(a1, b1))
3074 }
3075 #[inline(always)]
3076 fn sub_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3077 let (a0, a1) = self.split_i8x32(a);
3078 let (b0, b1) = self.split_i8x32(b);
3079 self.combine_i8x16(self.sub_i8x16(a0, b0), self.sub_i8x16(a1, b1))
3080 }
3081 #[inline(always)]
3082 fn mul_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3083 let (a0, a1) = self.split_i8x32(a);
3084 let (b0, b1) = self.split_i8x32(b);
3085 self.combine_i8x16(self.mul_i8x16(a0, b0), self.mul_i8x16(a1, b1))
3086 }
3087 #[inline(always)]
3088 fn and_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3089 let (a0, a1) = self.split_i8x32(a);
3090 let (b0, b1) = self.split_i8x32(b);
3091 self.combine_i8x16(self.and_i8x16(a0, b0), self.and_i8x16(a1, b1))
3092 }
3093 #[inline(always)]
3094 fn or_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3095 let (a0, a1) = self.split_i8x32(a);
3096 let (b0, b1) = self.split_i8x32(b);
3097 self.combine_i8x16(self.or_i8x16(a0, b0), self.or_i8x16(a1, b1))
3098 }
3099 #[inline(always)]
3100 fn xor_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3101 let (a0, a1) = self.split_i8x32(a);
3102 let (b0, b1) = self.split_i8x32(b);
3103 self.combine_i8x16(self.xor_i8x16(a0, b0), self.xor_i8x16(a1, b1))
3104 }
3105 #[inline(always)]
3106 fn shr_i8x32(self, a: i8x32<Self>, b: u32) -> i8x32<Self> {
3107 let (a0, a1) = self.split_i8x32(a);
3108 self.combine_i8x16(self.shr_i8x16(a0, b), self.shr_i8x16(a1, b))
3109 }
3110 #[inline(always)]
3111 fn simd_eq_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3112 let (a0, a1) = self.split_i8x32(a);
3113 let (b0, b1) = self.split_i8x32(b);
3114 self.combine_mask8x16(self.simd_eq_i8x16(a0, b0), self.simd_eq_i8x16(a1, b1))
3115 }
3116 #[inline(always)]
3117 fn simd_lt_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3118 let (a0, a1) = self.split_i8x32(a);
3119 let (b0, b1) = self.split_i8x32(b);
3120 self.combine_mask8x16(self.simd_lt_i8x16(a0, b0), self.simd_lt_i8x16(a1, b1))
3121 }
3122 #[inline(always)]
3123 fn simd_le_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3124 let (a0, a1) = self.split_i8x32(a);
3125 let (b0, b1) = self.split_i8x32(b);
3126 self.combine_mask8x16(self.simd_le_i8x16(a0, b0), self.simd_le_i8x16(a1, b1))
3127 }
3128 #[inline(always)]
3129 fn simd_ge_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3130 let (a0, a1) = self.split_i8x32(a);
3131 let (b0, b1) = self.split_i8x32(b);
3132 self.combine_mask8x16(self.simd_ge_i8x16(a0, b0), self.simd_ge_i8x16(a1, b1))
3133 }
3134 #[inline(always)]
3135 fn simd_gt_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
3136 let (a0, a1) = self.split_i8x32(a);
3137 let (b0, b1) = self.split_i8x32(b);
3138 self.combine_mask8x16(self.simd_gt_i8x16(a0, b0), self.simd_gt_i8x16(a1, b1))
3139 }
3140 #[inline(always)]
3141 fn zip_low_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3142 let (a0, _) = self.split_i8x32(a);
3143 let (b0, _) = self.split_i8x32(b);
3144 self.combine_i8x16(self.zip_low_i8x16(a0, b0), self.zip_high_i8x16(a0, b0))
3145 }
3146 #[inline(always)]
3147 fn zip_high_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3148 let (_, a1) = self.split_i8x32(a);
3149 let (_, b1) = self.split_i8x32(b);
3150 self.combine_i8x16(self.zip_low_i8x16(a1, b1), self.zip_high_i8x16(a1, b1))
3151 }
3152 #[inline(always)]
3153 fn unzip_low_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3154 let (a0, a1) = self.split_i8x32(a);
3155 let (b0, b1) = self.split_i8x32(b);
3156 self.combine_i8x16(self.unzip_low_i8x16(a0, a1), self.unzip_low_i8x16(b0, b1))
3157 }
3158 #[inline(always)]
3159 fn unzip_high_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3160 let (a0, a1) = self.split_i8x32(a);
3161 let (b0, b1) = self.split_i8x32(b);
3162 self.combine_i8x16(self.unzip_high_i8x16(a0, a1), self.unzip_high_i8x16(b0, b1))
3163 }
3164 #[inline(always)]
3165 fn select_i8x32(self, a: mask8x32<Self>, b: i8x32<Self>, c: i8x32<Self>) -> i8x32<Self> {
3166 let (a0, a1) = self.split_mask8x32(a);
3167 let (b0, b1) = self.split_i8x32(b);
3168 let (c0, c1) = self.split_i8x32(c);
3169 self.combine_i8x16(self.select_i8x16(a0, b0, c0), self.select_i8x16(a1, b1, c1))
3170 }
3171 #[inline(always)]
3172 fn min_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3173 let (a0, a1) = self.split_i8x32(a);
3174 let (b0, b1) = self.split_i8x32(b);
3175 self.combine_i8x16(self.min_i8x16(a0, b0), self.min_i8x16(a1, b1))
3176 }
3177 #[inline(always)]
3178 fn max_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
3179 let (a0, a1) = self.split_i8x32(a);
3180 let (b0, b1) = self.split_i8x32(b);
3181 self.combine_i8x16(self.max_i8x16(a0, b0), self.max_i8x16(a1, b1))
3182 }
3183 #[inline(always)]
3184 fn combine_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x64<Self> {
3185 let mut result = [0; 64usize];
3186 result[0..32usize].copy_from_slice(&a.val);
3187 result[32usize..64usize].copy_from_slice(&b.val);
3188 result.simd_into(self)
3189 }
3190 #[inline(always)]
3191 fn split_i8x32(self, a: i8x32<Self>) -> (i8x16<Self>, i8x16<Self>) {
3192 let mut b0 = [0; 16usize];
3193 let mut b1 = [0; 16usize];
3194 b0.copy_from_slice(&a.val[0..16usize]);
3195 b1.copy_from_slice(&a.val[16usize..32usize]);
3196 (b0.simd_into(self), b1.simd_into(self))
3197 }
3198 #[inline(always)]
3199 fn reinterpret_u8_i8x32(self, a: i8x32<Self>) -> u8x32<Self> {
3200 let (a0, a1) = self.split_i8x32(a);
3201 self.combine_u8x16(self.reinterpret_u8_i8x16(a0), self.reinterpret_u8_i8x16(a1))
3202 }
3203 #[inline(always)]
3204 fn reinterpret_u32_i8x32(self, a: i8x32<Self>) -> u32x8<Self> {
3205 let (a0, a1) = self.split_i8x32(a);
3206 self.combine_u32x4(
3207 self.reinterpret_u32_i8x16(a0),
3208 self.reinterpret_u32_i8x16(a1),
3209 )
3210 }
3211 #[inline(always)]
3212 fn splat_u8x32(self, a: u8) -> u8x32<Self> {
3213 let half = self.splat_u8x16(a);
3214 self.combine_u8x16(half, half)
3215 }
3216 #[inline(always)]
3217 fn not_u8x32(self, a: u8x32<Self>) -> u8x32<Self> {
3218 let (a0, a1) = self.split_u8x32(a);
3219 self.combine_u8x16(self.not_u8x16(a0), self.not_u8x16(a1))
3220 }
3221 #[inline(always)]
3222 fn add_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3223 let (a0, a1) = self.split_u8x32(a);
3224 let (b0, b1) = self.split_u8x32(b);
3225 self.combine_u8x16(self.add_u8x16(a0, b0), self.add_u8x16(a1, b1))
3226 }
3227 #[inline(always)]
3228 fn sub_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3229 let (a0, a1) = self.split_u8x32(a);
3230 let (b0, b1) = self.split_u8x32(b);
3231 self.combine_u8x16(self.sub_u8x16(a0, b0), self.sub_u8x16(a1, b1))
3232 }
3233 #[inline(always)]
3234 fn mul_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3235 let (a0, a1) = self.split_u8x32(a);
3236 let (b0, b1) = self.split_u8x32(b);
3237 self.combine_u8x16(self.mul_u8x16(a0, b0), self.mul_u8x16(a1, b1))
3238 }
3239 #[inline(always)]
3240 fn and_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3241 let (a0, a1) = self.split_u8x32(a);
3242 let (b0, b1) = self.split_u8x32(b);
3243 self.combine_u8x16(self.and_u8x16(a0, b0), self.and_u8x16(a1, b1))
3244 }
3245 #[inline(always)]
3246 fn or_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3247 let (a0, a1) = self.split_u8x32(a);
3248 let (b0, b1) = self.split_u8x32(b);
3249 self.combine_u8x16(self.or_u8x16(a0, b0), self.or_u8x16(a1, b1))
3250 }
3251 #[inline(always)]
3252 fn xor_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3253 let (a0, a1) = self.split_u8x32(a);
3254 let (b0, b1) = self.split_u8x32(b);
3255 self.combine_u8x16(self.xor_u8x16(a0, b0), self.xor_u8x16(a1, b1))
3256 }
3257 #[inline(always)]
3258 fn shr_u8x32(self, a: u8x32<Self>, b: u32) -> u8x32<Self> {
3259 let (a0, a1) = self.split_u8x32(a);
3260 self.combine_u8x16(self.shr_u8x16(a0, b), self.shr_u8x16(a1, b))
3261 }
3262 #[inline(always)]
3263 fn simd_eq_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3264 let (a0, a1) = self.split_u8x32(a);
3265 let (b0, b1) = self.split_u8x32(b);
3266 self.combine_mask8x16(self.simd_eq_u8x16(a0, b0), self.simd_eq_u8x16(a1, b1))
3267 }
3268 #[inline(always)]
3269 fn simd_lt_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3270 let (a0, a1) = self.split_u8x32(a);
3271 let (b0, b1) = self.split_u8x32(b);
3272 self.combine_mask8x16(self.simd_lt_u8x16(a0, b0), self.simd_lt_u8x16(a1, b1))
3273 }
3274 #[inline(always)]
3275 fn simd_le_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3276 let (a0, a1) = self.split_u8x32(a);
3277 let (b0, b1) = self.split_u8x32(b);
3278 self.combine_mask8x16(self.simd_le_u8x16(a0, b0), self.simd_le_u8x16(a1, b1))
3279 }
3280 #[inline(always)]
3281 fn simd_ge_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3282 let (a0, a1) = self.split_u8x32(a);
3283 let (b0, b1) = self.split_u8x32(b);
3284 self.combine_mask8x16(self.simd_ge_u8x16(a0, b0), self.simd_ge_u8x16(a1, b1))
3285 }
3286 #[inline(always)]
3287 fn simd_gt_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
3288 let (a0, a1) = self.split_u8x32(a);
3289 let (b0, b1) = self.split_u8x32(b);
3290 self.combine_mask8x16(self.simd_gt_u8x16(a0, b0), self.simd_gt_u8x16(a1, b1))
3291 }
3292 #[inline(always)]
3293 fn zip_low_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3294 let (a0, _) = self.split_u8x32(a);
3295 let (b0, _) = self.split_u8x32(b);
3296 self.combine_u8x16(self.zip_low_u8x16(a0, b0), self.zip_high_u8x16(a0, b0))
3297 }
3298 #[inline(always)]
3299 fn zip_high_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3300 let (_, a1) = self.split_u8x32(a);
3301 let (_, b1) = self.split_u8x32(b);
3302 self.combine_u8x16(self.zip_low_u8x16(a1, b1), self.zip_high_u8x16(a1, b1))
3303 }
3304 #[inline(always)]
3305 fn unzip_low_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3306 let (a0, a1) = self.split_u8x32(a);
3307 let (b0, b1) = self.split_u8x32(b);
3308 self.combine_u8x16(self.unzip_low_u8x16(a0, a1), self.unzip_low_u8x16(b0, b1))
3309 }
3310 #[inline(always)]
3311 fn unzip_high_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3312 let (a0, a1) = self.split_u8x32(a);
3313 let (b0, b1) = self.split_u8x32(b);
3314 self.combine_u8x16(self.unzip_high_u8x16(a0, a1), self.unzip_high_u8x16(b0, b1))
3315 }
3316 #[inline(always)]
3317 fn select_u8x32(self, a: mask8x32<Self>, b: u8x32<Self>, c: u8x32<Self>) -> u8x32<Self> {
3318 let (a0, a1) = self.split_mask8x32(a);
3319 let (b0, b1) = self.split_u8x32(b);
3320 let (c0, c1) = self.split_u8x32(c);
3321 self.combine_u8x16(self.select_u8x16(a0, b0, c0), self.select_u8x16(a1, b1, c1))
3322 }
3323 #[inline(always)]
3324 fn min_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3325 let (a0, a1) = self.split_u8x32(a);
3326 let (b0, b1) = self.split_u8x32(b);
3327 self.combine_u8x16(self.min_u8x16(a0, b0), self.min_u8x16(a1, b1))
3328 }
3329 #[inline(always)]
3330 fn max_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
3331 let (a0, a1) = self.split_u8x32(a);
3332 let (b0, b1) = self.split_u8x32(b);
3333 self.combine_u8x16(self.max_u8x16(a0, b0), self.max_u8x16(a1, b1))
3334 }
3335 #[inline(always)]
3336 fn combine_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x64<Self> {
3337 let mut result = [0; 64usize];
3338 result[0..32usize].copy_from_slice(&a.val);
3339 result[32usize..64usize].copy_from_slice(&b.val);
3340 result.simd_into(self)
3341 }
3342 #[inline(always)]
3343 fn split_u8x32(self, a: u8x32<Self>) -> (u8x16<Self>, u8x16<Self>) {
3344 let mut b0 = [0; 16usize];
3345 let mut b1 = [0; 16usize];
3346 b0.copy_from_slice(&a.val[0..16usize]);
3347 b1.copy_from_slice(&a.val[16usize..32usize]);
3348 (b0.simd_into(self), b1.simd_into(self))
3349 }
3350 #[inline(always)]
3351 fn widen_u8x32(self, a: u8x32<Self>) -> u16x32<Self> {
3352 let (a0, a1) = self.split_u8x32(a);
3353 self.combine_u16x16(self.widen_u8x16(a0), self.widen_u8x16(a1))
3354 }
3355 #[inline(always)]
3356 fn reinterpret_u32_u8x32(self, a: u8x32<Self>) -> u32x8<Self> {
3357 let (a0, a1) = self.split_u8x32(a);
3358 self.combine_u32x4(
3359 self.reinterpret_u32_u8x16(a0),
3360 self.reinterpret_u32_u8x16(a1),
3361 )
3362 }
3363 #[inline(always)]
3364 fn splat_mask8x32(self, a: i8) -> mask8x32<Self> {
3365 let half = self.splat_mask8x16(a);
3366 self.combine_mask8x16(half, half)
3367 }
3368 #[inline(always)]
3369 fn not_mask8x32(self, a: mask8x32<Self>) -> mask8x32<Self> {
3370 let (a0, a1) = self.split_mask8x32(a);
3371 self.combine_mask8x16(self.not_mask8x16(a0), self.not_mask8x16(a1))
3372 }
3373 #[inline(always)]
3374 fn and_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
3375 let (a0, a1) = self.split_mask8x32(a);
3376 let (b0, b1) = self.split_mask8x32(b);
3377 self.combine_mask8x16(self.and_mask8x16(a0, b0), self.and_mask8x16(a1, b1))
3378 }
3379 #[inline(always)]
3380 fn or_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
3381 let (a0, a1) = self.split_mask8x32(a);
3382 let (b0, b1) = self.split_mask8x32(b);
3383 self.combine_mask8x16(self.or_mask8x16(a0, b0), self.or_mask8x16(a1, b1))
3384 }
3385 #[inline(always)]
3386 fn xor_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
3387 let (a0, a1) = self.split_mask8x32(a);
3388 let (b0, b1) = self.split_mask8x32(b);
3389 self.combine_mask8x16(self.xor_mask8x16(a0, b0), self.xor_mask8x16(a1, b1))
3390 }
3391 #[inline(always)]
3392 fn select_mask8x32(
3393 self,
3394 a: mask8x32<Self>,
3395 b: mask8x32<Self>,
3396 c: mask8x32<Self>,
3397 ) -> mask8x32<Self> {
3398 let (a0, a1) = self.split_mask8x32(a);
3399 let (b0, b1) = self.split_mask8x32(b);
3400 let (c0, c1) = self.split_mask8x32(c);
3401 self.combine_mask8x16(
3402 self.select_mask8x16(a0, b0, c0),
3403 self.select_mask8x16(a1, b1, c1),
3404 )
3405 }
3406 #[inline(always)]
3407 fn simd_eq_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
3408 let (a0, a1) = self.split_mask8x32(a);
3409 let (b0, b1) = self.split_mask8x32(b);
3410 self.combine_mask8x16(self.simd_eq_mask8x16(a0, b0), self.simd_eq_mask8x16(a1, b1))
3411 }
3412 #[inline(always)]
3413 fn combine_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x64<Self> {
3414 let mut result = [0; 64usize];
3415 result[0..32usize].copy_from_slice(&a.val);
3416 result[32usize..64usize].copy_from_slice(&b.val);
3417 result.simd_into(self)
3418 }
3419 #[inline(always)]
3420 fn split_mask8x32(self, a: mask8x32<Self>) -> (mask8x16<Self>, mask8x16<Self>) {
3421 let mut b0 = [0; 16usize];
3422 let mut b1 = [0; 16usize];
3423 b0.copy_from_slice(&a.val[0..16usize]);
3424 b1.copy_from_slice(&a.val[16usize..32usize]);
3425 (b0.simd_into(self), b1.simd_into(self))
3426 }
3427 #[inline(always)]
3428 fn splat_i16x16(self, a: i16) -> i16x16<Self> {
3429 let half = self.splat_i16x8(a);
3430 self.combine_i16x8(half, half)
3431 }
3432 #[inline(always)]
3433 fn not_i16x16(self, a: i16x16<Self>) -> i16x16<Self> {
3434 let (a0, a1) = self.split_i16x16(a);
3435 self.combine_i16x8(self.not_i16x8(a0), self.not_i16x8(a1))
3436 }
3437 #[inline(always)]
3438 fn add_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3439 let (a0, a1) = self.split_i16x16(a);
3440 let (b0, b1) = self.split_i16x16(b);
3441 self.combine_i16x8(self.add_i16x8(a0, b0), self.add_i16x8(a1, b1))
3442 }
3443 #[inline(always)]
3444 fn sub_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3445 let (a0, a1) = self.split_i16x16(a);
3446 let (b0, b1) = self.split_i16x16(b);
3447 self.combine_i16x8(self.sub_i16x8(a0, b0), self.sub_i16x8(a1, b1))
3448 }
3449 #[inline(always)]
3450 fn mul_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3451 let (a0, a1) = self.split_i16x16(a);
3452 let (b0, b1) = self.split_i16x16(b);
3453 self.combine_i16x8(self.mul_i16x8(a0, b0), self.mul_i16x8(a1, b1))
3454 }
3455 #[inline(always)]
3456 fn and_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3457 let (a0, a1) = self.split_i16x16(a);
3458 let (b0, b1) = self.split_i16x16(b);
3459 self.combine_i16x8(self.and_i16x8(a0, b0), self.and_i16x8(a1, b1))
3460 }
3461 #[inline(always)]
3462 fn or_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3463 let (a0, a1) = self.split_i16x16(a);
3464 let (b0, b1) = self.split_i16x16(b);
3465 self.combine_i16x8(self.or_i16x8(a0, b0), self.or_i16x8(a1, b1))
3466 }
3467 #[inline(always)]
3468 fn xor_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3469 let (a0, a1) = self.split_i16x16(a);
3470 let (b0, b1) = self.split_i16x16(b);
3471 self.combine_i16x8(self.xor_i16x8(a0, b0), self.xor_i16x8(a1, b1))
3472 }
3473 #[inline(always)]
3474 fn shr_i16x16(self, a: i16x16<Self>, b: u32) -> i16x16<Self> {
3475 let (a0, a1) = self.split_i16x16(a);
3476 self.combine_i16x8(self.shr_i16x8(a0, b), self.shr_i16x8(a1, b))
3477 }
3478 #[inline(always)]
3479 fn simd_eq_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3480 let (a0, a1) = self.split_i16x16(a);
3481 let (b0, b1) = self.split_i16x16(b);
3482 self.combine_mask16x8(self.simd_eq_i16x8(a0, b0), self.simd_eq_i16x8(a1, b1))
3483 }
3484 #[inline(always)]
3485 fn simd_lt_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3486 let (a0, a1) = self.split_i16x16(a);
3487 let (b0, b1) = self.split_i16x16(b);
3488 self.combine_mask16x8(self.simd_lt_i16x8(a0, b0), self.simd_lt_i16x8(a1, b1))
3489 }
3490 #[inline(always)]
3491 fn simd_le_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3492 let (a0, a1) = self.split_i16x16(a);
3493 let (b0, b1) = self.split_i16x16(b);
3494 self.combine_mask16x8(self.simd_le_i16x8(a0, b0), self.simd_le_i16x8(a1, b1))
3495 }
3496 #[inline(always)]
3497 fn simd_ge_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3498 let (a0, a1) = self.split_i16x16(a);
3499 let (b0, b1) = self.split_i16x16(b);
3500 self.combine_mask16x8(self.simd_ge_i16x8(a0, b0), self.simd_ge_i16x8(a1, b1))
3501 }
3502 #[inline(always)]
3503 fn simd_gt_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
3504 let (a0, a1) = self.split_i16x16(a);
3505 let (b0, b1) = self.split_i16x16(b);
3506 self.combine_mask16x8(self.simd_gt_i16x8(a0, b0), self.simd_gt_i16x8(a1, b1))
3507 }
3508 #[inline(always)]
3509 fn zip_low_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3510 let (a0, _) = self.split_i16x16(a);
3511 let (b0, _) = self.split_i16x16(b);
3512 self.combine_i16x8(self.zip_low_i16x8(a0, b0), self.zip_high_i16x8(a0, b0))
3513 }
3514 #[inline(always)]
3515 fn zip_high_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3516 let (_, a1) = self.split_i16x16(a);
3517 let (_, b1) = self.split_i16x16(b);
3518 self.combine_i16x8(self.zip_low_i16x8(a1, b1), self.zip_high_i16x8(a1, b1))
3519 }
3520 #[inline(always)]
3521 fn unzip_low_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3522 let (a0, a1) = self.split_i16x16(a);
3523 let (b0, b1) = self.split_i16x16(b);
3524 self.combine_i16x8(self.unzip_low_i16x8(a0, a1), self.unzip_low_i16x8(b0, b1))
3525 }
3526 #[inline(always)]
3527 fn unzip_high_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3528 let (a0, a1) = self.split_i16x16(a);
3529 let (b0, b1) = self.split_i16x16(b);
3530 self.combine_i16x8(self.unzip_high_i16x8(a0, a1), self.unzip_high_i16x8(b0, b1))
3531 }
3532 #[inline(always)]
3533 fn select_i16x16(self, a: mask16x16<Self>, b: i16x16<Self>, c: i16x16<Self>) -> i16x16<Self> {
3534 let (a0, a1) = self.split_mask16x16(a);
3535 let (b0, b1) = self.split_i16x16(b);
3536 let (c0, c1) = self.split_i16x16(c);
3537 self.combine_i16x8(self.select_i16x8(a0, b0, c0), self.select_i16x8(a1, b1, c1))
3538 }
3539 #[inline(always)]
3540 fn min_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3541 let (a0, a1) = self.split_i16x16(a);
3542 let (b0, b1) = self.split_i16x16(b);
3543 self.combine_i16x8(self.min_i16x8(a0, b0), self.min_i16x8(a1, b1))
3544 }
3545 #[inline(always)]
3546 fn max_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
3547 let (a0, a1) = self.split_i16x16(a);
3548 let (b0, b1) = self.split_i16x16(b);
3549 self.combine_i16x8(self.max_i16x8(a0, b0), self.max_i16x8(a1, b1))
3550 }
3551 #[inline(always)]
3552 fn combine_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x32<Self> {
3553 let mut result = [0; 32usize];
3554 result[0..16usize].copy_from_slice(&a.val);
3555 result[16usize..32usize].copy_from_slice(&b.val);
3556 result.simd_into(self)
3557 }
3558 #[inline(always)]
3559 fn split_i16x16(self, a: i16x16<Self>) -> (i16x8<Self>, i16x8<Self>) {
3560 let mut b0 = [0; 8usize];
3561 let mut b1 = [0; 8usize];
3562 b0.copy_from_slice(&a.val[0..8usize]);
3563 b1.copy_from_slice(&a.val[8usize..16usize]);
3564 (b0.simd_into(self), b1.simd_into(self))
3565 }
3566 #[inline(always)]
3567 fn reinterpret_u8_i16x16(self, a: i16x16<Self>) -> u8x32<Self> {
3568 let (a0, a1) = self.split_i16x16(a);
3569 self.combine_u8x16(self.reinterpret_u8_i16x8(a0), self.reinterpret_u8_i16x8(a1))
3570 }
3571 #[inline(always)]
3572 fn reinterpret_u32_i16x16(self, a: i16x16<Self>) -> u32x8<Self> {
3573 let (a0, a1) = self.split_i16x16(a);
3574 self.combine_u32x4(
3575 self.reinterpret_u32_i16x8(a0),
3576 self.reinterpret_u32_i16x8(a1),
3577 )
3578 }
3579 #[inline(always)]
3580 fn splat_u16x16(self, a: u16) -> u16x16<Self> {
3581 let half = self.splat_u16x8(a);
3582 self.combine_u16x8(half, half)
3583 }
3584 #[inline(always)]
3585 fn not_u16x16(self, a: u16x16<Self>) -> u16x16<Self> {
3586 let (a0, a1) = self.split_u16x16(a);
3587 self.combine_u16x8(self.not_u16x8(a0), self.not_u16x8(a1))
3588 }
3589 #[inline(always)]
3590 fn add_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3591 let (a0, a1) = self.split_u16x16(a);
3592 let (b0, b1) = self.split_u16x16(b);
3593 self.combine_u16x8(self.add_u16x8(a0, b0), self.add_u16x8(a1, b1))
3594 }
3595 #[inline(always)]
3596 fn sub_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3597 let (a0, a1) = self.split_u16x16(a);
3598 let (b0, b1) = self.split_u16x16(b);
3599 self.combine_u16x8(self.sub_u16x8(a0, b0), self.sub_u16x8(a1, b1))
3600 }
3601 #[inline(always)]
3602 fn mul_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3603 let (a0, a1) = self.split_u16x16(a);
3604 let (b0, b1) = self.split_u16x16(b);
3605 self.combine_u16x8(self.mul_u16x8(a0, b0), self.mul_u16x8(a1, b1))
3606 }
3607 #[inline(always)]
3608 fn and_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3609 let (a0, a1) = self.split_u16x16(a);
3610 let (b0, b1) = self.split_u16x16(b);
3611 self.combine_u16x8(self.and_u16x8(a0, b0), self.and_u16x8(a1, b1))
3612 }
3613 #[inline(always)]
3614 fn or_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3615 let (a0, a1) = self.split_u16x16(a);
3616 let (b0, b1) = self.split_u16x16(b);
3617 self.combine_u16x8(self.or_u16x8(a0, b0), self.or_u16x8(a1, b1))
3618 }
3619 #[inline(always)]
3620 fn xor_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3621 let (a0, a1) = self.split_u16x16(a);
3622 let (b0, b1) = self.split_u16x16(b);
3623 self.combine_u16x8(self.xor_u16x8(a0, b0), self.xor_u16x8(a1, b1))
3624 }
3625 #[inline(always)]
3626 fn shr_u16x16(self, a: u16x16<Self>, b: u32) -> u16x16<Self> {
3627 let (a0, a1) = self.split_u16x16(a);
3628 self.combine_u16x8(self.shr_u16x8(a0, b), self.shr_u16x8(a1, b))
3629 }
3630 #[inline(always)]
3631 fn simd_eq_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3632 let (a0, a1) = self.split_u16x16(a);
3633 let (b0, b1) = self.split_u16x16(b);
3634 self.combine_mask16x8(self.simd_eq_u16x8(a0, b0), self.simd_eq_u16x8(a1, b1))
3635 }
3636 #[inline(always)]
3637 fn simd_lt_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3638 let (a0, a1) = self.split_u16x16(a);
3639 let (b0, b1) = self.split_u16x16(b);
3640 self.combine_mask16x8(self.simd_lt_u16x8(a0, b0), self.simd_lt_u16x8(a1, b1))
3641 }
3642 #[inline(always)]
3643 fn simd_le_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3644 let (a0, a1) = self.split_u16x16(a);
3645 let (b0, b1) = self.split_u16x16(b);
3646 self.combine_mask16x8(self.simd_le_u16x8(a0, b0), self.simd_le_u16x8(a1, b1))
3647 }
3648 #[inline(always)]
3649 fn simd_ge_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3650 let (a0, a1) = self.split_u16x16(a);
3651 let (b0, b1) = self.split_u16x16(b);
3652 self.combine_mask16x8(self.simd_ge_u16x8(a0, b0), self.simd_ge_u16x8(a1, b1))
3653 }
3654 #[inline(always)]
3655 fn simd_gt_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
3656 let (a0, a1) = self.split_u16x16(a);
3657 let (b0, b1) = self.split_u16x16(b);
3658 self.combine_mask16x8(self.simd_gt_u16x8(a0, b0), self.simd_gt_u16x8(a1, b1))
3659 }
3660 #[inline(always)]
3661 fn zip_low_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3662 let (a0, _) = self.split_u16x16(a);
3663 let (b0, _) = self.split_u16x16(b);
3664 self.combine_u16x8(self.zip_low_u16x8(a0, b0), self.zip_high_u16x8(a0, b0))
3665 }
3666 #[inline(always)]
3667 fn zip_high_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3668 let (_, a1) = self.split_u16x16(a);
3669 let (_, b1) = self.split_u16x16(b);
3670 self.combine_u16x8(self.zip_low_u16x8(a1, b1), self.zip_high_u16x8(a1, b1))
3671 }
3672 #[inline(always)]
3673 fn unzip_low_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3674 let (a0, a1) = self.split_u16x16(a);
3675 let (b0, b1) = self.split_u16x16(b);
3676 self.combine_u16x8(self.unzip_low_u16x8(a0, a1), self.unzip_low_u16x8(b0, b1))
3677 }
3678 #[inline(always)]
3679 fn unzip_high_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3680 let (a0, a1) = self.split_u16x16(a);
3681 let (b0, b1) = self.split_u16x16(b);
3682 self.combine_u16x8(self.unzip_high_u16x8(a0, a1), self.unzip_high_u16x8(b0, b1))
3683 }
3684 #[inline(always)]
3685 fn select_u16x16(self, a: mask16x16<Self>, b: u16x16<Self>, c: u16x16<Self>) -> u16x16<Self> {
3686 let (a0, a1) = self.split_mask16x16(a);
3687 let (b0, b1) = self.split_u16x16(b);
3688 let (c0, c1) = self.split_u16x16(c);
3689 self.combine_u16x8(self.select_u16x8(a0, b0, c0), self.select_u16x8(a1, b1, c1))
3690 }
3691 #[inline(always)]
3692 fn min_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3693 let (a0, a1) = self.split_u16x16(a);
3694 let (b0, b1) = self.split_u16x16(b);
3695 self.combine_u16x8(self.min_u16x8(a0, b0), self.min_u16x8(a1, b1))
3696 }
3697 #[inline(always)]
3698 fn max_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
3699 let (a0, a1) = self.split_u16x16(a);
3700 let (b0, b1) = self.split_u16x16(b);
3701 self.combine_u16x8(self.max_u16x8(a0, b0), self.max_u16x8(a1, b1))
3702 }
3703 #[inline(always)]
3704 fn combine_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x32<Self> {
3705 let mut result = [0; 32usize];
3706 result[0..16usize].copy_from_slice(&a.val);
3707 result[16usize..32usize].copy_from_slice(&b.val);
3708 result.simd_into(self)
3709 }
3710 #[inline(always)]
3711 fn split_u16x16(self, a: u16x16<Self>) -> (u16x8<Self>, u16x8<Self>) {
3712 let mut b0 = [0; 8usize];
3713 let mut b1 = [0; 8usize];
3714 b0.copy_from_slice(&a.val[0..8usize]);
3715 b1.copy_from_slice(&a.val[8usize..16usize]);
3716 (b0.simd_into(self), b1.simd_into(self))
3717 }
3718 #[inline(always)]
3719 fn narrow_u16x16(self, a: u16x16<Self>) -> u8x16<Self> {
3720 [
3721 a[0usize] as u8,
3722 a[1usize] as u8,
3723 a[2usize] as u8,
3724 a[3usize] as u8,
3725 a[4usize] as u8,
3726 a[5usize] as u8,
3727 a[6usize] as u8,
3728 a[7usize] as u8,
3729 a[8usize] as u8,
3730 a[9usize] as u8,
3731 a[10usize] as u8,
3732 a[11usize] as u8,
3733 a[12usize] as u8,
3734 a[13usize] as u8,
3735 a[14usize] as u8,
3736 a[15usize] as u8,
3737 ]
3738 .simd_into(self)
3739 }
3740 #[inline(always)]
3741 fn reinterpret_u8_u16x16(self, a: u16x16<Self>) -> u8x32<Self> {
3742 let (a0, a1) = self.split_u16x16(a);
3743 self.combine_u8x16(self.reinterpret_u8_u16x8(a0), self.reinterpret_u8_u16x8(a1))
3744 }
3745 #[inline(always)]
3746 fn reinterpret_u32_u16x16(self, a: u16x16<Self>) -> u32x8<Self> {
3747 let (a0, a1) = self.split_u16x16(a);
3748 self.combine_u32x4(
3749 self.reinterpret_u32_u16x8(a0),
3750 self.reinterpret_u32_u16x8(a1),
3751 )
3752 }
3753 #[inline(always)]
3754 fn splat_mask16x16(self, a: i16) -> mask16x16<Self> {
3755 let half = self.splat_mask16x8(a);
3756 self.combine_mask16x8(half, half)
3757 }
3758 #[inline(always)]
3759 fn not_mask16x16(self, a: mask16x16<Self>) -> mask16x16<Self> {
3760 let (a0, a1) = self.split_mask16x16(a);
3761 self.combine_mask16x8(self.not_mask16x8(a0), self.not_mask16x8(a1))
3762 }
3763 #[inline(always)]
3764 fn and_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
3765 let (a0, a1) = self.split_mask16x16(a);
3766 let (b0, b1) = self.split_mask16x16(b);
3767 self.combine_mask16x8(self.and_mask16x8(a0, b0), self.and_mask16x8(a1, b1))
3768 }
3769 #[inline(always)]
3770 fn or_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
3771 let (a0, a1) = self.split_mask16x16(a);
3772 let (b0, b1) = self.split_mask16x16(b);
3773 self.combine_mask16x8(self.or_mask16x8(a0, b0), self.or_mask16x8(a1, b1))
3774 }
3775 #[inline(always)]
3776 fn xor_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
3777 let (a0, a1) = self.split_mask16x16(a);
3778 let (b0, b1) = self.split_mask16x16(b);
3779 self.combine_mask16x8(self.xor_mask16x8(a0, b0), self.xor_mask16x8(a1, b1))
3780 }
3781 #[inline(always)]
3782 fn select_mask16x16(
3783 self,
3784 a: mask16x16<Self>,
3785 b: mask16x16<Self>,
3786 c: mask16x16<Self>,
3787 ) -> mask16x16<Self> {
3788 let (a0, a1) = self.split_mask16x16(a);
3789 let (b0, b1) = self.split_mask16x16(b);
3790 let (c0, c1) = self.split_mask16x16(c);
3791 self.combine_mask16x8(
3792 self.select_mask16x8(a0, b0, c0),
3793 self.select_mask16x8(a1, b1, c1),
3794 )
3795 }
3796 #[inline(always)]
3797 fn simd_eq_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
3798 let (a0, a1) = self.split_mask16x16(a);
3799 let (b0, b1) = self.split_mask16x16(b);
3800 self.combine_mask16x8(self.simd_eq_mask16x8(a0, b0), self.simd_eq_mask16x8(a1, b1))
3801 }
3802 #[inline(always)]
3803 fn combine_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x32<Self> {
3804 let mut result = [0; 32usize];
3805 result[0..16usize].copy_from_slice(&a.val);
3806 result[16usize..32usize].copy_from_slice(&b.val);
3807 result.simd_into(self)
3808 }
3809 #[inline(always)]
3810 fn split_mask16x16(self, a: mask16x16<Self>) -> (mask16x8<Self>, mask16x8<Self>) {
3811 let mut b0 = [0; 8usize];
3812 let mut b1 = [0; 8usize];
3813 b0.copy_from_slice(&a.val[0..8usize]);
3814 b1.copy_from_slice(&a.val[8usize..16usize]);
3815 (b0.simd_into(self), b1.simd_into(self))
3816 }
3817 #[inline(always)]
3818 fn splat_i32x8(self, a: i32) -> i32x8<Self> {
3819 let half = self.splat_i32x4(a);
3820 self.combine_i32x4(half, half)
3821 }
3822 #[inline(always)]
3823 fn not_i32x8(self, a: i32x8<Self>) -> i32x8<Self> {
3824 let (a0, a1) = self.split_i32x8(a);
3825 self.combine_i32x4(self.not_i32x4(a0), self.not_i32x4(a1))
3826 }
3827 #[inline(always)]
3828 fn add_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3829 let (a0, a1) = self.split_i32x8(a);
3830 let (b0, b1) = self.split_i32x8(b);
3831 self.combine_i32x4(self.add_i32x4(a0, b0), self.add_i32x4(a1, b1))
3832 }
3833 #[inline(always)]
3834 fn sub_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3835 let (a0, a1) = self.split_i32x8(a);
3836 let (b0, b1) = self.split_i32x8(b);
3837 self.combine_i32x4(self.sub_i32x4(a0, b0), self.sub_i32x4(a1, b1))
3838 }
3839 #[inline(always)]
3840 fn mul_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3841 let (a0, a1) = self.split_i32x8(a);
3842 let (b0, b1) = self.split_i32x8(b);
3843 self.combine_i32x4(self.mul_i32x4(a0, b0), self.mul_i32x4(a1, b1))
3844 }
3845 #[inline(always)]
3846 fn and_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3847 let (a0, a1) = self.split_i32x8(a);
3848 let (b0, b1) = self.split_i32x8(b);
3849 self.combine_i32x4(self.and_i32x4(a0, b0), self.and_i32x4(a1, b1))
3850 }
3851 #[inline(always)]
3852 fn or_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3853 let (a0, a1) = self.split_i32x8(a);
3854 let (b0, b1) = self.split_i32x8(b);
3855 self.combine_i32x4(self.or_i32x4(a0, b0), self.or_i32x4(a1, b1))
3856 }
3857 #[inline(always)]
3858 fn xor_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3859 let (a0, a1) = self.split_i32x8(a);
3860 let (b0, b1) = self.split_i32x8(b);
3861 self.combine_i32x4(self.xor_i32x4(a0, b0), self.xor_i32x4(a1, b1))
3862 }
3863 #[inline(always)]
3864 fn shr_i32x8(self, a: i32x8<Self>, b: u32) -> i32x8<Self> {
3865 let (a0, a1) = self.split_i32x8(a);
3866 self.combine_i32x4(self.shr_i32x4(a0, b), self.shr_i32x4(a1, b))
3867 }
3868 #[inline(always)]
3869 fn simd_eq_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
3870 let (a0, a1) = self.split_i32x8(a);
3871 let (b0, b1) = self.split_i32x8(b);
3872 self.combine_mask32x4(self.simd_eq_i32x4(a0, b0), self.simd_eq_i32x4(a1, b1))
3873 }
3874 #[inline(always)]
3875 fn simd_lt_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
3876 let (a0, a1) = self.split_i32x8(a);
3877 let (b0, b1) = self.split_i32x8(b);
3878 self.combine_mask32x4(self.simd_lt_i32x4(a0, b0), self.simd_lt_i32x4(a1, b1))
3879 }
3880 #[inline(always)]
3881 fn simd_le_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
3882 let (a0, a1) = self.split_i32x8(a);
3883 let (b0, b1) = self.split_i32x8(b);
3884 self.combine_mask32x4(self.simd_le_i32x4(a0, b0), self.simd_le_i32x4(a1, b1))
3885 }
3886 #[inline(always)]
3887 fn simd_ge_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
3888 let (a0, a1) = self.split_i32x8(a);
3889 let (b0, b1) = self.split_i32x8(b);
3890 self.combine_mask32x4(self.simd_ge_i32x4(a0, b0), self.simd_ge_i32x4(a1, b1))
3891 }
3892 #[inline(always)]
3893 fn simd_gt_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
3894 let (a0, a1) = self.split_i32x8(a);
3895 let (b0, b1) = self.split_i32x8(b);
3896 self.combine_mask32x4(self.simd_gt_i32x4(a0, b0), self.simd_gt_i32x4(a1, b1))
3897 }
3898 #[inline(always)]
3899 fn zip_low_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3900 let (a0, _) = self.split_i32x8(a);
3901 let (b0, _) = self.split_i32x8(b);
3902 self.combine_i32x4(self.zip_low_i32x4(a0, b0), self.zip_high_i32x4(a0, b0))
3903 }
3904 #[inline(always)]
3905 fn zip_high_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3906 let (_, a1) = self.split_i32x8(a);
3907 let (_, b1) = self.split_i32x8(b);
3908 self.combine_i32x4(self.zip_low_i32x4(a1, b1), self.zip_high_i32x4(a1, b1))
3909 }
3910 #[inline(always)]
3911 fn unzip_low_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3912 let (a0, a1) = self.split_i32x8(a);
3913 let (b0, b1) = self.split_i32x8(b);
3914 self.combine_i32x4(self.unzip_low_i32x4(a0, a1), self.unzip_low_i32x4(b0, b1))
3915 }
3916 #[inline(always)]
3917 fn unzip_high_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3918 let (a0, a1) = self.split_i32x8(a);
3919 let (b0, b1) = self.split_i32x8(b);
3920 self.combine_i32x4(self.unzip_high_i32x4(a0, a1), self.unzip_high_i32x4(b0, b1))
3921 }
3922 #[inline(always)]
3923 fn select_i32x8(self, a: mask32x8<Self>, b: i32x8<Self>, c: i32x8<Self>) -> i32x8<Self> {
3924 let (a0, a1) = self.split_mask32x8(a);
3925 let (b0, b1) = self.split_i32x8(b);
3926 let (c0, c1) = self.split_i32x8(c);
3927 self.combine_i32x4(self.select_i32x4(a0, b0, c0), self.select_i32x4(a1, b1, c1))
3928 }
3929 #[inline(always)]
3930 fn min_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3931 let (a0, a1) = self.split_i32x8(a);
3932 let (b0, b1) = self.split_i32x8(b);
3933 self.combine_i32x4(self.min_i32x4(a0, b0), self.min_i32x4(a1, b1))
3934 }
3935 #[inline(always)]
3936 fn max_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
3937 let (a0, a1) = self.split_i32x8(a);
3938 let (b0, b1) = self.split_i32x8(b);
3939 self.combine_i32x4(self.max_i32x4(a0, b0), self.max_i32x4(a1, b1))
3940 }
3941 #[inline(always)]
3942 fn combine_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x16<Self> {
3943 let mut result = [0; 16usize];
3944 result[0..8usize].copy_from_slice(&a.val);
3945 result[8usize..16usize].copy_from_slice(&b.val);
3946 result.simd_into(self)
3947 }
3948 #[inline(always)]
3949 fn split_i32x8(self, a: i32x8<Self>) -> (i32x4<Self>, i32x4<Self>) {
3950 let mut b0 = [0; 4usize];
3951 let mut b1 = [0; 4usize];
3952 b0.copy_from_slice(&a.val[0..4usize]);
3953 b1.copy_from_slice(&a.val[4usize..8usize]);
3954 (b0.simd_into(self), b1.simd_into(self))
3955 }
3956 #[inline(always)]
3957 fn reinterpret_u8_i32x8(self, a: i32x8<Self>) -> u8x32<Self> {
3958 let (a0, a1) = self.split_i32x8(a);
3959 self.combine_u8x16(self.reinterpret_u8_i32x4(a0), self.reinterpret_u8_i32x4(a1))
3960 }
3961 #[inline(always)]
3962 fn reinterpret_u32_i32x8(self, a: i32x8<Self>) -> u32x8<Self> {
3963 let (a0, a1) = self.split_i32x8(a);
3964 self.combine_u32x4(
3965 self.reinterpret_u32_i32x4(a0),
3966 self.reinterpret_u32_i32x4(a1),
3967 )
3968 }
3969 #[inline(always)]
3970 fn cvt_f32_i32x8(self, a: i32x8<Self>) -> f32x8<Self> {
3971 let (a0, a1) = self.split_i32x8(a);
3972 self.combine_f32x4(self.cvt_f32_i32x4(a0), self.cvt_f32_i32x4(a1))
3973 }
3974 #[inline(always)]
3975 fn splat_u32x8(self, a: u32) -> u32x8<Self> {
3976 let half = self.splat_u32x4(a);
3977 self.combine_u32x4(half, half)
3978 }
3979 #[inline(always)]
3980 fn not_u32x8(self, a: u32x8<Self>) -> u32x8<Self> {
3981 let (a0, a1) = self.split_u32x8(a);
3982 self.combine_u32x4(self.not_u32x4(a0), self.not_u32x4(a1))
3983 }
3984 #[inline(always)]
3985 fn add_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
3986 let (a0, a1) = self.split_u32x8(a);
3987 let (b0, b1) = self.split_u32x8(b);
3988 self.combine_u32x4(self.add_u32x4(a0, b0), self.add_u32x4(a1, b1))
3989 }
3990 #[inline(always)]
3991 fn sub_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
3992 let (a0, a1) = self.split_u32x8(a);
3993 let (b0, b1) = self.split_u32x8(b);
3994 self.combine_u32x4(self.sub_u32x4(a0, b0), self.sub_u32x4(a1, b1))
3995 }
3996 #[inline(always)]
3997 fn mul_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
3998 let (a0, a1) = self.split_u32x8(a);
3999 let (b0, b1) = self.split_u32x8(b);
4000 self.combine_u32x4(self.mul_u32x4(a0, b0), self.mul_u32x4(a1, b1))
4001 }
4002 #[inline(always)]
4003 fn and_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4004 let (a0, a1) = self.split_u32x8(a);
4005 let (b0, b1) = self.split_u32x8(b);
4006 self.combine_u32x4(self.and_u32x4(a0, b0), self.and_u32x4(a1, b1))
4007 }
4008 #[inline(always)]
4009 fn or_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4010 let (a0, a1) = self.split_u32x8(a);
4011 let (b0, b1) = self.split_u32x8(b);
4012 self.combine_u32x4(self.or_u32x4(a0, b0), self.or_u32x4(a1, b1))
4013 }
4014 #[inline(always)]
4015 fn xor_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4016 let (a0, a1) = self.split_u32x8(a);
4017 let (b0, b1) = self.split_u32x8(b);
4018 self.combine_u32x4(self.xor_u32x4(a0, b0), self.xor_u32x4(a1, b1))
4019 }
4020 #[inline(always)]
4021 fn shr_u32x8(self, a: u32x8<Self>, b: u32) -> u32x8<Self> {
4022 let (a0, a1) = self.split_u32x8(a);
4023 self.combine_u32x4(self.shr_u32x4(a0, b), self.shr_u32x4(a1, b))
4024 }
4025 #[inline(always)]
4026 fn simd_eq_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4027 let (a0, a1) = self.split_u32x8(a);
4028 let (b0, b1) = self.split_u32x8(b);
4029 self.combine_mask32x4(self.simd_eq_u32x4(a0, b0), self.simd_eq_u32x4(a1, b1))
4030 }
4031 #[inline(always)]
4032 fn simd_lt_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4033 let (a0, a1) = self.split_u32x8(a);
4034 let (b0, b1) = self.split_u32x8(b);
4035 self.combine_mask32x4(self.simd_lt_u32x4(a0, b0), self.simd_lt_u32x4(a1, b1))
4036 }
4037 #[inline(always)]
4038 fn simd_le_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4039 let (a0, a1) = self.split_u32x8(a);
4040 let (b0, b1) = self.split_u32x8(b);
4041 self.combine_mask32x4(self.simd_le_u32x4(a0, b0), self.simd_le_u32x4(a1, b1))
4042 }
4043 #[inline(always)]
4044 fn simd_ge_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4045 let (a0, a1) = self.split_u32x8(a);
4046 let (b0, b1) = self.split_u32x8(b);
4047 self.combine_mask32x4(self.simd_ge_u32x4(a0, b0), self.simd_ge_u32x4(a1, b1))
4048 }
4049 #[inline(always)]
4050 fn simd_gt_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
4051 let (a0, a1) = self.split_u32x8(a);
4052 let (b0, b1) = self.split_u32x8(b);
4053 self.combine_mask32x4(self.simd_gt_u32x4(a0, b0), self.simd_gt_u32x4(a1, b1))
4054 }
4055 #[inline(always)]
4056 fn zip_low_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4057 let (a0, _) = self.split_u32x8(a);
4058 let (b0, _) = self.split_u32x8(b);
4059 self.combine_u32x4(self.zip_low_u32x4(a0, b0), self.zip_high_u32x4(a0, b0))
4060 }
4061 #[inline(always)]
4062 fn zip_high_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4063 let (_, a1) = self.split_u32x8(a);
4064 let (_, b1) = self.split_u32x8(b);
4065 self.combine_u32x4(self.zip_low_u32x4(a1, b1), self.zip_high_u32x4(a1, b1))
4066 }
4067 #[inline(always)]
4068 fn unzip_low_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4069 let (a0, a1) = self.split_u32x8(a);
4070 let (b0, b1) = self.split_u32x8(b);
4071 self.combine_u32x4(self.unzip_low_u32x4(a0, a1), self.unzip_low_u32x4(b0, b1))
4072 }
4073 #[inline(always)]
4074 fn unzip_high_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4075 let (a0, a1) = self.split_u32x8(a);
4076 let (b0, b1) = self.split_u32x8(b);
4077 self.combine_u32x4(self.unzip_high_u32x4(a0, a1), self.unzip_high_u32x4(b0, b1))
4078 }
4079 #[inline(always)]
4080 fn select_u32x8(self, a: mask32x8<Self>, b: u32x8<Self>, c: u32x8<Self>) -> u32x8<Self> {
4081 let (a0, a1) = self.split_mask32x8(a);
4082 let (b0, b1) = self.split_u32x8(b);
4083 let (c0, c1) = self.split_u32x8(c);
4084 self.combine_u32x4(self.select_u32x4(a0, b0, c0), self.select_u32x4(a1, b1, c1))
4085 }
4086 #[inline(always)]
4087 fn min_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4088 let (a0, a1) = self.split_u32x8(a);
4089 let (b0, b1) = self.split_u32x8(b);
4090 self.combine_u32x4(self.min_u32x4(a0, b0), self.min_u32x4(a1, b1))
4091 }
4092 #[inline(always)]
4093 fn max_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
4094 let (a0, a1) = self.split_u32x8(a);
4095 let (b0, b1) = self.split_u32x8(b);
4096 self.combine_u32x4(self.max_u32x4(a0, b0), self.max_u32x4(a1, b1))
4097 }
4098 #[inline(always)]
4099 fn combine_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x16<Self> {
4100 let mut result = [0; 16usize];
4101 result[0..8usize].copy_from_slice(&a.val);
4102 result[8usize..16usize].copy_from_slice(&b.val);
4103 result.simd_into(self)
4104 }
4105 #[inline(always)]
4106 fn split_u32x8(self, a: u32x8<Self>) -> (u32x4<Self>, u32x4<Self>) {
4107 let mut b0 = [0; 4usize];
4108 let mut b1 = [0; 4usize];
4109 b0.copy_from_slice(&a.val[0..4usize]);
4110 b1.copy_from_slice(&a.val[4usize..8usize]);
4111 (b0.simd_into(self), b1.simd_into(self))
4112 }
4113 #[inline(always)]
4114 fn reinterpret_u8_u32x8(self, a: u32x8<Self>) -> u8x32<Self> {
4115 let (a0, a1) = self.split_u32x8(a);
4116 self.combine_u8x16(self.reinterpret_u8_u32x4(a0), self.reinterpret_u8_u32x4(a1))
4117 }
4118 #[inline(always)]
4119 fn cvt_f32_u32x8(self, a: u32x8<Self>) -> f32x8<Self> {
4120 let (a0, a1) = self.split_u32x8(a);
4121 self.combine_f32x4(self.cvt_f32_u32x4(a0), self.cvt_f32_u32x4(a1))
4122 }
4123 #[inline(always)]
4124 fn splat_mask32x8(self, a: i32) -> mask32x8<Self> {
4125 let half = self.splat_mask32x4(a);
4126 self.combine_mask32x4(half, half)
4127 }
4128 #[inline(always)]
4129 fn not_mask32x8(self, a: mask32x8<Self>) -> mask32x8<Self> {
4130 let (a0, a1) = self.split_mask32x8(a);
4131 self.combine_mask32x4(self.not_mask32x4(a0), self.not_mask32x4(a1))
4132 }
4133 #[inline(always)]
4134 fn and_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
4135 let (a0, a1) = self.split_mask32x8(a);
4136 let (b0, b1) = self.split_mask32x8(b);
4137 self.combine_mask32x4(self.and_mask32x4(a0, b0), self.and_mask32x4(a1, b1))
4138 }
4139 #[inline(always)]
4140 fn or_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
4141 let (a0, a1) = self.split_mask32x8(a);
4142 let (b0, b1) = self.split_mask32x8(b);
4143 self.combine_mask32x4(self.or_mask32x4(a0, b0), self.or_mask32x4(a1, b1))
4144 }
4145 #[inline(always)]
4146 fn xor_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
4147 let (a0, a1) = self.split_mask32x8(a);
4148 let (b0, b1) = self.split_mask32x8(b);
4149 self.combine_mask32x4(self.xor_mask32x4(a0, b0), self.xor_mask32x4(a1, b1))
4150 }
4151 #[inline(always)]
4152 fn select_mask32x8(
4153 self,
4154 a: mask32x8<Self>,
4155 b: mask32x8<Self>,
4156 c: mask32x8<Self>,
4157 ) -> mask32x8<Self> {
4158 let (a0, a1) = self.split_mask32x8(a);
4159 let (b0, b1) = self.split_mask32x8(b);
4160 let (c0, c1) = self.split_mask32x8(c);
4161 self.combine_mask32x4(
4162 self.select_mask32x4(a0, b0, c0),
4163 self.select_mask32x4(a1, b1, c1),
4164 )
4165 }
4166 #[inline(always)]
4167 fn simd_eq_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
4168 let (a0, a1) = self.split_mask32x8(a);
4169 let (b0, b1) = self.split_mask32x8(b);
4170 self.combine_mask32x4(self.simd_eq_mask32x4(a0, b0), self.simd_eq_mask32x4(a1, b1))
4171 }
4172 #[inline(always)]
4173 fn combine_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x16<Self> {
4174 let mut result = [0; 16usize];
4175 result[0..8usize].copy_from_slice(&a.val);
4176 result[8usize..16usize].copy_from_slice(&b.val);
4177 result.simd_into(self)
4178 }
4179 #[inline(always)]
4180 fn split_mask32x8(self, a: mask32x8<Self>) -> (mask32x4<Self>, mask32x4<Self>) {
4181 let mut b0 = [0; 4usize];
4182 let mut b1 = [0; 4usize];
4183 b0.copy_from_slice(&a.val[0..4usize]);
4184 b1.copy_from_slice(&a.val[4usize..8usize]);
4185 (b0.simd_into(self), b1.simd_into(self))
4186 }
4187 #[inline(always)]
4188 fn splat_f64x4(self, a: f64) -> f64x4<Self> {
4189 let half = self.splat_f64x2(a);
4190 self.combine_f64x2(half, half)
4191 }
4192 #[inline(always)]
4193 fn abs_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4194 let (a0, a1) = self.split_f64x4(a);
4195 self.combine_f64x2(self.abs_f64x2(a0), self.abs_f64x2(a1))
4196 }
4197 #[inline(always)]
4198 fn neg_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4199 let (a0, a1) = self.split_f64x4(a);
4200 self.combine_f64x2(self.neg_f64x2(a0), self.neg_f64x2(a1))
4201 }
4202 #[inline(always)]
4203 fn sqrt_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4204 let (a0, a1) = self.split_f64x4(a);
4205 self.combine_f64x2(self.sqrt_f64x2(a0), self.sqrt_f64x2(a1))
4206 }
4207 #[inline(always)]
4208 fn add_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4209 let (a0, a1) = self.split_f64x4(a);
4210 let (b0, b1) = self.split_f64x4(b);
4211 self.combine_f64x2(self.add_f64x2(a0, b0), self.add_f64x2(a1, b1))
4212 }
4213 #[inline(always)]
4214 fn sub_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4215 let (a0, a1) = self.split_f64x4(a);
4216 let (b0, b1) = self.split_f64x4(b);
4217 self.combine_f64x2(self.sub_f64x2(a0, b0), self.sub_f64x2(a1, b1))
4218 }
4219 #[inline(always)]
4220 fn mul_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4221 let (a0, a1) = self.split_f64x4(a);
4222 let (b0, b1) = self.split_f64x4(b);
4223 self.combine_f64x2(self.mul_f64x2(a0, b0), self.mul_f64x2(a1, b1))
4224 }
4225 #[inline(always)]
4226 fn div_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4227 let (a0, a1) = self.split_f64x4(a);
4228 let (b0, b1) = self.split_f64x4(b);
4229 self.combine_f64x2(self.div_f64x2(a0, b0), self.div_f64x2(a1, b1))
4230 }
4231 #[inline(always)]
4232 fn copysign_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4233 let (a0, a1) = self.split_f64x4(a);
4234 let (b0, b1) = self.split_f64x4(b);
4235 self.combine_f64x2(self.copysign_f64x2(a0, b0), self.copysign_f64x2(a1, b1))
4236 }
4237 #[inline(always)]
4238 fn simd_eq_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4239 let (a0, a1) = self.split_f64x4(a);
4240 let (b0, b1) = self.split_f64x4(b);
4241 self.combine_mask64x2(self.simd_eq_f64x2(a0, b0), self.simd_eq_f64x2(a1, b1))
4242 }
4243 #[inline(always)]
4244 fn simd_lt_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4245 let (a0, a1) = self.split_f64x4(a);
4246 let (b0, b1) = self.split_f64x4(b);
4247 self.combine_mask64x2(self.simd_lt_f64x2(a0, b0), self.simd_lt_f64x2(a1, b1))
4248 }
4249 #[inline(always)]
4250 fn simd_le_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4251 let (a0, a1) = self.split_f64x4(a);
4252 let (b0, b1) = self.split_f64x4(b);
4253 self.combine_mask64x2(self.simd_le_f64x2(a0, b0), self.simd_le_f64x2(a1, b1))
4254 }
4255 #[inline(always)]
4256 fn simd_ge_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4257 let (a0, a1) = self.split_f64x4(a);
4258 let (b0, b1) = self.split_f64x4(b);
4259 self.combine_mask64x2(self.simd_ge_f64x2(a0, b0), self.simd_ge_f64x2(a1, b1))
4260 }
4261 #[inline(always)]
4262 fn simd_gt_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
4263 let (a0, a1) = self.split_f64x4(a);
4264 let (b0, b1) = self.split_f64x4(b);
4265 self.combine_mask64x2(self.simd_gt_f64x2(a0, b0), self.simd_gt_f64x2(a1, b1))
4266 }
4267 #[inline(always)]
4268 fn zip_low_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4269 let (a0, _) = self.split_f64x4(a);
4270 let (b0, _) = self.split_f64x4(b);
4271 self.combine_f64x2(self.zip_low_f64x2(a0, b0), self.zip_high_f64x2(a0, b0))
4272 }
4273 #[inline(always)]
4274 fn zip_high_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4275 let (_, a1) = self.split_f64x4(a);
4276 let (_, b1) = self.split_f64x4(b);
4277 self.combine_f64x2(self.zip_low_f64x2(a1, b1), self.zip_high_f64x2(a1, b1))
4278 }
4279 #[inline(always)]
4280 fn unzip_low_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4281 let (a0, a1) = self.split_f64x4(a);
4282 let (b0, b1) = self.split_f64x4(b);
4283 self.combine_f64x2(self.unzip_low_f64x2(a0, a1), self.unzip_low_f64x2(b0, b1))
4284 }
4285 #[inline(always)]
4286 fn unzip_high_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4287 let (a0, a1) = self.split_f64x4(a);
4288 let (b0, b1) = self.split_f64x4(b);
4289 self.combine_f64x2(self.unzip_high_f64x2(a0, a1), self.unzip_high_f64x2(b0, b1))
4290 }
4291 #[inline(always)]
4292 fn max_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4293 let (a0, a1) = self.split_f64x4(a);
4294 let (b0, b1) = self.split_f64x4(b);
4295 self.combine_f64x2(self.max_f64x2(a0, b0), self.max_f64x2(a1, b1))
4296 }
4297 #[inline(always)]
4298 fn max_precise_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4299 let (a0, a1) = self.split_f64x4(a);
4300 let (b0, b1) = self.split_f64x4(b);
4301 self.combine_f64x2(
4302 self.max_precise_f64x2(a0, b0),
4303 self.max_precise_f64x2(a1, b1),
4304 )
4305 }
4306 #[inline(always)]
4307 fn min_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4308 let (a0, a1) = self.split_f64x4(a);
4309 let (b0, b1) = self.split_f64x4(b);
4310 self.combine_f64x2(self.min_f64x2(a0, b0), self.min_f64x2(a1, b1))
4311 }
4312 #[inline(always)]
4313 fn min_precise_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
4314 let (a0, a1) = self.split_f64x4(a);
4315 let (b0, b1) = self.split_f64x4(b);
4316 self.combine_f64x2(
4317 self.min_precise_f64x2(a0, b0),
4318 self.min_precise_f64x2(a1, b1),
4319 )
4320 }
4321 #[inline(always)]
4322 fn madd_f64x4(self, a: f64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
4323 let (a0, a1) = self.split_f64x4(a);
4324 let (b0, b1) = self.split_f64x4(b);
4325 let (c0, c1) = self.split_f64x4(c);
4326 self.combine_f64x2(self.madd_f64x2(a0, b0, c0), self.madd_f64x2(a1, b1, c1))
4327 }
4328 #[inline(always)]
4329 fn msub_f64x4(self, a: f64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
4330 let (a0, a1) = self.split_f64x4(a);
4331 let (b0, b1) = self.split_f64x4(b);
4332 let (c0, c1) = self.split_f64x4(c);
4333 self.combine_f64x2(self.msub_f64x2(a0, b0, c0), self.msub_f64x2(a1, b1, c1))
4334 }
4335 #[inline(always)]
4336 fn floor_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4337 let (a0, a1) = self.split_f64x4(a);
4338 self.combine_f64x2(self.floor_f64x2(a0), self.floor_f64x2(a1))
4339 }
4340 #[inline(always)]
4341 fn fract_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4342 let (a0, a1) = self.split_f64x4(a);
4343 self.combine_f64x2(self.fract_f64x2(a0), self.fract_f64x2(a1))
4344 }
4345 #[inline(always)]
4346 fn trunc_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
4347 let (a0, a1) = self.split_f64x4(a);
4348 self.combine_f64x2(self.trunc_f64x2(a0), self.trunc_f64x2(a1))
4349 }
4350 #[inline(always)]
4351 fn select_f64x4(self, a: mask64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
4352 let (a0, a1) = self.split_mask64x4(a);
4353 let (b0, b1) = self.split_f64x4(b);
4354 let (c0, c1) = self.split_f64x4(c);
4355 self.combine_f64x2(self.select_f64x2(a0, b0, c0), self.select_f64x2(a1, b1, c1))
4356 }
4357 #[inline(always)]
4358 fn combine_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x8<Self> {
4359 let mut result = [0.0; 8usize];
4360 result[0..4usize].copy_from_slice(&a.val);
4361 result[4usize..8usize].copy_from_slice(&b.val);
4362 result.simd_into(self)
4363 }
4364 #[inline(always)]
4365 fn split_f64x4(self, a: f64x4<Self>) -> (f64x2<Self>, f64x2<Self>) {
4366 let mut b0 = [0.0; 2usize];
4367 let mut b1 = [0.0; 2usize];
4368 b0.copy_from_slice(&a.val[0..2usize]);
4369 b1.copy_from_slice(&a.val[2usize..4usize]);
4370 (b0.simd_into(self), b1.simd_into(self))
4371 }
4372 #[inline(always)]
4373 fn reinterpret_f32_f64x4(self, a: f64x4<Self>) -> f32x8<Self> {
4374 let (a0, a1) = self.split_f64x4(a);
4375 self.combine_f32x4(
4376 self.reinterpret_f32_f64x2(a0),
4377 self.reinterpret_f32_f64x2(a1),
4378 )
4379 }
4380 #[inline(always)]
4381 fn splat_mask64x4(self, a: i64) -> mask64x4<Self> {
4382 let half = self.splat_mask64x2(a);
4383 self.combine_mask64x2(half, half)
4384 }
4385 #[inline(always)]
4386 fn not_mask64x4(self, a: mask64x4<Self>) -> mask64x4<Self> {
4387 let (a0, a1) = self.split_mask64x4(a);
4388 self.combine_mask64x2(self.not_mask64x2(a0), self.not_mask64x2(a1))
4389 }
4390 #[inline(always)]
4391 fn and_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
4392 let (a0, a1) = self.split_mask64x4(a);
4393 let (b0, b1) = self.split_mask64x4(b);
4394 self.combine_mask64x2(self.and_mask64x2(a0, b0), self.and_mask64x2(a1, b1))
4395 }
4396 #[inline(always)]
4397 fn or_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
4398 let (a0, a1) = self.split_mask64x4(a);
4399 let (b0, b1) = self.split_mask64x4(b);
4400 self.combine_mask64x2(self.or_mask64x2(a0, b0), self.or_mask64x2(a1, b1))
4401 }
4402 #[inline(always)]
4403 fn xor_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
4404 let (a0, a1) = self.split_mask64x4(a);
4405 let (b0, b1) = self.split_mask64x4(b);
4406 self.combine_mask64x2(self.xor_mask64x2(a0, b0), self.xor_mask64x2(a1, b1))
4407 }
4408 #[inline(always)]
4409 fn select_mask64x4(
4410 self,
4411 a: mask64x4<Self>,
4412 b: mask64x4<Self>,
4413 c: mask64x4<Self>,
4414 ) -> mask64x4<Self> {
4415 let (a0, a1) = self.split_mask64x4(a);
4416 let (b0, b1) = self.split_mask64x4(b);
4417 let (c0, c1) = self.split_mask64x4(c);
4418 self.combine_mask64x2(
4419 self.select_mask64x2(a0, b0, c0),
4420 self.select_mask64x2(a1, b1, c1),
4421 )
4422 }
4423 #[inline(always)]
4424 fn simd_eq_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
4425 let (a0, a1) = self.split_mask64x4(a);
4426 let (b0, b1) = self.split_mask64x4(b);
4427 self.combine_mask64x2(self.simd_eq_mask64x2(a0, b0), self.simd_eq_mask64x2(a1, b1))
4428 }
4429 #[inline(always)]
4430 fn combine_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x8<Self> {
4431 let mut result = [0; 8usize];
4432 result[0..4usize].copy_from_slice(&a.val);
4433 result[4usize..8usize].copy_from_slice(&b.val);
4434 result.simd_into(self)
4435 }
4436 #[inline(always)]
4437 fn split_mask64x4(self, a: mask64x4<Self>) -> (mask64x2<Self>, mask64x2<Self>) {
4438 let mut b0 = [0; 2usize];
4439 let mut b1 = [0; 2usize];
4440 b0.copy_from_slice(&a.val[0..2usize]);
4441 b1.copy_from_slice(&a.val[2usize..4usize]);
4442 (b0.simd_into(self), b1.simd_into(self))
4443 }
4444 #[inline(always)]
4445 fn splat_f32x16(self, a: f32) -> f32x16<Self> {
4446 let half = self.splat_f32x8(a);
4447 self.combine_f32x8(half, half)
4448 }
4449 #[inline(always)]
4450 fn abs_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4451 let (a0, a1) = self.split_f32x16(a);
4452 self.combine_f32x8(self.abs_f32x8(a0), self.abs_f32x8(a1))
4453 }
4454 #[inline(always)]
4455 fn neg_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4456 let (a0, a1) = self.split_f32x16(a);
4457 self.combine_f32x8(self.neg_f32x8(a0), self.neg_f32x8(a1))
4458 }
4459 #[inline(always)]
4460 fn sqrt_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4461 let (a0, a1) = self.split_f32x16(a);
4462 self.combine_f32x8(self.sqrt_f32x8(a0), self.sqrt_f32x8(a1))
4463 }
4464 #[inline(always)]
4465 fn add_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4466 let (a0, a1) = self.split_f32x16(a);
4467 let (b0, b1) = self.split_f32x16(b);
4468 self.combine_f32x8(self.add_f32x8(a0, b0), self.add_f32x8(a1, b1))
4469 }
4470 #[inline(always)]
4471 fn sub_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4472 let (a0, a1) = self.split_f32x16(a);
4473 let (b0, b1) = self.split_f32x16(b);
4474 self.combine_f32x8(self.sub_f32x8(a0, b0), self.sub_f32x8(a1, b1))
4475 }
4476 #[inline(always)]
4477 fn mul_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4478 let (a0, a1) = self.split_f32x16(a);
4479 let (b0, b1) = self.split_f32x16(b);
4480 self.combine_f32x8(self.mul_f32x8(a0, b0), self.mul_f32x8(a1, b1))
4481 }
4482 #[inline(always)]
4483 fn div_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4484 let (a0, a1) = self.split_f32x16(a);
4485 let (b0, b1) = self.split_f32x16(b);
4486 self.combine_f32x8(self.div_f32x8(a0, b0), self.div_f32x8(a1, b1))
4487 }
4488 #[inline(always)]
4489 fn copysign_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4490 let (a0, a1) = self.split_f32x16(a);
4491 let (b0, b1) = self.split_f32x16(b);
4492 self.combine_f32x8(self.copysign_f32x8(a0, b0), self.copysign_f32x8(a1, b1))
4493 }
4494 #[inline(always)]
4495 fn simd_eq_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4496 let (a0, a1) = self.split_f32x16(a);
4497 let (b0, b1) = self.split_f32x16(b);
4498 self.combine_mask32x8(self.simd_eq_f32x8(a0, b0), self.simd_eq_f32x8(a1, b1))
4499 }
4500 #[inline(always)]
4501 fn simd_lt_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4502 let (a0, a1) = self.split_f32x16(a);
4503 let (b0, b1) = self.split_f32x16(b);
4504 self.combine_mask32x8(self.simd_lt_f32x8(a0, b0), self.simd_lt_f32x8(a1, b1))
4505 }
4506 #[inline(always)]
4507 fn simd_le_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4508 let (a0, a1) = self.split_f32x16(a);
4509 let (b0, b1) = self.split_f32x16(b);
4510 self.combine_mask32x8(self.simd_le_f32x8(a0, b0), self.simd_le_f32x8(a1, b1))
4511 }
4512 #[inline(always)]
4513 fn simd_ge_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4514 let (a0, a1) = self.split_f32x16(a);
4515 let (b0, b1) = self.split_f32x16(b);
4516 self.combine_mask32x8(self.simd_ge_f32x8(a0, b0), self.simd_ge_f32x8(a1, b1))
4517 }
4518 #[inline(always)]
4519 fn simd_gt_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
4520 let (a0, a1) = self.split_f32x16(a);
4521 let (b0, b1) = self.split_f32x16(b);
4522 self.combine_mask32x8(self.simd_gt_f32x8(a0, b0), self.simd_gt_f32x8(a1, b1))
4523 }
4524 #[inline(always)]
4525 fn zip_low_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4526 let (a0, _) = self.split_f32x16(a);
4527 let (b0, _) = self.split_f32x16(b);
4528 self.combine_f32x8(self.zip_low_f32x8(a0, b0), self.zip_high_f32x8(a0, b0))
4529 }
4530 #[inline(always)]
4531 fn zip_high_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4532 let (_, a1) = self.split_f32x16(a);
4533 let (_, b1) = self.split_f32x16(b);
4534 self.combine_f32x8(self.zip_low_f32x8(a1, b1), self.zip_high_f32x8(a1, b1))
4535 }
4536 #[inline(always)]
4537 fn unzip_low_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4538 let (a0, a1) = self.split_f32x16(a);
4539 let (b0, b1) = self.split_f32x16(b);
4540 self.combine_f32x8(self.unzip_low_f32x8(a0, a1), self.unzip_low_f32x8(b0, b1))
4541 }
4542 #[inline(always)]
4543 fn unzip_high_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4544 let (a0, a1) = self.split_f32x16(a);
4545 let (b0, b1) = self.split_f32x16(b);
4546 self.combine_f32x8(self.unzip_high_f32x8(a0, a1), self.unzip_high_f32x8(b0, b1))
4547 }
4548 #[inline(always)]
4549 fn max_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4550 let (a0, a1) = self.split_f32x16(a);
4551 let (b0, b1) = self.split_f32x16(b);
4552 self.combine_f32x8(self.max_f32x8(a0, b0), self.max_f32x8(a1, b1))
4553 }
4554 #[inline(always)]
4555 fn max_precise_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4556 let (a0, a1) = self.split_f32x16(a);
4557 let (b0, b1) = self.split_f32x16(b);
4558 self.combine_f32x8(
4559 self.max_precise_f32x8(a0, b0),
4560 self.max_precise_f32x8(a1, b1),
4561 )
4562 }
4563 #[inline(always)]
4564 fn min_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4565 let (a0, a1) = self.split_f32x16(a);
4566 let (b0, b1) = self.split_f32x16(b);
4567 self.combine_f32x8(self.min_f32x8(a0, b0), self.min_f32x8(a1, b1))
4568 }
4569 #[inline(always)]
4570 fn min_precise_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
4571 let (a0, a1) = self.split_f32x16(a);
4572 let (b0, b1) = self.split_f32x16(b);
4573 self.combine_f32x8(
4574 self.min_precise_f32x8(a0, b0),
4575 self.min_precise_f32x8(a1, b1),
4576 )
4577 }
4578 #[inline(always)]
4579 fn madd_f32x16(self, a: f32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
4580 let (a0, a1) = self.split_f32x16(a);
4581 let (b0, b1) = self.split_f32x16(b);
4582 let (c0, c1) = self.split_f32x16(c);
4583 self.combine_f32x8(self.madd_f32x8(a0, b0, c0), self.madd_f32x8(a1, b1, c1))
4584 }
4585 #[inline(always)]
4586 fn msub_f32x16(self, a: f32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
4587 let (a0, a1) = self.split_f32x16(a);
4588 let (b0, b1) = self.split_f32x16(b);
4589 let (c0, c1) = self.split_f32x16(c);
4590 self.combine_f32x8(self.msub_f32x8(a0, b0, c0), self.msub_f32x8(a1, b1, c1))
4591 }
4592 #[inline(always)]
4593 fn floor_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4594 let (a0, a1) = self.split_f32x16(a);
4595 self.combine_f32x8(self.floor_f32x8(a0), self.floor_f32x8(a1))
4596 }
4597 #[inline(always)]
4598 fn fract_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4599 let (a0, a1) = self.split_f32x16(a);
4600 self.combine_f32x8(self.fract_f32x8(a0), self.fract_f32x8(a1))
4601 }
4602 #[inline(always)]
4603 fn trunc_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
4604 let (a0, a1) = self.split_f32x16(a);
4605 self.combine_f32x8(self.trunc_f32x8(a0), self.trunc_f32x8(a1))
4606 }
4607 #[inline(always)]
4608 fn select_f32x16(self, a: mask32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
4609 let (a0, a1) = self.split_mask32x16(a);
4610 let (b0, b1) = self.split_f32x16(b);
4611 let (c0, c1) = self.split_f32x16(c);
4612 self.combine_f32x8(self.select_f32x8(a0, b0, c0), self.select_f32x8(a1, b1, c1))
4613 }
4614 #[inline(always)]
4615 fn split_f32x16(self, a: f32x16<Self>) -> (f32x8<Self>, f32x8<Self>) {
4616 let mut b0 = [0.0; 8usize];
4617 let mut b1 = [0.0; 8usize];
4618 b0.copy_from_slice(&a.val[0..8usize]);
4619 b1.copy_from_slice(&a.val[8usize..16usize]);
4620 (b0.simd_into(self), b1.simd_into(self))
4621 }
4622 #[inline(always)]
4623 fn reinterpret_f64_f32x16(self, a: f32x16<Self>) -> f64x8<Self> {
4624 let (a0, a1) = self.split_f32x16(a);
4625 self.combine_f64x4(
4626 self.reinterpret_f64_f32x8(a0),
4627 self.reinterpret_f64_f32x8(a1),
4628 )
4629 }
4630 #[inline(always)]
4631 fn reinterpret_i32_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
4632 let (a0, a1) = self.split_f32x16(a);
4633 self.combine_i32x8(
4634 self.reinterpret_i32_f32x8(a0),
4635 self.reinterpret_i32_f32x8(a1),
4636 )
4637 }
4638 #[inline(always)]
4639 fn load_interleaved_128_f32x16(self, src: &[f32; 16usize]) -> f32x16<Self> {
4640 [
4641 src[0usize],
4642 src[4usize],
4643 src[8usize],
4644 src[12usize],
4645 src[1usize],
4646 src[5usize],
4647 src[9usize],
4648 src[13usize],
4649 src[2usize],
4650 src[6usize],
4651 src[10usize],
4652 src[14usize],
4653 src[3usize],
4654 src[7usize],
4655 src[11usize],
4656 src[15usize],
4657 ]
4658 .simd_into(self)
4659 }
4660 #[inline(always)]
4661 fn store_interleaved_128_f32x16(self, a: f32x16<Self>, dest: &mut [f32; 16usize]) -> () {
4662 *dest = [
4663 a[0usize], a[4usize], a[8usize], a[12usize], a[1usize], a[5usize], a[9usize],
4664 a[13usize], a[2usize], a[6usize], a[10usize], a[14usize], a[3usize], a[7usize],
4665 a[11usize], a[15usize],
4666 ];
4667 }
4668 #[inline(always)]
4669 fn reinterpret_u8_f32x16(self, a: f32x16<Self>) -> u8x64<Self> {
4670 let (a0, a1) = self.split_f32x16(a);
4671 self.combine_u8x32(self.reinterpret_u8_f32x8(a0), self.reinterpret_u8_f32x8(a1))
4672 }
4673 #[inline(always)]
4674 fn reinterpret_u32_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
4675 let (a0, a1) = self.split_f32x16(a);
4676 self.combine_u32x8(
4677 self.reinterpret_u32_f32x8(a0),
4678 self.reinterpret_u32_f32x8(a1),
4679 )
4680 }
4681 #[inline(always)]
4682 fn cvt_u32_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
4683 let (a0, a1) = self.split_f32x16(a);
4684 self.combine_u32x8(self.cvt_u32_f32x8(a0), self.cvt_u32_f32x8(a1))
4685 }
4686 #[inline(always)]
4687 fn cvt_i32_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
4688 let (a0, a1) = self.split_f32x16(a);
4689 self.combine_i32x8(self.cvt_i32_f32x8(a0), self.cvt_i32_f32x8(a1))
4690 }
4691 #[inline(always)]
4692 fn splat_i8x64(self, a: i8) -> i8x64<Self> {
4693 let half = self.splat_i8x32(a);
4694 self.combine_i8x32(half, half)
4695 }
4696 #[inline(always)]
4697 fn not_i8x64(self, a: i8x64<Self>) -> i8x64<Self> {
4698 let (a0, a1) = self.split_i8x64(a);
4699 self.combine_i8x32(self.not_i8x32(a0), self.not_i8x32(a1))
4700 }
4701 #[inline(always)]
4702 fn add_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4703 let (a0, a1) = self.split_i8x64(a);
4704 let (b0, b1) = self.split_i8x64(b);
4705 self.combine_i8x32(self.add_i8x32(a0, b0), self.add_i8x32(a1, b1))
4706 }
4707 #[inline(always)]
4708 fn sub_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4709 let (a0, a1) = self.split_i8x64(a);
4710 let (b0, b1) = self.split_i8x64(b);
4711 self.combine_i8x32(self.sub_i8x32(a0, b0), self.sub_i8x32(a1, b1))
4712 }
4713 #[inline(always)]
4714 fn mul_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4715 let (a0, a1) = self.split_i8x64(a);
4716 let (b0, b1) = self.split_i8x64(b);
4717 self.combine_i8x32(self.mul_i8x32(a0, b0), self.mul_i8x32(a1, b1))
4718 }
4719 #[inline(always)]
4720 fn and_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4721 let (a0, a1) = self.split_i8x64(a);
4722 let (b0, b1) = self.split_i8x64(b);
4723 self.combine_i8x32(self.and_i8x32(a0, b0), self.and_i8x32(a1, b1))
4724 }
4725 #[inline(always)]
4726 fn or_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4727 let (a0, a1) = self.split_i8x64(a);
4728 let (b0, b1) = self.split_i8x64(b);
4729 self.combine_i8x32(self.or_i8x32(a0, b0), self.or_i8x32(a1, b1))
4730 }
4731 #[inline(always)]
4732 fn xor_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4733 let (a0, a1) = self.split_i8x64(a);
4734 let (b0, b1) = self.split_i8x64(b);
4735 self.combine_i8x32(self.xor_i8x32(a0, b0), self.xor_i8x32(a1, b1))
4736 }
4737 #[inline(always)]
4738 fn shr_i8x64(self, a: i8x64<Self>, b: u32) -> i8x64<Self> {
4739 let (a0, a1) = self.split_i8x64(a);
4740 self.combine_i8x32(self.shr_i8x32(a0, b), self.shr_i8x32(a1, b))
4741 }
4742 #[inline(always)]
4743 fn simd_eq_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
4744 let (a0, a1) = self.split_i8x64(a);
4745 let (b0, b1) = self.split_i8x64(b);
4746 self.combine_mask8x32(self.simd_eq_i8x32(a0, b0), self.simd_eq_i8x32(a1, b1))
4747 }
4748 #[inline(always)]
4749 fn simd_lt_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
4750 let (a0, a1) = self.split_i8x64(a);
4751 let (b0, b1) = self.split_i8x64(b);
4752 self.combine_mask8x32(self.simd_lt_i8x32(a0, b0), self.simd_lt_i8x32(a1, b1))
4753 }
4754 #[inline(always)]
4755 fn simd_le_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
4756 let (a0, a1) = self.split_i8x64(a);
4757 let (b0, b1) = self.split_i8x64(b);
4758 self.combine_mask8x32(self.simd_le_i8x32(a0, b0), self.simd_le_i8x32(a1, b1))
4759 }
4760 #[inline(always)]
4761 fn simd_ge_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
4762 let (a0, a1) = self.split_i8x64(a);
4763 let (b0, b1) = self.split_i8x64(b);
4764 self.combine_mask8x32(self.simd_ge_i8x32(a0, b0), self.simd_ge_i8x32(a1, b1))
4765 }
4766 #[inline(always)]
4767 fn simd_gt_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
4768 let (a0, a1) = self.split_i8x64(a);
4769 let (b0, b1) = self.split_i8x64(b);
4770 self.combine_mask8x32(self.simd_gt_i8x32(a0, b0), self.simd_gt_i8x32(a1, b1))
4771 }
4772 #[inline(always)]
4773 fn zip_low_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4774 let (a0, _) = self.split_i8x64(a);
4775 let (b0, _) = self.split_i8x64(b);
4776 self.combine_i8x32(self.zip_low_i8x32(a0, b0), self.zip_high_i8x32(a0, b0))
4777 }
4778 #[inline(always)]
4779 fn zip_high_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4780 let (_, a1) = self.split_i8x64(a);
4781 let (_, b1) = self.split_i8x64(b);
4782 self.combine_i8x32(self.zip_low_i8x32(a1, b1), self.zip_high_i8x32(a1, b1))
4783 }
4784 #[inline(always)]
4785 fn unzip_low_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4786 let (a0, a1) = self.split_i8x64(a);
4787 let (b0, b1) = self.split_i8x64(b);
4788 self.combine_i8x32(self.unzip_low_i8x32(a0, a1), self.unzip_low_i8x32(b0, b1))
4789 }
4790 #[inline(always)]
4791 fn unzip_high_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4792 let (a0, a1) = self.split_i8x64(a);
4793 let (b0, b1) = self.split_i8x64(b);
4794 self.combine_i8x32(self.unzip_high_i8x32(a0, a1), self.unzip_high_i8x32(b0, b1))
4795 }
4796 #[inline(always)]
4797 fn select_i8x64(self, a: mask8x64<Self>, b: i8x64<Self>, c: i8x64<Self>) -> i8x64<Self> {
4798 let (a0, a1) = self.split_mask8x64(a);
4799 let (b0, b1) = self.split_i8x64(b);
4800 let (c0, c1) = self.split_i8x64(c);
4801 self.combine_i8x32(self.select_i8x32(a0, b0, c0), self.select_i8x32(a1, b1, c1))
4802 }
4803 #[inline(always)]
4804 fn min_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4805 let (a0, a1) = self.split_i8x64(a);
4806 let (b0, b1) = self.split_i8x64(b);
4807 self.combine_i8x32(self.min_i8x32(a0, b0), self.min_i8x32(a1, b1))
4808 }
4809 #[inline(always)]
4810 fn max_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
4811 let (a0, a1) = self.split_i8x64(a);
4812 let (b0, b1) = self.split_i8x64(b);
4813 self.combine_i8x32(self.max_i8x32(a0, b0), self.max_i8x32(a1, b1))
4814 }
4815 #[inline(always)]
4816 fn split_i8x64(self, a: i8x64<Self>) -> (i8x32<Self>, i8x32<Self>) {
4817 let mut b0 = [0; 32usize];
4818 let mut b1 = [0; 32usize];
4819 b0.copy_from_slice(&a.val[0..32usize]);
4820 b1.copy_from_slice(&a.val[32usize..64usize]);
4821 (b0.simd_into(self), b1.simd_into(self))
4822 }
4823 #[inline(always)]
4824 fn reinterpret_u8_i8x64(self, a: i8x64<Self>) -> u8x64<Self> {
4825 let (a0, a1) = self.split_i8x64(a);
4826 self.combine_u8x32(self.reinterpret_u8_i8x32(a0), self.reinterpret_u8_i8x32(a1))
4827 }
4828 #[inline(always)]
4829 fn reinterpret_u32_i8x64(self, a: i8x64<Self>) -> u32x16<Self> {
4830 let (a0, a1) = self.split_i8x64(a);
4831 self.combine_u32x8(
4832 self.reinterpret_u32_i8x32(a0),
4833 self.reinterpret_u32_i8x32(a1),
4834 )
4835 }
4836 #[inline(always)]
4837 fn splat_u8x64(self, a: u8) -> u8x64<Self> {
4838 let half = self.splat_u8x32(a);
4839 self.combine_u8x32(half, half)
4840 }
4841 #[inline(always)]
4842 fn not_u8x64(self, a: u8x64<Self>) -> u8x64<Self> {
4843 let (a0, a1) = self.split_u8x64(a);
4844 self.combine_u8x32(self.not_u8x32(a0), self.not_u8x32(a1))
4845 }
4846 #[inline(always)]
4847 fn add_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4848 let (a0, a1) = self.split_u8x64(a);
4849 let (b0, b1) = self.split_u8x64(b);
4850 self.combine_u8x32(self.add_u8x32(a0, b0), self.add_u8x32(a1, b1))
4851 }
4852 #[inline(always)]
4853 fn sub_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4854 let (a0, a1) = self.split_u8x64(a);
4855 let (b0, b1) = self.split_u8x64(b);
4856 self.combine_u8x32(self.sub_u8x32(a0, b0), self.sub_u8x32(a1, b1))
4857 }
4858 #[inline(always)]
4859 fn mul_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4860 let (a0, a1) = self.split_u8x64(a);
4861 let (b0, b1) = self.split_u8x64(b);
4862 self.combine_u8x32(self.mul_u8x32(a0, b0), self.mul_u8x32(a1, b1))
4863 }
4864 #[inline(always)]
4865 fn and_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4866 let (a0, a1) = self.split_u8x64(a);
4867 let (b0, b1) = self.split_u8x64(b);
4868 self.combine_u8x32(self.and_u8x32(a0, b0), self.and_u8x32(a1, b1))
4869 }
4870 #[inline(always)]
4871 fn or_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4872 let (a0, a1) = self.split_u8x64(a);
4873 let (b0, b1) = self.split_u8x64(b);
4874 self.combine_u8x32(self.or_u8x32(a0, b0), self.or_u8x32(a1, b1))
4875 }
4876 #[inline(always)]
4877 fn xor_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4878 let (a0, a1) = self.split_u8x64(a);
4879 let (b0, b1) = self.split_u8x64(b);
4880 self.combine_u8x32(self.xor_u8x32(a0, b0), self.xor_u8x32(a1, b1))
4881 }
4882 #[inline(always)]
4883 fn shr_u8x64(self, a: u8x64<Self>, b: u32) -> u8x64<Self> {
4884 let (a0, a1) = self.split_u8x64(a);
4885 self.combine_u8x32(self.shr_u8x32(a0, b), self.shr_u8x32(a1, b))
4886 }
4887 #[inline(always)]
4888 fn simd_eq_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
4889 let (a0, a1) = self.split_u8x64(a);
4890 let (b0, b1) = self.split_u8x64(b);
4891 self.combine_mask8x32(self.simd_eq_u8x32(a0, b0), self.simd_eq_u8x32(a1, b1))
4892 }
4893 #[inline(always)]
4894 fn simd_lt_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
4895 let (a0, a1) = self.split_u8x64(a);
4896 let (b0, b1) = self.split_u8x64(b);
4897 self.combine_mask8x32(self.simd_lt_u8x32(a0, b0), self.simd_lt_u8x32(a1, b1))
4898 }
4899 #[inline(always)]
4900 fn simd_le_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
4901 let (a0, a1) = self.split_u8x64(a);
4902 let (b0, b1) = self.split_u8x64(b);
4903 self.combine_mask8x32(self.simd_le_u8x32(a0, b0), self.simd_le_u8x32(a1, b1))
4904 }
4905 #[inline(always)]
4906 fn simd_ge_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
4907 let (a0, a1) = self.split_u8x64(a);
4908 let (b0, b1) = self.split_u8x64(b);
4909 self.combine_mask8x32(self.simd_ge_u8x32(a0, b0), self.simd_ge_u8x32(a1, b1))
4910 }
4911 #[inline(always)]
4912 fn simd_gt_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
4913 let (a0, a1) = self.split_u8x64(a);
4914 let (b0, b1) = self.split_u8x64(b);
4915 self.combine_mask8x32(self.simd_gt_u8x32(a0, b0), self.simd_gt_u8x32(a1, b1))
4916 }
4917 #[inline(always)]
4918 fn zip_low_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4919 let (a0, _) = self.split_u8x64(a);
4920 let (b0, _) = self.split_u8x64(b);
4921 self.combine_u8x32(self.zip_low_u8x32(a0, b0), self.zip_high_u8x32(a0, b0))
4922 }
4923 #[inline(always)]
4924 fn zip_high_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4925 let (_, a1) = self.split_u8x64(a);
4926 let (_, b1) = self.split_u8x64(b);
4927 self.combine_u8x32(self.zip_low_u8x32(a1, b1), self.zip_high_u8x32(a1, b1))
4928 }
4929 #[inline(always)]
4930 fn unzip_low_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4931 let (a0, a1) = self.split_u8x64(a);
4932 let (b0, b1) = self.split_u8x64(b);
4933 self.combine_u8x32(self.unzip_low_u8x32(a0, a1), self.unzip_low_u8x32(b0, b1))
4934 }
4935 #[inline(always)]
4936 fn unzip_high_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4937 let (a0, a1) = self.split_u8x64(a);
4938 let (b0, b1) = self.split_u8x64(b);
4939 self.combine_u8x32(self.unzip_high_u8x32(a0, a1), self.unzip_high_u8x32(b0, b1))
4940 }
4941 #[inline(always)]
4942 fn select_u8x64(self, a: mask8x64<Self>, b: u8x64<Self>, c: u8x64<Self>) -> u8x64<Self> {
4943 let (a0, a1) = self.split_mask8x64(a);
4944 let (b0, b1) = self.split_u8x64(b);
4945 let (c0, c1) = self.split_u8x64(c);
4946 self.combine_u8x32(self.select_u8x32(a0, b0, c0), self.select_u8x32(a1, b1, c1))
4947 }
4948 #[inline(always)]
4949 fn min_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4950 let (a0, a1) = self.split_u8x64(a);
4951 let (b0, b1) = self.split_u8x64(b);
4952 self.combine_u8x32(self.min_u8x32(a0, b0), self.min_u8x32(a1, b1))
4953 }
4954 #[inline(always)]
4955 fn max_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
4956 let (a0, a1) = self.split_u8x64(a);
4957 let (b0, b1) = self.split_u8x64(b);
4958 self.combine_u8x32(self.max_u8x32(a0, b0), self.max_u8x32(a1, b1))
4959 }
4960 #[inline(always)]
4961 fn split_u8x64(self, a: u8x64<Self>) -> (u8x32<Self>, u8x32<Self>) {
4962 let mut b0 = [0; 32usize];
4963 let mut b1 = [0; 32usize];
4964 b0.copy_from_slice(&a.val[0..32usize]);
4965 b1.copy_from_slice(&a.val[32usize..64usize]);
4966 (b0.simd_into(self), b1.simd_into(self))
4967 }
4968 #[inline(always)]
4969 fn load_interleaved_128_u8x64(self, src: &[u8; 64usize]) -> u8x64<Self> {
4970 [
4971 src[0usize],
4972 src[4usize],
4973 src[8usize],
4974 src[12usize],
4975 src[16usize],
4976 src[20usize],
4977 src[24usize],
4978 src[28usize],
4979 src[32usize],
4980 src[36usize],
4981 src[40usize],
4982 src[44usize],
4983 src[48usize],
4984 src[52usize],
4985 src[56usize],
4986 src[60usize],
4987 src[1usize],
4988 src[5usize],
4989 src[9usize],
4990 src[13usize],
4991 src[17usize],
4992 src[21usize],
4993 src[25usize],
4994 src[29usize],
4995 src[33usize],
4996 src[37usize],
4997 src[41usize],
4998 src[45usize],
4999 src[49usize],
5000 src[53usize],
5001 src[57usize],
5002 src[61usize],
5003 src[2usize],
5004 src[6usize],
5005 src[10usize],
5006 src[14usize],
5007 src[18usize],
5008 src[22usize],
5009 src[26usize],
5010 src[30usize],
5011 src[34usize],
5012 src[38usize],
5013 src[42usize],
5014 src[46usize],
5015 src[50usize],
5016 src[54usize],
5017 src[58usize],
5018 src[62usize],
5019 src[3usize],
5020 src[7usize],
5021 src[11usize],
5022 src[15usize],
5023 src[19usize],
5024 src[23usize],
5025 src[27usize],
5026 src[31usize],
5027 src[35usize],
5028 src[39usize],
5029 src[43usize],
5030 src[47usize],
5031 src[51usize],
5032 src[55usize],
5033 src[59usize],
5034 src[63usize],
5035 ]
5036 .simd_into(self)
5037 }
5038 #[inline(always)]
5039 fn store_interleaved_128_u8x64(self, a: u8x64<Self>, dest: &mut [u8; 64usize]) -> () {
5040 *dest = [
5041 a[0usize], a[16usize], a[32usize], a[48usize], a[1usize], a[17usize], a[33usize],
5042 a[49usize], a[2usize], a[18usize], a[34usize], a[50usize], a[3usize], a[19usize],
5043 a[35usize], a[51usize], a[4usize], a[20usize], a[36usize], a[52usize], a[5usize],
5044 a[21usize], a[37usize], a[53usize], a[6usize], a[22usize], a[38usize], a[54usize],
5045 a[7usize], a[23usize], a[39usize], a[55usize], a[8usize], a[24usize], a[40usize],
5046 a[56usize], a[9usize], a[25usize], a[41usize], a[57usize], a[10usize], a[26usize],
5047 a[42usize], a[58usize], a[11usize], a[27usize], a[43usize], a[59usize], a[12usize],
5048 a[28usize], a[44usize], a[60usize], a[13usize], a[29usize], a[45usize], a[61usize],
5049 a[14usize], a[30usize], a[46usize], a[62usize], a[15usize], a[31usize], a[47usize],
5050 a[63usize],
5051 ];
5052 }
5053 #[inline(always)]
5054 fn reinterpret_u32_u8x64(self, a: u8x64<Self>) -> u32x16<Self> {
5055 let (a0, a1) = self.split_u8x64(a);
5056 self.combine_u32x8(
5057 self.reinterpret_u32_u8x32(a0),
5058 self.reinterpret_u32_u8x32(a1),
5059 )
5060 }
5061 #[inline(always)]
5062 fn splat_mask8x64(self, a: i8) -> mask8x64<Self> {
5063 let half = self.splat_mask8x32(a);
5064 self.combine_mask8x32(half, half)
5065 }
5066 #[inline(always)]
5067 fn not_mask8x64(self, a: mask8x64<Self>) -> mask8x64<Self> {
5068 let (a0, a1) = self.split_mask8x64(a);
5069 self.combine_mask8x32(self.not_mask8x32(a0), self.not_mask8x32(a1))
5070 }
5071 #[inline(always)]
5072 fn and_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
5073 let (a0, a1) = self.split_mask8x64(a);
5074 let (b0, b1) = self.split_mask8x64(b);
5075 self.combine_mask8x32(self.and_mask8x32(a0, b0), self.and_mask8x32(a1, b1))
5076 }
5077 #[inline(always)]
5078 fn or_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
5079 let (a0, a1) = self.split_mask8x64(a);
5080 let (b0, b1) = self.split_mask8x64(b);
5081 self.combine_mask8x32(self.or_mask8x32(a0, b0), self.or_mask8x32(a1, b1))
5082 }
5083 #[inline(always)]
5084 fn xor_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
5085 let (a0, a1) = self.split_mask8x64(a);
5086 let (b0, b1) = self.split_mask8x64(b);
5087 self.combine_mask8x32(self.xor_mask8x32(a0, b0), self.xor_mask8x32(a1, b1))
5088 }
5089 #[inline(always)]
5090 fn select_mask8x64(
5091 self,
5092 a: mask8x64<Self>,
5093 b: mask8x64<Self>,
5094 c: mask8x64<Self>,
5095 ) -> mask8x64<Self> {
5096 let (a0, a1) = self.split_mask8x64(a);
5097 let (b0, b1) = self.split_mask8x64(b);
5098 let (c0, c1) = self.split_mask8x64(c);
5099 self.combine_mask8x32(
5100 self.select_mask8x32(a0, b0, c0),
5101 self.select_mask8x32(a1, b1, c1),
5102 )
5103 }
5104 #[inline(always)]
5105 fn simd_eq_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
5106 let (a0, a1) = self.split_mask8x64(a);
5107 let (b0, b1) = self.split_mask8x64(b);
5108 self.combine_mask8x32(self.simd_eq_mask8x32(a0, b0), self.simd_eq_mask8x32(a1, b1))
5109 }
5110 #[inline(always)]
5111 fn split_mask8x64(self, a: mask8x64<Self>) -> (mask8x32<Self>, mask8x32<Self>) {
5112 let mut b0 = [0; 32usize];
5113 let mut b1 = [0; 32usize];
5114 b0.copy_from_slice(&a.val[0..32usize]);
5115 b1.copy_from_slice(&a.val[32usize..64usize]);
5116 (b0.simd_into(self), b1.simd_into(self))
5117 }
5118 #[inline(always)]
5119 fn splat_i16x32(self, a: i16) -> i16x32<Self> {
5120 let half = self.splat_i16x16(a);
5121 self.combine_i16x16(half, half)
5122 }
5123 #[inline(always)]
5124 fn not_i16x32(self, a: i16x32<Self>) -> i16x32<Self> {
5125 let (a0, a1) = self.split_i16x32(a);
5126 self.combine_i16x16(self.not_i16x16(a0), self.not_i16x16(a1))
5127 }
5128 #[inline(always)]
5129 fn add_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5130 let (a0, a1) = self.split_i16x32(a);
5131 let (b0, b1) = self.split_i16x32(b);
5132 self.combine_i16x16(self.add_i16x16(a0, b0), self.add_i16x16(a1, b1))
5133 }
5134 #[inline(always)]
5135 fn sub_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5136 let (a0, a1) = self.split_i16x32(a);
5137 let (b0, b1) = self.split_i16x32(b);
5138 self.combine_i16x16(self.sub_i16x16(a0, b0), self.sub_i16x16(a1, b1))
5139 }
5140 #[inline(always)]
5141 fn mul_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5142 let (a0, a1) = self.split_i16x32(a);
5143 let (b0, b1) = self.split_i16x32(b);
5144 self.combine_i16x16(self.mul_i16x16(a0, b0), self.mul_i16x16(a1, b1))
5145 }
5146 #[inline(always)]
5147 fn and_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5148 let (a0, a1) = self.split_i16x32(a);
5149 let (b0, b1) = self.split_i16x32(b);
5150 self.combine_i16x16(self.and_i16x16(a0, b0), self.and_i16x16(a1, b1))
5151 }
5152 #[inline(always)]
5153 fn or_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5154 let (a0, a1) = self.split_i16x32(a);
5155 let (b0, b1) = self.split_i16x32(b);
5156 self.combine_i16x16(self.or_i16x16(a0, b0), self.or_i16x16(a1, b1))
5157 }
5158 #[inline(always)]
5159 fn xor_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5160 let (a0, a1) = self.split_i16x32(a);
5161 let (b0, b1) = self.split_i16x32(b);
5162 self.combine_i16x16(self.xor_i16x16(a0, b0), self.xor_i16x16(a1, b1))
5163 }
5164 #[inline(always)]
5165 fn shr_i16x32(self, a: i16x32<Self>, b: u32) -> i16x32<Self> {
5166 let (a0, a1) = self.split_i16x32(a);
5167 self.combine_i16x16(self.shr_i16x16(a0, b), self.shr_i16x16(a1, b))
5168 }
5169 #[inline(always)]
5170 fn simd_eq_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5171 let (a0, a1) = self.split_i16x32(a);
5172 let (b0, b1) = self.split_i16x32(b);
5173 self.combine_mask16x16(self.simd_eq_i16x16(a0, b0), self.simd_eq_i16x16(a1, b1))
5174 }
5175 #[inline(always)]
5176 fn simd_lt_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5177 let (a0, a1) = self.split_i16x32(a);
5178 let (b0, b1) = self.split_i16x32(b);
5179 self.combine_mask16x16(self.simd_lt_i16x16(a0, b0), self.simd_lt_i16x16(a1, b1))
5180 }
5181 #[inline(always)]
5182 fn simd_le_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5183 let (a0, a1) = self.split_i16x32(a);
5184 let (b0, b1) = self.split_i16x32(b);
5185 self.combine_mask16x16(self.simd_le_i16x16(a0, b0), self.simd_le_i16x16(a1, b1))
5186 }
5187 #[inline(always)]
5188 fn simd_ge_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5189 let (a0, a1) = self.split_i16x32(a);
5190 let (b0, b1) = self.split_i16x32(b);
5191 self.combine_mask16x16(self.simd_ge_i16x16(a0, b0), self.simd_ge_i16x16(a1, b1))
5192 }
5193 #[inline(always)]
5194 fn simd_gt_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
5195 let (a0, a1) = self.split_i16x32(a);
5196 let (b0, b1) = self.split_i16x32(b);
5197 self.combine_mask16x16(self.simd_gt_i16x16(a0, b0), self.simd_gt_i16x16(a1, b1))
5198 }
5199 #[inline(always)]
5200 fn zip_low_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5201 let (a0, _) = self.split_i16x32(a);
5202 let (b0, _) = self.split_i16x32(b);
5203 self.combine_i16x16(self.zip_low_i16x16(a0, b0), self.zip_high_i16x16(a0, b0))
5204 }
5205 #[inline(always)]
5206 fn zip_high_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5207 let (_, a1) = self.split_i16x32(a);
5208 let (_, b1) = self.split_i16x32(b);
5209 self.combine_i16x16(self.zip_low_i16x16(a1, b1), self.zip_high_i16x16(a1, b1))
5210 }
5211 #[inline(always)]
5212 fn unzip_low_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5213 let (a0, a1) = self.split_i16x32(a);
5214 let (b0, b1) = self.split_i16x32(b);
5215 self.combine_i16x16(self.unzip_low_i16x16(a0, a1), self.unzip_low_i16x16(b0, b1))
5216 }
5217 #[inline(always)]
5218 fn unzip_high_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5219 let (a0, a1) = self.split_i16x32(a);
5220 let (b0, b1) = self.split_i16x32(b);
5221 self.combine_i16x16(
5222 self.unzip_high_i16x16(a0, a1),
5223 self.unzip_high_i16x16(b0, b1),
5224 )
5225 }
5226 #[inline(always)]
5227 fn select_i16x32(self, a: mask16x32<Self>, b: i16x32<Self>, c: i16x32<Self>) -> i16x32<Self> {
5228 let (a0, a1) = self.split_mask16x32(a);
5229 let (b0, b1) = self.split_i16x32(b);
5230 let (c0, c1) = self.split_i16x32(c);
5231 self.combine_i16x16(
5232 self.select_i16x16(a0, b0, c0),
5233 self.select_i16x16(a1, b1, c1),
5234 )
5235 }
5236 #[inline(always)]
5237 fn min_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5238 let (a0, a1) = self.split_i16x32(a);
5239 let (b0, b1) = self.split_i16x32(b);
5240 self.combine_i16x16(self.min_i16x16(a0, b0), self.min_i16x16(a1, b1))
5241 }
5242 #[inline(always)]
5243 fn max_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
5244 let (a0, a1) = self.split_i16x32(a);
5245 let (b0, b1) = self.split_i16x32(b);
5246 self.combine_i16x16(self.max_i16x16(a0, b0), self.max_i16x16(a1, b1))
5247 }
5248 #[inline(always)]
5249 fn split_i16x32(self, a: i16x32<Self>) -> (i16x16<Self>, i16x16<Self>) {
5250 let mut b0 = [0; 16usize];
5251 let mut b1 = [0; 16usize];
5252 b0.copy_from_slice(&a.val[0..16usize]);
5253 b1.copy_from_slice(&a.val[16usize..32usize]);
5254 (b0.simd_into(self), b1.simd_into(self))
5255 }
5256 #[inline(always)]
5257 fn reinterpret_u8_i16x32(self, a: i16x32<Self>) -> u8x64<Self> {
5258 let (a0, a1) = self.split_i16x32(a);
5259 self.combine_u8x32(
5260 self.reinterpret_u8_i16x16(a0),
5261 self.reinterpret_u8_i16x16(a1),
5262 )
5263 }
5264 #[inline(always)]
5265 fn reinterpret_u32_i16x32(self, a: i16x32<Self>) -> u32x16<Self> {
5266 let (a0, a1) = self.split_i16x32(a);
5267 self.combine_u32x8(
5268 self.reinterpret_u32_i16x16(a0),
5269 self.reinterpret_u32_i16x16(a1),
5270 )
5271 }
5272 #[inline(always)]
5273 fn splat_u16x32(self, a: u16) -> u16x32<Self> {
5274 let half = self.splat_u16x16(a);
5275 self.combine_u16x16(half, half)
5276 }
5277 #[inline(always)]
5278 fn not_u16x32(self, a: u16x32<Self>) -> u16x32<Self> {
5279 let (a0, a1) = self.split_u16x32(a);
5280 self.combine_u16x16(self.not_u16x16(a0), self.not_u16x16(a1))
5281 }
5282 #[inline(always)]
5283 fn add_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5284 let (a0, a1) = self.split_u16x32(a);
5285 let (b0, b1) = self.split_u16x32(b);
5286 self.combine_u16x16(self.add_u16x16(a0, b0), self.add_u16x16(a1, b1))
5287 }
5288 #[inline(always)]
5289 fn sub_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5290 let (a0, a1) = self.split_u16x32(a);
5291 let (b0, b1) = self.split_u16x32(b);
5292 self.combine_u16x16(self.sub_u16x16(a0, b0), self.sub_u16x16(a1, b1))
5293 }
5294 #[inline(always)]
5295 fn mul_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5296 let (a0, a1) = self.split_u16x32(a);
5297 let (b0, b1) = self.split_u16x32(b);
5298 self.combine_u16x16(self.mul_u16x16(a0, b0), self.mul_u16x16(a1, b1))
5299 }
5300 #[inline(always)]
5301 fn and_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5302 let (a0, a1) = self.split_u16x32(a);
5303 let (b0, b1) = self.split_u16x32(b);
5304 self.combine_u16x16(self.and_u16x16(a0, b0), self.and_u16x16(a1, b1))
5305 }
5306 #[inline(always)]
5307 fn or_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5308 let (a0, a1) = self.split_u16x32(a);
5309 let (b0, b1) = self.split_u16x32(b);
5310 self.combine_u16x16(self.or_u16x16(a0, b0), self.or_u16x16(a1, b1))
5311 }
5312 #[inline(always)]
5313 fn xor_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5314 let (a0, a1) = self.split_u16x32(a);
5315 let (b0, b1) = self.split_u16x32(b);
5316 self.combine_u16x16(self.xor_u16x16(a0, b0), self.xor_u16x16(a1, b1))
5317 }
5318 #[inline(always)]
5319 fn shr_u16x32(self, a: u16x32<Self>, b: u32) -> u16x32<Self> {
5320 let (a0, a1) = self.split_u16x32(a);
5321 self.combine_u16x16(self.shr_u16x16(a0, b), self.shr_u16x16(a1, b))
5322 }
5323 #[inline(always)]
5324 fn simd_eq_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5325 let (a0, a1) = self.split_u16x32(a);
5326 let (b0, b1) = self.split_u16x32(b);
5327 self.combine_mask16x16(self.simd_eq_u16x16(a0, b0), self.simd_eq_u16x16(a1, b1))
5328 }
5329 #[inline(always)]
5330 fn simd_lt_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5331 let (a0, a1) = self.split_u16x32(a);
5332 let (b0, b1) = self.split_u16x32(b);
5333 self.combine_mask16x16(self.simd_lt_u16x16(a0, b0), self.simd_lt_u16x16(a1, b1))
5334 }
5335 #[inline(always)]
5336 fn simd_le_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5337 let (a0, a1) = self.split_u16x32(a);
5338 let (b0, b1) = self.split_u16x32(b);
5339 self.combine_mask16x16(self.simd_le_u16x16(a0, b0), self.simd_le_u16x16(a1, b1))
5340 }
5341 #[inline(always)]
5342 fn simd_ge_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5343 let (a0, a1) = self.split_u16x32(a);
5344 let (b0, b1) = self.split_u16x32(b);
5345 self.combine_mask16x16(self.simd_ge_u16x16(a0, b0), self.simd_ge_u16x16(a1, b1))
5346 }
5347 #[inline(always)]
5348 fn simd_gt_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
5349 let (a0, a1) = self.split_u16x32(a);
5350 let (b0, b1) = self.split_u16x32(b);
5351 self.combine_mask16x16(self.simd_gt_u16x16(a0, b0), self.simd_gt_u16x16(a1, b1))
5352 }
5353 #[inline(always)]
5354 fn zip_low_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5355 let (a0, _) = self.split_u16x32(a);
5356 let (b0, _) = self.split_u16x32(b);
5357 self.combine_u16x16(self.zip_low_u16x16(a0, b0), self.zip_high_u16x16(a0, b0))
5358 }
5359 #[inline(always)]
5360 fn zip_high_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5361 let (_, a1) = self.split_u16x32(a);
5362 let (_, b1) = self.split_u16x32(b);
5363 self.combine_u16x16(self.zip_low_u16x16(a1, b1), self.zip_high_u16x16(a1, b1))
5364 }
5365 #[inline(always)]
5366 fn unzip_low_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5367 let (a0, a1) = self.split_u16x32(a);
5368 let (b0, b1) = self.split_u16x32(b);
5369 self.combine_u16x16(self.unzip_low_u16x16(a0, a1), self.unzip_low_u16x16(b0, b1))
5370 }
5371 #[inline(always)]
5372 fn unzip_high_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5373 let (a0, a1) = self.split_u16x32(a);
5374 let (b0, b1) = self.split_u16x32(b);
5375 self.combine_u16x16(
5376 self.unzip_high_u16x16(a0, a1),
5377 self.unzip_high_u16x16(b0, b1),
5378 )
5379 }
5380 #[inline(always)]
5381 fn select_u16x32(self, a: mask16x32<Self>, b: u16x32<Self>, c: u16x32<Self>) -> u16x32<Self> {
5382 let (a0, a1) = self.split_mask16x32(a);
5383 let (b0, b1) = self.split_u16x32(b);
5384 let (c0, c1) = self.split_u16x32(c);
5385 self.combine_u16x16(
5386 self.select_u16x16(a0, b0, c0),
5387 self.select_u16x16(a1, b1, c1),
5388 )
5389 }
5390 #[inline(always)]
5391 fn min_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5392 let (a0, a1) = self.split_u16x32(a);
5393 let (b0, b1) = self.split_u16x32(b);
5394 self.combine_u16x16(self.min_u16x16(a0, b0), self.min_u16x16(a1, b1))
5395 }
5396 #[inline(always)]
5397 fn max_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
5398 let (a0, a1) = self.split_u16x32(a);
5399 let (b0, b1) = self.split_u16x32(b);
5400 self.combine_u16x16(self.max_u16x16(a0, b0), self.max_u16x16(a1, b1))
5401 }
5402 #[inline(always)]
5403 fn split_u16x32(self, a: u16x32<Self>) -> (u16x16<Self>, u16x16<Self>) {
5404 let mut b0 = [0; 16usize];
5405 let mut b1 = [0; 16usize];
5406 b0.copy_from_slice(&a.val[0..16usize]);
5407 b1.copy_from_slice(&a.val[16usize..32usize]);
5408 (b0.simd_into(self), b1.simd_into(self))
5409 }
5410 #[inline(always)]
5411 fn load_interleaved_128_u16x32(self, src: &[u16; 32usize]) -> u16x32<Self> {
5412 [
5413 src[0usize],
5414 src[4usize],
5415 src[8usize],
5416 src[12usize],
5417 src[16usize],
5418 src[20usize],
5419 src[24usize],
5420 src[28usize],
5421 src[1usize],
5422 src[5usize],
5423 src[9usize],
5424 src[13usize],
5425 src[17usize],
5426 src[21usize],
5427 src[25usize],
5428 src[29usize],
5429 src[2usize],
5430 src[6usize],
5431 src[10usize],
5432 src[14usize],
5433 src[18usize],
5434 src[22usize],
5435 src[26usize],
5436 src[30usize],
5437 src[3usize],
5438 src[7usize],
5439 src[11usize],
5440 src[15usize],
5441 src[19usize],
5442 src[23usize],
5443 src[27usize],
5444 src[31usize],
5445 ]
5446 .simd_into(self)
5447 }
5448 #[inline(always)]
5449 fn store_interleaved_128_u16x32(self, a: u16x32<Self>, dest: &mut [u16; 32usize]) -> () {
5450 *dest = [
5451 a[0usize], a[8usize], a[16usize], a[24usize], a[1usize], a[9usize], a[17usize],
5452 a[25usize], a[2usize], a[10usize], a[18usize], a[26usize], a[3usize], a[11usize],
5453 a[19usize], a[27usize], a[4usize], a[12usize], a[20usize], a[28usize], a[5usize],
5454 a[13usize], a[21usize], a[29usize], a[6usize], a[14usize], a[22usize], a[30usize],
5455 a[7usize], a[15usize], a[23usize], a[31usize],
5456 ];
5457 }
5458 #[inline(always)]
5459 fn narrow_u16x32(self, a: u16x32<Self>) -> u8x32<Self> {
5460 let (a0, a1) = self.split_u16x32(a);
5461 self.combine_u8x16(self.narrow_u16x16(a0), self.narrow_u16x16(a1))
5462 }
5463 #[inline(always)]
5464 fn reinterpret_u8_u16x32(self, a: u16x32<Self>) -> u8x64<Self> {
5465 let (a0, a1) = self.split_u16x32(a);
5466 self.combine_u8x32(
5467 self.reinterpret_u8_u16x16(a0),
5468 self.reinterpret_u8_u16x16(a1),
5469 )
5470 }
5471 #[inline(always)]
5472 fn reinterpret_u32_u16x32(self, a: u16x32<Self>) -> u32x16<Self> {
5473 let (a0, a1) = self.split_u16x32(a);
5474 self.combine_u32x8(
5475 self.reinterpret_u32_u16x16(a0),
5476 self.reinterpret_u32_u16x16(a1),
5477 )
5478 }
5479 #[inline(always)]
5480 fn splat_mask16x32(self, a: i16) -> mask16x32<Self> {
5481 let half = self.splat_mask16x16(a);
5482 self.combine_mask16x16(half, half)
5483 }
5484 #[inline(always)]
5485 fn not_mask16x32(self, a: mask16x32<Self>) -> mask16x32<Self> {
5486 let (a0, a1) = self.split_mask16x32(a);
5487 self.combine_mask16x16(self.not_mask16x16(a0), self.not_mask16x16(a1))
5488 }
5489 #[inline(always)]
5490 fn and_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
5491 let (a0, a1) = self.split_mask16x32(a);
5492 let (b0, b1) = self.split_mask16x32(b);
5493 self.combine_mask16x16(self.and_mask16x16(a0, b0), self.and_mask16x16(a1, b1))
5494 }
5495 #[inline(always)]
5496 fn or_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
5497 let (a0, a1) = self.split_mask16x32(a);
5498 let (b0, b1) = self.split_mask16x32(b);
5499 self.combine_mask16x16(self.or_mask16x16(a0, b0), self.or_mask16x16(a1, b1))
5500 }
5501 #[inline(always)]
5502 fn xor_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
5503 let (a0, a1) = self.split_mask16x32(a);
5504 let (b0, b1) = self.split_mask16x32(b);
5505 self.combine_mask16x16(self.xor_mask16x16(a0, b0), self.xor_mask16x16(a1, b1))
5506 }
5507 #[inline(always)]
5508 fn select_mask16x32(
5509 self,
5510 a: mask16x32<Self>,
5511 b: mask16x32<Self>,
5512 c: mask16x32<Self>,
5513 ) -> mask16x32<Self> {
5514 let (a0, a1) = self.split_mask16x32(a);
5515 let (b0, b1) = self.split_mask16x32(b);
5516 let (c0, c1) = self.split_mask16x32(c);
5517 self.combine_mask16x16(
5518 self.select_mask16x16(a0, b0, c0),
5519 self.select_mask16x16(a1, b1, c1),
5520 )
5521 }
5522 #[inline(always)]
5523 fn simd_eq_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
5524 let (a0, a1) = self.split_mask16x32(a);
5525 let (b0, b1) = self.split_mask16x32(b);
5526 self.combine_mask16x16(
5527 self.simd_eq_mask16x16(a0, b0),
5528 self.simd_eq_mask16x16(a1, b1),
5529 )
5530 }
5531 #[inline(always)]
5532 fn split_mask16x32(self, a: mask16x32<Self>) -> (mask16x16<Self>, mask16x16<Self>) {
5533 let mut b0 = [0; 16usize];
5534 let mut b1 = [0; 16usize];
5535 b0.copy_from_slice(&a.val[0..16usize]);
5536 b1.copy_from_slice(&a.val[16usize..32usize]);
5537 (b0.simd_into(self), b1.simd_into(self))
5538 }
5539 #[inline(always)]
5540 fn splat_i32x16(self, a: i32) -> i32x16<Self> {
5541 let half = self.splat_i32x8(a);
5542 self.combine_i32x8(half, half)
5543 }
5544 #[inline(always)]
5545 fn not_i32x16(self, a: i32x16<Self>) -> i32x16<Self> {
5546 let (a0, a1) = self.split_i32x16(a);
5547 self.combine_i32x8(self.not_i32x8(a0), self.not_i32x8(a1))
5548 }
5549 #[inline(always)]
5550 fn add_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5551 let (a0, a1) = self.split_i32x16(a);
5552 let (b0, b1) = self.split_i32x16(b);
5553 self.combine_i32x8(self.add_i32x8(a0, b0), self.add_i32x8(a1, b1))
5554 }
5555 #[inline(always)]
5556 fn sub_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5557 let (a0, a1) = self.split_i32x16(a);
5558 let (b0, b1) = self.split_i32x16(b);
5559 self.combine_i32x8(self.sub_i32x8(a0, b0), self.sub_i32x8(a1, b1))
5560 }
5561 #[inline(always)]
5562 fn mul_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5563 let (a0, a1) = self.split_i32x16(a);
5564 let (b0, b1) = self.split_i32x16(b);
5565 self.combine_i32x8(self.mul_i32x8(a0, b0), self.mul_i32x8(a1, b1))
5566 }
5567 #[inline(always)]
5568 fn and_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5569 let (a0, a1) = self.split_i32x16(a);
5570 let (b0, b1) = self.split_i32x16(b);
5571 self.combine_i32x8(self.and_i32x8(a0, b0), self.and_i32x8(a1, b1))
5572 }
5573 #[inline(always)]
5574 fn or_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5575 let (a0, a1) = self.split_i32x16(a);
5576 let (b0, b1) = self.split_i32x16(b);
5577 self.combine_i32x8(self.or_i32x8(a0, b0), self.or_i32x8(a1, b1))
5578 }
5579 #[inline(always)]
5580 fn xor_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5581 let (a0, a1) = self.split_i32x16(a);
5582 let (b0, b1) = self.split_i32x16(b);
5583 self.combine_i32x8(self.xor_i32x8(a0, b0), self.xor_i32x8(a1, b1))
5584 }
5585 #[inline(always)]
5586 fn shr_i32x16(self, a: i32x16<Self>, b: u32) -> i32x16<Self> {
5587 let (a0, a1) = self.split_i32x16(a);
5588 self.combine_i32x8(self.shr_i32x8(a0, b), self.shr_i32x8(a1, b))
5589 }
5590 #[inline(always)]
5591 fn simd_eq_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5592 let (a0, a1) = self.split_i32x16(a);
5593 let (b0, b1) = self.split_i32x16(b);
5594 self.combine_mask32x8(self.simd_eq_i32x8(a0, b0), self.simd_eq_i32x8(a1, b1))
5595 }
5596 #[inline(always)]
5597 fn simd_lt_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5598 let (a0, a1) = self.split_i32x16(a);
5599 let (b0, b1) = self.split_i32x16(b);
5600 self.combine_mask32x8(self.simd_lt_i32x8(a0, b0), self.simd_lt_i32x8(a1, b1))
5601 }
5602 #[inline(always)]
5603 fn simd_le_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5604 let (a0, a1) = self.split_i32x16(a);
5605 let (b0, b1) = self.split_i32x16(b);
5606 self.combine_mask32x8(self.simd_le_i32x8(a0, b0), self.simd_le_i32x8(a1, b1))
5607 }
5608 #[inline(always)]
5609 fn simd_ge_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5610 let (a0, a1) = self.split_i32x16(a);
5611 let (b0, b1) = self.split_i32x16(b);
5612 self.combine_mask32x8(self.simd_ge_i32x8(a0, b0), self.simd_ge_i32x8(a1, b1))
5613 }
5614 #[inline(always)]
5615 fn simd_gt_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
5616 let (a0, a1) = self.split_i32x16(a);
5617 let (b0, b1) = self.split_i32x16(b);
5618 self.combine_mask32x8(self.simd_gt_i32x8(a0, b0), self.simd_gt_i32x8(a1, b1))
5619 }
5620 #[inline(always)]
5621 fn zip_low_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5622 let (a0, _) = self.split_i32x16(a);
5623 let (b0, _) = self.split_i32x16(b);
5624 self.combine_i32x8(self.zip_low_i32x8(a0, b0), self.zip_high_i32x8(a0, b0))
5625 }
5626 #[inline(always)]
5627 fn zip_high_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5628 let (_, a1) = self.split_i32x16(a);
5629 let (_, b1) = self.split_i32x16(b);
5630 self.combine_i32x8(self.zip_low_i32x8(a1, b1), self.zip_high_i32x8(a1, b1))
5631 }
5632 #[inline(always)]
5633 fn unzip_low_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5634 let (a0, a1) = self.split_i32x16(a);
5635 let (b0, b1) = self.split_i32x16(b);
5636 self.combine_i32x8(self.unzip_low_i32x8(a0, a1), self.unzip_low_i32x8(b0, b1))
5637 }
5638 #[inline(always)]
5639 fn unzip_high_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5640 let (a0, a1) = self.split_i32x16(a);
5641 let (b0, b1) = self.split_i32x16(b);
5642 self.combine_i32x8(self.unzip_high_i32x8(a0, a1), self.unzip_high_i32x8(b0, b1))
5643 }
5644 #[inline(always)]
5645 fn select_i32x16(self, a: mask32x16<Self>, b: i32x16<Self>, c: i32x16<Self>) -> i32x16<Self> {
5646 let (a0, a1) = self.split_mask32x16(a);
5647 let (b0, b1) = self.split_i32x16(b);
5648 let (c0, c1) = self.split_i32x16(c);
5649 self.combine_i32x8(self.select_i32x8(a0, b0, c0), self.select_i32x8(a1, b1, c1))
5650 }
5651 #[inline(always)]
5652 fn min_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5653 let (a0, a1) = self.split_i32x16(a);
5654 let (b0, b1) = self.split_i32x16(b);
5655 self.combine_i32x8(self.min_i32x8(a0, b0), self.min_i32x8(a1, b1))
5656 }
5657 #[inline(always)]
5658 fn max_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
5659 let (a0, a1) = self.split_i32x16(a);
5660 let (b0, b1) = self.split_i32x16(b);
5661 self.combine_i32x8(self.max_i32x8(a0, b0), self.max_i32x8(a1, b1))
5662 }
5663 #[inline(always)]
5664 fn split_i32x16(self, a: i32x16<Self>) -> (i32x8<Self>, i32x8<Self>) {
5665 let mut b0 = [0; 8usize];
5666 let mut b1 = [0; 8usize];
5667 b0.copy_from_slice(&a.val[0..8usize]);
5668 b1.copy_from_slice(&a.val[8usize..16usize]);
5669 (b0.simd_into(self), b1.simd_into(self))
5670 }
5671 #[inline(always)]
5672 fn reinterpret_u8_i32x16(self, a: i32x16<Self>) -> u8x64<Self> {
5673 let (a0, a1) = self.split_i32x16(a);
5674 self.combine_u8x32(self.reinterpret_u8_i32x8(a0), self.reinterpret_u8_i32x8(a1))
5675 }
5676 #[inline(always)]
5677 fn reinterpret_u32_i32x16(self, a: i32x16<Self>) -> u32x16<Self> {
5678 let (a0, a1) = self.split_i32x16(a);
5679 self.combine_u32x8(
5680 self.reinterpret_u32_i32x8(a0),
5681 self.reinterpret_u32_i32x8(a1),
5682 )
5683 }
5684 #[inline(always)]
5685 fn cvt_f32_i32x16(self, a: i32x16<Self>) -> f32x16<Self> {
5686 let (a0, a1) = self.split_i32x16(a);
5687 self.combine_f32x8(self.cvt_f32_i32x8(a0), self.cvt_f32_i32x8(a1))
5688 }
5689 #[inline(always)]
5690 fn splat_u32x16(self, a: u32) -> u32x16<Self> {
5691 let half = self.splat_u32x8(a);
5692 self.combine_u32x8(half, half)
5693 }
5694 #[inline(always)]
5695 fn not_u32x16(self, a: u32x16<Self>) -> u32x16<Self> {
5696 let (a0, a1) = self.split_u32x16(a);
5697 self.combine_u32x8(self.not_u32x8(a0), self.not_u32x8(a1))
5698 }
5699 #[inline(always)]
5700 fn add_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5701 let (a0, a1) = self.split_u32x16(a);
5702 let (b0, b1) = self.split_u32x16(b);
5703 self.combine_u32x8(self.add_u32x8(a0, b0), self.add_u32x8(a1, b1))
5704 }
5705 #[inline(always)]
5706 fn sub_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5707 let (a0, a1) = self.split_u32x16(a);
5708 let (b0, b1) = self.split_u32x16(b);
5709 self.combine_u32x8(self.sub_u32x8(a0, b0), self.sub_u32x8(a1, b1))
5710 }
5711 #[inline(always)]
5712 fn mul_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5713 let (a0, a1) = self.split_u32x16(a);
5714 let (b0, b1) = self.split_u32x16(b);
5715 self.combine_u32x8(self.mul_u32x8(a0, b0), self.mul_u32x8(a1, b1))
5716 }
5717 #[inline(always)]
5718 fn and_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5719 let (a0, a1) = self.split_u32x16(a);
5720 let (b0, b1) = self.split_u32x16(b);
5721 self.combine_u32x8(self.and_u32x8(a0, b0), self.and_u32x8(a1, b1))
5722 }
5723 #[inline(always)]
5724 fn or_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5725 let (a0, a1) = self.split_u32x16(a);
5726 let (b0, b1) = self.split_u32x16(b);
5727 self.combine_u32x8(self.or_u32x8(a0, b0), self.or_u32x8(a1, b1))
5728 }
5729 #[inline(always)]
5730 fn xor_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5731 let (a0, a1) = self.split_u32x16(a);
5732 let (b0, b1) = self.split_u32x16(b);
5733 self.combine_u32x8(self.xor_u32x8(a0, b0), self.xor_u32x8(a1, b1))
5734 }
5735 #[inline(always)]
5736 fn shr_u32x16(self, a: u32x16<Self>, b: u32) -> u32x16<Self> {
5737 let (a0, a1) = self.split_u32x16(a);
5738 self.combine_u32x8(self.shr_u32x8(a0, b), self.shr_u32x8(a1, b))
5739 }
5740 #[inline(always)]
5741 fn simd_eq_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
5742 let (a0, a1) = self.split_u32x16(a);
5743 let (b0, b1) = self.split_u32x16(b);
5744 self.combine_mask32x8(self.simd_eq_u32x8(a0, b0), self.simd_eq_u32x8(a1, b1))
5745 }
5746 #[inline(always)]
5747 fn simd_lt_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
5748 let (a0, a1) = self.split_u32x16(a);
5749 let (b0, b1) = self.split_u32x16(b);
5750 self.combine_mask32x8(self.simd_lt_u32x8(a0, b0), self.simd_lt_u32x8(a1, b1))
5751 }
5752 #[inline(always)]
5753 fn simd_le_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
5754 let (a0, a1) = self.split_u32x16(a);
5755 let (b0, b1) = self.split_u32x16(b);
5756 self.combine_mask32x8(self.simd_le_u32x8(a0, b0), self.simd_le_u32x8(a1, b1))
5757 }
5758 #[inline(always)]
5759 fn simd_ge_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
5760 let (a0, a1) = self.split_u32x16(a);
5761 let (b0, b1) = self.split_u32x16(b);
5762 self.combine_mask32x8(self.simd_ge_u32x8(a0, b0), self.simd_ge_u32x8(a1, b1))
5763 }
5764 #[inline(always)]
5765 fn simd_gt_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
5766 let (a0, a1) = self.split_u32x16(a);
5767 let (b0, b1) = self.split_u32x16(b);
5768 self.combine_mask32x8(self.simd_gt_u32x8(a0, b0), self.simd_gt_u32x8(a1, b1))
5769 }
5770 #[inline(always)]
5771 fn zip_low_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5772 let (a0, _) = self.split_u32x16(a);
5773 let (b0, _) = self.split_u32x16(b);
5774 self.combine_u32x8(self.zip_low_u32x8(a0, b0), self.zip_high_u32x8(a0, b0))
5775 }
5776 #[inline(always)]
5777 fn zip_high_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5778 let (_, a1) = self.split_u32x16(a);
5779 let (_, b1) = self.split_u32x16(b);
5780 self.combine_u32x8(self.zip_low_u32x8(a1, b1), self.zip_high_u32x8(a1, b1))
5781 }
5782 #[inline(always)]
5783 fn unzip_low_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5784 let (a0, a1) = self.split_u32x16(a);
5785 let (b0, b1) = self.split_u32x16(b);
5786 self.combine_u32x8(self.unzip_low_u32x8(a0, a1), self.unzip_low_u32x8(b0, b1))
5787 }
5788 #[inline(always)]
5789 fn unzip_high_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5790 let (a0, a1) = self.split_u32x16(a);
5791 let (b0, b1) = self.split_u32x16(b);
5792 self.combine_u32x8(self.unzip_high_u32x8(a0, a1), self.unzip_high_u32x8(b0, b1))
5793 }
5794 #[inline(always)]
5795 fn select_u32x16(self, a: mask32x16<Self>, b: u32x16<Self>, c: u32x16<Self>) -> u32x16<Self> {
5796 let (a0, a1) = self.split_mask32x16(a);
5797 let (b0, b1) = self.split_u32x16(b);
5798 let (c0, c1) = self.split_u32x16(c);
5799 self.combine_u32x8(self.select_u32x8(a0, b0, c0), self.select_u32x8(a1, b1, c1))
5800 }
5801 #[inline(always)]
5802 fn min_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5803 let (a0, a1) = self.split_u32x16(a);
5804 let (b0, b1) = self.split_u32x16(b);
5805 self.combine_u32x8(self.min_u32x8(a0, b0), self.min_u32x8(a1, b1))
5806 }
5807 #[inline(always)]
5808 fn max_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
5809 let (a0, a1) = self.split_u32x16(a);
5810 let (b0, b1) = self.split_u32x16(b);
5811 self.combine_u32x8(self.max_u32x8(a0, b0), self.max_u32x8(a1, b1))
5812 }
5813 #[inline(always)]
5814 fn split_u32x16(self, a: u32x16<Self>) -> (u32x8<Self>, u32x8<Self>) {
5815 let mut b0 = [0; 8usize];
5816 let mut b1 = [0; 8usize];
5817 b0.copy_from_slice(&a.val[0..8usize]);
5818 b1.copy_from_slice(&a.val[8usize..16usize]);
5819 (b0.simd_into(self), b1.simd_into(self))
5820 }
5821 #[inline(always)]
5822 fn load_interleaved_128_u32x16(self, src: &[u32; 16usize]) -> u32x16<Self> {
5823 [
5824 src[0usize],
5825 src[4usize],
5826 src[8usize],
5827 src[12usize],
5828 src[1usize],
5829 src[5usize],
5830 src[9usize],
5831 src[13usize],
5832 src[2usize],
5833 src[6usize],
5834 src[10usize],
5835 src[14usize],
5836 src[3usize],
5837 src[7usize],
5838 src[11usize],
5839 src[15usize],
5840 ]
5841 .simd_into(self)
5842 }
5843 #[inline(always)]
5844 fn store_interleaved_128_u32x16(self, a: u32x16<Self>, dest: &mut [u32; 16usize]) -> () {
5845 *dest = [
5846 a[0usize], a[4usize], a[8usize], a[12usize], a[1usize], a[5usize], a[9usize],
5847 a[13usize], a[2usize], a[6usize], a[10usize], a[14usize], a[3usize], a[7usize],
5848 a[11usize], a[15usize],
5849 ];
5850 }
5851 #[inline(always)]
5852 fn reinterpret_u8_u32x16(self, a: u32x16<Self>) -> u8x64<Self> {
5853 let (a0, a1) = self.split_u32x16(a);
5854 self.combine_u8x32(self.reinterpret_u8_u32x8(a0), self.reinterpret_u8_u32x8(a1))
5855 }
5856 #[inline(always)]
5857 fn cvt_f32_u32x16(self, a: u32x16<Self>) -> f32x16<Self> {
5858 let (a0, a1) = self.split_u32x16(a);
5859 self.combine_f32x8(self.cvt_f32_u32x8(a0), self.cvt_f32_u32x8(a1))
5860 }
5861 #[inline(always)]
5862 fn splat_mask32x16(self, a: i32) -> mask32x16<Self> {
5863 let half = self.splat_mask32x8(a);
5864 self.combine_mask32x8(half, half)
5865 }
5866 #[inline(always)]
5867 fn not_mask32x16(self, a: mask32x16<Self>) -> mask32x16<Self> {
5868 let (a0, a1) = self.split_mask32x16(a);
5869 self.combine_mask32x8(self.not_mask32x8(a0), self.not_mask32x8(a1))
5870 }
5871 #[inline(always)]
5872 fn and_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
5873 let (a0, a1) = self.split_mask32x16(a);
5874 let (b0, b1) = self.split_mask32x16(b);
5875 self.combine_mask32x8(self.and_mask32x8(a0, b0), self.and_mask32x8(a1, b1))
5876 }
5877 #[inline(always)]
5878 fn or_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
5879 let (a0, a1) = self.split_mask32x16(a);
5880 let (b0, b1) = self.split_mask32x16(b);
5881 self.combine_mask32x8(self.or_mask32x8(a0, b0), self.or_mask32x8(a1, b1))
5882 }
5883 #[inline(always)]
5884 fn xor_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
5885 let (a0, a1) = self.split_mask32x16(a);
5886 let (b0, b1) = self.split_mask32x16(b);
5887 self.combine_mask32x8(self.xor_mask32x8(a0, b0), self.xor_mask32x8(a1, b1))
5888 }
5889 #[inline(always)]
5890 fn select_mask32x16(
5891 self,
5892 a: mask32x16<Self>,
5893 b: mask32x16<Self>,
5894 c: mask32x16<Self>,
5895 ) -> mask32x16<Self> {
5896 let (a0, a1) = self.split_mask32x16(a);
5897 let (b0, b1) = self.split_mask32x16(b);
5898 let (c0, c1) = self.split_mask32x16(c);
5899 self.combine_mask32x8(
5900 self.select_mask32x8(a0, b0, c0),
5901 self.select_mask32x8(a1, b1, c1),
5902 )
5903 }
5904 #[inline(always)]
5905 fn simd_eq_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
5906 let (a0, a1) = self.split_mask32x16(a);
5907 let (b0, b1) = self.split_mask32x16(b);
5908 self.combine_mask32x8(self.simd_eq_mask32x8(a0, b0), self.simd_eq_mask32x8(a1, b1))
5909 }
5910 #[inline(always)]
5911 fn split_mask32x16(self, a: mask32x16<Self>) -> (mask32x8<Self>, mask32x8<Self>) {
5912 let mut b0 = [0; 8usize];
5913 let mut b1 = [0; 8usize];
5914 b0.copy_from_slice(&a.val[0..8usize]);
5915 b1.copy_from_slice(&a.val[8usize..16usize]);
5916 (b0.simd_into(self), b1.simd_into(self))
5917 }
5918 #[inline(always)]
5919 fn splat_f64x8(self, a: f64) -> f64x8<Self> {
5920 let half = self.splat_f64x4(a);
5921 self.combine_f64x4(half, half)
5922 }
5923 #[inline(always)]
5924 fn abs_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
5925 let (a0, a1) = self.split_f64x8(a);
5926 self.combine_f64x4(self.abs_f64x4(a0), self.abs_f64x4(a1))
5927 }
5928 #[inline(always)]
5929 fn neg_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
5930 let (a0, a1) = self.split_f64x8(a);
5931 self.combine_f64x4(self.neg_f64x4(a0), self.neg_f64x4(a1))
5932 }
5933 #[inline(always)]
5934 fn sqrt_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
5935 let (a0, a1) = self.split_f64x8(a);
5936 self.combine_f64x4(self.sqrt_f64x4(a0), self.sqrt_f64x4(a1))
5937 }
5938 #[inline(always)]
5939 fn add_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
5940 let (a0, a1) = self.split_f64x8(a);
5941 let (b0, b1) = self.split_f64x8(b);
5942 self.combine_f64x4(self.add_f64x4(a0, b0), self.add_f64x4(a1, b1))
5943 }
5944 #[inline(always)]
5945 fn sub_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
5946 let (a0, a1) = self.split_f64x8(a);
5947 let (b0, b1) = self.split_f64x8(b);
5948 self.combine_f64x4(self.sub_f64x4(a0, b0), self.sub_f64x4(a1, b1))
5949 }
5950 #[inline(always)]
5951 fn mul_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
5952 let (a0, a1) = self.split_f64x8(a);
5953 let (b0, b1) = self.split_f64x8(b);
5954 self.combine_f64x4(self.mul_f64x4(a0, b0), self.mul_f64x4(a1, b1))
5955 }
5956 #[inline(always)]
5957 fn div_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
5958 let (a0, a1) = self.split_f64x8(a);
5959 let (b0, b1) = self.split_f64x8(b);
5960 self.combine_f64x4(self.div_f64x4(a0, b0), self.div_f64x4(a1, b1))
5961 }
5962 #[inline(always)]
5963 fn copysign_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
5964 let (a0, a1) = self.split_f64x8(a);
5965 let (b0, b1) = self.split_f64x8(b);
5966 self.combine_f64x4(self.copysign_f64x4(a0, b0), self.copysign_f64x4(a1, b1))
5967 }
5968 #[inline(always)]
5969 fn simd_eq_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
5970 let (a0, a1) = self.split_f64x8(a);
5971 let (b0, b1) = self.split_f64x8(b);
5972 self.combine_mask64x4(self.simd_eq_f64x4(a0, b0), self.simd_eq_f64x4(a1, b1))
5973 }
5974 #[inline(always)]
5975 fn simd_lt_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
5976 let (a0, a1) = self.split_f64x8(a);
5977 let (b0, b1) = self.split_f64x8(b);
5978 self.combine_mask64x4(self.simd_lt_f64x4(a0, b0), self.simd_lt_f64x4(a1, b1))
5979 }
5980 #[inline(always)]
5981 fn simd_le_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
5982 let (a0, a1) = self.split_f64x8(a);
5983 let (b0, b1) = self.split_f64x8(b);
5984 self.combine_mask64x4(self.simd_le_f64x4(a0, b0), self.simd_le_f64x4(a1, b1))
5985 }
5986 #[inline(always)]
5987 fn simd_ge_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
5988 let (a0, a1) = self.split_f64x8(a);
5989 let (b0, b1) = self.split_f64x8(b);
5990 self.combine_mask64x4(self.simd_ge_f64x4(a0, b0), self.simd_ge_f64x4(a1, b1))
5991 }
5992 #[inline(always)]
5993 fn simd_gt_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
5994 let (a0, a1) = self.split_f64x8(a);
5995 let (b0, b1) = self.split_f64x8(b);
5996 self.combine_mask64x4(self.simd_gt_f64x4(a0, b0), self.simd_gt_f64x4(a1, b1))
5997 }
5998 #[inline(always)]
5999 fn zip_low_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6000 let (a0, _) = self.split_f64x8(a);
6001 let (b0, _) = self.split_f64x8(b);
6002 self.combine_f64x4(self.zip_low_f64x4(a0, b0), self.zip_high_f64x4(a0, b0))
6003 }
6004 #[inline(always)]
6005 fn zip_high_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6006 let (_, a1) = self.split_f64x8(a);
6007 let (_, b1) = self.split_f64x8(b);
6008 self.combine_f64x4(self.zip_low_f64x4(a1, b1), self.zip_high_f64x4(a1, b1))
6009 }
6010 #[inline(always)]
6011 fn unzip_low_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6012 let (a0, a1) = self.split_f64x8(a);
6013 let (b0, b1) = self.split_f64x8(b);
6014 self.combine_f64x4(self.unzip_low_f64x4(a0, a1), self.unzip_low_f64x4(b0, b1))
6015 }
6016 #[inline(always)]
6017 fn unzip_high_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6018 let (a0, a1) = self.split_f64x8(a);
6019 let (b0, b1) = self.split_f64x8(b);
6020 self.combine_f64x4(self.unzip_high_f64x4(a0, a1), self.unzip_high_f64x4(b0, b1))
6021 }
6022 #[inline(always)]
6023 fn max_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6024 let (a0, a1) = self.split_f64x8(a);
6025 let (b0, b1) = self.split_f64x8(b);
6026 self.combine_f64x4(self.max_f64x4(a0, b0), self.max_f64x4(a1, b1))
6027 }
6028 #[inline(always)]
6029 fn max_precise_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6030 let (a0, a1) = self.split_f64x8(a);
6031 let (b0, b1) = self.split_f64x8(b);
6032 self.combine_f64x4(
6033 self.max_precise_f64x4(a0, b0),
6034 self.max_precise_f64x4(a1, b1),
6035 )
6036 }
6037 #[inline(always)]
6038 fn min_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6039 let (a0, a1) = self.split_f64x8(a);
6040 let (b0, b1) = self.split_f64x8(b);
6041 self.combine_f64x4(self.min_f64x4(a0, b0), self.min_f64x4(a1, b1))
6042 }
6043 #[inline(always)]
6044 fn min_precise_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
6045 let (a0, a1) = self.split_f64x8(a);
6046 let (b0, b1) = self.split_f64x8(b);
6047 self.combine_f64x4(
6048 self.min_precise_f64x4(a0, b0),
6049 self.min_precise_f64x4(a1, b1),
6050 )
6051 }
6052 #[inline(always)]
6053 fn madd_f64x8(self, a: f64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
6054 let (a0, a1) = self.split_f64x8(a);
6055 let (b0, b1) = self.split_f64x8(b);
6056 let (c0, c1) = self.split_f64x8(c);
6057 self.combine_f64x4(self.madd_f64x4(a0, b0, c0), self.madd_f64x4(a1, b1, c1))
6058 }
6059 #[inline(always)]
6060 fn msub_f64x8(self, a: f64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
6061 let (a0, a1) = self.split_f64x8(a);
6062 let (b0, b1) = self.split_f64x8(b);
6063 let (c0, c1) = self.split_f64x8(c);
6064 self.combine_f64x4(self.msub_f64x4(a0, b0, c0), self.msub_f64x4(a1, b1, c1))
6065 }
6066 #[inline(always)]
6067 fn floor_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
6068 let (a0, a1) = self.split_f64x8(a);
6069 self.combine_f64x4(self.floor_f64x4(a0), self.floor_f64x4(a1))
6070 }
6071 #[inline(always)]
6072 fn fract_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
6073 let (a0, a1) = self.split_f64x8(a);
6074 self.combine_f64x4(self.fract_f64x4(a0), self.fract_f64x4(a1))
6075 }
6076 #[inline(always)]
6077 fn trunc_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
6078 let (a0, a1) = self.split_f64x8(a);
6079 self.combine_f64x4(self.trunc_f64x4(a0), self.trunc_f64x4(a1))
6080 }
6081 #[inline(always)]
6082 fn select_f64x8(self, a: mask64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
6083 let (a0, a1) = self.split_mask64x8(a);
6084 let (b0, b1) = self.split_f64x8(b);
6085 let (c0, c1) = self.split_f64x8(c);
6086 self.combine_f64x4(self.select_f64x4(a0, b0, c0), self.select_f64x4(a1, b1, c1))
6087 }
6088 #[inline(always)]
6089 fn split_f64x8(self, a: f64x8<Self>) -> (f64x4<Self>, f64x4<Self>) {
6090 let mut b0 = [0.0; 4usize];
6091 let mut b1 = [0.0; 4usize];
6092 b0.copy_from_slice(&a.val[0..4usize]);
6093 b1.copy_from_slice(&a.val[4usize..8usize]);
6094 (b0.simd_into(self), b1.simd_into(self))
6095 }
6096 #[inline(always)]
6097 fn reinterpret_f32_f64x8(self, a: f64x8<Self>) -> f32x16<Self> {
6098 let (a0, a1) = self.split_f64x8(a);
6099 self.combine_f32x8(
6100 self.reinterpret_f32_f64x4(a0),
6101 self.reinterpret_f32_f64x4(a1),
6102 )
6103 }
6104 #[inline(always)]
6105 fn splat_mask64x8(self, a: i64) -> mask64x8<Self> {
6106 let half = self.splat_mask64x4(a);
6107 self.combine_mask64x4(half, half)
6108 }
6109 #[inline(always)]
6110 fn not_mask64x8(self, a: mask64x8<Self>) -> mask64x8<Self> {
6111 let (a0, a1) = self.split_mask64x8(a);
6112 self.combine_mask64x4(self.not_mask64x4(a0), self.not_mask64x4(a1))
6113 }
6114 #[inline(always)]
6115 fn and_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
6116 let (a0, a1) = self.split_mask64x8(a);
6117 let (b0, b1) = self.split_mask64x8(b);
6118 self.combine_mask64x4(self.and_mask64x4(a0, b0), self.and_mask64x4(a1, b1))
6119 }
6120 #[inline(always)]
6121 fn or_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
6122 let (a0, a1) = self.split_mask64x8(a);
6123 let (b0, b1) = self.split_mask64x8(b);
6124 self.combine_mask64x4(self.or_mask64x4(a0, b0), self.or_mask64x4(a1, b1))
6125 }
6126 #[inline(always)]
6127 fn xor_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
6128 let (a0, a1) = self.split_mask64x8(a);
6129 let (b0, b1) = self.split_mask64x8(b);
6130 self.combine_mask64x4(self.xor_mask64x4(a0, b0), self.xor_mask64x4(a1, b1))
6131 }
6132 #[inline(always)]
6133 fn select_mask64x8(
6134 self,
6135 a: mask64x8<Self>,
6136 b: mask64x8<Self>,
6137 c: mask64x8<Self>,
6138 ) -> mask64x8<Self> {
6139 let (a0, a1) = self.split_mask64x8(a);
6140 let (b0, b1) = self.split_mask64x8(b);
6141 let (c0, c1) = self.split_mask64x8(c);
6142 self.combine_mask64x4(
6143 self.select_mask64x4(a0, b0, c0),
6144 self.select_mask64x4(a1, b1, c1),
6145 )
6146 }
6147 #[inline(always)]
6148 fn simd_eq_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
6149 let (a0, a1) = self.split_mask64x8(a);
6150 let (b0, b1) = self.split_mask64x8(b);
6151 self.combine_mask64x4(self.simd_eq_mask64x4(a0, b0), self.simd_eq_mask64x4(a1, b1))
6152 }
6153 #[inline(always)]
6154 fn split_mask64x8(self, a: mask64x8<Self>) -> (mask64x4<Self>, mask64x4<Self>) {
6155 let mut b0 = [0; 4usize];
6156 let mut b1 = [0; 4usize];
6157 b0.copy_from_slice(&a.val[0..4usize]);
6158 b1.copy_from_slice(&a.val[4usize..8usize]);
6159 (b0.simd_into(self), b1.simd_into(self))
6160 }
6161}