#[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))]
use bytemuck::cast;
#[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))]
use core::arch::aarch64::uint16x8_t;
#[allow(non_camel_case_types)]
#[derive(Copy, Clone, PartialEq, Default, Debug)]
pub struct u16x16(pub [u16; 16]);
macro_rules! impl_u16x16_op {
($a:expr, $op:ident, $b:expr) => {
u16x16([
$a.0[0].$op($b.0[0]),
$a.0[1].$op($b.0[1]),
$a.0[2].$op($b.0[2]),
$a.0[3].$op($b.0[3]),
$a.0[4].$op($b.0[4]),
$a.0[5].$op($b.0[5]),
$a.0[6].$op($b.0[6]),
$a.0[7].$op($b.0[7]),
$a.0[8].$op($b.0[8]),
$a.0[9].$op($b.0[9]),
$a.0[10].$op($b.0[10]),
$a.0[11].$op($b.0[11]),
$a.0[12].$op($b.0[12]),
$a.0[13].$op($b.0[13]),
$a.0[14].$op($b.0[14]),
$a.0[15].$op($b.0[15]),
])
};
}
#[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))]
macro_rules! impl_aarch64_call {
($f:ident, $a:expr, $b:expr) => {
let a = $a.split();
let b = $b.split();
Self(bytemuck::cast([
unsafe { core::arch::aarch64::$f(a.0, b.0) },
unsafe { core::arch::aarch64::$f(a.1, b.1) },
]))
};
}
impl u16x16 {
#[inline]
pub fn splat(n: u16) -> Self {
Self([n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n])
}
#[inline]
pub fn as_slice(&self) -> &[u16; 16] {
&self.0
}
#[inline]
pub fn min(&self, rhs: &Self) -> Self {
cfg_if::cfg_if! {
if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
impl_aarch64_call!(vminq_u16, self, rhs)
} else {
impl_u16x16_op!(self, min, rhs)
}
}
}
#[inline]
pub fn max(&self, rhs: &Self) -> Self {
cfg_if::cfg_if! {
if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
impl_aarch64_call!(vmaxq_u16, self, rhs)
} else {
impl_u16x16_op!(self, max, rhs)
}
}
}
#[inline]
pub fn cmp_le(&self, rhs: &Self) -> Self {
cfg_if::cfg_if! {
if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
impl_aarch64_call!(vcleq_u16, self, rhs)
} else {
Self([
if self.0[ 0] <= rhs.0[ 0] { !0 } else { 0 },
if self.0[ 1] <= rhs.0[ 1] { !0 } else { 0 },
if self.0[ 2] <= rhs.0[ 2] { !0 } else { 0 },
if self.0[ 3] <= rhs.0[ 3] { !0 } else { 0 },
if self.0[ 4] <= rhs.0[ 4] { !0 } else { 0 },
if self.0[ 5] <= rhs.0[ 5] { !0 } else { 0 },
if self.0[ 6] <= rhs.0[ 6] { !0 } else { 0 },
if self.0[ 7] <= rhs.0[ 7] { !0 } else { 0 },
if self.0[ 8] <= rhs.0[ 8] { !0 } else { 0 },
if self.0[ 9] <= rhs.0[ 9] { !0 } else { 0 },
if self.0[10] <= rhs.0[10] { !0 } else { 0 },
if self.0[11] <= rhs.0[11] { !0 } else { 0 },
if self.0[12] <= rhs.0[12] { !0 } else { 0 },
if self.0[13] <= rhs.0[13] { !0 } else { 0 },
if self.0[14] <= rhs.0[14] { !0 } else { 0 },
if self.0[15] <= rhs.0[15] { !0 } else { 0 },
])
}
}
}
#[inline]
pub fn blend(self, t: Self, e: Self) -> Self {
(t & self) | (e & !self)
}
#[inline]
#[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))]
pub fn split(self) -> (uint16x8_t, uint16x8_t) {
let pair: [uint16x8_t; 2] = cast(self.0);
(pair[0], pair[1])
}
}
impl core::ops::Add<u16x16> for u16x16 {
type Output = Self;
#[inline]
fn add(self, rhs: Self) -> Self::Output {
cfg_if::cfg_if! {
if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
impl_aarch64_call!(vaddq_u16, self, rhs)
} else {
impl_u16x16_op!(self, add, rhs)
}
}
}
}
impl core::ops::Sub<u16x16> for u16x16 {
type Output = Self;
#[inline]
fn sub(self, rhs: Self) -> Self::Output {
cfg_if::cfg_if! {
if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
impl_aarch64_call!(vsubq_u16, self, rhs)
} else {
impl_u16x16_op!(self, sub, rhs)
}
}
}
}
impl core::ops::Mul<u16x16> for u16x16 {
type Output = Self;
#[inline]
fn mul(self, rhs: Self) -> Self::Output {
cfg_if::cfg_if! {
if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
impl_aarch64_call!(vmulq_u16, self, rhs)
} else {
impl_u16x16_op!(self, mul, rhs)
}
}
}
}
impl core::ops::Div<u16x16> for u16x16 {
type Output = Self;
#[inline]
fn div(self, rhs: Self) -> Self::Output {
impl_u16x16_op!(self, div, rhs)
}
}
impl core::ops::BitAnd<u16x16> for u16x16 {
type Output = Self;
#[inline]
fn bitand(self, rhs: Self) -> Self::Output {
cfg_if::cfg_if! {
if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
impl_aarch64_call!(vandq_u16, self, rhs)
} else {
impl_u16x16_op!(self, bitand, rhs)
}
}
}
}
impl core::ops::BitOr<u16x16> for u16x16 {
type Output = Self;
#[inline]
fn bitor(self, rhs: Self) -> Self::Output {
cfg_if::cfg_if! {
if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
impl_aarch64_call!(vorrq_u16, self, rhs)
} else {
impl_u16x16_op!(self, bitor, rhs)
}
}
}
}
impl core::ops::Not for u16x16 {
type Output = Self;
#[inline]
fn not(self) -> Self::Output {
u16x16([
!self.0[0],
!self.0[1],
!self.0[2],
!self.0[3],
!self.0[4],
!self.0[5],
!self.0[6],
!self.0[7],
!self.0[8],
!self.0[9],
!self.0[10],
!self.0[11],
!self.0[12],
!self.0[13],
!self.0[14],
!self.0[15],
])
}
}
impl core::ops::Shr for u16x16 {
type Output = Self;
#[inline]
fn shr(self, rhs: Self) -> Self::Output {
impl_u16x16_op!(self, shr, rhs)
}
}