1use fearless_simd::{
7 Bytes, Simd, SimdBase, SimdFloat, f32x16, u8x16, u8x32, u16x16, u16x32, u32x16,
8};
9use peniko::kurbo::Affine;
10#[cfg(not(feature = "std"))]
11use peniko::kurbo::common::FloatFuncs as _;
12
13#[inline(always)]
15pub fn f32_to_u8<S: Simd>(val: f32x16<S>) -> u8x16<S> {
16 let simd = val.simd;
17 let converted = val.to_int::<u32x16<S>>().to_bytes();
18
19 let (x8_1, x8_2) = simd.split_u8x64(converted);
20 let (p1, p2) = simd.split_u8x32(x8_1);
21 let (p3, p4) = simd.split_u8x32(x8_2);
22
23 let uzp1 = simd.unzip_low_u8x16(p1, p2);
24 let uzp2 = simd.unzip_low_u8x16(p3, p4);
25 simd.unzip_low_u8x16(uzp1, uzp2)
26}
27
28pub trait Div255Ext {
30 fn div_255(self) -> Self;
32}
33
34impl<S: Simd> Div255Ext for u16x32<S> {
35 #[inline(always)]
36 fn div_255(self) -> Self {
37 let p1 = Self::splat(self.simd, 255);
38 let p2 = self + p1;
39 p2 >> 8
40 }
41}
42
43impl<S: Simd> Div255Ext for u16x16<S> {
44 #[inline(always)]
45 fn div_255(self) -> Self {
46 let p1 = Self::splat(self.simd, 255);
47 let p2 = self + p1;
48 p2 >> 8
49 }
50}
51
52#[inline(always)]
54pub fn normalized_mul_u8x32<S: Simd>(a: u8x32<S>, b: u8x32<S>) -> u16x32<S> {
55 (S::widen_u8x32(a.simd, a) * S::widen_u8x32(b.simd, b)).div_255()
56}
57
58#[inline(always)]
60pub fn normalized_mul_u8x16<S: Simd>(a: u8x16<S>, b: u8x16<S>) -> u16x16<S> {
61 (S::widen_u8x16(a.simd, a) * S::widen_u8x16(b.simd, b)).div_255()
62}
63
64#[inline]
80pub fn extract_scales(transform: &Affine) -> (f32, f32) {
81 let [a, b, c, d, _, _] = transform.as_coeffs();
82 let a = a as f32;
83 let b = b as f32;
84 let c = c as f32;
85 let d = d as f32;
86
87 let a2 = a * a;
89 let b2 = b * b;
90 let c2 = c * c;
91 let d2 = d * d;
92 let s1 = a2 + b2 + c2 + d2;
93 let s2 = ((a2 - b2 + c2 - d2).powi(2) + 4.0 * (a * b + c * d).powi(2)).sqrt();
94
95 let scale_x = (0.5 * (s1 + s2)).sqrt();
96 let scale_y = (0.5 * (s1 - s2)).sqrt();
97
98 (scale_x.max(1e-6), scale_y.max(1e-6))
99}